Move token related functions to common (#10942)

### What problem does this PR solve? As title ### Type of change - [x] Refactoring Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-01-31 15:45:08 +08:00 · 2025-11-03 08:50:05 +08:00
parent 44f2d6f5da
commit 360f5c1179
25 changed files with 529 additions and 78 deletions
--- a/graphrag/general/community_reports_extractor.py
+++ b/graphrag/general/community_reports_extractor.py
@ -21,7 +21,7 @@ from graphrag.general.extractor import Extractor
 from graphrag.general.leiden import add_community_info2graph
 from rag.llm.chat_model import Base as CompletionLLM
 from graphrag.utils import perform_variable_replacements, dict_has_keys_with_types, chat_limiter
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 import trio


--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@ -38,7 +38,7 @@ from graphrag.utils import (
 )
 from rag.llm.chat_model import Base as CompletionLLM
 from rag.prompts.generator import message_fit_in
-from rag.utils import truncate
+from common.token_utils import truncate

 GRAPH_FIELD_SEP = "<SEP>"
 DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]
--- a/graphrag/general/graph_extractor.py
+++ b/graphrag/general/graph_extractor.py
@ -16,7 +16,7 @@ from graphrag.general.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROM
 from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter, split_string_by_multi_markers
 from rag.llm.chat_model import Base as CompletionLLM
 import networkx as nx
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string

 DEFAULT_TUPLE_DELIMITER = "<|>"
 DEFAULT_RECORD_DELIMITER = "##"
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@ -165,7 +165,7 @@ async def run_graphrag_for_kb(
        return {"ok_docs": [], "failed_docs": [], "total_docs": 0, "total_chunks": 0, "seconds": 0.0}

    def load_doc_chunks(doc_id: str) -> list[str]:
-        from rag.utils import num_tokens_from_string
+        from common.token_utils import num_tokens_from_string

        chunks = []
        current_chunk = ""
--- a/graphrag/general/mind_map_extractor.py
+++ b/graphrag/general/mind_map_extractor.py
@ -27,7 +27,7 @@ from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_l
 from rag.llm.chat_model import Base as CompletionLLM
 import markdown_to_json
 from functools import reduce
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string


@dataclass
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@ -17,7 +17,7 @@ from graphrag.general.extractor import ENTITY_EXTRACTION_MAX_GLEANINGS, Extracto
 from graphrag.light.graph_prompt import PROMPTS
 from graphrag.utils import chat_limiter, pack_user_ass_to_openai_messages, split_string_by_multi_markers
 from rag.llm.chat_model import Base as CompletionLLM
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string


@dataclass
--- a/graphrag/search.py
+++ b/graphrag/search.py
@ -24,7 +24,7 @@ import trio
 from common.misc_utils import get_uuid
 from graphrag.query_analyze_prompt import PROMPTS
 from graphrag.utils import get_entity_type2samples, get_llm_cache, set_llm_cache, get_relation
-from rag.utils import num_tokens_from_string
+from common.token_utils import num_tokens_from_string
 from rag.utils.doc_store_conn import OrderByExpr

 from rag.nlp.search import Dealer, index_name