Move token related functions to common (#10942)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-03 08:50:05 +08:00
committed by GitHub
parent 44f2d6f5da
commit 360f5c1179
25 changed files with 529 additions and 78 deletions

View File

@ -21,7 +21,7 @@ from graphrag.general.extractor import Extractor
from graphrag.general.leiden import add_community_info2graph
from rag.llm.chat_model import Base as CompletionLLM
from graphrag.utils import perform_variable_replacements, dict_has_keys_with_types, chat_limiter
from rag.utils import num_tokens_from_string
from common.token_utils import num_tokens_from_string
import trio

View File

@ -38,7 +38,7 @@ from graphrag.utils import (
)
from rag.llm.chat_model import Base as CompletionLLM
from rag.prompts.generator import message_fit_in
from rag.utils import truncate
from common.token_utils import truncate
GRAPH_FIELD_SEP = "<SEP>"
DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]

View File

@ -16,7 +16,7 @@ from graphrag.general.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROM
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_limiter, split_string_by_multi_markers
from rag.llm.chat_model import Base as CompletionLLM
import networkx as nx
from rag.utils import num_tokens_from_string
from common.token_utils import num_tokens_from_string
DEFAULT_TUPLE_DELIMITER = "<|>"
DEFAULT_RECORD_DELIMITER = "##"

View File

@ -165,7 +165,7 @@ async def run_graphrag_for_kb(
return {"ok_docs": [], "failed_docs": [], "total_docs": 0, "total_chunks": 0, "seconds": 0.0}
def load_doc_chunks(doc_id: str) -> list[str]:
from rag.utils import num_tokens_from_string
from common.token_utils import num_tokens_from_string
chunks = []
current_chunk = ""

View File

@ -27,7 +27,7 @@ from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, chat_l
from rag.llm.chat_model import Base as CompletionLLM
import markdown_to_json
from functools import reduce
from rag.utils import num_tokens_from_string
from common.token_utils import num_tokens_from_string
@dataclass