Move api.settings to common.settings (#11036)

### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2026-01-23 03:26:53 +08:00 · 2025-11-06 09:36:38 +08:00
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions
--- a/graphrag/general/index.py
+++ b/graphrag/general/index.py
@ -39,7 +39,7 @@ from graphrag.utils import (
 )
 from rag.nlp import rag_tokenizer, search
 from rag.utils.redis_conn import RedisDistributedLock
-from common import globals
+from common import settings


 async def run_graphrag(
@ -55,7 +55,7 @@ async def run_graphrag(
    start = trio.current_time()
    tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
    chunks = []
-    for d in globals.retriever.chunk_list(doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"], sort_by_position=True):
+    for d in settings.retriever.chunk_list(doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"], sort_by_position=True):
        chunks.append(d["content_with_weight"])

    with trio.fail_after(max(120, len(chunks) * 60 * 10) if enable_timeout_assertion else 10000000000):
@ -170,7 +170,7 @@ async def run_graphrag_for_kb(
        chunks = []
        current_chunk = ""

-        for d in globals.retriever.chunk_list(
+        for d in settings.retriever.chunk_list(
            doc_id,
            tenant_id,
            [kb_id],
@ -387,8 +387,8 @@ async def generate_subgraph(
        "removed_kwd": "N",
    }
    cid = chunk_id(chunk)
-    await trio.to_thread.run_sync(globals.docStoreConn.delete, {"knowledge_graph_kwd": "subgraph", "source_id": doc_id}, search.index_name(tenant_id), kb_id)
-    await trio.to_thread.run_sync(globals.docStoreConn.insert, [{"id": cid, **chunk}], search.index_name(tenant_id), kb_id)
+    await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": "subgraph", "source_id": doc_id}, search.index_name(tenant_id), kb_id)
+    await trio.to_thread.run_sync(settings.docStoreConn.insert, [{"id": cid, **chunk}], search.index_name(tenant_id), kb_id)
    now = trio.current_time()
    callback(msg=f"generated subgraph for doc {doc_id} in {now - start:.2f} seconds.")
    return subgraph
@ -496,7 +496,7 @@ async def extract_community(
        chunks.append(chunk)

    await trio.to_thread.run_sync(
-        lambda: globals.docStoreConn.delete(
+        lambda: settings.docStoreConn.delete(
            {"knowledge_graph_kwd": "community_report", "kb_id": kb_id},
            search.index_name(tenant_id),
            kb_id,
@ -504,7 +504,7 @@ async def extract_community(
    )
    es_bulk_size = 4
    for b in range(0, len(chunks), es_bulk_size):
-        doc_store_result = await trio.to_thread.run_sync(lambda: globals.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
+        doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
        if doc_store_result:
            error_message = f"Insert chunk error: {doc_store_result}, please check log file and Elasticsearch/Infinity status!"
            raise Exception(error_message)
--- a/graphrag/general/smoke.py
+++ b/graphrag/general/smoke.py
@ -20,7 +20,6 @@ import logging
 import networkx as nx
 import trio

-from api import settings
 from common.constants import LLMType
 from api.db.services.document_service import DocumentService
 from api.db.services.knowledgebase_service import KnowledgebaseService
@ -28,7 +27,7 @@ from api.db.services.llm_service import LLMBundle
 from api.db.services.user_service import TenantService
 from graphrag.general.graph_extractor import GraphExtractor
 from graphrag.general.index import update_graph, with_resolution, with_community
-from common import globals
+from common import settings

 settings.init_settings()

@ -63,7 +62,7 @@ async def main():

    chunks = [
        d["content_with_weight"]
-        for d in globals.retriever.chunk_list(
+        for d in settings.retriever.chunk_list(
            args.doc_id,
            args.tenant_id,
            [kb_id],
--- a/graphrag/light/smoke.py
+++ b/graphrag/light/smoke.py
@ -16,7 +16,6 @@

 import argparse
 import json
-from api import settings
 import networkx as nx
 import logging
 import trio
@ -28,7 +27,7 @@ from api.db.services.llm_service import LLMBundle
 from api.db.services.user_service import TenantService
 from graphrag.general.index import update_graph
 from graphrag.light.graph_extractor import GraphExtractor
-from common import globals
+from common import settings

 settings.init_settings()

@ -64,7 +63,7 @@ async def main():

    chunks = [
        d["content_with_weight"]
-        for d in globals.retriever.chunk_list(
+        for d in settings.retriever.chunk_list(
            args.doc_id,
            args.tenant_id,
            [kb_id],
--- a/graphrag/search.py
+++ b/graphrag/search.py
@ -29,7 +29,7 @@ from rag.utils.doc_store_conn import OrderByExpr

 from rag.nlp.search import Dealer, index_name
 from common.float_utils import get_float
-from common import globals
+from common import settings


 class KGSearch(Dealer):
@ -314,7 +314,6 @@ class KGSearch(Dealer):


 if __name__ == "__main__":
-    from api import settings
    import argparse
    from common.constants import LLMType
    from api.db.services.knowledgebase_service import KnowledgebaseService
@ -335,6 +334,6 @@ if __name__ == "__main__":
    _, kb = KnowledgebaseService.get_by_id(kb_id)
    embed_bdl = LLMBundle(args.tenant_id, LLMType.EMBEDDING, kb.embd_id)

-    kg = KGSearch(globals.docStoreConn)
+    kg = KGSearch(settings.docStoreConn)
    print(kg.retrieval({"question": args.question, "kb_ids": [kb_id]},
                    search.index_name(kb.tenant_id), [kb_id], embed_bdl, llm_bdl))
--- a/graphrag/utils.py
+++ b/graphrag/utils.py
@ -28,7 +28,7 @@ from common.connection_utils import timeout
 from rag.nlp import rag_tokenizer, search
 from rag.utils.doc_store_conn import OrderByExpr
 from rag.utils.redis_conn import REDIS_CONN
-from common import globals
+from common import settings

 GRAPH_FIELD_SEP = "<SEP>"

@ -334,7 +334,7 @@ def get_relation(tenant_id, kb_id, from_ent_name, to_ent_name, size=1):
    ents = list(set(ents))
    conds = {"fields": ["content_with_weight"], "size": size, "from_entity_kwd": ents, "to_entity_kwd": ents, "knowledge_graph_kwd": ["relation"]}
    res = []
-    es_res = globals.retriever.search(conds, search.index_name(tenant_id), [kb_id] if isinstance(kb_id, str) else kb_id)
+    es_res = settings.retriever.search(conds, search.index_name(tenant_id), [kb_id] if isinstance(kb_id, str) else kb_id)
    for id in es_res.ids:
        try:
            if size == 1:
@ -381,8 +381,8 @@ async def does_graph_contains(tenant_id, kb_id, doc_id):
        "knowledge_graph_kwd": ["graph"],
        "removed_kwd": "N",
    }
-    res = await trio.to_thread.run_sync(lambda: globals.docStoreConn.search(fields, [], condition, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [kb_id]))
-    fields2 = globals.docStoreConn.getFields(res, fields)
+    res = await trio.to_thread.run_sync(lambda: settings.docStoreConn.search(fields, [], condition, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [kb_id]))
+    fields2 = settings.docStoreConn.getFields(res, fields)
    graph_doc_ids = set()
    for chunk_id in fields2.keys():
        graph_doc_ids = set(fields2[chunk_id]["source_id"])
@ -391,7 +391,7 @@ async def does_graph_contains(tenant_id, kb_id, doc_id):

 async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:
    conds = {"fields": ["source_id"], "removed_kwd": "N", "size": 1, "knowledge_graph_kwd": ["graph"]}
-    res = await trio.to_thread.run_sync(lambda: globals.retriever.search(conds, search.index_name(tenant_id), [kb_id]))
+    res = await trio.to_thread.run_sync(lambda: settings.retriever.search(conds, search.index_name(tenant_id), [kb_id]))
    doc_ids = []
    if res.total == 0:
        return doc_ids
@ -402,7 +402,7 @@ async def get_graph_doc_ids(tenant_id, kb_id) -> list[str]:

 async def get_graph(tenant_id, kb_id, exclude_rebuild=None):
    conds = {"fields": ["content_with_weight", "removed_kwd", "source_id"], "size": 1, "knowledge_graph_kwd": ["graph"]}
-    res = await trio.to_thread.run_sync(globals.retriever.search, conds, search.index_name(tenant_id), [kb_id])
+    res = await trio.to_thread.run_sync(settings.retriever.search, conds, search.index_name(tenant_id), [kb_id])
    if not res.total == 0:
        for id in res.ids:
            try:
@ -423,17 +423,17 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
    global chat_limiter
    start = trio.current_time()

-    await trio.to_thread.run_sync(globals.docStoreConn.delete, {"knowledge_graph_kwd": ["graph", "subgraph"]}, search.index_name(tenant_id), kb_id)
+    await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": ["graph", "subgraph"]}, search.index_name(tenant_id), kb_id)

    if change.removed_nodes:
-        await trio.to_thread.run_sync(globals.docStoreConn.delete, {"knowledge_graph_kwd": ["entity"], "entity_kwd": sorted(change.removed_nodes)}, search.index_name(tenant_id), kb_id)
+        await trio.to_thread.run_sync(settings.docStoreConn.delete, {"knowledge_graph_kwd": ["entity"], "entity_kwd": sorted(change.removed_nodes)}, search.index_name(tenant_id), kb_id)

    if change.removed_edges:

        async def del_edges(from_node, to_node):
            async with chat_limiter:
                await trio.to_thread.run_sync(
-                    globals.docStoreConn.delete, {"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id
+                    settings.docStoreConn.delete, {"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id
                )

        async with trio.open_nursery() as nursery:
@ -501,7 +501,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
    es_bulk_size = 4
    for b in range(0, len(chunks), es_bulk_size):
        with trio.fail_after(3 if enable_timeout_assertion else 30000000):
-            doc_store_result = await trio.to_thread.run_sync(lambda: globals.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
+            doc_store_result = await trio.to_thread.run_sync(lambda: settings.docStoreConn.insert(chunks[b : b + es_bulk_size], search.index_name(tenant_id), kb_id))
        if b % 100 == es_bulk_size and callback:
            callback(msg=f"Insert chunks: {b}/{len(chunks)}")
        if doc_store_result:
@ -555,7 +555,7 @@ def merge_tuples(list1, list2):


 async def get_entity_type2samples(idxnms, kb_ids: list):
-    es_res = await trio.to_thread.run_sync(lambda: globals.retriever.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]}, idxnms, kb_ids))
+    es_res = await trio.to_thread.run_sync(lambda: settings.retriever.search({"knowledge_graph_kwd": "ty2ents", "kb_id": kb_ids, "size": 10000, "fields": ["content_with_weight"]}, idxnms, kb_ids))

    res = defaultdict(list)
    for id in es_res.ids:
@ -589,10 +589,10 @@ async def rebuild_graph(tenant_id, kb_id, exclude_rebuild=None):
    bs = 256
    for i in range(0, 1024 * bs, bs):
        es_res = await trio.to_thread.run_sync(
-            lambda: globals.docStoreConn.search(flds, [], {"kb_id": kb_id, "knowledge_graph_kwd": ["subgraph"]}, [], OrderByExpr(), i, bs, search.index_name(tenant_id), [kb_id])
+            lambda: settings.docStoreConn.search(flds, [], {"kb_id": kb_id, "knowledge_graph_kwd": ["subgraph"]}, [], OrderByExpr(), i, bs, search.index_name(tenant_id), [kb_id])
        )
-        # tot = globals.docStoreConn.getTotal(es_res)
-        es_res = globals.docStoreConn.getFields(es_res, flds)
+        # tot = settings.docStoreConn.getTotal(es_res)
+        es_res = settings.docStoreConn.getFields(es_res, flds)

        if len(es_res) == 0:
            break