Move api.settings to common.settings (#11036)

### What problem does this PR solve? As title ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-12-08 12:32:30 +08:00 · 2025-11-06 09:36:38 +08:00
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@ -31,7 +31,7 @@ from peewee import InterfaceError, OperationalError, BigIntegerField, BooleanFie
 from playhouse.migrate import MySQLMigrator, PostgresqlMigrator, migrate
 from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase

-from api import settings, utils
+from api import utils
 from api.db import SerializedType
 from api.utils.json_encode import json_dumps, json_loads
 from api.utils.configs import deserialize_b64, serialize_b64
@ -39,6 +39,7 @@ from api.utils.configs import deserialize_b64, serialize_b64
 from common.time_utils import current_timestamp, timestamp_to_date, date_string_to_timestamp
 from common.decorator import singleton
 from common.constants import ParserType
+from common import settings


 CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField}
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -29,10 +29,9 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.tenant_llm_service import LLMFactoriesService, TenantLLMService
 from api.db.services.llm_service import LLMService, LLMBundle, get_init_tenant_llm
 from api.db.services.user_service import TenantService, UserTenantService
-from api import settings
 from common.constants import LLMType
 from common.file_utils import get_project_base_directory
-from common import globals
+from common import settings
 from api.common.base64 import encode_to_base64


@ -50,7 +49,7 @@ def init_superuser():
        "id": user_info["id"],
        "name": user_info["nickname"] + "‘s Kingdom",
        "llm_id": settings.CHAT_MDL,
-        "embd_id": globals.EMBEDDING_MDL,
+        "embd_id": settings.EMBEDDING_MDL,
        "asr_id": settings.ASR_MDL,
        "parser_ids": settings.PARSERS,
        "img2txt_id": settings.IMAGE2TEXT_MDL
--- a/api/db/joint_services/user_account_service.py
+++ b/api/db/joint_services/user_account_service.py
@ -16,7 +16,6 @@
 import logging
 import uuid

-from api import settings
 from api.utils.api_utils import group_by
 from api.db import FileType, UserTenantRole
 from api.db.services.api_service import APITokenService, API4ConversationService
@ -35,10 +34,9 @@ from api.db.services.task_service import TaskService
 from api.db.services.tenant_llm_service import TenantLLMService
 from api.db.services.user_canvas_version import UserCanvasVersionService
 from api.db.services.user_service import TenantService, UserService, UserTenantService
-from rag.utils.storage_factory import STORAGE_IMPL
 from rag.nlp import search
 from common.constants import ActiveEnum
-from common import globals
+from common import settings

 def create_new_user(user_info: dict) -> dict:
    """
@ -64,7 +62,7 @@ def create_new_user(user_info: dict) -> dict:
        "id": user_id,
        "name": user_info["nickname"] + "‘s Kingdom",
        "llm_id": settings.CHAT_MDL,
-        "embd_id": globals.EMBEDDING_MDL,
+        "embd_id": settings.EMBEDDING_MDL,
        "asr_id": settings.ASR_MDL,
        "parser_ids": settings.PARSERS,
        "img2txt_id": settings.IMAGE2TEXT_MDL,
@ -159,8 +157,8 @@ def delete_user_data(user_id: str) -> dict:
            if kb_ids:
                # step1.1.1 delete files in storage, remove bucket
                for kb_id in kb_ids:
-                    if STORAGE_IMPL.bucket_exists(kb_id):
-                        STORAGE_IMPL.remove_bucket(kb_id)
+                    if settings.STORAGE_IMPL.bucket_exists(kb_id):
+                        settings.STORAGE_IMPL.remove_bucket(kb_id)
                done_msg += f"- Removed {len(kb_ids)} dataset's buckets.\n"
                # step1.1.2 delete file and document info in db
                doc_ids = DocumentService.get_all_doc_ids_by_kb_ids(kb_ids)
@ -180,7 +178,7 @@ def delete_user_data(user_id: str) -> dict:
                    )
                    done_msg += f"- Deleted {file2doc_delete_res} document-file relation records.\n"
                # step1.1.3 delete chunk in es
-                r = globals.docStoreConn.delete({"kb_id": kb_ids},
+                r = settings.docStoreConn.delete({"kb_id": kb_ids},
                                         search.index_name(tenant_id), kb_ids)
                done_msg += f"- Deleted {r} chunk records.\n"
                kb_delete_res = KnowledgebaseService.delete_by_ids(kb_ids)
@ -219,7 +217,7 @@ def delete_user_data(user_id: str) -> dict:
                    if created_files:
                        # step2.1.1.1 delete file in storage
                        for f in created_files:
-                            STORAGE_IMPL.rm(f.parent_id, f.location)
+                            settings.STORAGE_IMPL.rm(f.parent_id, f.location)
                        done_msg += f"- Deleted {len(created_files)} uploaded file.\n"
                        # step2.1.1.2 delete file record
                        file_delete_res = FileService.delete_by_ids([f.id for f in created_files])
@ -238,7 +236,7 @@ def delete_user_data(user_id: str) -> dict:
                    kb_doc_info = {}
                    for _tenant_id, kb_doc in kb_grouped_doc.items():
                        for _kb_id, docs in kb_doc.items():
-                            chunk_delete_res += globals.docStoreConn.delete(
+                            chunk_delete_res += settings.docStoreConn.delete(
                                {"doc_id": [d["id"] for d in docs]},
                                search.index_name(_tenant_id), _kb_id
                            )
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -25,7 +25,6 @@ import trio
 from langfuse import Langfuse
 from peewee import fn
 from agentic_reasoning import DeepResearcher
-from api import settings
 from common.constants import LLMType, ParserType, StatusEnum
 from api.db.db_models import DB, Dialog
 from api.db.services.common_service import CommonService
@ -44,7 +43,7 @@ from rag.prompts.generator import chunks_format, citation_prompt, cross_language
 from common.token_utils import num_tokens_from_string
 from rag.utils.tavily_conn import Tavily
 from common.string_utils import remove_redundant_spaces
-from common import globals
+from common import settings


 class DialogService(CommonService):
@ -373,7 +372,7 @@ def chat(dialog, messages, stream=True, **kwargs):
        chat_mdl.bind_tools(toolcall_session, tools)
    bind_models_ts = timer()

-    retriever = globals.retriever
+    retriever = settings.retriever
    questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
    attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
    if "doc_ids" in messages[-1]:
@ -665,7 +664,7 @@ Please write the SQL, only SQL, without any other explanations or text.

        logging.debug(f"{question} get SQL(refined): {sql}")
        tried_times += 1
-        return globals.retriever.sql_retrieval(sql, format="json"), sql
+        return settings.retriever.sql_retrieval(sql, format="json"), sql

    tbl, sql = get_table()
    if tbl is None:
@ -759,7 +758,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
    embedding_list = list(set([kb.embd_id for kb in kbs]))

    is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
-    retriever = globals.retriever if not is_knowledge_graph else settings.kg_retriever
+    retriever = settings.retriever if not is_knowledge_graph else settings.kg_retriever

    embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
    chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
@ -855,7 +854,7 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
            if not doc_ids:
                doc_ids = None

-    ranks = globals.retriever.retrieval(
+    ranks = settings.retriever.retrieval(
        question=question,
        embd_mdl=embd_mdl,
        tenant_ids=tenant_ids,
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -35,13 +35,11 @@ from api.db.services.common_service import CommonService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from common.misc_utils import get_uuid
 from common.time_utils import current_timestamp, get_format_time
-from common.constants import LLMType, ParserType, StatusEnum, TaskStatus
+from common.constants import LLMType, ParserType, StatusEnum, TaskStatus, SVR_CONSUMER_GROUP_NAME
 from rag.nlp import rag_tokenizer, search
-from rag.settings import get_svr_queue_name, SVR_CONSUMER_GROUP_NAME
 from rag.utils.redis_conn import REDIS_CONN
-from rag.utils.storage_factory import STORAGE_IMPL
 from rag.utils.doc_store_conn import OrderByExpr
-from common import globals
+from common import settings

 class DocumentService(CommonService):
    model = Document
@ -308,33 +306,33 @@ class DocumentService(CommonService):
            page_size = 1000
            all_chunk_ids = []
            while True:
-                chunks = globals.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
+                chunks = settings.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
                                                      page * page_size, page_size, search.index_name(tenant_id),
                                                      [doc.kb_id])
-                chunk_ids = globals.docStoreConn.getChunkIds(chunks)
+                chunk_ids = settings.docStoreConn.getChunkIds(chunks)
                if not chunk_ids:
                    break
                all_chunk_ids.extend(chunk_ids)
                page += 1
            for cid in all_chunk_ids:
-                if STORAGE_IMPL.obj_exist(doc.kb_id, cid):
-                    STORAGE_IMPL.rm(doc.kb_id, cid)
+                if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
+                    settings.STORAGE_IMPL.rm(doc.kb_id, cid)
            if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX):
-                if STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
-                    STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
-            globals.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
+                if settings.STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
+                    settings.STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
+            settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)

-            graph_source = globals.docStoreConn.getFields(
-                globals.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
+            graph_source = settings.docStoreConn.getFields(
+                settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
            )
            if len(graph_source) > 0 and doc.id in list(graph_source.values())[0]["source_id"]:
-                globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
+                settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
                                             {"remove": {"source_id": doc.id}},
                                             search.index_name(tenant_id), doc.kb_id)
-                globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
+                settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
                                             {"removed_kwd": "Y"},
                                             search.index_name(tenant_id), doc.kb_id)
-                globals.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
+                settings.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
                                             search.index_name(tenant_id), doc.kb_id)
        except Exception:
            pass
@ -851,12 +849,12 @@ def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", d
    task["doc_id"] = fake_doc_id
    task["doc_ids"] = doc_ids
    DocumentService.begin2parse(sample_doc_id["id"])
-    assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
+    assert REDIS_CONN.queue_product(settings.get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
    return task["id"]


 def get_queue_length(priority):
-    group_info = REDIS_CONN.queue_info(get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
+    group_info = REDIS_CONN.queue_info(settings.get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
    if not group_info:
        return 0
    return int(group_info.get("lag", 0) or 0)
@ -938,7 +936,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
            else:
                d["image"].save(output_buffer, format='JPEG')

-            STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
+            settings.STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
            d["img_id"] = "{}-{}".format(kb.id, d["id"])
            d.pop("image", None)
            docs.append(d)
@ -995,10 +993,10 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
            d["q_%d_vec" % len(v)] = v
        for b in range(0, len(cks), es_bulk_size):
            if try_create_idx:
-                if not globals.docStoreConn.indexExist(idxnm, kb_id):
-                    globals.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
+                if not settings.docStoreConn.indexExist(idxnm, kb_id):
+                    settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
                try_create_idx = False
-            globals.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
+            settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)

        DocumentService.increment_chunk_num(
            doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@ -33,7 +33,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.db.services.task_service import TaskService
 from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
 from rag.llm.cv_model import GptV4
-from rag.utils.storage_factory import STORAGE_IMPL
+from common import settings


 class FileService(CommonService):
@ -440,13 +440,13 @@ class FileService(CommonService):
                    raise RuntimeError("This type of file has not been supported yet!")

                location = filename
-                while STORAGE_IMPL.obj_exist(kb.id, location):
+                while settings.STORAGE_IMPL.obj_exist(kb.id, location):
                    location += "_"

                blob = file.read()
                if filetype == FileType.PDF.value:
                    blob = read_potential_broken_pdf(blob)
-                STORAGE_IMPL.put(kb.id, location, blob)
+                settings.STORAGE_IMPL.put(kb.id, location, blob)

                doc_id = get_uuid()

@ -454,7 +454,7 @@ class FileService(CommonService):
                thumbnail_location = ""
                if img is not None:
                    thumbnail_location = f"thumbnail_{doc_id}.png"
-                    STORAGE_IMPL.put(kb.id, thumbnail_location, img)
+                    settings.STORAGE_IMPL.put(kb.id, thumbnail_location, img)

                doc = {
                    "id": doc_id,
@ -534,12 +534,12 @@ class FileService(CommonService):
    @staticmethod
    def get_blob(user_id, location):
        bname = f"{user_id}-downloads"
-        return STORAGE_IMPL.get(bname, location)
+        return settings.STORAGE_IMPL.get(bname, location)

    @staticmethod
    def put_blob(user_id, location, blob):
        bname = f"{user_id}-downloads"
-        return STORAGE_IMPL.put(bname, location, blob)
+        return settings.STORAGE_IMPL.put(bname, location, blob)

    @classmethod
    @DB.connection_context()
@ -570,7 +570,7 @@ class FileService(CommonService):
                    deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
                File2DocumentService.delete_by_document_id(doc_id)
                if deleted_file_count > 0:
-                    STORAGE_IMPL.rm(b, n)
+                    settings.STORAGE_IMPL.rm(b, n)

                doc_parser = doc.parser_id
                if doc_parser == ParserType.TABLE:
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -29,15 +29,14 @@ class LLMService(CommonService):


 def get_init_tenant_llm(user_id):
-    from api import settings
-    from common import globals
+    from common import settings
    tenant_llm = []

    seen = set()
    factory_configs = []
    for factory_config in [
        settings.CHAT_CFG,
-        globals.EMBEDDING_CFG,
+        settings.EMBEDDING_CFG,
        settings.ASR_CFG,
        settings.IMAGE2TEXT_CFG,
        settings.RERANK_CFG,
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@ -31,10 +31,8 @@ from common.misc_utils import get_uuid
 from common.time_utils import current_timestamp
 from common.constants import StatusEnum, TaskStatus
 from deepdoc.parser.excel_parser import RAGFlowExcelParser
-from rag.settings import get_svr_queue_name
-from rag.utils.storage_factory import STORAGE_IMPL
 from rag.utils.redis_conn import REDIS_CONN
-from common import globals
+from common import settings
 from rag.nlp import search

 CANVAS_DEBUG_DOC_ID = "dataflow_x"
@ -359,7 +357,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
    parse_task_array = []

    if doc["type"] == FileType.PDF.value:
-        file_bin = STORAGE_IMPL.get(bucket, name)
+        file_bin = settings.STORAGE_IMPL.get(bucket, name)
        do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC")
        pages = PdfParser.total_page_number(doc["name"], file_bin)
        if pages is None:
@ -381,7 +379,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
                parse_task_array.append(task)

    elif doc["parser_id"] == "table":
-        file_bin = STORAGE_IMPL.get(bucket, name)
+        file_bin = settings.STORAGE_IMPL.get(bucket, name)
        rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
        for i in range(0, rn, 3000):
            task = new_task()
@ -418,7 +416,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
            if pre_task["chunk_ids"]:
                pre_chunk_ids.extend(pre_task["chunk_ids"].split())
        if pre_chunk_ids:
-            globals.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
+            settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
                                         chunking_config["kb_id"])
    DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})

@ -428,7 +426,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
    unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
    for unfinished_task in unfinished_task_array:
        assert REDIS_CONN.queue_product(
-            get_svr_queue_name(priority), message=unfinished_task
+            settings.get_svr_queue_name(priority), message=unfinished_task
        ), "Can't access Redis. Please check the Redis' status."


@ -518,7 +516,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
    task["file"] = file

    if not REDIS_CONN.queue_product(
-            get_svr_queue_name(priority), message=task
+            settings.get_svr_queue_name(priority), message=task
    ):
        return False, "Can't access Redis. Please check the Redis' status."

--- a/api/db/services/tenant_llm_service.py
+++ b/api/db/services/tenant_llm_service.py
@ -16,8 +16,7 @@
 import os
 import logging
 from langfuse import Langfuse
-from api import settings
-from common import globals
+from common import settings
 from common.constants import LLMType
 from api.db.db_models import DB, LLMFactories, TenantLLM
 from api.db.services.common_service import CommonService
@ -115,7 +114,7 @@ class TenantLLMService(CommonService):
        if model_config:
            model_config = model_config.to_dict()
        elif llm_type == LLMType.EMBEDDING and fid == 'Builtin' and "tei-" in os.getenv("COMPOSE_PROFILES", "") and mdlnm == os.getenv('TEI_MODEL', ''):
-            embedding_cfg = globals.EMBEDDING_CFG
+            embedding_cfg = settings.EMBEDDING_CFG
            model_config = {"llm_factory": 'Builtin', "api_key": embedding_cfg["api_key"], "llm_name": mdlnm, "api_base": embedding_cfg["base_url"]}
        else:
            raise LookupError(f"Model({mdlnm}@{fid}) not authorized")
--- a/api/db/services/user_service.py
+++ b/api/db/services/user_service.py
@ -27,7 +27,7 @@ from api.db.services.common_service import CommonService
 from common.misc_utils import get_uuid
 from common.time_utils import current_timestamp, datetime_format
 from common.constants import StatusEnum
-from common import globals
+from common import settings


 class UserService(CommonService):
@ -221,7 +221,7 @@ class TenantService(CommonService):
    @DB.connection_context()
    def user_gateway(cls, tenant_id):
        hash_obj = hashlib.sha256(tenant_id.encode("utf-8"))
-        return int(hash_obj.hexdigest(), 16)%len(globals.MINIO)
+        return int(hash_obj.hexdigest(), 16)%len(settings.MINIO)


 class UserTenantService(CommonService):