Move api.settings to common.settings (#11036)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-06 09:36:38 +08:00
committed by GitHub
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions

View File

@ -31,7 +31,7 @@ from peewee import InterfaceError, OperationalError, BigIntegerField, BooleanFie
from playhouse.migrate import MySQLMigrator, PostgresqlMigrator, migrate
from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
from api import settings, utils
from api import utils
from api.db import SerializedType
from api.utils.json_encode import json_dumps, json_loads
from api.utils.configs import deserialize_b64, serialize_b64
@ -39,6 +39,7 @@ from api.utils.configs import deserialize_b64, serialize_b64
from common.time_utils import current_timestamp, timestamp_to_date, date_string_to_timestamp
from common.decorator import singleton
from common.constants import ParserType
from common import settings
CONTINUOUS_FIELD_TYPE = {IntegerField, FloatField, DateTimeField}

View File

@ -29,10 +29,9 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.tenant_llm_service import LLMFactoriesService, TenantLLMService
from api.db.services.llm_service import LLMService, LLMBundle, get_init_tenant_llm
from api.db.services.user_service import TenantService, UserTenantService
from api import settings
from common.constants import LLMType
from common.file_utils import get_project_base_directory
from common import globals
from common import settings
from api.common.base64 import encode_to_base64
@ -50,7 +49,7 @@ def init_superuser():
"id": user_info["id"],
"name": user_info["nickname"] + "s Kingdom",
"llm_id": settings.CHAT_MDL,
"embd_id": globals.EMBEDDING_MDL,
"embd_id": settings.EMBEDDING_MDL,
"asr_id": settings.ASR_MDL,
"parser_ids": settings.PARSERS,
"img2txt_id": settings.IMAGE2TEXT_MDL

View File

@ -16,7 +16,6 @@
import logging
import uuid
from api import settings
from api.utils.api_utils import group_by
from api.db import FileType, UserTenantRole
from api.db.services.api_service import APITokenService, API4ConversationService
@ -35,10 +34,9 @@ from api.db.services.task_service import TaskService
from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.user_canvas_version import UserCanvasVersionService
from api.db.services.user_service import TenantService, UserService, UserTenantService
from rag.utils.storage_factory import STORAGE_IMPL
from rag.nlp import search
from common.constants import ActiveEnum
from common import globals
from common import settings
def create_new_user(user_info: dict) -> dict:
"""
@ -64,7 +62,7 @@ def create_new_user(user_info: dict) -> dict:
"id": user_id,
"name": user_info["nickname"] + "s Kingdom",
"llm_id": settings.CHAT_MDL,
"embd_id": globals.EMBEDDING_MDL,
"embd_id": settings.EMBEDDING_MDL,
"asr_id": settings.ASR_MDL,
"parser_ids": settings.PARSERS,
"img2txt_id": settings.IMAGE2TEXT_MDL,
@ -159,8 +157,8 @@ def delete_user_data(user_id: str) -> dict:
if kb_ids:
# step1.1.1 delete files in storage, remove bucket
for kb_id in kb_ids:
if STORAGE_IMPL.bucket_exists(kb_id):
STORAGE_IMPL.remove_bucket(kb_id)
if settings.STORAGE_IMPL.bucket_exists(kb_id):
settings.STORAGE_IMPL.remove_bucket(kb_id)
done_msg += f"- Removed {len(kb_ids)} dataset's buckets.\n"
# step1.1.2 delete file and document info in db
doc_ids = DocumentService.get_all_doc_ids_by_kb_ids(kb_ids)
@ -180,7 +178,7 @@ def delete_user_data(user_id: str) -> dict:
)
done_msg += f"- Deleted {file2doc_delete_res} document-file relation records.\n"
# step1.1.3 delete chunk in es
r = globals.docStoreConn.delete({"kb_id": kb_ids},
r = settings.docStoreConn.delete({"kb_id": kb_ids},
search.index_name(tenant_id), kb_ids)
done_msg += f"- Deleted {r} chunk records.\n"
kb_delete_res = KnowledgebaseService.delete_by_ids(kb_ids)
@ -219,7 +217,7 @@ def delete_user_data(user_id: str) -> dict:
if created_files:
# step2.1.1.1 delete file in storage
for f in created_files:
STORAGE_IMPL.rm(f.parent_id, f.location)
settings.STORAGE_IMPL.rm(f.parent_id, f.location)
done_msg += f"- Deleted {len(created_files)} uploaded file.\n"
# step2.1.1.2 delete file record
file_delete_res = FileService.delete_by_ids([f.id for f in created_files])
@ -238,7 +236,7 @@ def delete_user_data(user_id: str) -> dict:
kb_doc_info = {}
for _tenant_id, kb_doc in kb_grouped_doc.items():
for _kb_id, docs in kb_doc.items():
chunk_delete_res += globals.docStoreConn.delete(
chunk_delete_res += settings.docStoreConn.delete(
{"doc_id": [d["id"] for d in docs]},
search.index_name(_tenant_id), _kb_id
)

View File

@ -25,7 +25,6 @@ import trio
from langfuse import Langfuse
from peewee import fn
from agentic_reasoning import DeepResearcher
from api import settings
from common.constants import LLMType, ParserType, StatusEnum
from api.db.db_models import DB, Dialog
from api.db.services.common_service import CommonService
@ -44,7 +43,7 @@ from rag.prompts.generator import chunks_format, citation_prompt, cross_language
from common.token_utils import num_tokens_from_string
from rag.utils.tavily_conn import Tavily
from common.string_utils import remove_redundant_spaces
from common import globals
from common import settings
class DialogService(CommonService):
@ -373,7 +372,7 @@ def chat(dialog, messages, stream=True, **kwargs):
chat_mdl.bind_tools(toolcall_session, tools)
bind_models_ts = timer()
retriever = globals.retriever
retriever = settings.retriever
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
if "doc_ids" in messages[-1]:
@ -665,7 +664,7 @@ Please write the SQL, only SQL, without any other explanations or text.
logging.debug(f"{question} get SQL(refined): {sql}")
tried_times += 1
return globals.retriever.sql_retrieval(sql, format="json"), sql
return settings.retriever.sql_retrieval(sql, format="json"), sql
tbl, sql = get_table()
if tbl is None:
@ -759,7 +758,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
embedding_list = list(set([kb.embd_id for kb in kbs]))
is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
retriever = globals.retriever if not is_knowledge_graph else settings.kg_retriever
retriever = settings.retriever if not is_knowledge_graph else settings.kg_retriever
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
@ -855,7 +854,7 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
if not doc_ids:
doc_ids = None
ranks = globals.retriever.retrieval(
ranks = settings.retriever.retrieval(
question=question,
embd_mdl=embd_mdl,
tenant_ids=tenant_ids,

View File

@ -35,13 +35,11 @@ from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, get_format_time
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus, SVR_CONSUMER_GROUP_NAME
from rag.nlp import rag_tokenizer, search
from rag.settings import get_svr_queue_name, SVR_CONSUMER_GROUP_NAME
from rag.utils.redis_conn import REDIS_CONN
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.doc_store_conn import OrderByExpr
from common import globals
from common import settings
class DocumentService(CommonService):
model = Document
@ -308,33 +306,33 @@ class DocumentService(CommonService):
page_size = 1000
all_chunk_ids = []
while True:
chunks = globals.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
chunks = settings.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
page * page_size, page_size, search.index_name(tenant_id),
[doc.kb_id])
chunk_ids = globals.docStoreConn.getChunkIds(chunks)
chunk_ids = settings.docStoreConn.getChunkIds(chunks)
if not chunk_ids:
break
all_chunk_ids.extend(chunk_ids)
page += 1
for cid in all_chunk_ids:
if STORAGE_IMPL.obj_exist(doc.kb_id, cid):
STORAGE_IMPL.rm(doc.kb_id, cid)
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
settings.STORAGE_IMPL.rm(doc.kb_id, cid)
if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX):
if STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
globals.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
settings.STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
graph_source = globals.docStoreConn.getFields(
globals.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
graph_source = settings.docStoreConn.getFields(
settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
)
if len(graph_source) > 0 and doc.id in list(graph_source.values())[0]["source_id"]:
globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
{"remove": {"source_id": doc.id}},
search.index_name(tenant_id), doc.kb_id)
globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
{"removed_kwd": "Y"},
search.index_name(tenant_id), doc.kb_id)
globals.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
settings.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
search.index_name(tenant_id), doc.kb_id)
except Exception:
pass
@ -851,12 +849,12 @@ def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", d
task["doc_id"] = fake_doc_id
task["doc_ids"] = doc_ids
DocumentService.begin2parse(sample_doc_id["id"])
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
assert REDIS_CONN.queue_product(settings.get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
return task["id"]
def get_queue_length(priority):
group_info = REDIS_CONN.queue_info(get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
group_info = REDIS_CONN.queue_info(settings.get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
if not group_info:
return 0
return int(group_info.get("lag", 0) or 0)
@ -938,7 +936,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
else:
d["image"].save(output_buffer, format='JPEG')
STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
settings.STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
d["img_id"] = "{}-{}".format(kb.id, d["id"])
d.pop("image", None)
docs.append(d)
@ -995,10 +993,10 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
d["q_%d_vec" % len(v)] = v
for b in range(0, len(cks), es_bulk_size):
if try_create_idx:
if not globals.docStoreConn.indexExist(idxnm, kb_id):
globals.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
if not settings.docStoreConn.indexExist(idxnm, kb_id):
settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
try_create_idx = False
globals.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
DocumentService.increment_chunk_num(
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)

View File

@ -33,7 +33,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import TaskService
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.llm.cv_model import GptV4
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
class FileService(CommonService):
@ -440,13 +440,13 @@ class FileService(CommonService):
raise RuntimeError("This type of file has not been supported yet!")
location = filename
while STORAGE_IMPL.obj_exist(kb.id, location):
while settings.STORAGE_IMPL.obj_exist(kb.id, location):
location += "_"
blob = file.read()
if filetype == FileType.PDF.value:
blob = read_potential_broken_pdf(blob)
STORAGE_IMPL.put(kb.id, location, blob)
settings.STORAGE_IMPL.put(kb.id, location, blob)
doc_id = get_uuid()
@ -454,7 +454,7 @@ class FileService(CommonService):
thumbnail_location = ""
if img is not None:
thumbnail_location = f"thumbnail_{doc_id}.png"
STORAGE_IMPL.put(kb.id, thumbnail_location, img)
settings.STORAGE_IMPL.put(kb.id, thumbnail_location, img)
doc = {
"id": doc_id,
@ -534,12 +534,12 @@ class FileService(CommonService):
@staticmethod
def get_blob(user_id, location):
bname = f"{user_id}-downloads"
return STORAGE_IMPL.get(bname, location)
return settings.STORAGE_IMPL.get(bname, location)
@staticmethod
def put_blob(user_id, location, blob):
bname = f"{user_id}-downloads"
return STORAGE_IMPL.put(bname, location, blob)
return settings.STORAGE_IMPL.put(bname, location, blob)
@classmethod
@DB.connection_context()
@ -570,7 +570,7 @@ class FileService(CommonService):
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc_id)
if deleted_file_count > 0:
STORAGE_IMPL.rm(b, n)
settings.STORAGE_IMPL.rm(b, n)
doc_parser = doc.parser_id
if doc_parser == ParserType.TABLE:

View File

@ -29,15 +29,14 @@ class LLMService(CommonService):
def get_init_tenant_llm(user_id):
from api import settings
from common import globals
from common import settings
tenant_llm = []
seen = set()
factory_configs = []
for factory_config in [
settings.CHAT_CFG,
globals.EMBEDDING_CFG,
settings.EMBEDDING_CFG,
settings.ASR_CFG,
settings.IMAGE2TEXT_CFG,
settings.RERANK_CFG,

View File

@ -31,10 +31,8 @@ from common.misc_utils import get_uuid
from common.time_utils import current_timestamp
from common.constants import StatusEnum, TaskStatus
from deepdoc.parser.excel_parser import RAGFlowExcelParser
from rag.settings import get_svr_queue_name
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.redis_conn import REDIS_CONN
from common import globals
from common import settings
from rag.nlp import search
CANVAS_DEBUG_DOC_ID = "dataflow_x"
@ -359,7 +357,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
parse_task_array = []
if doc["type"] == FileType.PDF.value:
file_bin = STORAGE_IMPL.get(bucket, name)
file_bin = settings.STORAGE_IMPL.get(bucket, name)
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC")
pages = PdfParser.total_page_number(doc["name"], file_bin)
if pages is None:
@ -381,7 +379,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
parse_task_array.append(task)
elif doc["parser_id"] == "table":
file_bin = STORAGE_IMPL.get(bucket, name)
file_bin = settings.STORAGE_IMPL.get(bucket, name)
rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
for i in range(0, rn, 3000):
task = new_task()
@ -418,7 +416,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
if pre_task["chunk_ids"]:
pre_chunk_ids.extend(pre_task["chunk_ids"].split())
if pre_chunk_ids:
globals.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
chunking_config["kb_id"])
DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})
@ -428,7 +426,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
for unfinished_task in unfinished_task_array:
assert REDIS_CONN.queue_product(
get_svr_queue_name(priority), message=unfinished_task
settings.get_svr_queue_name(priority), message=unfinished_task
), "Can't access Redis. Please check the Redis' status."
@ -518,7 +516,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
task["file"] = file
if not REDIS_CONN.queue_product(
get_svr_queue_name(priority), message=task
settings.get_svr_queue_name(priority), message=task
):
return False, "Can't access Redis. Please check the Redis' status."

View File

@ -16,8 +16,7 @@
import os
import logging
from langfuse import Langfuse
from api import settings
from common import globals
from common import settings
from common.constants import LLMType
from api.db.db_models import DB, LLMFactories, TenantLLM
from api.db.services.common_service import CommonService
@ -115,7 +114,7 @@ class TenantLLMService(CommonService):
if model_config:
model_config = model_config.to_dict()
elif llm_type == LLMType.EMBEDDING and fid == 'Builtin' and "tei-" in os.getenv("COMPOSE_PROFILES", "") and mdlnm == os.getenv('TEI_MODEL', ''):
embedding_cfg = globals.EMBEDDING_CFG
embedding_cfg = settings.EMBEDDING_CFG
model_config = {"llm_factory": 'Builtin', "api_key": embedding_cfg["api_key"], "llm_name": mdlnm, "api_base": embedding_cfg["base_url"]}
else:
raise LookupError(f"Model({mdlnm}@{fid}) not authorized")

View File

@ -27,7 +27,7 @@ from api.db.services.common_service import CommonService
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format
from common.constants import StatusEnum
from common import globals
from common import settings
class UserService(CommonService):
@ -221,7 +221,7 @@ class TenantService(CommonService):
@DB.connection_context()
def user_gateway(cls, tenant_id):
hash_obj = hashlib.sha256(tenant_id.encode("utf-8"))
return int(hash_obj.hexdigest(), 16)%len(globals.MINIO)
return int(hash_obj.hexdigest(), 16)%len(settings.MINIO)
class UserTenantService(CommonService):