Move api.settings to common.settings (#11036)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-06 09:36:38 +08:00
committed by GitHub
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions

View File

@ -25,7 +25,6 @@ import trio
from langfuse import Langfuse
from peewee import fn
from agentic_reasoning import DeepResearcher
from api import settings
from common.constants import LLMType, ParserType, StatusEnum
from api.db.db_models import DB, Dialog
from api.db.services.common_service import CommonService
@ -44,7 +43,7 @@ from rag.prompts.generator import chunks_format, citation_prompt, cross_language
from common.token_utils import num_tokens_from_string
from rag.utils.tavily_conn import Tavily
from common.string_utils import remove_redundant_spaces
from common import globals
from common import settings
class DialogService(CommonService):
@ -373,7 +372,7 @@ def chat(dialog, messages, stream=True, **kwargs):
chat_mdl.bind_tools(toolcall_session, tools)
bind_models_ts = timer()
retriever = globals.retriever
retriever = settings.retriever
questions = [m["content"] for m in messages if m["role"] == "user"][-3:]
attachments = kwargs["doc_ids"].split(",") if "doc_ids" in kwargs else []
if "doc_ids" in messages[-1]:
@ -665,7 +664,7 @@ Please write the SQL, only SQL, without any other explanations or text.
logging.debug(f"{question} get SQL(refined): {sql}")
tried_times += 1
return globals.retriever.sql_retrieval(sql, format="json"), sql
return settings.retriever.sql_retrieval(sql, format="json"), sql
tbl, sql = get_table()
if tbl is None:
@ -759,7 +758,7 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
embedding_list = list(set([kb.embd_id for kb in kbs]))
is_knowledge_graph = all([kb.parser_id == ParserType.KG for kb in kbs])
retriever = globals.retriever if not is_knowledge_graph else settings.kg_retriever
retriever = settings.retriever if not is_knowledge_graph else settings.kg_retriever
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
@ -855,7 +854,7 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
if not doc_ids:
doc_ids = None
ranks = globals.retriever.retrieval(
ranks = settings.retriever.retrieval(
question=question,
embd_mdl=embd_mdl,
tenant_ids=tenant_ids,

View File

@ -35,13 +35,11 @@ from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, get_format_time
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus, SVR_CONSUMER_GROUP_NAME
from rag.nlp import rag_tokenizer, search
from rag.settings import get_svr_queue_name, SVR_CONSUMER_GROUP_NAME
from rag.utils.redis_conn import REDIS_CONN
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.doc_store_conn import OrderByExpr
from common import globals
from common import settings
class DocumentService(CommonService):
model = Document
@ -308,33 +306,33 @@ class DocumentService(CommonService):
page_size = 1000
all_chunk_ids = []
while True:
chunks = globals.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
chunks = settings.docStoreConn.search(["img_id"], [], {"doc_id": doc.id}, [], OrderByExpr(),
page * page_size, page_size, search.index_name(tenant_id),
[doc.kb_id])
chunk_ids = globals.docStoreConn.getChunkIds(chunks)
chunk_ids = settings.docStoreConn.getChunkIds(chunks)
if not chunk_ids:
break
all_chunk_ids.extend(chunk_ids)
page += 1
for cid in all_chunk_ids:
if STORAGE_IMPL.obj_exist(doc.kb_id, cid):
STORAGE_IMPL.rm(doc.kb_id, cid)
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
settings.STORAGE_IMPL.rm(doc.kb_id, cid)
if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX):
if STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
globals.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
settings.STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
graph_source = globals.docStoreConn.getFields(
globals.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
graph_source = settings.docStoreConn.getFields(
settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
)
if len(graph_source) > 0 and doc.id in list(graph_source.values())[0]["source_id"]:
globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
{"remove": {"source_id": doc.id}},
search.index_name(tenant_id), doc.kb_id)
globals.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
{"removed_kwd": "Y"},
search.index_name(tenant_id), doc.kb_id)
globals.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
settings.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
search.index_name(tenant_id), doc.kb_id)
except Exception:
pass
@ -851,12 +849,12 @@ def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", d
task["doc_id"] = fake_doc_id
task["doc_ids"] = doc_ids
DocumentService.begin2parse(sample_doc_id["id"])
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
assert REDIS_CONN.queue_product(settings.get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
return task["id"]
def get_queue_length(priority):
group_info = REDIS_CONN.queue_info(get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
group_info = REDIS_CONN.queue_info(settings.get_svr_queue_name(priority), SVR_CONSUMER_GROUP_NAME)
if not group_info:
return 0
return int(group_info.get("lag", 0) or 0)
@ -938,7 +936,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
else:
d["image"].save(output_buffer, format='JPEG')
STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
settings.STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
d["img_id"] = "{}-{}".format(kb.id, d["id"])
d.pop("image", None)
docs.append(d)
@ -995,10 +993,10 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
d["q_%d_vec" % len(v)] = v
for b in range(0, len(cks), es_bulk_size):
if try_create_idx:
if not globals.docStoreConn.indexExist(idxnm, kb_id):
globals.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
if not settings.docStoreConn.indexExist(idxnm, kb_id):
settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
try_create_idx = False
globals.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
DocumentService.increment_chunk_num(
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)

View File

@ -33,7 +33,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import TaskService
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.llm.cv_model import GptV4
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
class FileService(CommonService):
@ -440,13 +440,13 @@ class FileService(CommonService):
raise RuntimeError("This type of file has not been supported yet!")
location = filename
while STORAGE_IMPL.obj_exist(kb.id, location):
while settings.STORAGE_IMPL.obj_exist(kb.id, location):
location += "_"
blob = file.read()
if filetype == FileType.PDF.value:
blob = read_potential_broken_pdf(blob)
STORAGE_IMPL.put(kb.id, location, blob)
settings.STORAGE_IMPL.put(kb.id, location, blob)
doc_id = get_uuid()
@ -454,7 +454,7 @@ class FileService(CommonService):
thumbnail_location = ""
if img is not None:
thumbnail_location = f"thumbnail_{doc_id}.png"
STORAGE_IMPL.put(kb.id, thumbnail_location, img)
settings.STORAGE_IMPL.put(kb.id, thumbnail_location, img)
doc = {
"id": doc_id,
@ -534,12 +534,12 @@ class FileService(CommonService):
@staticmethod
def get_blob(user_id, location):
bname = f"{user_id}-downloads"
return STORAGE_IMPL.get(bname, location)
return settings.STORAGE_IMPL.get(bname, location)
@staticmethod
def put_blob(user_id, location, blob):
bname = f"{user_id}-downloads"
return STORAGE_IMPL.put(bname, location, blob)
return settings.STORAGE_IMPL.put(bname, location, blob)
@classmethod
@DB.connection_context()
@ -570,7 +570,7 @@ class FileService(CommonService):
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc_id)
if deleted_file_count > 0:
STORAGE_IMPL.rm(b, n)
settings.STORAGE_IMPL.rm(b, n)
doc_parser = doc.parser_id
if doc_parser == ParserType.TABLE:

View File

@ -29,15 +29,14 @@ class LLMService(CommonService):
def get_init_tenant_llm(user_id):
from api import settings
from common import globals
from common import settings
tenant_llm = []
seen = set()
factory_configs = []
for factory_config in [
settings.CHAT_CFG,
globals.EMBEDDING_CFG,
settings.EMBEDDING_CFG,
settings.ASR_CFG,
settings.IMAGE2TEXT_CFG,
settings.RERANK_CFG,

View File

@ -31,10 +31,8 @@ from common.misc_utils import get_uuid
from common.time_utils import current_timestamp
from common.constants import StatusEnum, TaskStatus
from deepdoc.parser.excel_parser import RAGFlowExcelParser
from rag.settings import get_svr_queue_name
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.redis_conn import REDIS_CONN
from common import globals
from common import settings
from rag.nlp import search
CANVAS_DEBUG_DOC_ID = "dataflow_x"
@ -359,7 +357,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
parse_task_array = []
if doc["type"] == FileType.PDF.value:
file_bin = STORAGE_IMPL.get(bucket, name)
file_bin = settings.STORAGE_IMPL.get(bucket, name)
do_layout = doc["parser_config"].get("layout_recognize", "DeepDOC")
pages = PdfParser.total_page_number(doc["name"], file_bin)
if pages is None:
@ -381,7 +379,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
parse_task_array.append(task)
elif doc["parser_id"] == "table":
file_bin = STORAGE_IMPL.get(bucket, name)
file_bin = settings.STORAGE_IMPL.get(bucket, name)
rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
for i in range(0, rn, 3000):
task = new_task()
@ -418,7 +416,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
if pre_task["chunk_ids"]:
pre_chunk_ids.extend(pre_task["chunk_ids"].split())
if pre_chunk_ids:
globals.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
settings.docStoreConn.delete({"id": pre_chunk_ids}, search.index_name(chunking_config["tenant_id"]),
chunking_config["kb_id"])
DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})
@ -428,7 +426,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
unfinished_task_array = [task for task in parse_task_array if task["progress"] < 1.0]
for unfinished_task in unfinished_task_array:
assert REDIS_CONN.queue_product(
get_svr_queue_name(priority), message=unfinished_task
settings.get_svr_queue_name(priority), message=unfinished_task
), "Can't access Redis. Please check the Redis' status."
@ -518,7 +516,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
task["file"] = file
if not REDIS_CONN.queue_product(
get_svr_queue_name(priority), message=task
settings.get_svr_queue_name(priority), message=task
):
return False, "Can't access Redis. Please check the Redis' status."

View File

@ -16,8 +16,7 @@
import os
import logging
from langfuse import Langfuse
from api import settings
from common import globals
from common import settings
from common.constants import LLMType
from api.db.db_models import DB, LLMFactories, TenantLLM
from api.db.services.common_service import CommonService
@ -115,7 +114,7 @@ class TenantLLMService(CommonService):
if model_config:
model_config = model_config.to_dict()
elif llm_type == LLMType.EMBEDDING and fid == 'Builtin' and "tei-" in os.getenv("COMPOSE_PROFILES", "") and mdlnm == os.getenv('TEI_MODEL', ''):
embedding_cfg = globals.EMBEDDING_CFG
embedding_cfg = settings.EMBEDDING_CFG
model_config = {"llm_factory": 'Builtin', "api_key": embedding_cfg["api_key"], "llm_name": mdlnm, "api_base": embedding_cfg["base_url"]}
else:
raise LookupError(f"Model({mdlnm}@{fid}) not authorized")

View File

@ -27,7 +27,7 @@ from api.db.services.common_service import CommonService
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format
from common.constants import StatusEnum
from common import globals
from common import settings
class UserService(CommonService):
@ -221,7 +221,7 @@ class TenantService(CommonService):
@DB.connection_context()
def user_gateway(cls, tenant_id):
hash_obj = hashlib.sha256(tenant_id.encode("utf-8"))
return int(hash_obj.hexdigest(), 16)%len(globals.MINIO)
return int(hash_obj.hexdigest(), 16)%len(settings.MINIO)
class UserTenantService(CommonService):