Move api.settings to common.settings (#11036)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-06 09:36:38 +08:00
committed by GitHub
parent 87c9a054d3
commit f98b24c9bf
68 changed files with 675 additions and 718 deletions

View File

@ -33,7 +33,7 @@ from api.utils import commands
from flask_mail import Mail
from flask_session import Session
from flask_login import LoginManager
from api import settings
from common import settings
from api.utils.api_utils import server_error_response
from api.constants import API_VERSION

View File

@ -40,14 +40,13 @@ from api.utils.api_utils import server_error_response, get_data_error_result, ge
from api.utils.file_utils import filename_type, thumbnail
from rag.app.tag import label_question
from rag.prompts.generator import keyword_extraction
from rag.utils.storage_factory import STORAGE_IMPL
from common.time_utils import current_timestamp, datetime_format
from api.db.services.canvas_service import UserCanvasService
from agent.canvas import Canvas
from functools import partial
from pathlib import Path
from common import globals
from common import settings
@manager.route('/new_token', methods=['POST']) # noqa: F821
@ -428,10 +427,10 @@ def upload():
message="This type of file has not been supported yet!")
location = filename
while STORAGE_IMPL.obj_exist(kb_id, location):
while settings.STORAGE_IMPL.obj_exist(kb_id, location):
location += "_"
blob = request.files['file'].read()
STORAGE_IMPL.put(kb_id, location, blob)
settings.STORAGE_IMPL.put(kb_id, location, blob)
doc = {
"id": get_uuid(),
"kb_id": kb.id,
@ -538,7 +537,7 @@ def list_chunks():
)
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
res = globals.retriever.chunk_list(doc_id, tenant_id, kb_ids)
res = settings.retriever.chunk_list(doc_id, tenant_id, kb_ids)
res = [
{
"content": res_item["content_with_weight"],
@ -564,7 +563,7 @@ def get_chunk(chunk_id):
try:
tenant_id = objs[0].tenant_id
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
chunk = globals.docStoreConn.get(chunk_id, search.index_name(tenant_id), kb_ids)
chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), kb_ids)
if chunk is None:
return server_error_response(Exception("Chunk not found"))
k = []
@ -699,7 +698,7 @@ def document_rm():
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc_id)
STORAGE_IMPL.rm(b, n)
settings.STORAGE_IMPL.rm(b, n)
except Exception as e:
errors += str(e)
@ -792,7 +791,7 @@ def completion_faq():
if ans["reference"]["chunks"][chunk_idx]["img_id"]:
try:
bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
response = STORAGE_IMPL.get(bkt, nm)
response = settings.STORAGE_IMPL.get(bkt, nm)
data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
data.append(data_type_picture)
break
@ -837,7 +836,7 @@ def completion_faq():
if ans["reference"]["chunks"][chunk_idx]["img_id"]:
try:
bkt, nm = ans["reference"]["chunks"][chunk_idx]["img_id"].split("-")
response = STORAGE_IMPL.get(bkt, nm)
response = settings.STORAGE_IMPL.get(bkt, nm)
data_type_picture["url"] = base64.b64encode(response).decode('utf-8')
data.append(data_type_picture)
break
@ -886,7 +885,7 @@ def retrieval():
if req.get("keyword", False):
chat_mdl = LLMBundle(kbs[0].tenant_id, LLMType.CHAT)
question += keyword_extraction(chat_mdl, question)
ranks = globals.retriever.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
ranks = settings.retriever.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
rank_feature=label_question(question, kbs))

View File

@ -45,7 +45,7 @@ from api.utils.file_utils import filename_type, read_potential_broken_pdf
from rag.flow.pipeline import Pipeline
from rag.nlp import search
from rag.utils.redis_conn import REDIS_CONN
from common import globals
from common import settings
@manager.route('/templates', methods=['GET']) # noqa: F821
@ -192,8 +192,8 @@ def rerun():
if 0 < doc["progress"] < 1:
return get_data_error_result(message=f"`{doc['name']}` is processing...")
if globals.docStoreConn.indexExist(search.index_name(current_user.id), doc["kb_id"]):
globals.docStoreConn.delete({"doc_id": doc["id"]}, search.index_name(current_user.id), doc["kb_id"])
if settings.docStoreConn.indexExist(search.index_name(current_user.id), doc["kb_id"]):
settings.docStoreConn.delete({"doc_id": doc["id"]}, search.index_name(current_user.id), doc["kb_id"])
doc["progress_msg"] = ""
doc["chunk_num"] = 0
doc["token_num"] = 0

View File

@ -21,7 +21,6 @@ import xxhash
from flask import request
from flask_login import current_user, login_required
from api import settings
from api.db.services.dialog_service import meta_filter
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
@ -33,10 +32,9 @@ from rag.app.qa import beAdoc, rmPrefix
from rag.app.tag import label_question
from rag.nlp import rag_tokenizer, search
from rag.prompts.generator import gen_meta_filter, cross_languages, keyword_extraction
from rag.settings import PAGERANK_FLD
from common.string_utils import remove_redundant_spaces
from common.constants import RetCode, LLMType, ParserType
from common import globals
from common.constants import RetCode, LLMType, ParserType, PAGERANK_FLD
from common import settings
@manager.route('/list', methods=['POST']) # noqa: F821
@ -61,7 +59,7 @@ def list_chunk():
}
if "available_int" in req:
query["available_int"] = int(req["available_int"])
sres = globals.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
for id in sres.ids:
d = {
@ -99,7 +97,7 @@ def get():
return get_data_error_result(message="Tenant not found!")
for tenant in tenants:
kb_ids = KnowledgebaseService.get_kb_ids(tenant.tenant_id)
chunk = globals.docStoreConn.get(chunk_id, search.index_name(tenant.tenant_id), kb_ids)
chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant.tenant_id), kb_ids)
if chunk:
break
if chunk is None:
@ -171,7 +169,7 @@ def set():
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist()
globals.docStoreConn.update({"id": req["chunk_id"]}, d, search.index_name(tenant_id), doc.kb_id)
settings.docStoreConn.update({"id": req["chunk_id"]}, d, search.index_name(tenant_id), doc.kb_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@ -187,7 +185,7 @@ def switch():
if not e:
return get_data_error_result(message="Document not found!")
for cid in req["chunk_ids"]:
if not globals.docStoreConn.update({"id": cid},
if not settings.docStoreConn.update({"id": cid},
{"available_int": int(req["available_int"])},
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
doc.kb_id):
@ -201,13 +199,12 @@ def switch():
@login_required
@validate_request("chunk_ids", "doc_id")
def rm():
from rag.utils.storage_factory import STORAGE_IMPL
req = request.json
try:
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
if not globals.docStoreConn.delete({"id": req["chunk_ids"]},
if not settings.docStoreConn.delete({"id": req["chunk_ids"]},
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
doc.kb_id):
return get_data_error_result(message="Chunk deleting failure")
@ -215,8 +212,8 @@ def rm():
chunk_number = len(deleted_chunk_ids)
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
for cid in deleted_chunk_ids:
if STORAGE_IMPL.obj_exist(doc.kb_id, cid):
STORAGE_IMPL.rm(doc.kb_id, cid)
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
settings.STORAGE_IMPL.rm(doc.kb_id, cid)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@ -271,7 +268,7 @@ def create():
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist()
globals.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, c, 1, 0)
@ -347,7 +344,7 @@ def retrieval_test():
question += keyword_extraction(chat_mdl, question)
labels = label_question(question, [kb])
ranks = globals.retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
ranks = settings.retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
float(req.get("similarity_threshold", 0.0)),
float(req.get("vector_similarity_weight", 0.3)),
top,
@ -386,7 +383,7 @@ def knowledge_graph():
"doc_ids": [doc_id],
"knowledge_graph_kwd": ["graph", "mind_map"]
}
sres = globals.retriever.search(req, search.index_name(tenant_id), kb_ids)
sres = settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
obj = {"graph": {}, "mind_map": {}}
for id in sres.ids[:2]:
ty = sres.field[id]["knowledge_graph_kwd"]

View File

@ -47,8 +47,7 @@ from common.constants import RetCode, VALID_TASK_STATUS, ParserType, TaskStatus
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search, rag_tokenizer
from rag.utils.storage_factory import STORAGE_IMPL
from common import globals
from common import settings
@manager.route("/upload", methods=["POST"]) # noqa: F821
@ -119,9 +118,9 @@ def web_crawl():
raise RuntimeError("This type of file has not been supported yet!")
location = filename
while STORAGE_IMPL.obj_exist(kb_id, location):
while settings.STORAGE_IMPL.obj_exist(kb_id, location):
location += "_"
STORAGE_IMPL.put(kb_id, location, blob)
settings.STORAGE_IMPL.put(kb_id, location, blob)
doc = {
"id": get_uuid(),
"kb_id": kb.id,
@ -367,7 +366,7 @@ def change_status():
continue
status_int = int(status)
if not globals.docStoreConn.update({"doc_id": doc_id}, {"available_int": status_int}, search.index_name(kb.tenant_id), doc.kb_id):
if not settings.docStoreConn.update({"doc_id": doc_id}, {"available_int": status_int}, search.index_name(kb.tenant_id), doc.kb_id):
result[doc_id] = {"error": "Database error (docStore update)!"}
result[doc_id] = {"status": status}
except Exception as e:
@ -432,8 +431,8 @@ def run():
DocumentService.update_by_id(id, info)
if req.get("delete", False):
TaskService.filter_delete([Task.doc_id == id])
if globals.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
globals.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
if str(req["run"]) == TaskStatus.RUNNING.value:
doc = doc.to_dict()
@ -479,8 +478,8 @@ def rename():
"title_tks": title_tks,
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
}
if globals.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
globals.docStoreConn.update(
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
settings.docStoreConn.update(
{"doc_id": req["doc_id"]},
es_body,
search.index_name(tenant_id),
@ -501,7 +500,7 @@ def get(doc_id):
return get_data_error_result(message="Document not found!")
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
response = flask.make_response(STORAGE_IMPL.get(b, n))
response = flask.make_response(settings.STORAGE_IMPL.get(b, n))
ext = re.search(r"\.([^.]+)$", doc.name.lower())
ext = ext.group(1) if ext else None
@ -541,8 +540,8 @@ def change_parser():
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
if not tenant_id:
return get_data_error_result(message="Tenant not found!")
if globals.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
globals.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
try:
if "pipeline_id" in req and req["pipeline_id"] != "":
@ -577,7 +576,7 @@ def get_image(image_id):
if len(arr) != 2:
return get_data_error_result(message="Image not found.")
bkt, nm = image_id.split("-")
response = flask.make_response(STORAGE_IMPL.get(bkt, nm))
response = flask.make_response(settings.STORAGE_IMPL.get(bkt, nm))
response.headers.set("Content-Type", "image/JPEG")
return response
except Exception as e:

View File

@ -34,7 +34,7 @@ from api.db.services.file_service import FileService
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
@manager.route('/upload', methods=['POST']) # noqa: F821
@ -95,14 +95,14 @@ def upload():
# file type
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while STORAGE_IMPL.obj_exist(last_folder.id, location):
while settings.STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = file_obj.read()
filename = duplicate_name(
FileService.query,
name=file_obj_names[file_len - 1],
parent_id=last_folder.id)
STORAGE_IMPL.put(last_folder.id, location, blob)
settings.STORAGE_IMPL.put(last_folder.id, location, blob)
file = {
"id": get_uuid(),
"parent_id": last_folder.id,
@ -245,7 +245,7 @@ def rm():
def _delete_single_file(file):
try:
if file.location:
STORAGE_IMPL.rm(file.parent_id, file.location)
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
except Exception:
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
@ -346,10 +346,10 @@ def get(file_id):
if not check_file_team_permission(file, current_user.id):
return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
blob = STORAGE_IMPL.get(file.parent_id, file.location)
blob = settings.STORAGE_IMPL.get(file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = STORAGE_IMPL.get(b, n)
blob = settings.STORAGE_IMPL.get(b, n)
response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name.lower())
@ -428,11 +428,11 @@ def move():
filename = source_file_entry.name
new_location = filename
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
while settings.STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
new_location += "_"
try:
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
settings.STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
except Exception as storage_err:
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")

View File

@ -37,12 +37,10 @@ from api.db.db_models import File
from api.utils.api_utils import get_json_result
from rag.nlp import search
from api.constants import DATASET_NAME_LIMIT
from rag.settings import PAGERANK_FLD
from rag.utils.redis_conn import REDIS_CONN
from rag.utils.storage_factory import STORAGE_IMPL
from rag.utils.doc_store_conn import OrderByExpr
from common.constants import RetCode, PipelineTaskType, StatusEnum, VALID_TASK_STATUS, FileSource, LLMType
from common import globals
from common.constants import RetCode, PipelineTaskType, StatusEnum, VALID_TASK_STATUS, FileSource, LLMType, PAGERANK_FLD
from common import settings
@manager.route('/create', methods=['post']) # noqa: F821
@login_required
@ -113,11 +111,11 @@ def update():
if kb.pagerank != req.get("pagerank", 0):
if req.get("pagerank", 0) > 0:
globals.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
search.index_name(kb.tenant_id), kb.id)
else:
# Elasticsearch requires PAGERANK_FLD be non-zero!
globals.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
search.index_name(kb.tenant_id), kb.id)
e, kb = KnowledgebaseService.get_by_id(kb.id)
@ -233,10 +231,10 @@ def rm():
return get_data_error_result(
message="Database error (Knowledgebase removal)!")
for kb in kbs:
globals.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
globals.docStoreConn.deleteIdx(search.index_name(kb.tenant_id), kb.id)
if hasattr(STORAGE_IMPL, 'remove_bucket'):
STORAGE_IMPL.remove_bucket(kb.id)
settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
settings.docStoreConn.deleteIdx(search.index_name(kb.tenant_id), kb.id)
if hasattr(settings.STORAGE_IMPL, 'remove_bucket'):
settings.STORAGE_IMPL.remove_bucket(kb.id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@ -255,7 +253,7 @@ def list_tags(kb_id):
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
tags = []
for tenant in tenants:
tags += globals.retriever.all_tags(tenant["tenant_id"], [kb_id])
tags += settings.retriever.all_tags(tenant["tenant_id"], [kb_id])
return get_json_result(data=tags)
@ -274,7 +272,7 @@ def list_tags_from_kbs():
tenants = UserTenantService.get_tenants_by_user_id(current_user.id)
tags = []
for tenant in tenants:
tags += globals.retriever.all_tags(tenant["tenant_id"], kb_ids)
tags += settings.retriever.all_tags(tenant["tenant_id"], kb_ids)
return get_json_result(data=tags)
@ -291,7 +289,7 @@ def rm_tags(kb_id):
e, kb = KnowledgebaseService.get_by_id(kb_id)
for t in req["tags"]:
globals.docStoreConn.update({"tag_kwd": t, "kb_id": [kb_id]},
settings.docStoreConn.update({"tag_kwd": t, "kb_id": [kb_id]},
{"remove": {"tag_kwd": t}},
search.index_name(kb.tenant_id),
kb_id)
@ -310,7 +308,7 @@ def rename_tags(kb_id):
)
e, kb = KnowledgebaseService.get_by_id(kb_id)
globals.docStoreConn.update({"tag_kwd": req["from_tag"], "kb_id": [kb_id]},
settings.docStoreConn.update({"tag_kwd": req["from_tag"], "kb_id": [kb_id]},
{"remove": {"tag_kwd": req["from_tag"].strip()}, "add": {"tag_kwd": req["to_tag"]}},
search.index_name(kb.tenant_id),
kb_id)
@ -333,9 +331,9 @@ def knowledge_graph(kb_id):
}
obj = {"graph": {}, "mind_map": {}}
if not globals.docStoreConn.indexExist(search.index_name(kb.tenant_id), kb_id):
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), kb_id):
return get_json_result(data=obj)
sres = globals.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
if not len(sres.ids):
return get_json_result(data=obj)
@ -367,7 +365,7 @@ def delete_knowledge_graph(kb_id):
code=RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(kb_id)
globals.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
return get_json_result(data=True)
@ -739,13 +737,13 @@ def delete_kb_task():
task_id = kb.graphrag_task_id
kb_task_finish_at = "graphrag_task_finish_at"
cancel_task(task_id)
globals.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
case PipelineTaskType.RAPTOR:
kb_task_id_field = "raptor_task_id"
task_id = kb.raptor_task_id
kb_task_finish_at = "raptor_task_finish_at"
cancel_task(task_id)
globals.docStoreConn.delete({"raptor_kwd": ["raptor"]}, search.index_name(kb.tenant_id), kb_id)
settings.docStoreConn.delete({"raptor_kwd": ["raptor"]}, search.index_name(kb.tenant_id), kb_id)
case PipelineTaskType.MINDMAP:
kb_task_id_field = "mindmap_task_id"
task_id = kb.mindmap_task_id
@ -857,7 +855,7 @@ def check_embedding():
tenant_id = kb.tenant_id
emb_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embd_id)
samples = sample_random_chunks_with_vectors(globals.docStoreConn, tenant_id=tenant_id, kb_id=kb_id, n=n)
samples = sample_random_chunks_with_vectors(settings.docStoreConn, tenant_id=tenant_id, kb_id=kb_id, n=n)
results, eff_sims = [], []
for ck in samples:

View File

@ -47,8 +47,8 @@ from api.utils.validation_utils import (
validate_and_parse_request_args,
)
from rag.nlp import search
from rag.settings import PAGERANK_FLD
from common import globals
from common.constants import PAGERANK_FLD
from common import settings
@manager.route("/datasets", methods=["POST"]) # noqa: F821
@ -360,11 +360,11 @@ def update(tenant_id, dataset_id):
return get_error_argument_result(message="'pagerank' can only be set when doc_engine is elasticsearch")
if req["pagerank"] > 0:
globals.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
search.index_name(kb.tenant_id), kb.id)
else:
# Elasticsearch requires PAGERANK_FLD be non-zero!
globals.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
search.index_name(kb.tenant_id), kb.id)
if not KnowledgebaseService.update_by_id(kb.id, req):
@ -493,9 +493,9 @@ def knowledge_graph(tenant_id, dataset_id):
}
obj = {"graph": {}, "mind_map": {}}
if not globals.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
if not settings.docStoreConn.indexExist(search.index_name(kb.tenant_id), dataset_id):
return get_result(data=obj)
sres = globals.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
if not len(sres.ids):
return get_result(data=obj)
@ -528,7 +528,7 @@ def delete_knowledge_graph(tenant_id, dataset_id):
code=RetCode.AUTHENTICATION_ERROR
)
_, kb = KnowledgebaseService.get_by_id(dataset_id)
globals.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]},
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]},
search.index_name(kb.tenant_id), dataset_id)
return get_result(data=True)

View File

@ -20,12 +20,11 @@ from flask import request, jsonify
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle
from api import settings
from api.utils.api_utils import validate_request, build_error_result, apikey_required
from rag.app.tag import label_question
from api.db.services.dialog_service import meta_filter, convert_conditions
from common.constants import RetCode, LLMType
from common import globals
from common import settings
@manager.route('/dify/retrieval', methods=['POST']) # noqa: F821
@apikey_required
@ -138,7 +137,7 @@ def retrieval(tenant_id):
# print("doc_ids", doc_ids)
if not doc_ids and metadata_condition is not None:
doc_ids = ['-999']
ranks = globals.retriever.retrieval(
ranks = settings.retriever.retrieval(
question,
embd_mdl,
kb.tenant_id,

View File

@ -24,7 +24,6 @@ from flask import request, send_file
from peewee import OperationalError
from pydantic import BaseModel, Field, validator
from api import settings
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import FileType
from api.db.db_models import File, Task
@ -41,10 +40,9 @@ from rag.app.qa import beAdoc, rmPrefix
from rag.app.tag import label_question
from rag.nlp import rag_tokenizer, search
from rag.prompts.generator import cross_languages, keyword_extraction
from rag.utils.storage_factory import STORAGE_IMPL
from common.string_utils import remove_redundant_spaces
from common.constants import RetCode, LLMType, ParserType, TaskStatus, FileSource
from common import globals
from common import settings
MAXIMUM_OF_UPLOADING_FILES = 256
@ -308,7 +306,7 @@ def update_doc(tenant_id, dataset_id, document_id):
)
if not e:
return get_error_data_result(message="Document not found!")
globals.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
if "enabled" in req:
status = int(req["enabled"])
@ -317,7 +315,7 @@ def update_doc(tenant_id, dataset_id, document_id):
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
return get_error_data_result(message="Database error (Document update)!")
globals.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
return get_result(data=True)
except Exception as e:
return server_error_response(e)
@ -402,7 +400,7 @@ def download(tenant_id, dataset_id, document_id):
return get_error_data_result(message=f"The dataset not own the document {document_id}.")
# The process of downloading
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
file_stream = settings.STORAGE_IMPL.get(doc_id, doc_location)
if not file_stream:
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
file = BytesIO(file_stream)
@ -672,7 +670,7 @@ def delete(tenant_id, dataset_id):
)
File2DocumentService.delete_by_document_id(doc_id)
STORAGE_IMPL.rm(b, n)
settings.STORAGE_IMPL.rm(b, n)
success_count += 1
except Exception as e:
errors += str(e)
@ -756,7 +754,7 @@ def parse(tenant_id, dataset_id):
return get_error_data_result("Can't parse document that is currently being processed")
info = {"run": "1", "progress": 0, "progress_msg": "", "chunk_num": 0, "token_num": 0}
DocumentService.update_by_id(id, info)
globals.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
TaskService.filter_delete([Task.doc_id == id])
e, doc = DocumentService.get_by_id(id)
doc = doc.to_dict()
@ -836,7 +834,7 @@ def stop_parsing(tenant_id, dataset_id):
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
info = {"run": "2", "progress": 0, "chunk_num": 0}
DocumentService.update_by_id(id, info)
globals.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
success_count += 1
if duplicate_messages:
if success_count > 0:
@ -969,7 +967,7 @@ def list_chunks(tenant_id, dataset_id, document_id):
res = {"total": 0, "chunks": [], "doc": renamed_doc}
if req.get("id"):
chunk = globals.docStoreConn.get(req.get("id"), search.index_name(tenant_id), [dataset_id])
chunk = settings.docStoreConn.get(req.get("id"), search.index_name(tenant_id), [dataset_id])
if not chunk:
return get_result(message=f"Chunk not found: {dataset_id}/{req.get('id')}", code=RetCode.NOT_FOUND)
k = []
@ -996,8 +994,8 @@ def list_chunks(tenant_id, dataset_id, document_id):
res["chunks"].append(final_chunk)
_ = Chunk(**final_chunk)
elif globals.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
sres = globals.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
elif settings.docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
sres = settings.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
res["total"] = sres.total
for id in sres.ids:
d = {
@ -1121,7 +1119,7 @@ def add_chunk(tenant_id, dataset_id, document_id):
v, c = embd_mdl.encode([doc.name, req["content"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist()
globals.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
settings.docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
# rename keys
@ -1202,7 +1200,7 @@ def rm_chunk(tenant_id, dataset_id, document_id):
if "chunk_ids" in req:
unique_chunk_ids, duplicate_messages = check_duplicate_ids(req["chunk_ids"], "chunk")
condition["id"] = unique_chunk_ids
chunk_number = globals.docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
chunk_number = settings.docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
if chunk_number != 0:
DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
if "chunk_ids" in req and chunk_number != len(unique_chunk_ids):
@ -1274,7 +1272,7 @@ def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
schema:
type: object
"""
chunk = globals.docStoreConn.get(chunk_id, search.index_name(tenant_id), [dataset_id])
chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), [dataset_id])
if chunk is None:
return get_error_data_result(f"Can't find this chunk {chunk_id}")
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
@ -1319,7 +1317,7 @@ def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
v, c = embd_mdl.encode([doc.name, d["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist()
globals.docStoreConn.update({"id": chunk_id}, d, search.index_name(tenant_id), dataset_id)
settings.docStoreConn.update({"id": chunk_id}, d, search.index_name(tenant_id), dataset_id)
return get_result()
@ -1465,7 +1463,7 @@ def retrieval_test(tenant_id):
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
question += keyword_extraction(chat_mdl, question)
ranks = globals.retriever.retrieval(
ranks = settings.retriever.retrieval(
question,
embd_mdl,
tenant_ids,

View File

@ -32,7 +32,7 @@ from api.db.services import duplicate_name
from api.db.services.file_service import FileService
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from rag.utils.storage_factory import STORAGE_IMPL
from common import settings
@manager.route('/file/upload', methods=['POST']) # noqa: F821
@ -126,7 +126,7 @@ def upload(tenant_id):
filetype = filename_type(file_obj_names[file_len - 1])
location = file_obj_names[file_len - 1]
while STORAGE_IMPL.obj_exist(last_folder.id, location):
while settings.STORAGE_IMPL.obj_exist(last_folder.id, location):
location += "_"
blob = file_obj.read()
filename = duplicate_name(FileService.query, name=file_obj_names[file_len - 1], parent_id=last_folder.id)
@ -142,7 +142,7 @@ def upload(tenant_id):
"size": len(blob),
}
file = FileService.insert(file)
STORAGE_IMPL.put(last_folder.id, location, blob)
settings.STORAGE_IMPL.put(last_folder.id, location, blob)
file_res.append(file.to_json())
return get_json_result(data=file_res)
except Exception as e:
@ -497,10 +497,10 @@ def rm(tenant_id):
e, file = FileService.get_by_id(inner_file_id)
if not e:
return get_json_result(message="File not found!", code=404)
STORAGE_IMPL.rm(file.parent_id, file.location)
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
FileService.delete_folder_by_pf_id(tenant_id, file_id)
else:
STORAGE_IMPL.rm(file.parent_id, file.location)
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
if not FileService.delete(file):
return get_json_result(message="Database error (File removal)!", code=500)
@ -614,10 +614,10 @@ def get(tenant_id, file_id):
if not e:
return get_json_result(message="Document not found!", code=404)
blob = STORAGE_IMPL.get(file.parent_id, file.location)
blob = settings.STORAGE_IMPL.get(file.parent_id, file.location)
if not blob:
b, n = File2DocumentService.get_storage_address(file_id=file_id)
blob = STORAGE_IMPL.get(b, n)
blob = settings.STORAGE_IMPL.get(b, n)
response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)

View File

@ -21,7 +21,6 @@ import tiktoken
from flask import Response, jsonify, request
from agent.canvas import Canvas
from api import settings
from api.db.db_models import APIToken
from api.db.services.api_service import API4ConversationService
from api.db.services.canvas_service import UserCanvasService, completion_openai
@ -41,7 +40,7 @@ from rag.app.tag import label_question
from rag.prompts.template import load_prompt
from rag.prompts.generator import cross_languages, gen_meta_filter, keyword_extraction, chunks_format
from common.constants import RetCode, LLMType, StatusEnum
from common import globals
from common import settings
@manager.route("/chats/<chat_id>/sessions", methods=["POST"]) # noqa: F821
@token_required
@ -1016,7 +1015,7 @@ def retrieval_test_embedded():
question += keyword_extraction(chat_mdl, question)
labels = label_question(question, [kb])
ranks = globals.retriever.retrieval(
ranks = settings.retriever.retrieval(
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
)

View File

@ -23,7 +23,6 @@ from api.db.db_models import APIToken
from api.db.services.api_service import APITokenService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import UserTenantService
from api import settings
from api.utils.api_utils import (
get_json_result,
get_data_error_result,
@ -32,13 +31,12 @@ from api.utils.api_utils import (
)
from api.versions import get_ragflow_version
from common.time_utils import current_timestamp, datetime_format
from rag.utils.storage_factory import STORAGE_IMPL, STORAGE_IMPL_TYPE
from timeit import default_timer as timer
from rag.utils.redis_conn import REDIS_CONN
from flask import jsonify
from api.utils.health_utils import run_health_checks
from common import globals
from common import settings
@manager.route("/version", methods=["GET"]) # noqa: F821
@ -101,7 +99,7 @@ def status():
res = {}
st = timer()
try:
res["doc_engine"] = globals.docStoreConn.health()
res["doc_engine"] = settings.docStoreConn.health()
res["doc_engine"]["elapsed"] = "{:.1f}".format((timer() - st) * 1000.0)
except Exception as e:
res["doc_engine"] = {
@ -113,15 +111,15 @@ def status():
st = timer()
try:
STORAGE_IMPL.health()
settings.STORAGE_IMPL.health()
res["storage"] = {
"storage": STORAGE_IMPL_TYPE.lower(),
"storage": settings.STORAGE_IMPL_TYPE.lower(),
"status": "green",
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
}
except Exception as e:
res["storage"] = {
"storage": STORAGE_IMPL_TYPE.lower(),
"storage": settings.STORAGE_IMPL_TYPE.lower(),
"status": "red",
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
"error": str(e),

View File

@ -17,7 +17,6 @@
from flask import request
from flask_login import login_required, current_user
from api import settings
from api.apps import smtp_mail_server
from api.db import UserTenantRole
from api.db.db_models import UserTenant
@ -28,6 +27,7 @@ from common.misc_utils import get_uuid
from common.time_utils import delta_seconds
from api.utils.api_utils import get_json_result, validate_request, server_error_response, get_data_error_result
from api.utils.web_utils import send_invite_email
from common import settings
@manager.route("/<tenant_id>/user/list", methods=["GET"]) # noqa: F821

View File

@ -26,7 +26,6 @@ from flask import redirect, request, session, make_response
from flask_login import current_user, login_required, login_user, logout_user
from werkzeug.security import check_password_hash, generate_password_hash
from api import settings
from api.apps.auth import get_auth_client
from api.db import FileType, UserTenantRole
from api.db.db_models import TenantLLM
@ -58,7 +57,7 @@ from api.utils.web_utils import (
hash_code,
captcha_key,
)
from common import globals
from common import settings
@manager.route("/login", methods=["POST", "GET"]) # noqa: F821
@ -624,7 +623,7 @@ def user_register(user_id, user):
"id": user_id,
"name": user["nickname"] + "s Kingdom",
"llm_id": settings.CHAT_MDL,
"embd_id": globals.EMBEDDING_MDL,
"embd_id": settings.EMBEDDING_MDL,
"asr_id": settings.ASR_MDL,
"parser_ids": settings.PARSERS,
"img2txt_id": settings.IMAGE2TEXT_MDL,