Minor tweats (#11271)

### What problem does this PR solve?

As title.

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-11-16 19:29:20 +08:00
committed by GitHub
parent e841b09d63
commit 61cf430dbb
7 changed files with 58 additions and 22 deletions

View File

@ -70,7 +70,7 @@ class ConnectorService(CommonService):
def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
e, conn = cls.get_by_id(connector_id)
if not e:
return
return None
SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id)
err = FileService.delete_docs([d.id for d in docs], tenant_id)
@ -125,11 +125,11 @@ class SyncLogsService(CommonService):
)
query = query.distinct().order_by(cls.model.update_time.desc())
totbal = query.count()
total = query.count()
if page_number:
query = query.paginate(page_number, items_per_page)
return list(query.dicts()), totbal
return list(query.dicts()), total
@classmethod
def start(cls, id, connector_id):

View File

@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs):
if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"):
for ans in chat_solo(dialog, messages, stream):
yield ans
return
return None
chat_start_ts = timer()
@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs):
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids)
if ans:
yield ans
return
return None
for p in prompt_config["parameters"]:
if p["key"] == "knowledge":
@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs):
res["audio_binary"] = tts(tts_mdl, answer)
yield res
return None
def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None):
sys_prompt = """
@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text.
def tts(tts_mdl, text):
if not tts_mdl or not text:
return
return None
bin = b""
for chunk in tts_mdl.tts(text):
bin += chunk

View File

@ -113,7 +113,7 @@ class DocumentService(CommonService):
def check_doc_health(cls, tenant_id: str, filename):
import os
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER:
if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id):
raise RuntimeError("Exceed the maximum file number of a free user!")
if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
raise RuntimeError("Exceed the maximum length of file name!")
@ -464,7 +464,7 @@ class DocumentService(CommonService):
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
docs = docs.dicts()
if not docs:
return
return None
return docs[0]["tenant_id"]
@classmethod
@ -473,7 +473,7 @@ class DocumentService(CommonService):
docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
docs = docs.dicts()
if not docs:
return
return None
return docs[0]["kb_id"]
@classmethod
@ -486,7 +486,7 @@ class DocumentService(CommonService):
cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
docs = docs.dicts()
if not docs:
return
return None
return docs[0]["tenant_id"]
@classmethod
@ -533,7 +533,7 @@ class DocumentService(CommonService):
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
docs = docs.dicts()
if not docs:
return
return None
return docs[0]["embd_id"]
@classmethod
@ -569,7 +569,7 @@ class DocumentService(CommonService):
.where(cls.model.name == doc_name)
doc_id = doc_id.dicts()
if not doc_id:
return
return None
return doc_id[0]["id"]
@classmethod
@ -715,7 +715,7 @@ class DocumentService(CommonService):
prg = 1
status = TaskStatus.DONE.value
# only for special task and parsed docs and unfinised
# only for special task and parsed docs and unfinished
freeze_progress = special_task_running and doc_progress >= 1 and not finished
msg = "\n".join(sorted(msg))
info = {
@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
def embedding(doc_id, cnts, batch_size=16):
nonlocal embd_mdl, chunk_counts, token_counts
vects = []
vectors = []
for i in range(0, len(cnts), batch_size):
vts, c = embd_mdl.encode(cnts[i: i + batch_size])
vects.extend(vts.tolist())
vectors.extend(vts.tolist())
chunk_counts[doc_id] += len(cnts[i:i + batch_size])
token_counts[doc_id] += c
return vects
return vectors
idxnm = search.index_name(kb.tenant_id)
try_create_idx = True
@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
except Exception:
logging.exception("Mind map generation error")
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
assert len(cks) == len(vects)
vectors = embedding(doc_id, [c["content_with_weight"] for c in cks])
assert len(cks) == len(vectors)
for i, d in enumerate(cks):
v = vects[i]
v = vectors[i]
d["q_%d_vec" % len(v)] = v
for b in range(0, len(cks), es_bulk_size):
if try_create_idx:
if not settings.docStoreConn.indexExist(idxnm, kb_id):
settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0]))
try_create_idx = False
settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)

View File

@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService):
# Default parser_config (align with kb_app.create) — do not accept external overrides
payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config"))
return payload