mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Minor tweats (#11271)
### What problem does this PR solve? As title. ### Type of change - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -305,6 +305,7 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
|
|||||||
time.sleep(self.retry_delay * (2 ** attempt))
|
time.sleep(self.retry_delay * (2 ** attempt))
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
|
class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
|
||||||
@ -772,7 +773,7 @@ class Document(DataBaseModel):
|
|||||||
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
|
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
|
||||||
kb_id = CharField(max_length=256, null=False, index=True)
|
kb_id = CharField(max_length=256, null=False, index=True)
|
||||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
|
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True)
|
||||||
pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True)
|
pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True)
|
||||||
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]})
|
||||||
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
|
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True)
|
||||||
type = CharField(max_length=32, null=False, help_text="file extension", index=True)
|
type = CharField(max_length=32, null=False, help_text="file extension", index=True)
|
||||||
@ -876,7 +877,7 @@ class Dialog(DataBaseModel):
|
|||||||
class Conversation(DataBaseModel):
|
class Conversation(DataBaseModel):
|
||||||
id = CharField(max_length=32, primary_key=True)
|
id = CharField(max_length=32, primary_key=True)
|
||||||
dialog_id = CharField(max_length=32, null=False, index=True)
|
dialog_id = CharField(max_length=32, null=False, index=True)
|
||||||
name = CharField(max_length=255, null=True, help_text="converastion name", index=True)
|
name = CharField(max_length=255, null=True, help_text="conversation name", index=True)
|
||||||
message = JSONField(null=True)
|
message = JSONField(null=True)
|
||||||
reference = JSONField(null=True, default=[])
|
reference = JSONField(null=True, default=[])
|
||||||
user_id = CharField(max_length=255, null=True, help_text="user_id", index=True)
|
user_id = CharField(max_length=255, null=True, help_text="user_id", index=True)
|
||||||
|
|||||||
@ -70,7 +70,7 @@ class ConnectorService(CommonService):
|
|||||||
def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
|
def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str):
|
||||||
e, conn = cls.get_by_id(connector_id)
|
e, conn = cls.get_by_id(connector_id)
|
||||||
if not e:
|
if not e:
|
||||||
return
|
return None
|
||||||
SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
|
SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
|
||||||
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id)
|
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id)
|
||||||
err = FileService.delete_docs([d.id for d in docs], tenant_id)
|
err = FileService.delete_docs([d.id for d in docs], tenant_id)
|
||||||
@ -125,11 +125,11 @@ class SyncLogsService(CommonService):
|
|||||||
)
|
)
|
||||||
|
|
||||||
query = query.distinct().order_by(cls.model.update_time.desc())
|
query = query.distinct().order_by(cls.model.update_time.desc())
|
||||||
totbal = query.count()
|
total = query.count()
|
||||||
if page_number:
|
if page_number:
|
||||||
query = query.paginate(page_number, items_per_page)
|
query = query.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
return list(query.dicts()), totbal
|
return list(query.dicts()), total
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def start(cls, id, connector_id):
|
def start(cls, id, connector_id):
|
||||||
|
|||||||
@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"):
|
if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"):
|
||||||
for ans in chat_solo(dialog, messages, stream):
|
for ans in chat_solo(dialog, messages, stream):
|
||||||
yield ans
|
yield ans
|
||||||
return
|
return None
|
||||||
|
|
||||||
chat_start_ts = timer()
|
chat_start_ts = timer()
|
||||||
|
|
||||||
@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids)
|
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids)
|
||||||
if ans:
|
if ans:
|
||||||
yield ans
|
yield ans
|
||||||
return
|
return None
|
||||||
|
|
||||||
for p in prompt_config["parameters"]:
|
for p in prompt_config["parameters"]:
|
||||||
if p["key"] == "knowledge":
|
if p["key"] == "knowledge":
|
||||||
@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|||||||
res["audio_binary"] = tts(tts_mdl, answer)
|
res["audio_binary"] = tts(tts_mdl, answer)
|
||||||
yield res
|
yield res
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None):
|
def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None):
|
||||||
sys_prompt = """
|
sys_prompt = """
|
||||||
@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text.
|
|||||||
|
|
||||||
def tts(tts_mdl, text):
|
def tts(tts_mdl, text):
|
||||||
if not tts_mdl or not text:
|
if not tts_mdl or not text:
|
||||||
return
|
return None
|
||||||
bin = b""
|
bin = b""
|
||||||
for chunk in tts_mdl.tts(text):
|
for chunk in tts_mdl.tts(text):
|
||||||
bin += chunk
|
bin += chunk
|
||||||
|
|||||||
@ -113,7 +113,7 @@ class DocumentService(CommonService):
|
|||||||
def check_doc_health(cls, tenant_id: str, filename):
|
def check_doc_health(cls, tenant_id: str, filename):
|
||||||
import os
|
import os
|
||||||
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
|
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
|
||||||
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER:
|
if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id):
|
||||||
raise RuntimeError("Exceed the maximum file number of a free user!")
|
raise RuntimeError("Exceed the maximum file number of a free user!")
|
||||||
if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
raise RuntimeError("Exceed the maximum length of file name!")
|
raise RuntimeError("Exceed the maximum length of file name!")
|
||||||
@ -464,7 +464,7 @@ class DocumentService(CommonService):
|
|||||||
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
||||||
docs = docs.dicts()
|
docs = docs.dicts()
|
||||||
if not docs:
|
if not docs:
|
||||||
return
|
return None
|
||||||
return docs[0]["tenant_id"]
|
return docs[0]["tenant_id"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -473,7 +473,7 @@ class DocumentService(CommonService):
|
|||||||
docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
|
docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
|
||||||
docs = docs.dicts()
|
docs = docs.dicts()
|
||||||
if not docs:
|
if not docs:
|
||||||
return
|
return None
|
||||||
return docs[0]["kb_id"]
|
return docs[0]["kb_id"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -486,7 +486,7 @@ class DocumentService(CommonService):
|
|||||||
cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
|
cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
|
||||||
docs = docs.dicts()
|
docs = docs.dicts()
|
||||||
if not docs:
|
if not docs:
|
||||||
return
|
return None
|
||||||
return docs[0]["tenant_id"]
|
return docs[0]["tenant_id"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -533,7 +533,7 @@ class DocumentService(CommonService):
|
|||||||
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
||||||
docs = docs.dicts()
|
docs = docs.dicts()
|
||||||
if not docs:
|
if not docs:
|
||||||
return
|
return None
|
||||||
return docs[0]["embd_id"]
|
return docs[0]["embd_id"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -569,7 +569,7 @@ class DocumentService(CommonService):
|
|||||||
.where(cls.model.name == doc_name)
|
.where(cls.model.name == doc_name)
|
||||||
doc_id = doc_id.dicts()
|
doc_id = doc_id.dicts()
|
||||||
if not doc_id:
|
if not doc_id:
|
||||||
return
|
return None
|
||||||
return doc_id[0]["id"]
|
return doc_id[0]["id"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -715,7 +715,7 @@ class DocumentService(CommonService):
|
|||||||
prg = 1
|
prg = 1
|
||||||
status = TaskStatus.DONE.value
|
status = TaskStatus.DONE.value
|
||||||
|
|
||||||
# only for special task and parsed docs and unfinised
|
# only for special task and parsed docs and unfinished
|
||||||
freeze_progress = special_task_running and doc_progress >= 1 and not finished
|
freeze_progress = special_task_running and doc_progress >= 1 and not finished
|
||||||
msg = "\n".join(sorted(msg))
|
msg = "\n".join(sorted(msg))
|
||||||
info = {
|
info = {
|
||||||
@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|||||||
|
|
||||||
def embedding(doc_id, cnts, batch_size=16):
|
def embedding(doc_id, cnts, batch_size=16):
|
||||||
nonlocal embd_mdl, chunk_counts, token_counts
|
nonlocal embd_mdl, chunk_counts, token_counts
|
||||||
vects = []
|
vectors = []
|
||||||
for i in range(0, len(cnts), batch_size):
|
for i in range(0, len(cnts), batch_size):
|
||||||
vts, c = embd_mdl.encode(cnts[i: i + batch_size])
|
vts, c = embd_mdl.encode(cnts[i: i + batch_size])
|
||||||
vects.extend(vts.tolist())
|
vectors.extend(vts.tolist())
|
||||||
chunk_counts[doc_id] += len(cnts[i:i + batch_size])
|
chunk_counts[doc_id] += len(cnts[i:i + batch_size])
|
||||||
token_counts[doc_id] += c
|
token_counts[doc_id] += c
|
||||||
return vects
|
return vectors
|
||||||
|
|
||||||
idxnm = search.index_name(kb.tenant_id)
|
idxnm = search.index_name(kb.tenant_id)
|
||||||
try_create_idx = True
|
try_create_idx = True
|
||||||
@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|||||||
except Exception:
|
except Exception:
|
||||||
logging.exception("Mind map generation error")
|
logging.exception("Mind map generation error")
|
||||||
|
|
||||||
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
vectors = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
||||||
assert len(cks) == len(vects)
|
assert len(cks) == len(vectors)
|
||||||
for i, d in enumerate(cks):
|
for i, d in enumerate(cks):
|
||||||
v = vects[i]
|
v = vectors[i]
|
||||||
d["q_%d_vec" % len(v)] = v
|
d["q_%d_vec" % len(v)] = v
|
||||||
for b in range(0, len(cks), es_bulk_size):
|
for b in range(0, len(cks), es_bulk_size):
|
||||||
if try_create_idx:
|
if try_create_idx:
|
||||||
if not settings.docStoreConn.indexExist(idxnm, kb_id):
|
if not settings.docStoreConn.indexExist(idxnm, kb_id):
|
||||||
settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
|
settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0]))
|
||||||
try_create_idx = False
|
try_create_idx = False
|
||||||
settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
|
settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
|
||||||
|
|
||||||
|
|||||||
@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
|
|
||||||
# Default parser_config (align with kb_app.create) — do not accept external overrides
|
# Default parser_config (align with kb_app.create) — do not accept external overrides
|
||||||
payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config"))
|
payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config"))
|
||||||
|
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,19 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Reusable HTML email templates and registry.
|
Reusable HTML email templates and registry.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,3 +1,19 @@
|
|||||||
|
#
|
||||||
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
from enum import Enum, IntEnum
|
from enum import Enum, IntEnum
|
||||||
|
|||||||
Reference in New Issue
Block a user