diff --git a/api/db/db_models.py b/api/db/db_models.py index 68bf37ce4..2b4c4a0ef 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -305,6 +305,7 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase): time.sleep(self.retry_delay * (2 ** attempt)) else: raise + return None class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase): @@ -772,7 +773,7 @@ class Document(DataBaseModel): thumbnail = TextField(null=True, help_text="thumbnail base64 string") kb_id = CharField(max_length=256, null=False, index=True) parser_id = CharField(max_length=32, null=False, help_text="default parser ID", index=True) - pipeline_id = CharField(max_length=32, null=True, help_text="pipleline ID", index=True) + pipeline_id = CharField(max_length=32, null=True, help_text="pipeline ID", index=True) parser_config = JSONField(null=False, default={"pages": [[1, 1000000]]}) source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document come from", index=True) type = CharField(max_length=32, null=False, help_text="file extension", index=True) @@ -876,7 +877,7 @@ class Dialog(DataBaseModel): class Conversation(DataBaseModel): id = CharField(max_length=32, primary_key=True) dialog_id = CharField(max_length=32, null=False, index=True) - name = CharField(max_length=255, null=True, help_text="converastion name", index=True) + name = CharField(max_length=255, null=True, help_text="conversation name", index=True) message = JSONField(null=True) reference = JSONField(null=True, default=[]) user_id = CharField(max_length=255, null=True, help_text="user_id", index=True) diff --git a/api/db/services/connector_service.py b/api/db/services/connector_service.py index 3e65c87da..2f29c3324 100644 --- a/api/db/services/connector_service.py +++ b/api/db/services/connector_service.py @@ -70,7 +70,7 @@ class ConnectorService(CommonService): def rebuild(cls, kb_id:str, connector_id: str, tenant_id:str): e, conn = cls.get_by_id(connector_id) if not e: - return + return None SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id]) docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}", kb_id=kb_id) err = FileService.delete_docs([d.id for d in docs], tenant_id) @@ -125,11 +125,11 @@ class SyncLogsService(CommonService): ) query = query.distinct().order_by(cls.model.update_time.desc()) - totbal = query.count() + total = query.count() if page_number: query = query.paginate(page_number, items_per_page) - return list(query.dicts()), totbal + return list(query.dicts()), total @classmethod def start(cls, id, connector_id): diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index f54ebf709..d2f3b9bc1 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -342,7 +342,7 @@ def chat(dialog, messages, stream=True, **kwargs): if not dialog.kb_ids and not dialog.prompt_config.get("tavily_api_key"): for ans in chat_solo(dialog, messages, stream): yield ans - return + return None chat_start_ts = timer() @@ -386,7 +386,7 @@ def chat(dialog, messages, stream=True, **kwargs): ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True), dialog.kb_ids) if ans: yield ans - return + return None for p in prompt_config["parameters"]: if p["key"] == "knowledge": @@ -617,6 +617,8 @@ def chat(dialog, messages, stream=True, **kwargs): res["audio_binary"] = tts(tts_mdl, answer) yield res + return None + def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=None): sys_prompt = """ @@ -745,7 +747,7 @@ Please write the SQL, only SQL, without any other explanations or text. def tts(tts_mdl, text): if not tts_mdl or not text: - return + return None bin = b"" for chunk in tts_mdl.tts(text): bin += chunk diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 530133164..0abf1b1f3 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -113,7 +113,7 @@ class DocumentService(CommonService): def check_doc_health(cls, tenant_id: str, filename): import os MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) - if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER: + if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(tenant_id): raise RuntimeError("Exceed the maximum file number of a free user!") if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: raise RuntimeError("Exceed the maximum length of file name!") @@ -464,7 +464,7 @@ class DocumentService(CommonService): cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["tenant_id"] @classmethod @@ -473,7 +473,7 @@ class DocumentService(CommonService): docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id) docs = docs.dicts() if not docs: - return + return None return docs[0]["kb_id"] @classmethod @@ -486,7 +486,7 @@ class DocumentService(CommonService): cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["tenant_id"] @classmethod @@ -533,7 +533,7 @@ class DocumentService(CommonService): cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value) docs = docs.dicts() if not docs: - return + return None return docs[0]["embd_id"] @classmethod @@ -569,7 +569,7 @@ class DocumentService(CommonService): .where(cls.model.name == doc_name) doc_id = doc_id.dicts() if not doc_id: - return + return None return doc_id[0]["id"] @classmethod @@ -715,7 +715,7 @@ class DocumentService(CommonService): prg = 1 status = TaskStatus.DONE.value - # only for special task and parsed docs and unfinised + # only for special task and parsed docs and unfinished freeze_progress = special_task_running and doc_progress >= 1 and not finished msg = "\n".join(sorted(msg)) info = { @@ -974,13 +974,13 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): def embedding(doc_id, cnts, batch_size=16): nonlocal embd_mdl, chunk_counts, token_counts - vects = [] + vectors = [] for i in range(0, len(cnts), batch_size): vts, c = embd_mdl.encode(cnts[i: i + batch_size]) - vects.extend(vts.tolist()) + vectors.extend(vts.tolist()) chunk_counts[doc_id] += len(cnts[i:i + batch_size]) token_counts[doc_id] += c - return vects + return vectors idxnm = search.index_name(kb.tenant_id) try_create_idx = True @@ -1011,15 +1011,15 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): except Exception: logging.exception("Mind map generation error") - vects = embedding(doc_id, [c["content_with_weight"] for c in cks]) - assert len(cks) == len(vects) + vectors = embedding(doc_id, [c["content_with_weight"] for c in cks]) + assert len(cks) == len(vectors) for i, d in enumerate(cks): - v = vects[i] + v = vectors[i] d["q_%d_vec" % len(v)] = v for b in range(0, len(cks), es_bulk_size): if try_create_idx: if not settings.docStoreConn.indexExist(idxnm, kb_id): - settings.docStoreConn.createIdx(idxnm, kb_id, len(vects[0])) + settings.docStoreConn.createIdx(idxnm, kb_id, len(vectors[0])) try_create_idx = False settings.docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 03179da49..ca30ca074 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -424,6 +424,7 @@ class KnowledgebaseService(CommonService): # Default parser_config (align with kb_app.create) — do not accept external overrides payload["parser_config"] = get_parser_config(parser_id, kwargs.get("parser_config")) + return payload diff --git a/api/utils/email_templates.py b/api/utils/email_templates.py index 10473908a..34201ee38 100644 --- a/api/utils/email_templates.py +++ b/api/utils/email_templates.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """ Reusable HTML email templates and registry. """ diff --git a/api/utils/json_encode.py b/api/utils/json_encode.py index b21addd4f..fa5ea973a 100644 --- a/api/utils/json_encode.py +++ b/api/utils/json_encode.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + import datetime import json from enum import Enum, IntEnum