add use layout or not option (#145)

* add use layout or not option * trival
2026-01-23 11:36:38 +08:00 · 2024-03-22 19:21:09 +08:00
parent 2f4c71b4b4
commit f6aee7f230
18 changed files with 238 additions and 140 deletions
--- a/api/apps/conversation_app.py
+++ b/api/apps/conversation_app.py
@ -196,7 +196,10 @@ def chat(dialog, messages, **kwargs):

    for _ in range(len(questions)//2):
        questions.append(questions[-1])
-    kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
+    if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]:
+        kbinfos = {"total":0, "chunks":[],"doc_aggs":[]}
+    else:
+        kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
                                    dialog.similarity_threshold,
                                    dialog.vector_similarity_weight, top=1024, aggs=False)
    knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@ -310,7 +310,10 @@ def change_parser():
        if not e:
            return get_data_error_result(retmsg="Document not found!")
        if doc.parser_id.lower() == req["parser_id"].lower():
-            return get_json_result(data=True)
+            if "parser_config" in req:
+                if req["parser_config"] == doc.parser_config:
+                    return get_json_result(data=True)
+            else: return get_json_result(data=True)

        if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
            return get_data_error_result(retmsg="Not supported yet!")
@ -319,6 +322,8 @@ def change_parser():
                                         {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": "0"})
        if not e:
            return get_data_error_result(retmsg="Document not found!")
+        if "parser_config" in req:
+            DocumentService.update_parser_config(doc.id, req["parser_config"])
        if doc.token_num > 0:
            e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
                                                    doc.process_duation * -1)
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -276,7 +276,7 @@ def init_llm_factory():
    drop table llm_factories;
    update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
    update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
-    update tenant set parser_ids='naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture';
+    update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
    alter table knowledgebase modify avatar longtext;
    alter table user modify avatar longtext;
    alter table dialog modify icon longtext;
@ -297,5 +297,4 @@ def init_web_data():

 if __name__ == '__main__':
    init_web_db()
-    init_web_data()
-    add_tenant_llm()
+    init_web_data()
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -118,9 +118,25 @@ class DocumentService(CommonService):
        if not docs:return
        return docs[0]["tenant_id"]

-
    @classmethod
    @DB.connection_context()
    def get_thumbnails(cls, docids):
        fields = [cls.model.id, cls.model.thumbnail]
        return list(cls.model.select(*fields).where(cls.model.id.in_(docids)).dicts())
+
+    @classmethod
+    @DB.connection_context()
+    def update_parser_config(cls, id, config):
+        e, d = cls.get_by_id(id)
+        if not e:raise LookupError(f"Document({id}) not found.")
+        def dfs_update(old, new):
+            for k,v in new.items():
+                if k not in old:
+                    old[k] = v
+                    continue
+                if isinstance(v, dict):
+                    assert isinstance(old[k], dict)
+                    dfs_update(old[k], v)
+                else: old[k] = v
+        dfs_update(d.parser_config, config)
+        cls.update_by_id(id, {"parser_config": d.parser_config})
--- a/api/settings.py
+++ b/api/settings.py
@ -94,7 +94,7 @@ ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
 IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]

 API_KEY = LLM.get("api_key", "")
-PARSERS = LLM.get("parsers", "naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
+PARSERS = LLM.get("parsers", "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One")

 # distribution
 DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)