Add 'One' chunk method (#137)

2026-01-30 07:06:39 +08:00 · 2024-03-20 18:57:22 +08:00
parent fce14ee187
commit 5875c8ba08
11 changed files with 143 additions and 24 deletions
--- a/api/db/init.py
+++ b/api/db/init.py
@ -79,3 +79,4 @@ class ParserType(StrEnum):
    TABLE = "table"
    NAIVE = "naive"
    PICTURE = "picture"
+    ONE = "one"
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -79,12 +79,12 @@ factory_infos = [{
        "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
        "status": "1",
    },{
-        "name": "通义千问",
+        "name": "Tongyi-Qianwen",
        "logo": "",
        "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
        "status": "1",
    },{
-        "name": "智谱AI",
+        "name": "ZHIPU-AI",
        "logo": "",
        "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
        "status": "1",
@ -270,6 +270,14 @@ def init_llm_factory():
        except Exception as e:
            pass

+    """
+    drop table llm;
+    drop table factories;
+    update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
+    update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
+    update tenant set parser_ids='naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture';
+    """
+

 def init_web_data():
    start_time = time.time()
--- a/api/settings.py
+++ b/api/settings.py
@ -52,7 +52,7 @@ REQUEST_MAX_WAIT_SEC = 300
 USE_REGISTRY = get_base_config("use_registry")

 default_llm = {
-    "通义千问": {
+    "Tongyi-Qianwen": {
        "chat_model": "qwen-plus",
        "embedding_model": "text-embedding-v2",
        "image2text_model": "qwen-vl-max",
@ -64,7 +64,7 @@ default_llm = {
        "image2text_model": "gpt-4-vision-preview",
        "asr_model": "whisper-1",
    },
-    "智谱AI": {
+    "ZHIPU-AI": {
        "chat_model": "glm-3-turbo",
        "embedding_model": "embedding-2",
        "image2text_model": "glm-4v",
@ -84,17 +84,17 @@ default_llm = {
    }
 }
 LLM = get_base_config("user_default_llm", {})
-LLM_FACTORY = LLM.get("factory", "通义千问")
+LLM_FACTORY = LLM.get("factory", "Tongyi-Qianwen")
 if LLM_FACTORY not in default_llm:
-    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to '通义千问/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
-    LLM_FACTORY = "通义千问"
+    print("\33[91m【ERROR】\33[0m:", f"LLM factory {LLM_FACTORY} has not supported yet, switch to 'Tongyi-Qianwen/QWen' automatically, and please check the API_KEY in service_conf.yaml.")
+    LLM_FACTORY = "Tongyi-Qianwen"
 CHAT_MDL = default_llm[LLM_FACTORY]["chat_model"]
 EMBEDDING_MDL = default_llm[LLM_FACTORY]["embedding_model"]
 ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
 IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]

 API_KEY = LLM.get("api_key", "")
-PARSERS = LLM.get("parsers", "naive:General,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
+PARSERS = LLM.get("parsers", "naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")

 # distribution
 DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)