Refine resume parts and fix bugs in retrival using sql (#66)

This commit is contained in:
KevinHuSh
2024-02-19 19:22:17 +08:00
committed by GitHub
parent 452020d33a
commit a8294f2168
29 changed files with 302 additions and 158 deletions

View File

@ -465,7 +465,8 @@ class Knowledgebase(DataBaseModel):
tenant_id = CharField(max_length=32, null=False)
name = CharField(max_length=128, null=False, help_text="KB name", index=True)
description = TextField(null=True, help_text="KB description")
permission = CharField(max_length=16, null=False, help_text="me|team")
embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
permission = CharField(max_length=16, null=False, help_text="me|team", default="me")
created_by = CharField(max_length=32, null=False)
doc_num = IntegerField(default=0)
token_num = IntegerField(default=0)

View File

@ -46,11 +46,6 @@ def init_llm_factory():
"logo": "",
"tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
"status": "1",
},{
"name": "Infiniflow",
"logo": "",
"tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
"status": "1",
},{
"name": "智普AI",
"logo": "",
@ -135,59 +130,33 @@ def init_llm_factory():
"model_type": LLMType.SPEECH2TEXT.value
},{
"fid": factory_infos[1]["name"],
"llm_name": "qwen_vl_chat_v1",
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 765,
"model_type": LLMType.IMAGE2TEXT.value
},
# ----------------------- Infiniflow -----------------------
{
"fid": factory_infos[2]["name"],
"llm_name": "gpt-3.5-turbo",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": LLMType.CHAT.value
},{
"fid": factory_infos[2]["name"],
"llm_name": "text-embedding-ada-002",
"tags": "TEXT EMBEDDING,8K",
"max_tokens": 8191,
"model_type": LLMType.EMBEDDING.value
},{
"fid": factory_infos[2]["name"],
"llm_name": "whisper-1",
"tags": "SPEECH2TEXT",
"max_tokens": 25*1024*1024,
"model_type": LLMType.SPEECH2TEXT.value
},{
"fid": factory_infos[2]["name"],
"llm_name": "gpt-4-vision-preview",
"llm_name": "qwen-vl-max",
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 765,
"model_type": LLMType.IMAGE2TEXT.value
},
# ---------------------- ZhipuAI ----------------------
{
"fid": factory_infos[3]["name"],
"fid": factory_infos[2]["name"],
"llm_name": "glm-3-turbo",
"tags": "LLM,CHAT,",
"max_tokens": 128 * 1000,
"model_type": LLMType.CHAT.value
}, {
"fid": factory_infos[3]["name"],
"fid": factory_infos[2]["name"],
"llm_name": "glm-4",
"tags": "LLM,CHAT,",
"max_tokens": 128 * 1000,
"model_type": LLMType.CHAT.value
}, {
"fid": factory_infos[3]["name"],
"fid": factory_infos[2]["name"],
"llm_name": "glm-4v",
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 2000,
"model_type": LLMType.IMAGE2TEXT.value
},
{
"fid": factory_infos[3]["name"],
"fid": factory_infos[2]["name"],
"llm_name": "embedding-2",
"tags": "TEXT EMBEDDING",
"max_tokens": 512,

View File

@ -77,9 +77,12 @@ class KnowledgebaseService(CommonService):
if isinstance(v, dict):
assert isinstance(old[k], dict)
dfs_update(old[k], v)
if isinstance(v, list):
assert isinstance(old[k], list)
old[k] = list(set(old[k]+v))
else: old[k] = v
dfs_update(m.parser_config, config)
cls.update_by_id(id, m.parser_config)
cls.update_by_id(id, {"parser_config": m.parser_config})
@classmethod
@ -88,6 +91,6 @@ class KnowledgebaseService(CommonService):
conf = {}
for k in cls.get_by_ids(ids):
if k.parser_config and "field_map" in k.parser_config:
conf.update(k.parser_config)
conf.update(k.parser_config["field_map"])
return conf