add use layout or not option (#145)

* add use layout or not option

* trival
This commit is contained in:
KevinHuSh
2024-03-22 19:21:09 +08:00
committed by GitHub
parent 2f4c71b4b4
commit f6aee7f230
18 changed files with 238 additions and 140 deletions

View File

@ -196,7 +196,10 @@ def chat(dialog, messages, **kwargs):
for _ in range(len(questions)//2):
questions.append(questions[-1])
kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
if "knowledge" not in [p["key"] for p in prompt_config["parameters"]]:
kbinfos = {"total":0, "chunks":[],"doc_aggs":[]}
else:
kbinfos = retrievaler.retrieval(" ".join(questions), embd_mdl, dialog.tenant_id, dialog.kb_ids, 1, dialog.top_n,
dialog.similarity_threshold,
dialog.vector_similarity_weight, top=1024, aggs=False)
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]

View File

@ -310,7 +310,10 @@ def change_parser():
if not e:
return get_data_error_result(retmsg="Document not found!")
if doc.parser_id.lower() == req["parser_id"].lower():
return get_json_result(data=True)
if "parser_config" in req:
if req["parser_config"] == doc.parser_config:
return get_json_result(data=True)
else: return get_json_result(data=True)
if doc.type == FileType.VISUAL or re.search(r"\.(ppt|pptx|pages)$", doc.name):
return get_data_error_result(retmsg="Not supported yet!")
@ -319,6 +322,8 @@ def change_parser():
{"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", "run": "0"})
if not e:
return get_data_error_result(retmsg="Document not found!")
if "parser_config" in req:
DocumentService.update_parser_config(doc.id, req["parser_config"])
if doc.token_num > 0:
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
doc.process_duation * -1)

View File

@ -276,7 +276,7 @@ def init_llm_factory():
drop table llm_factories;
update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
update tenant set parser_ids='naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture';
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
alter table knowledgebase modify avatar longtext;
alter table user modify avatar longtext;
alter table dialog modify icon longtext;
@ -297,5 +297,4 @@ def init_web_data():
if __name__ == '__main__':
init_web_db()
init_web_data()
add_tenant_llm()
init_web_data()

View File

@ -118,9 +118,25 @@ class DocumentService(CommonService):
if not docs:return
return docs[0]["tenant_id"]
@classmethod
@DB.connection_context()
def get_thumbnails(cls, docids):
fields = [cls.model.id, cls.model.thumbnail]
return list(cls.model.select(*fields).where(cls.model.id.in_(docids)).dicts())
@classmethod
@DB.connection_context()
def update_parser_config(cls, id, config):
e, d = cls.get_by_id(id)
if not e:raise LookupError(f"Document({id}) not found.")
def dfs_update(old, new):
for k,v in new.items():
if k not in old:
old[k] = v
continue
if isinstance(v, dict):
assert isinstance(old[k], dict)
dfs_update(old[k], v)
else: old[k] = v
dfs_update(d.parser_config, config)
cls.update_by_id(id, {"parser_config": d.parser_config})

View File

@ -94,7 +94,7 @@ ASR_MDL = default_llm[LLM_FACTORY]["asr_model"]
IMAGE2TEXT_MDL = default_llm[LLM_FACTORY]["image2text_model"]
API_KEY = LLM.get("api_key", "")
PARSERS = LLM.get("parsers", "naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture")
PARSERS = LLM.get("parsers", "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One")
# distribution
DEPENDENT_DISTRIBUTION = get_base_config("dependent_distribution", False)