refine manual parser (#140)

This commit is contained in:
KevinHuSh
2024-03-21 18:17:32 +08:00
committed by GitHub
parent f4ec7cfa76
commit 6c6b144de2
11 changed files with 77 additions and 47 deletions

View File

@ -118,14 +118,13 @@ def message_fit_in(msg, max_length=4000):
c = count()
if c < max_length: return c, msg
msg = [m for m in msg if m.role in ["system", "user"]]
c = count()
if c < max_length: return c, msg
msg_ = [m for m in msg[:-1] if m.role == "system"]
msg_.append(msg[-1])
msg = msg_
c = count()
if c < max_length: return c, msg
ll = num_tokens_from_string(msg_[0].content)
l = num_tokens_from_string(msg_[-1].content)
if ll / (ll + l) > 0.8:

View File

@ -218,7 +218,7 @@ def rm():
ELASTICSEARCH.deleteByQuery(Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not DocumentService.delete_by_id(req["doc_id"]):
if not DocumentService.delete(doc):
return get_data_error_result(
retmsg="Database error (Document removal)!")

View File

@ -353,7 +353,7 @@ class User(DataBaseModel, UserMixin):
email = CharField(max_length=255, null=False, help_text="email", index=True)
avatar = TextField(null=True, help_text="avatar base64 string")
language = CharField(max_length=32, null=True, help_text="English|Chinese", default="Chinese")
color_schema = CharField(max_length=32, null=True, help_text="Bright|Dark", default="Dark")
color_schema = CharField(max_length=32, null=True, help_text="Bright|Dark", default="Bright")
timezone = CharField(max_length=64, null=True, help_text="Timezone", default="UTC+8\tAsia/Shanghai")
last_login_time = DateTimeField(null=True)
is_authenticated = CharField(max_length=1, null=False, default="1")

View File

@ -223,7 +223,7 @@ def init_llm_factory():
"fid": factory_infos[3]["name"],
"llm_name": "qwen-14B-chat",
"tags": "LLM,CHAT,",
"max_tokens": 8191,
"max_tokens": 4096,
"model_type": LLMType.CHAT.value
}, {
"fid": factory_infos[3]["name"],
@ -271,11 +271,15 @@ def init_llm_factory():
pass
"""
modify service_config
drop table llm;
drop table factories;
drop table llm_factories;
update tenant_llm set llm_factory='Tongyi-Qianwen' where llm_factory='通义千问';
update tenant_llm set llm_factory='ZHIPU-AI' where llm_factory='智谱AI';
update tenant set parser_ids='naive:General,one:One,qa:Q&A,resume:Resume,table:Table,laws:Laws,manual:Manual,book:Book,paper:Paper,presentation:Presentation,picture:Picture';
alter table knowledgebase modify avatar longtext;
alter table user modify avatar longtext;
alter table dialog modify icon longtext;
"""

View File

@ -60,6 +60,15 @@ class DocumentService(CommonService):
raise RuntimeError("Database error (Knowledgebase)!")
return doc
@classmethod
@DB.connection_context()
def delete(cls, doc):
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
if not KnowledgebaseService.update_by_id(
kb.id, {"doc_num": kb.doc_num - 1}):
raise RuntimeError("Database error (Knowledgebase)!")
return cls.delete_by_id(doc.id)
@classmethod
@DB.connection_context()
def get_newly_uploaded(cls, tm, mod=0, comm=1, items_per_page=64):