init README of deepdoc, add picture processer. (#71)

* init README of deepdoc, add picture processer.

* add resume parsing
This commit is contained in:
KevinHuSh
2024-02-23 18:28:12 +08:00
committed by GitHub
parent d32322c081
commit 7fd1eca582
42 changed files with 58319 additions and 350 deletions

View File

@ -57,7 +57,7 @@ class TenantLLMService(CommonService):
@classmethod
@DB.connection_context()
def model_instance(cls, tenant_id, llm_type, llm_name=None):
def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese"):
e, tenant = TenantService.get_by_id(tenant_id)
if not e:
raise LookupError("Tenant not found")
@ -87,7 +87,7 @@ class TenantLLMService(CommonService):
if model_config["llm_factory"] not in CvModel:
return
return CvModel[model_config["llm_factory"]](
model_config["api_key"], model_config["llm_name"])
model_config["api_key"], model_config["llm_name"], lang)
if llm_type == LLMType.CHAT.value:
if model_config["llm_factory"] not in ChatModel:
@ -120,11 +120,11 @@ class TenantLLMService(CommonService):
class LLMBundle(object):
def __init__(self, tenant_id, llm_type, llm_name=None):
def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
self.tenant_id = tenant_id
self.llm_type = llm_type
self.llm_name = llm_name
self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name)
self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang)
assert self.mdl, "Can't find mole for {}/{}/{}".format(tenant_id, llm_type, llm_name)
def encode(self, texts: list, batch_size=32):

View File

@ -27,7 +27,24 @@ class TaskService(CommonService):
@classmethod
@DB.connection_context()
def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
fields = [cls.model.id, cls.model.doc_id, cls.model.from_page,cls.model.to_page, Document.kb_id, Document.parser_id, Document.parser_config, Document.name, Document.type, Document.location, Document.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
fields = [
cls.model.id,
cls.model.doc_id,
cls.model.from_page,
cls.model.to_page,
Document.kb_id,
Document.parser_id,
Document.parser_config,
Document.name,
Document.type,
Document.location,
Document.size,
Knowledgebase.tenant_id,
Knowledgebase.language,
Tenant.embd_id,
Tenant.img2txt_id,
Tenant.asr_id,
cls.model.update_time]
docs = cls.model.select(*fields) \
.join(Document, on=(cls.model.doc_id == Document.id)) \
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
@ -42,7 +59,6 @@ class TaskService(CommonService):
.paginate(1, items_per_page)
return list(docs.dicts())
@classmethod
@DB.connection_context()
def do_cancel(cls, id):
@ -54,12 +70,11 @@ class TaskService(CommonService):
pass
return True
@classmethod
@DB.connection_context()
def update_progress(cls, id, info):
cls.model.update(progress_msg=cls.model.progress_msg + "\n"+info["progress_msg"]).where(
cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
cls.model.id == id).execute()
if "progress" in info:
cls.model.update(progress=info["progress"]).where(
cls.model.id == id).execute()
cls.model.id == id).execute()