Try to reuse existing chunks (#3983)

### What problem does this PR solve?

Try to reuse existing chunks. Close #3793
### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Zhichang Yu
2024-12-12 16:38:03 +08:00
committed by GitHub
parent 835fd7abcd
commit 301f95837c
7 changed files with 242 additions and 85 deletions

View File

@ -282,6 +282,31 @@ class DocumentService(CommonService):
return
return docs[0]["embd_id"]
@classmethod
@DB.connection_context()
def get_chunking_config(cls, doc_id):
configs = (
cls.model.select(
cls.model.id,
cls.model.kb_id,
cls.model.parser_id,
cls.model.parser_config,
Knowledgebase.language,
Knowledgebase.embd_id,
Tenant.id.alias("tenant_id"),
Tenant.img2txt_id,
Tenant.asr_id,
Tenant.llm_id,
)
.join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id))
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))
.where(cls.model.id == doc_id)
)
configs = configs.dicts()
if not configs:
return None
return configs[0]
@classmethod
@DB.connection_context()
def get_doc_id_by_doc_name(cls, doc_name):