upgrade laws parser of docx (#1332)

### What problem does this PR solve?


### Type of change

- [x] Refactoring
This commit is contained in:
KevinHuSh
2024-07-01 15:50:24 +08:00
committed by GitHub
parent 5eb21b9c7c
commit 92e9320657
4 changed files with 56 additions and 53 deletions

View File

@ -20,7 +20,7 @@ from flask_login import login_required, current_user
from elasticsearch_dsl import Q
from rag.app.qa import rmPrefix, beAdoc
from rag.nlp import search, rag_tokenizer
from rag.nlp import search, rag_tokenizer, keyword_extraction
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils import rmSpace
from api.db import LLMType, ParserType
@ -268,6 +268,10 @@ def retrieval_test():
rerank_mdl = TenantLLMService.model_instance(
kb.tenant_id, LLMType.RERANK.value, llm_name=req["rerank_id"])
if req.get("keyword", False):
chat_mdl = TenantLLMService.model_instance(kb.tenant_id, LLMType.CHAT)
question += keyword_extraction(chat_mdl, question)
ranks = retrievaler.retrieval(question, embd_mdl, kb.tenant_id, [kb_id], page, size,
similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl)