mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-23 11:36:38 +08:00
Refa: async retrieval process. (#12629)
### Type of change - [x] Refactoring - [x] Performance Improvement
This commit is contained in:
@ -61,7 +61,7 @@ async def list_chunk():
|
||||
}
|
||||
if "available_int" in req:
|
||||
query["available_int"] = int(req["available_int"])
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
|
||||
sres = await settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
|
||||
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
@ -371,20 +371,20 @@ async def retrieval_test():
|
||||
_question += await keyword_extraction(chat_mdl, _question)
|
||||
|
||||
labels = label_question(_question, [kb])
|
||||
ranks = await asyncio.to_thread(settings.retriever.retrieval,
|
||||
_question,
|
||||
embd_mdl,
|
||||
tenant_ids,
|
||||
kb_ids,
|
||||
page,
|
||||
size,
|
||||
float(req.get("similarity_threshold", 0.0)),
|
||||
float(req.get("vector_similarity_weight", 0.3)),
|
||||
doc_ids=local_doc_ids,
|
||||
top=top,
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=labels,
|
||||
)
|
||||
ranks = await settings.retriever.retrieval(
|
||||
_question,
|
||||
embd_mdl,
|
||||
tenant_ids,
|
||||
kb_ids,
|
||||
page,
|
||||
size,
|
||||
float(req.get("similarity_threshold", 0.0)),
|
||||
float(req.get("vector_similarity_weight", 0.3)),
|
||||
doc_ids=local_doc_ids,
|
||||
top=top,
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=labels
|
||||
)
|
||||
|
||||
if use_kg:
|
||||
ck = await settings.kg_retriever.retrieval(_question,
|
||||
@ -413,7 +413,7 @@ async def retrieval_test():
|
||||
|
||||
@manager.route('/knowledge_graph', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def knowledge_graph():
|
||||
async def knowledge_graph():
|
||||
doc_id = request.args["doc_id"]
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
|
||||
@ -421,7 +421,7 @@ def knowledge_graph():
|
||||
"doc_ids": [doc_id],
|
||||
"knowledge_graph_kwd": ["graph", "mind_map"]
|
||||
}
|
||||
sres = settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
|
||||
sres = await settings.retriever.search(req, search.index_name(tenant_id), kb_ids)
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
for id in sres.ids[:2]:
|
||||
ty = sres.field[id]["knowledge_graph_kwd"]
|
||||
|
||||
@ -373,7 +373,7 @@ async def rename_tags(kb_id):
|
||||
|
||||
@manager.route('/<kb_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
def knowledge_graph(kb_id):
|
||||
async def knowledge_graph(kb_id):
|
||||
if not KnowledgebaseService.accessible(kb_id, current_user.id):
|
||||
return get_json_result(
|
||||
data=False,
|
||||
@ -389,7 +389,7 @@ def knowledge_graph(kb_id):
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
if not settings.docStoreConn.index_exist(search.index_name(kb.tenant_id), kb_id):
|
||||
return get_json_result(data=obj)
|
||||
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
|
||||
sres = await settings.retriever.search(req, search.index_name(kb.tenant_id), [kb_id])
|
||||
if not len(sres.ids):
|
||||
return get_json_result(data=obj)
|
||||
|
||||
|
||||
@ -481,7 +481,7 @@ def list_datasets(tenant_id):
|
||||
|
||||
@manager.route('/datasets/<dataset_id>/knowledge_graph', methods=['GET']) # noqa: F821
|
||||
@token_required
|
||||
def knowledge_graph(tenant_id, dataset_id):
|
||||
async def knowledge_graph(tenant_id, dataset_id):
|
||||
if not KnowledgebaseService.accessible(dataset_id, tenant_id):
|
||||
return get_result(
|
||||
data=False,
|
||||
@ -497,7 +497,7 @@ def knowledge_graph(tenant_id, dataset_id):
|
||||
obj = {"graph": {}, "mind_map": {}}
|
||||
if not settings.docStoreConn.index_exist(search.index_name(kb.tenant_id), dataset_id):
|
||||
return get_result(data=obj)
|
||||
sres = settings.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||
sres = await settings.retriever.search(req, search.index_name(kb.tenant_id), [dataset_id])
|
||||
if not len(sres.ids):
|
||||
return get_result(data=obj)
|
||||
|
||||
|
||||
@ -135,7 +135,7 @@ async def retrieval(tenant_id):
|
||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
|
||||
if not doc_ids and metadata_condition:
|
||||
doc_ids = ["-999"]
|
||||
ranks = settings.retriever.retrieval(
|
||||
ranks = await settings.retriever.retrieval(
|
||||
question,
|
||||
embd_mdl,
|
||||
kb.tenant_id,
|
||||
|
||||
@ -935,7 +935,7 @@ async def stop_parsing(tenant_id, dataset_id):
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents/<document_id>/chunks", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
def list_chunks(tenant_id, dataset_id, document_id):
|
||||
async def list_chunks(tenant_id, dataset_id, document_id):
|
||||
"""
|
||||
List chunks of a document.
|
||||
---
|
||||
@ -1081,7 +1081,7 @@ def list_chunks(tenant_id, dataset_id, document_id):
|
||||
_ = Chunk(**final_chunk)
|
||||
|
||||
elif settings.docStoreConn.index_exist(search.index_name(tenant_id), dataset_id):
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
||||
sres = await settings.retriever.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
|
||||
res["total"] = sres.total
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
@ -1559,7 +1559,7 @@ async def retrieval_test(tenant_id):
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
question += await keyword_extraction(chat_mdl, question)
|
||||
|
||||
ranks = settings.retriever.retrieval(
|
||||
ranks = await settings.retriever.retrieval(
|
||||
question,
|
||||
embd_mdl,
|
||||
tenant_ids,
|
||||
|
||||
@ -1098,7 +1098,7 @@ async def retrieval_test_embedded():
|
||||
_question += await keyword_extraction(chat_mdl, _question)
|
||||
|
||||
labels = label_question(_question, [kb])
|
||||
ranks = settings.retriever.retrieval(
|
||||
ranks = await settings.retriever.retrieval(
|
||||
_question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
|
||||
local_doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user