mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-03 00:55:10 +08:00
shortcut metadata_condition if there is none (#12835)
### What problem does this PR solve? If no `metadata_condition` parameter is given then don't load the metadata of all documents into memory. Instead just pass `doc_ids` as `None` to the `retrieval()` method, which means to use all documents of the given datasets. This is relevant if you have *a lot* of documents! ### Type of change - [x] Performance Improvement
This commit is contained in:
committed by
GitHub
parent
c8338dec57
commit
b36d9744ae
@ -1526,14 +1526,18 @@ async def retrieval_test(tenant_id):
|
|||||||
if doc_id not in doc_ids_list:
|
if doc_id not in doc_ids_list:
|
||||||
return get_error_data_result(f"The datasets don't own the document {doc_id}")
|
return get_error_data_result(f"The datasets don't own the document {doc_id}")
|
||||||
if not doc_ids:
|
if not doc_ids:
|
||||||
metadata_condition = req.get("metadata_condition", {}) or {}
|
metadata_condition = req.get("metadata_condition")
|
||||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
if metadata_condition:
|
||||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||||
# If metadata_condition has conditions but no docs match, return empty result
|
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
||||||
if not doc_ids and metadata_condition.get("conditions"):
|
# If metadata_condition has conditions but no docs match, return empty result
|
||||||
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
if not doc_ids and metadata_condition.get("conditions"):
|
||||||
if metadata_condition and not doc_ids:
|
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
||||||
doc_ids = ["-999"]
|
if metadata_condition and not doc_ids:
|
||||||
|
doc_ids = ["-999"]
|
||||||
|
else:
|
||||||
|
# If doc_ids is None all documents of the datasets are used
|
||||||
|
doc_ids = None
|
||||||
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
||||||
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
||||||
top = int(req.get("top_k", 1024))
|
top = int(req.get("top_k", 1024))
|
||||||
|
|||||||
Reference in New Issue
Block a user