mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-28 14:16:34 +08:00
shortcut metadata_condition if there is none (#12835)
### What problem does this PR solve? If no `metadata_condition` parameter is given then don't load the metadata of all documents into memory. Instead just pass `doc_ids` as `None` to the `retrieval()` method, which means to use all documents of the given datasets. This is relevant if you have *a lot* of documents! ### Type of change - [x] Performance Improvement
This commit is contained in:
committed by
GitHub
parent
c8338dec57
commit
b36d9744ae
@ -1526,14 +1526,18 @@ async def retrieval_test(tenant_id):
|
||||
if doc_id not in doc_ids_list:
|
||||
return get_error_data_result(f"The datasets don't own the document {doc_id}")
|
||||
if not doc_ids:
|
||||
metadata_condition = req.get("metadata_condition", {}) or {}
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
||||
# If metadata_condition has conditions but no docs match, return empty result
|
||||
if not doc_ids and metadata_condition.get("conditions"):
|
||||
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
||||
if metadata_condition and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
metadata_condition = req.get("metadata_condition")
|
||||
if metadata_condition:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
||||
# If metadata_condition has conditions but no docs match, return empty result
|
||||
if not doc_ids and metadata_condition.get("conditions"):
|
||||
return get_result(data={"total": 0, "chunks": [], "doc_aggs": {}})
|
||||
if metadata_condition and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
else:
|
||||
# If doc_ids is None all documents of the datasets are used
|
||||
doc_ids = None
|
||||
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
||||
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
||||
top = int(req.get("top_k", 1024))
|
||||
|
||||
Reference in New Issue
Block a user