mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-25 16:26:51 +08:00
Feat: deduplicate metadata lists during updates (#12125)
### What problem does this PR solve? Deduplicate metadata lists during updates. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -151,6 +151,18 @@ async def apply_meta_data_filter(
|
||||
return doc_ids
|
||||
|
||||
|
||||
def dedupe_list(values: list) -> list:
|
||||
seen = set()
|
||||
deduped = []
|
||||
for item in values:
|
||||
key = str(item)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(item)
|
||||
return deduped
|
||||
|
||||
|
||||
def update_metadata_to(metadata, meta):
|
||||
if not meta:
|
||||
return metadata
|
||||
@ -162,11 +174,13 @@ def update_metadata_to(metadata, meta):
|
||||
return metadata
|
||||
if not isinstance(meta, dict):
|
||||
return metadata
|
||||
|
||||
for k, v in meta.items():
|
||||
if isinstance(v, list):
|
||||
v = [vv for vv in v if isinstance(vv, str)]
|
||||
if not v:
|
||||
continue
|
||||
v = dedupe_list(v)
|
||||
if not isinstance(v, list) and not isinstance(v, str):
|
||||
continue
|
||||
if k not in metadata:
|
||||
@ -177,6 +191,7 @@ def update_metadata_to(metadata, meta):
|
||||
metadata[k].extend(v)
|
||||
else:
|
||||
metadata[k].append(v)
|
||||
metadata[k] = dedupe_list(metadata[k])
|
||||
else:
|
||||
metadata[k] = v
|
||||
|
||||
@ -208,4 +223,4 @@ def metadata_schema(metadata: list|None) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
json_schema["additionalProperties"] = False
|
||||
return json_schema
|
||||
return json_schema
|
||||
|
||||
Reference in New Issue
Block a user