mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-24 23:46:52 +08:00
Feat: deduplicate metadata lists during updates (#12125)
### What problem does this PR solve? Deduplicate metadata lists during updates. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -33,6 +33,7 @@ from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTena
|
||||
from api.db.db_utils import bulk_insert_into_db
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from common.metadata_utils import dedupe_list
|
||||
from common.misc_utils import get_uuid
|
||||
from common.time_utils import current_timestamp, get_format_time
|
||||
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus, SVR_CONSUMER_GROUP_NAME
|
||||
@ -799,7 +800,10 @@ class DocumentService(CommonService):
|
||||
match_provided = "match" in upd
|
||||
if isinstance(meta[key], list):
|
||||
if not match_provided:
|
||||
meta[key] = new_value
|
||||
if isinstance(new_value, list):
|
||||
meta[key] = dedupe_list(new_value)
|
||||
else:
|
||||
meta[key] = new_value
|
||||
changed = True
|
||||
else:
|
||||
match_value = upd.get("match")
|
||||
@ -812,7 +816,7 @@ class DocumentService(CommonService):
|
||||
else:
|
||||
new_list.append(item)
|
||||
if replaced:
|
||||
meta[key] = new_list
|
||||
meta[key] = dedupe_list(new_list)
|
||||
changed = True
|
||||
else:
|
||||
if not match_provided:
|
||||
|
||||
@ -151,6 +151,18 @@ async def apply_meta_data_filter(
|
||||
return doc_ids
|
||||
|
||||
|
||||
def dedupe_list(values: list) -> list:
|
||||
seen = set()
|
||||
deduped = []
|
||||
for item in values:
|
||||
key = str(item)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(item)
|
||||
return deduped
|
||||
|
||||
|
||||
def update_metadata_to(metadata, meta):
|
||||
if not meta:
|
||||
return metadata
|
||||
@ -162,11 +174,13 @@ def update_metadata_to(metadata, meta):
|
||||
return metadata
|
||||
if not isinstance(meta, dict):
|
||||
return metadata
|
||||
|
||||
for k, v in meta.items():
|
||||
if isinstance(v, list):
|
||||
v = [vv for vv in v if isinstance(vv, str)]
|
||||
if not v:
|
||||
continue
|
||||
v = dedupe_list(v)
|
||||
if not isinstance(v, list) and not isinstance(v, str):
|
||||
continue
|
||||
if k not in metadata:
|
||||
@ -177,6 +191,7 @@ def update_metadata_to(metadata, meta):
|
||||
metadata[k].extend(v)
|
||||
else:
|
||||
metadata[k].append(v)
|
||||
metadata[k] = dedupe_list(metadata[k])
|
||||
else:
|
||||
metadata[k] = v
|
||||
|
||||
@ -208,4 +223,4 @@ def metadata_schema(metadata: list|None) -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
json_schema["additionalProperties"] = False
|
||||
return json_schema
|
||||
return json_schema
|
||||
|
||||
Reference in New Issue
Block a user