Feat: deduplicate metadata lists during updates (#12125)

### What problem does this PR solve?

Deduplicate metadata lists during updates.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-12-24 09:32:55 +08:00
committed by GitHub
parent d72debf0db
commit c987d33649
2 changed files with 22 additions and 3 deletions

View File

@ -33,6 +33,7 @@ from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTena
from api.db.db_utils import bulk_insert_into_db
from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService
from common.metadata_utils import dedupe_list
from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, get_format_time
from common.constants import LLMType, ParserType, StatusEnum, TaskStatus, SVR_CONSUMER_GROUP_NAME
@ -799,7 +800,10 @@ class DocumentService(CommonService):
match_provided = "match" in upd
if isinstance(meta[key], list):
if not match_provided:
meta[key] = new_value
if isinstance(new_value, list):
meta[key] = dedupe_list(new_value)
else:
meta[key] = new_value
changed = True
else:
match_value = upd.get("match")
@ -812,7 +816,7 @@ class DocumentService(CommonService):
else:
new_list.append(item)
if replaced:
meta[key] = new_list
meta[key] = dedupe_list(new_list)
changed = True
else:
if not match_provided: