Feat: Add Duplicate ID Check and Update Deletion Logic (#6376)

- Introduce the `check_duplicate_ids` function in `dataset.py` and
`doc.py` to check for and handle duplicate IDs.
- Update the deletion operation to ensure that when deleting datasets
and documents, error messages regarding duplicate IDs can be returned.
- Implement the `check_duplicate_ids` function in `api_utils.py` to
return unique IDs and error messages for duplicate IDs.


### What problem does this PR solve?

Close https://github.com/infiniflow/ragflow/issues/6234

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: wenju.li <wenju.li@deepctr.cn>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
liwenju0
2025-03-21 14:05:17 +08:00
committed by GitHub
parent 7cc5603a82
commit efdfb39a33
3 changed files with 86 additions and 10 deletions

View File

@ -30,7 +30,7 @@ from api.utils.api_utils import (
token_required,
get_error_data_result,
valid,
get_parser_config, valid_parser_config, dataset_readonly_fields,
get_parser_config, valid_parser_config, dataset_readonly_fields,check_duplicate_ids
)
@ -237,7 +237,7 @@ def delete(tenant_id):
if not req:
ids = None
else:
ids = set(req.get("ids"))
ids = req.get("ids")
if not ids:
id_list = []
kbs = KnowledgebaseService.query(tenant_id=tenant_id)
@ -245,6 +245,9 @@ def delete(tenant_id):
id_list.append(kb.id)
else:
id_list = ids
unique_id_list, duplicate_messages = check_duplicate_ids(id_list, "dataset")
id_list = unique_id_list
for id in id_list:
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
if not kbs:
@ -276,6 +279,11 @@ def delete(tenant_id):
)
else:
return get_error_data_result(message="; ".join(errors))
if duplicate_messages:
if success_count > 0:
return get_result(message=f"Partially deleted {success_count} datasets with {len(duplicate_messages)} errors", data={"success_count": success_count, "errors": duplicate_messages},)
else:
return get_error_data_result(message=";".join(duplicate_messages))
return get_result(code=settings.RetCode.SUCCESS)