mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: Add Duplicate ID Check and Update Deletion Logic (#6376)
- Introduce the `check_duplicate_ids` function in `dataset.py` and `doc.py` to check for and handle duplicate IDs. - Update the deletion operation to ensure that when deleting datasets and documents, error messages regarding duplicate IDs can be returned. - Implement the `check_duplicate_ids` function in `api_utils.py` to return unique IDs and error messages for duplicate IDs. ### What problem does this PR solve? Close https://github.com/infiniflow/ragflow/issues/6234 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: wenju.li <wenju.li@deepctr.cn> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -411,3 +411,34 @@ def valid_parser_config(parser_config):
|
||||
assert 0 <= parser_config.get("topn_tags", 0) < 10, "topn_tags should be in range from 0 to 10"
|
||||
assert isinstance(parser_config.get("html4excel", False), bool), "html4excel should be True or False"
|
||||
assert isinstance(parser_config.get("delimiter", ""), str), "delimiter should be str"
|
||||
|
||||
|
||||
def check_duplicate_ids(ids, id_type="item"):
|
||||
"""
|
||||
Check for duplicate IDs in a list and return unique IDs and error messages.
|
||||
|
||||
Args:
|
||||
ids (list): List of IDs to check for duplicates
|
||||
id_type (str): Type of ID for error messages (e.g., 'document', 'dataset', 'chunk')
|
||||
|
||||
Returns:
|
||||
tuple: (unique_ids, error_messages)
|
||||
- unique_ids (list): List of unique IDs
|
||||
- error_messages (list): List of error messages for duplicate IDs
|
||||
"""
|
||||
id_count = {}
|
||||
duplicate_messages = []
|
||||
|
||||
# Count occurrences of each ID
|
||||
for id_value in ids:
|
||||
id_count[id_value] = id_count.get(id_value, 0) + 1
|
||||
|
||||
# Check for duplicates
|
||||
for id_value, count in id_count.items():
|
||||
if count > 1:
|
||||
duplicate_messages.append(f"Duplicate {id_type} ids: {id_value}")
|
||||
|
||||
# Return unique IDs and error messages
|
||||
return list(set(ids)), duplicate_messages
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user