mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-23 03:26:53 +08:00
## Summary Fixes #12520 - Deleted chunks should not appear in retrieval/reference results. ## Changes ### Core Fix - **api/apps/chunk_app.py**: Include \doc_id\ in delete condition to properly scope the delete operation ### Improved Error Handling - **api/db/services/document_service.py**: Better separation of concerns with individual try-catch blocks and proper logging for each cleanup operation ### Doc Store Updates - **rag/utils/es_conn.py**: Updated delete query construction to support compound conditions - **rag/utils/opensearch_conn.py**: Same updates for OpenSearch compatibility ### Tests - **test/testcases/.../test_retrieval_chunks.py**: Added \TestDeletedChunksNotRetrievable\ class with regression tests - **test/unit/test_delete_query_construction.py**: Unit tests for delete query construction ## Testing - Added regression tests that verify deleted chunks are not returned by retrieval API - Tests cover single chunk deletion and batch deletion scenarios
This commit is contained in:
@ -223,7 +223,9 @@ async def rm():
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if not settings.docStoreConn.delete({"id": req["chunk_ids"]},
|
||||
# Include doc_id in condition to properly scope the delete
|
||||
condition = {"id": req["chunk_ids"], "doc_id": req["doc_id"]}
|
||||
if not settings.docStoreConn.delete(condition,
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
return get_data_error_result(message="Chunk deleting failure")
|
||||
|
||||
@ -340,14 +340,35 @@ class DocumentService(CommonService):
|
||||
def remove_document(cls, doc, tenant_id):
|
||||
from api.db.services.task_service import TaskService
|
||||
cls.clear_chunk_num(doc.id)
|
||||
|
||||
# Delete tasks first
|
||||
try:
|
||||
TaskService.filter_delete([Task.doc_id == doc.id])
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to delete tasks for document {doc.id}: {e}")
|
||||
|
||||
# Delete chunk images (non-critical, log and continue)
|
||||
try:
|
||||
cls.delete_chunk_images(doc, tenant_id)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to delete chunk images for document {doc.id}: {e}")
|
||||
|
||||
# Delete thumbnail (non-critical, log and continue)
|
||||
try:
|
||||
if doc.thumbnail and not doc.thumbnail.startswith(IMG_BASE64_PREFIX):
|
||||
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, doc.thumbnail):
|
||||
settings.STORAGE_IMPL.rm(doc.kb_id, doc.thumbnail)
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to delete thumbnail for document {doc.id}: {e}")
|
||||
|
||||
# Delete chunks from doc store - this is critical, log errors
|
||||
try:
|
||||
settings.docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to delete chunks from doc store for document {doc.id}: {e}")
|
||||
|
||||
# Cleanup knowledge graph references (non-critical, log and continue)
|
||||
try:
|
||||
graph_source = settings.docStoreConn.get_fields(
|
||||
settings.docStoreConn.search(["source_id"], [], {"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]}, [], OrderByExpr(), 0, 1, search.index_name(tenant_id), [doc.kb_id]), ["source_id"]
|
||||
)
|
||||
@ -360,8 +381,9 @@ class DocumentService(CommonService):
|
||||
search.index_name(tenant_id), doc.kb_id)
|
||||
settings.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
|
||||
search.index_name(tenant_id), doc.kb_id)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to cleanup knowledge graph for document {doc.id}: {e}")
|
||||
|
||||
return cls.delete_by_id(doc.id)
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user