Put document metadata in ES/Infinity (#12826)

### What problem does this PR solve?

Put document metadata in ES/Infinity.

Index name of meta data: ragflow_doc_meta_{tenant_id}

### Type of change

- [x] Refactoring
This commit is contained in:
qinling0210
2026-01-28 13:29:34 +08:00
committed by GitHub
parent fd11aca8e5
commit 9a5208976c
24 changed files with 1529 additions and 304 deletions

View File

@ -23,6 +23,7 @@ from api.db.services.canvas_service import UserCanvasService
from api.db.services.conversation_service import ConversationService
from api.db.services.dialog_service import DialogService
from api.db.services.document_service import DocumentService
from api.db.services.doc_metadata_service import DocMetadataService
from api.db.services.file2document_service import File2DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.langfuse_service import TenantLangfuseService
@ -107,6 +108,11 @@ def create_new_user(user_info: dict) -> dict:
except Exception as create_error:
logging.exception(create_error)
# rollback
try:
metadata_index_name = DocMetadataService._get_doc_meta_index_name(user_id)
settings.docStoreConn.delete_idx(metadata_index_name, "")
except Exception as e:
logging.exception(e)
try:
TenantService.delete_by_id(user_id)
except Exception as e:
@ -165,6 +171,12 @@ def delete_user_data(user_id: str) -> dict:
# step1.1.2 delete file and document info in db
doc_ids = DocumentService.get_all_doc_ids_by_kb_ids(kb_ids)
if doc_ids:
for doc in doc_ids:
try:
DocMetadataService.delete_document_metadata(doc["id"], skip_empty_check=True)
except Exception as e:
logging.warning(f"Failed to delete metadata for document {doc['id']}: {e}")
doc_delete_res = DocumentService.delete_by_ids([i["id"] for i in doc_ids])
done_msg += f"- Deleted {doc_delete_res} document records.\n"
task_delete_res = TaskService.delete_by_doc_ids([i["id"] for i in doc_ids])
@ -202,6 +214,13 @@ def delete_user_data(user_id: str) -> dict:
done_msg += f"- Deleted {llm_delete_res} tenant-LLM records.\n"
langfuse_delete_res = TenantLangfuseService.delete_ty_tenant_id(tenant_id)
done_msg += f"- Deleted {langfuse_delete_res} langfuse records.\n"
try:
metadata_index_name = DocMetadataService._get_doc_meta_index_name(tenant_id)
settings.docStoreConn.delete_idx(metadata_index_name, "")
done_msg += f"- Deleted metadata table {metadata_index_name}.\n"
except Exception as e:
logging.warning(f"Failed to delete metadata table for tenant {tenant_id}: {e}")
done_msg += "- Warning: Failed to delete metadata table (continuing).\n"
# step1.3 delete memory and messages
user_memory = MemoryService.get_by_tenant_id(tenant_id)
if user_memory:
@ -269,6 +288,11 @@ def delete_user_data(user_id: str) -> dict:
# step2.1.5 delete document record
doc_delete_res = DocumentService.delete_by_ids([d['id'] for d in created_documents])
done_msg += f"- Deleted {doc_delete_res} documents.\n"
for doc in created_documents:
try:
DocMetadataService.delete_document_metadata(doc['id'])
except Exception as e:
logging.warning(f"Failed to delete metadata for document {doc['id']}: {e}")
# step2.1.6 update dataset doc&chunk&token cnt
for kb_id, doc_num in kb_doc_info.items():
KnowledgebaseService.decrease_document_num_in_delete(kb_id, doc_num)