mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-01 16:15:07 +08:00
Fix metadata in get_list() (#12906)
### What problem does this PR solve? test_update_document.py failed as metadata is not included in the response of get_list(), fix the issue. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -28,6 +28,8 @@ from typing import Dict, List, Optional
|
|||||||
from api.db.db_models import DB, Document
|
from api.db.db_models import DB, Document
|
||||||
from common import settings
|
from common import settings
|
||||||
from common.metadata_utils import dedupe_list
|
from common.metadata_utils import dedupe_list
|
||||||
|
from api.db.db_models import Knowledgebase
|
||||||
|
from common.doc_store.doc_store_base import OrderByExpr
|
||||||
|
|
||||||
|
|
||||||
class DocMetadataService:
|
class DocMetadataService:
|
||||||
@ -57,7 +59,7 @@ class DocMetadataService:
|
|||||||
Returns:
|
Returns:
|
||||||
Simple metadata dictionary
|
Simple metadata dictionary
|
||||||
"""
|
"""
|
||||||
if not flat_meta:
|
if not flat_meta or not isinstance(flat_meta, dict):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
meta_fields = flat_meta.get('meta_fields')
|
meta_fields = flat_meta.get('meta_fields')
|
||||||
@ -158,9 +160,6 @@ class DocMetadataService:
|
|||||||
Returns:
|
Returns:
|
||||||
Search results from ES/Infinity
|
Search results from ES/Infinity
|
||||||
"""
|
"""
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
from common.doc_store.doc_store_base import OrderByExpr
|
|
||||||
|
|
||||||
kb = Knowledgebase.get_by_id(kb_id)
|
kb = Knowledgebase.get_by_id(kb_id)
|
||||||
if not kb:
|
if not kb:
|
||||||
return None
|
return None
|
||||||
@ -247,8 +246,6 @@ class DocMetadataService:
|
|||||||
True if successful, False otherwise
|
True if successful, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
|
|
||||||
# Get document with tenant_id (need to join with Knowledgebase)
|
# Get document with tenant_id (need to join with Knowledgebase)
|
||||||
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
||||||
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
||||||
@ -304,6 +301,13 @@ class DocMetadataService:
|
|||||||
if result:
|
if result:
|
||||||
logging.error(f"Failed to insert metadata for document {doc_id}: {result}")
|
logging.error(f"Failed to insert metadata for document {doc_id}: {result}")
|
||||||
return False
|
return False
|
||||||
|
# Force ES refresh to make metadata immediately available for search
|
||||||
|
if not settings.DOC_ENGINE_INFINITY:
|
||||||
|
try:
|
||||||
|
settings.docStoreConn.es.indices.refresh(index=index_name)
|
||||||
|
logging.debug(f"Refreshed metadata index: {index_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to refresh metadata index {index_name}: {e}")
|
||||||
|
|
||||||
logging.debug(f"Successfully inserted metadata for document {doc_id}")
|
logging.debug(f"Successfully inserted metadata for document {doc_id}")
|
||||||
return True
|
return True
|
||||||
@ -329,8 +333,6 @@ class DocMetadataService:
|
|||||||
True if successful, False otherwise
|
True if successful, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
|
|
||||||
# Get document with tenant_id
|
# Get document with tenant_id
|
||||||
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
||||||
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
||||||
@ -394,10 +396,7 @@ class DocMetadataService:
|
|||||||
True if successful (or no metadata to delete), False otherwise
|
True if successful (or no metadata to delete), False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
|
|
||||||
logging.debug(f"[METADATA DELETE] Starting metadata deletion for document: {doc_id}")
|
logging.debug(f"[METADATA DELETE] Starting metadata deletion for document: {doc_id}")
|
||||||
|
|
||||||
# Get document with tenant_id
|
# Get document with tenant_id
|
||||||
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
||||||
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
||||||
@ -494,8 +493,6 @@ class DocMetadataService:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"[DROP EMPTY TABLE] Count API failed, falling back to search: {e}")
|
logging.warning(f"[DROP EMPTY TABLE] Count API failed, falling back to search: {e}")
|
||||||
# Fallback to search if count fails
|
# Fallback to search if count fails
|
||||||
from common.doc_store.doc_store_base import OrderByExpr
|
|
||||||
|
|
||||||
results = settings.docStoreConn.search(
|
results = settings.docStoreConn.search(
|
||||||
select_fields=["id"],
|
select_fields=["id"],
|
||||||
highlight_fields=[],
|
highlight_fields=[],
|
||||||
@ -564,8 +561,6 @@ class DocMetadataService:
|
|||||||
Metadata dictionary, empty dict if not found
|
Metadata dictionary, empty dict if not found
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
|
|
||||||
# Get document with tenant_id
|
# Get document with tenant_id
|
||||||
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
doc_query = Document.select(Document, Knowledgebase.tenant_id).join(
|
||||||
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
Knowledgebase, on=(Knowledgebase.id == Document.kb_id)
|
||||||
@ -618,9 +613,6 @@ class DocMetadataService:
|
|||||||
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
|
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
from common.doc_store.doc_store_base import OrderByExpr
|
|
||||||
|
|
||||||
# Get tenant_id from first KB
|
# Get tenant_id from first KB
|
||||||
kb = Knowledgebase.get_by_id(kb_ids[0])
|
kb = Knowledgebase.get_by_id(kb_ids[0])
|
||||||
if not kb:
|
if not kb:
|
||||||
@ -697,9 +689,6 @@ class DocMetadataService:
|
|||||||
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
|
Metadata dictionary in format: {field_name: {value: [doc_ids]}}
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from api.db.db_models import Knowledgebase
|
|
||||||
from common.doc_store.doc_store_base import OrderByExpr
|
|
||||||
|
|
||||||
# Get tenant_id from first KB
|
# Get tenant_id from first KB
|
||||||
kb = Knowledgebase.get_by_id(kb_ids[0])
|
kb = Knowledgebase.get_by_id(kb_ids[0])
|
||||||
if not kb:
|
if not kb:
|
||||||
|
|||||||
@ -109,7 +109,12 @@ class DocumentService(CommonService):
|
|||||||
|
|
||||||
count = docs.count()
|
count = docs.count()
|
||||||
docs = docs.paginate(page_number, items_per_page)
|
docs = docs.paginate(page_number, items_per_page)
|
||||||
return list(docs.dicts()), count
|
|
||||||
|
docs_list = list(docs.dicts())
|
||||||
|
metadata_map = DocMetadataService.get_metadata_for_documents(None, kb_id)
|
||||||
|
for doc in docs_list:
|
||||||
|
doc["meta_fields"] = metadata_map.get(doc["id"], {})
|
||||||
|
return docs_list, count
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
|
|||||||
Reference in New Issue
Block a user