mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-31 15:45:08 +08:00
Put document metadata in ES/Infinity (#12826)
### What problem does this PR solve?
Put document metadata in ES/Infinity.
Index name of meta data: ragflow_doc_meta_{tenant_id}
### Type of change
- [x] Refactoring
This commit is contained in:
@ -61,6 +61,7 @@ import numpy as np
|
||||
from peewee import DoesNotExist
|
||||
from common.constants import LLMType, ParserType, PipelineTaskType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.doc_metadata_service import DocMetadataService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import TaskService, has_canceled, CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
@ -438,12 +439,10 @@ async def build_chunks(task, progress_callback):
|
||||
metadata = update_metadata_to(metadata, doc["metadata_obj"])
|
||||
del doc["metadata_obj"]
|
||||
if metadata:
|
||||
e, doc = DocumentService.get_by_id(task["doc_id"])
|
||||
if e:
|
||||
if isinstance(doc.meta_fields, str):
|
||||
doc.meta_fields = json.loads(doc.meta_fields)
|
||||
metadata = update_metadata_to(metadata, doc.meta_fields)
|
||||
DocumentService.update_by_id(task["doc_id"], {"meta_fields": metadata})
|
||||
existing_meta = DocMetadataService.get_document_metadata(task["doc_id"])
|
||||
existing_meta = existing_meta if isinstance(existing_meta, dict) else {}
|
||||
metadata = update_metadata_to(metadata, existing_meta)
|
||||
DocMetadataService.update_document_metadata(task["doc_id"], metadata)
|
||||
progress_callback(msg="Question generation {} chunks completed in {:.2f}s".format(len(docs), timer() - st))
|
||||
|
||||
if task["kb_parser_config"].get("tag_kb_ids", []):
|
||||
@ -735,12 +734,10 @@ async def run_dataflow(task: dict):
|
||||
del ck["positions"]
|
||||
|
||||
if metadata:
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if e:
|
||||
if isinstance(doc.meta_fields, str):
|
||||
doc.meta_fields = json.loads(doc.meta_fields)
|
||||
metadata = update_metadata_to(metadata, doc.meta_fields)
|
||||
DocumentService.update_by_id(doc_id, {"meta_fields": metadata})
|
||||
existing_meta = DocMetadataService.get_document_metadata(doc_id)
|
||||
existing_meta = existing_meta if isinstance(existing_meta, dict) else {}
|
||||
metadata = update_metadata_to(metadata, existing_meta)
|
||||
DocMetadataService.update_document_metadata(doc_id, metadata)
|
||||
|
||||
start_ts = timer()
|
||||
set_progress(task_id, prog=0.82, msg="[DOC Engine]:\nStart to index...")
|
||||
|
||||
Reference in New Issue
Block a user