mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-01 08:05:07 +08:00
Put document metadata in ES/Infinity (#12826)
### What problem does this PR solve?
Put document metadata in ES/Infinity.
Index name of meta data: ragflow_doc_meta_{tenant_id}
### Type of change
- [x] Refactoring
This commit is contained in:
@ -285,8 +285,65 @@ class InfinityConnectionBase(DocStoreConnection):
|
||||
self.logger.info(f"INFINITY created table {table_name}, vector size {vector_size}")
|
||||
return True
|
||||
|
||||
def create_doc_meta_idx(self, index_name: str):
|
||||
"""
|
||||
Create a document metadata table.
|
||||
|
||||
Table name pattern: ragflow_doc_meta_{tenant_id}
|
||||
- Per-tenant metadata table for storing document metadata fields
|
||||
"""
|
||||
table_name = index_name
|
||||
inf_conn = self.connPool.get_conn()
|
||||
inf_db = inf_conn.create_database(self.dbName, ConflictType.Ignore)
|
||||
try:
|
||||
fp_mapping = os.path.join(get_project_base_directory(), "conf", "doc_meta_infinity_mapping.json")
|
||||
if not os.path.exists(fp_mapping):
|
||||
self.logger.error(f"Document metadata mapping file not found at {fp_mapping}")
|
||||
return False
|
||||
schema = json.load(open(fp_mapping))
|
||||
inf_db.create_table(
|
||||
table_name,
|
||||
schema,
|
||||
ConflictType.Ignore,
|
||||
)
|
||||
|
||||
# Create secondary indexes on id and kb_id for better query performance
|
||||
inf_table = inf_db.get_table(table_name)
|
||||
|
||||
try:
|
||||
inf_table.create_index(
|
||||
f"idx_{table_name}_id",
|
||||
IndexInfo("id", IndexType.Secondary),
|
||||
ConflictType.Ignore,
|
||||
)
|
||||
self.logger.debug(f"INFINITY created secondary index on id for table {table_name}")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to create index on id for {table_name}: {e}")
|
||||
|
||||
try:
|
||||
inf_table.create_index(
|
||||
f"idx_{table_name}_kb_id",
|
||||
IndexInfo("kb_id", IndexType.Secondary),
|
||||
ConflictType.Ignore,
|
||||
)
|
||||
self.logger.debug(f"INFINITY created secondary index on kb_id for table {table_name}")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to create index on kb_id for {table_name}: {e}")
|
||||
|
||||
self.connPool.release_conn(inf_conn)
|
||||
self.logger.debug(f"INFINITY created document metadata table {table_name} with secondary indexes")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.connPool.release_conn(inf_conn)
|
||||
self.logger.exception(f"Error creating document metadata table {table_name}: {e}")
|
||||
return False
|
||||
|
||||
def delete_idx(self, index_name: str, dataset_id: str):
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
if index_name.startswith("ragflow_doc_meta_"):
|
||||
table_name = index_name
|
||||
else:
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
inf_conn = self.connPool.get_conn()
|
||||
db_instance = inf_conn.get_database(self.dbName)
|
||||
db_instance.drop_table(table_name, ConflictType.Ignore)
|
||||
@ -294,7 +351,10 @@ class InfinityConnectionBase(DocStoreConnection):
|
||||
self.logger.info(f"INFINITY dropped table {table_name}")
|
||||
|
||||
def index_exist(self, index_name: str, dataset_id: str) -> bool:
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
if index_name.startswith("ragflow_doc_meta_"):
|
||||
table_name = index_name
|
||||
else:
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
try:
|
||||
inf_conn = self.connPool.get_conn()
|
||||
db_instance = inf_conn.get_database(self.dbName)
|
||||
@ -341,7 +401,10 @@ class InfinityConnectionBase(DocStoreConnection):
|
||||
def delete(self, condition: dict, index_name: str, dataset_id: str) -> int:
|
||||
inf_conn = self.connPool.get_conn()
|
||||
db_instance = inf_conn.get_database(self.dbName)
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
if index_name.startswith("ragflow_doc_meta_"):
|
||||
table_name = index_name
|
||||
else:
|
||||
table_name = f"{index_name}_{dataset_id}"
|
||||
try:
|
||||
table_instance = db_instance.get_table(table_name)
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user