mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-31 23:55:06 +08:00
Add dataset with table parser type for Infinity and answer question in chat using SQL (#12541)
### What problem does this PR solve? 1) Create dataset using table parser for infinity 2) Answer questions in chat using SQL ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -317,7 +317,18 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
break
|
||||
if vector_size == 0:
|
||||
raise ValueError("Cannot infer vector size from documents")
|
||||
self.create_idx(index_name, knowledgebase_id, vector_size)
|
||||
|
||||
# Determine parser_id from document structure
|
||||
# Table parser documents have 'chunk_data' field
|
||||
parser_id = None
|
||||
if "chunk_data" in documents[0] and isinstance(documents[0].get("chunk_data"), dict):
|
||||
from common.constants import ParserType
|
||||
parser_id = ParserType.TABLE.value
|
||||
self.logger.debug("Detected TABLE parser from document structure")
|
||||
|
||||
# Fallback: Create table with base schema (shouldn't normally happen as init_kb() creates it)
|
||||
self.logger.debug(f"Fallback: Creating table {table_name} with base schema, parser_id: {parser_id}")
|
||||
self.create_idx(index_name, knowledgebase_id, vector_size, parser_id)
|
||||
table_instance = db_instance.get_table(table_name)
|
||||
|
||||
# embedding fields can't have a default value....
|
||||
@ -378,6 +389,12 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
d[k] = v
|
||||
elif re.search(r"_feas$", k):
|
||||
d[k] = json.dumps(v)
|
||||
elif k == "chunk_data":
|
||||
# Convert data dict to JSON string for storage
|
||||
if isinstance(v, dict):
|
||||
d[k] = json.dumps(v)
|
||||
else:
|
||||
d[k] = v
|
||||
elif k == "kb_id":
|
||||
if isinstance(d[k], list):
|
||||
d[k] = d[k][0] # since d[k] is a list, but we need a str
|
||||
@ -586,6 +603,9 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd])
|
||||
elif re.search(r"_feas$", k):
|
||||
res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {})
|
||||
elif k == "chunk_data":
|
||||
# Parse JSON data back to dict for table parser fields
|
||||
res2[column] = res2[column].apply(lambda v: json.loads(v) if v and isinstance(v, str) else v)
|
||||
elif k == "position_int":
|
||||
def to_position_int(v):
|
||||
if v:
|
||||
|
||||
Reference in New Issue
Block a user