Add dataset with table parser type for Infinity and answer question in chat using SQL (#12541)

### What problem does this PR solve?

1) Create  dataset using table parser for infinity
2) Answer questions in chat using SQL

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
qinling0210
2026-01-19 19:35:14 +08:00
committed by GitHub
parent 05da2a5872
commit b40d639fdb
19 changed files with 1003 additions and 101 deletions

View File

@ -317,7 +317,18 @@ class InfinityConnection(InfinityConnectionBase):
break
if vector_size == 0:
raise ValueError("Cannot infer vector size from documents")
self.create_idx(index_name, knowledgebase_id, vector_size)
# Determine parser_id from document structure
# Table parser documents have 'chunk_data' field
parser_id = None
if "chunk_data" in documents[0] and isinstance(documents[0].get("chunk_data"), dict):
from common.constants import ParserType
parser_id = ParserType.TABLE.value
self.logger.debug("Detected TABLE parser from document structure")
# Fallback: Create table with base schema (shouldn't normally happen as init_kb() creates it)
self.logger.debug(f"Fallback: Creating table {table_name} with base schema, parser_id: {parser_id}")
self.create_idx(index_name, knowledgebase_id, vector_size, parser_id)
table_instance = db_instance.get_table(table_name)
# embedding fields can't have a default value....
@ -378,6 +389,12 @@ class InfinityConnection(InfinityConnectionBase):
d[k] = v
elif re.search(r"_feas$", k):
d[k] = json.dumps(v)
elif k == "chunk_data":
# Convert data dict to JSON string for storage
if isinstance(v, dict):
d[k] = json.dumps(v)
else:
d[k] = v
elif k == "kb_id":
if isinstance(d[k], list):
d[k] = d[k][0] # since d[k] is a list, but we need a str
@ -586,6 +603,9 @@ class InfinityConnection(InfinityConnectionBase):
res2[column] = res2[column].apply(lambda v: [kwd for kwd in v.split("###") if kwd])
elif re.search(r"_feas$", k):
res2[column] = res2[column].apply(lambda v: json.loads(v) if v else {})
elif k == "chunk_data":
# Parse JSON data back to dict for table parser fields
res2[column] = res2[column].apply(lambda v: json.loads(v) if v and isinstance(v, str) else v)
elif k == "position_int":
def to_position_int(v):
if v: