feature:Add OceanBase Storage Support for Table Parser (#12923)

### What problem does this PR solve? close #12770 This PR adds OceanBase as a storage backend for the Table Parser. It enables dynamic table schema storage via JSON and implements OceanBase SQL execution for text-to-SQL retrieval. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): ### Changes - Table Parser stores row data into `chunk_data` when doc engine is OceanBase. (table.py) - OceanBase table schema adds `chunk_data` JSON column and migrates if needed. - Implemented OceanBase `sql()` to execute text-to-SQL results. (ob_conn.py) - Add `DOC_ENGINE_OCEANBASE` flag for engine detection (setting.py) ### Test 1. Set `DOC_ENGINE=oceanbase` (e.g. in `docker/.env`) <img width="1290" height="783" alt="doc_engine_ob" src="https://github.com/user-attachments/assets/7d1c609f-7bf2-4b2e-b4cc-4243e72ad4f1" /> 2. Upload an Excel file to Knowledge Base.(for test, we use as below) <img width="786" height="930" alt="excel" src="https://github.com/user-attachments/assets/bedf82f2-cd00-426b-8f4d-6978a151231a" /> 3. Choose **Table** as parsing method. <img width="2550" height="1134" alt="parse_excel" src="https://github.com/user-attachments/assets/aba11769-02be-4905-97e1-e24485e24cd0" /> 4.Ask a natural language query in chat. <img width="2550" height="1134" alt="query" src="https://github.com/user-attachments/assets/26a910a6-e503-4ac7-b66a-f5754bbb0e91" />
2026-02-01 16:15:07 +08:00 · 2026-01-31 15:11:54 +08:00
parent ee23b9eb63
commit 23bdf25a1f
4 changed files with 115 additions and 15 deletions
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@ -586,7 +586,12 @@ async def use_sql(question, field_map, tenant_id, chat_mdl, quota=True, kb_ids=N
    logging.debug(f"use_sql: Question: {question}")

    # Determine which document engine we're using
-    doc_engine = "infinity" if settings.DOC_ENGINE_INFINITY else "es"
+    if settings.DOC_ENGINE_INFINITY:
+        doc_engine = "infinity"
+    elif settings.DOC_ENGINE_OCEANBASE:
+        doc_engine = "oceanbase"
+    else:
+        doc_engine = "es"

    # Construct the full table name
    # For Elasticsearch: ragflow_{tenant_id} (kb_id is in WHERE clause)
@ -633,6 +638,37 @@ Write SQL using json_extract_string() with exact field names. Include doc_id, do
            "\n".join([f"  - {field}" for field in json_field_names]),
            question
        )
+    elif doc_engine == "oceanbase":
+        # Build OceanBase prompts with JSON extraction context
+        json_field_names = list(field_map.keys())
+        sys_prompt = """You are a Database Administrator. Write SQL for a table with JSON 'chunk_data' column.
+
+JSON Extraction: json_extract_string(chunk_data, '$.FieldName')
+Numeric Cast: CAST(json_extract_string(chunk_data, '$.FieldName') AS INTEGER/FLOAT)
+NULL Check: json_extract_isnull(chunk_data, '$.FieldName') == false
+
+RULES:
+1. Use EXACT field names (case-sensitive) from the list below
+2. For SELECT: include doc_id, docnm_kwd, and json_extract_string() for requested fields
+3. For COUNT: use COUNT(*) or COUNT(DISTINCT json_extract_string(...))
+4. Add AS alias for extracted field names
+5. DO NOT select 'content' field
+6. Only add NULL check (json_extract_isnull() == false) in WHERE clause when:
+   - Question asks to "show me" or "display" specific columns
+   - Question mentions "not null" or "excluding null"
+   - Add NULL check for count specific column
+   - DO NOT add NULL check for COUNT(*) queries (COUNT(*) counts all rows including nulls)
+7. Output ONLY the SQL, no explanations"""
+        user_prompt = """Table: {}
+Fields (EXACT case): {}
+{}
+Question: {}
+Write SQL using json_extract_string() with exact field names. Include doc_id, docnm_kwd for data queries. Only SQL.""".format(
+            table_name,
+            ", ".join(json_field_names),
+            "\n".join([f"  - {field}" for field in json_field_names]),
+            question
+        )
    else:
        # Build ES/OS prompts with direct field access
        sys_prompt = """You are a Database Administrator. Write SQL queries.
@ -703,7 +739,7 @@ Write SQL using exact field names above. Include doc_id, docnm_kwd for data quer
    except Exception as e:
        logging.warning(f"use_sql: Initial SQL execution FAILED with error: {e}")
        # Build retry prompt with error information
-        if doc_engine == "infinity":
+        if doc_engine in ("infinity", "oceanbase"):
            # Build Infinity error retry prompt
            json_field_names = list(field_map.keys())
            user_prompt = """