From 8af769de410af2b7b2d08e77e75902f5519ba623 Mon Sep 17 00:00:00 2001
From: Liu An <asiro@qq.com>
Date: Thu, 16 Oct 2025 12:47:24 +0800
Subject: [PATCH] Fix: add toc_kwd field and update page_num_int type (#10596)

### What problem does this PR solve?

- Added new field 'toc_kwd' to infinity_mapping.json for table of
contents keyword support
- Changed page_num_int from integer to array type in task_executor.py to
handle multiple page numbers

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 conf/infinity_mapping.json | 5 ++---
 rag/svr/task_executor.py   | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/conf/infinity_mapping.json b/conf/infinity_mapping.json
index 3e39044a7..e44766447 100644
--- a/conf/infinity_mapping.json
+++ b/conf/infinity_mapping.json
@@ -31,7 +31,6 @@
 	"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"pagerank_fea": {"type": "integer", "default":  0},
 	"tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"},
-
 	"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
@@ -39,6 +38,6 @@
 	"source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
 	"n_hop_with_weight": {"type": "varchar", "default": ""},
 	"removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
-
-	"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
+	"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
+	"toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
 }
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index 14a0b5cda..7674952ff 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -447,7 +447,7 @@ def build_TOC(task, docs, progress_callback):
         d["content_with_weight"] = json.dumps(toc, ensure_ascii=False)
         d["toc_kwd"] = "toc"
         d["available_int"] = 0
-        d["page_num_int"] = 100000000
+        d["page_num_int"] = [100000000]
         d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
         return d