From 8af769de410af2b7b2d08e77e75902f5519ba623 Mon Sep 17 00:00:00 2001 From: Liu An Date: Thu, 16 Oct 2025 12:47:24 +0800 Subject: [PATCH] Fix: add toc_kwd field and update page_num_int type (#10596) ### What problem does this PR solve? - Added new field 'toc_kwd' to infinity_mapping.json for table of contents keyword support - Changed page_num_int from integer to array type in task_executor.py to handle multiple page numbers ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- conf/infinity_mapping.json | 5 ++--- rag/svr/task_executor.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/conf/infinity_mapping.json b/conf/infinity_mapping.json index 3e39044a7..e44766447 100644 --- a/conf/infinity_mapping.json +++ b/conf/infinity_mapping.json @@ -31,7 +31,6 @@ "entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "pagerank_fea": {"type": "integer", "default": 0}, "tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"}, - "from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, @@ -39,6 +38,6 @@ "source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, "n_hop_with_weight": {"type": "varchar", "default": ""}, "removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, - - "doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"} + "doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}, + "toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"} } diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 14a0b5cda..7674952ff 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -447,7 +447,7 @@ def build_TOC(task, docs, progress_callback): d["content_with_weight"] = json.dumps(toc, ensure_ascii=False) d["toc_kwd"] = "toc" d["available_int"] = 0 - d["page_num_int"] = 100000000 + d["page_num_int"] = [100000000] d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest() return d