From 2ea8dddef6a5c8815f00a32df586e6a75cbfe701 Mon Sep 17 00:00:00 2001 From: liuxiaoyusky <49766325+liuxiaoyusky@users.noreply.github.com> Date: Thu, 15 Jan 2026 15:32:40 +0800 Subject: [PATCH] =?UTF-8?q?fix(infinity):=20Use=20comma=20separator=20for?= =?UTF-8?q?=20important=5Fkwd=20to=20preserve=20mult=E2=80=A6=20(#12618)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem The \`important_kwd\` field in Infinity connector was using mismatched separators: - **Storage**: \`list2str(v)\` uses space as default separator - **Reading**: \`v.split()\` splits by all whitespace This causes multi-word keywords like \`\"Senior Fund Manager\"\` to be incorrectly split into \`[\"Senior\", \"Fund\", \"Manager\"]\`. ## Solution Use comma \`,\` as separator for both storing and reading, consistent with: 1. The LLM output format in \`keyword_prompt.md\` (\"delimited by ENGLISH COMMA\") 2. The \`cached.split(\",\")\` in \`task_executor.py\` ## Changes - \`insert()\`: \`list2str(v)\` → \`list2str(v, \",\")\` - \`update()\`: \`list2str(v)\` → \`list2str(v, \",\")\` - \`get_fields()\`: \`v.split()\` → \`v.split(\",\") if v else []\` ## Impact This bug affects: - Python-level reranking weight calculation (\`important_kwd * 5\`) - API response keyword display - Search precision due to fragmented keywords --- rag/utils/infinity_conn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 79f871e80..ac5129735 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -340,7 +340,7 @@ class InfinityConnection(InfinityConnectionBase): if not d.get("docnm_kwd"): d["docnm"] = self.list2str(v) elif k == "important_kwd": - d["important_keywords"] = self.list2str(v) + d["important_keywords"] = self.list2str(v, ",") elif k == "important_tks": if not d.get("important_kwd"): d["important_keywords"] = v @@ -429,7 +429,7 @@ class InfinityConnection(InfinityConnectionBase): if not new_value.get("docnm_kwd"): new_value["docnm"] = v elif k == "important_kwd": - new_value["important_keywords"] = self.list2str(v) + new_value["important_keywords"] = self.list2str(v, ",") elif k == "important_tks": if not new_value.get("important_kwd"): new_value["important_keywords"] = v @@ -532,7 +532,7 @@ class InfinityConnection(InfinityConnectionBase): res[field] = res["docnm"] if "important_keywords" in res.columns: if "important_kwd" in fields_all: - res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split()) + res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split(",") if v else []) if "important_tks" in fields_all: res["important_tks"] = res["important_keywords"] if "questions" in res.columns: