Feat: Support knowledge base type input in agent flow debugger (#7471)

### What problem does this PR solve? This is a follow-up of #7088 , adding a knowledge base type input to the `Begin` component, and a knowledge base selector to the agent flow debug input panel: ![image](https://github.com/user-attachments/assets/e4cd35f1-1c8e-4f69-bed4-5d613b96d148) then you can select one or more knowledge bases when testing the agent: ![image](https://github.com/user-attachments/assets/724b547e-4790-4cd8-83d3-67e02f2e76d8) Note: the lines changed in `agent/component/retrieval.py` after line 94 are modified by `ruff format` from the `pre-commit` hooks, no functional change. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
2026-01-30 15:16:45 +08:00 · 2025-05-06 19:30:27 +08:00
parent 75b24ba02a
commit bc3160f75a
4 changed files with 46 additions and 22 deletions
--- a/agent/component/retrieval.py
+++ b/agent/component/retrieval.py
@ -30,10 +30,10 @@ from rag.utils.tavily_conn import Tavily


 class RetrievalParam(ComponentParamBase):
-
    """
    Define the Retrieval component parameters.
    """
+
    def __init__(self):
        super().__init__()
        self.similarity_threshold = 0.2
@ -67,7 +67,10 @@ class Retrieval(ComponentBase, ABC):
        if len(kb_vars) > 0:
            for kb_var in kb_vars:
                if len(kb_var) == 1:
-                    kb_ids.append(str(kb_var["content"][0]))
+                    kb_var_value = str(kb_var["content"][0])
+
+                    for v in kb_var_value.split(","):
+                        kb_ids.append(v)
                else:
                    for v in kb_var.to_dict("records"):
                        kb_ids.append(v["content"])
@ -91,20 +94,24 @@ class Retrieval(ComponentBase, ABC):
            rerank_mdl = LLMBundle(kbs[0].tenant_id, LLMType.RERANK, self._param.rerank_id)

        if kbs:
-            kbinfos = settings.retrievaler.retrieval(query, embd_mdl, kbs[0].tenant_id, filtered_kb_ids,
-                                        1, self._param.top_n,
-                                        self._param.similarity_threshold, 1 - self._param.keywords_similarity_weight,
-                                        aggs=False, rerank_mdl=rerank_mdl,
-                                        rank_feature=label_question(query, kbs))
+            kbinfos = settings.retrievaler.retrieval(
+                query,
+                embd_mdl,
+                kbs[0].tenant_id,
+                filtered_kb_ids,
+                1,
+                self._param.top_n,
+                self._param.similarity_threshold,
+                1 - self._param.keywords_similarity_weight,
+                aggs=False,
+                rerank_mdl=rerank_mdl,
+                rank_feature=label_question(query, kbs),
+            )
        else:
            kbinfos = {"chunks": [], "doc_aggs": []}

        if self._param.use_kg and kbs:
-            ck = settings.kg_retrievaler.retrieval(query,
-                                                   [kbs[0].tenant_id],
-                                                   filtered_kb_ids,
-                                                   embd_mdl,
-                                                   LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
+            ck = settings.kg_retrievaler.retrieval(query, [kbs[0].tenant_id], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
            if ck["content_with_weight"]:
                kbinfos["chunks"].insert(0, ck)

@ -123,5 +130,3 @@ class Retrieval(ComponentBase, ABC):
        df = pd.DataFrame({"content": kb_prompt(kbinfos, 200000), "chunks": json.dumps(kbinfos["chunks"])})
        logging.debug("{} {}".format(query, df))
        return df.dropna()
-
-