Feat: Support metadata auto filer for Search. (#9524)

### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-31 15:45:08 +08:00 · 2025-08-19 10:27:24 +08:00
parent ccb9f0b0d7
commit b5b8032a56
8 changed files with 53 additions and 34 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -484,7 +484,7 @@ class Canvas:
            threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
        return [th.result() for th in threads]

-    def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any):
+    def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any, elapsed_time=None):
        agent_ids = agent_id.split("-->")
        agent_name = self.get_component_name(agent_ids[0])
        path = agent_name if len(agent_ids) < 2 else agent_name+"-->"+"-->".join(agent_ids[1:])
@ -493,16 +493,16 @@ class Canvas:
            if bin:
                obj = json.loads(bin.encode("utf-8"))
                if obj[-1]["component_id"] == agent_ids[0]:
-                    obj[-1]["trace"].append({"path": path, "tool_name": func_name, "arguments": params, "result": result})
+                    obj[-1]["trace"].append({"path": path, "tool_name": func_name, "arguments": params, "result": result, "elapsed_time": elapsed_time})
                else:
                    obj.append({
                    "component_id": agent_ids[0],
-                    "trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result}]
+                    "trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result, "elapsed_time": elapsed_time}]
                })
            else:
                obj = [{
                    "component_id": agent_ids[0],
-                    "trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result}]
+                    "trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result, "elapsed_time": elapsed_time}]
                }]
            REDIS_CONN.set_obj(f"{self.task_id}-{self.message_id}-logs", obj, 60*10)
        except Exception as e:
--- a/agent/component/agent_with_tools.py
+++ b/agent/component/agent_with_tools.py
@ -22,7 +22,7 @@ from functools import partial
 from typing import Any

 import json_repair
-
+from timeit import default_timer as timer
 from agent.tools.base import LLMToolPluginCallSession, ToolParamBase, ToolBase, ToolMeta
 from api.db.services.llm_service import LLMBundle
 from api.db.services.tenant_llm_service import TenantLLMService
@ -215,8 +215,9 @@ class Agent(LLM, ToolBase):
        hist = deepcopy(history)
        last_calling = ""
        if len(hist) > 3:
+            st = timer()
            user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
-            self.callback("Multi-turn conversation optimization", {}, user_request)
+            self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
        else:
            user_request = history[-1]["content"]

@ -263,12 +264,13 @@ class Agent(LLM, ToolBase):
            if not need2cite or cited:
                return

+            st = timer()
            txt = ""
            for delta_ans in self._gen_citations(entire_txt):
                yield delta_ans, 0
                txt += delta_ans

-            self.callback("gen_citations", {}, txt)
+            self.callback("gen_citations", {}, txt, elapsed_time=timer()-st)

        def append_user_content(hist, content):
            if hist[-1]["role"] == "user":
@ -276,8 +278,9 @@ class Agent(LLM, ToolBase):
            else:
                hist.append({"role": "user", "content": content})

+        st = timer()
        task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas)
-        self.callback("analyze_task", {}, task_desc)
+        self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
        for _ in range(self._param.max_rounds + 1):
            response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc)
            # self.callback("next_step", {}, str(response)[:256]+"...")
@ -303,9 +306,10 @@ class Agent(LLM, ToolBase):

                        thr.append(executor.submit(use_tool, name, args))

+                    st = timer()
                    reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr])
                    append_user_content(hist, reflection)
-                    self.callback("reflection", {}, str(reflection))
+                    self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)

            except Exception as e:
                logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
--- a/agent/tools/base.py
+++ b/agent/tools/base.py
@ -24,6 +24,7 @@ from api.utils import hash_str2int
 from rag.llm.chat_model import ToolCallSession
 from rag.prompts.prompts import kb_prompt
 from rag.utils.mcp_tool_call_conn import MCPToolCallSession
+from timeit import default_timer as timer


 class ToolParameter(TypedDict):
@ -49,12 +50,13 @@ class LLMToolPluginCallSession(ToolCallSession):

    def tool_call(self, name: str, arguments: dict[str, Any]) -> Any:
        assert name in self.tools_map, f"LLM tool {name} does not exist"
+        st = timer()
        if isinstance(self.tools_map[name], MCPToolCallSession):
            resp = self.tools_map[name].tool_call(name, arguments, 60)
        else:
            resp = self.tools_map[name].invoke(**arguments)

-        self.callback(name, arguments, resp)
+        self.callback(name, arguments, resp, elapsed_time=timer()-st)
        return resp

    def get_tool_obj(self, name):
--- a/agent/tools/exesql.py
+++ b/agent/tools/exesql.py
@ -79,6 +79,17 @@ class ExeSQL(ToolBase, ABC):

    @timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60))
    def _invoke(self, **kwargs):
+
+        def convert_decimals(obj):
+            from decimal import Decimal
+            if isinstance(obj, Decimal):
+                return float(obj)  # 或 str(obj)
+            elif isinstance(obj, dict):
+                return {k: convert_decimals(v) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [convert_decimals(item) for item in obj]
+            return obj
+
        sql = kwargs.get("sql")
        if not sql:
            raise Exception("SQL for `ExeSQL` MUST not be empty.")
@ -122,7 +133,11 @@ class ExeSQL(ToolBase, ABC):
                single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.max_records)])
                single_res.columns = [i[0] for i in cursor.description]

-            sql_res.append(single_res.to_dict(orient='records'))
+            for col in single_res.columns:
+                if pd.api.types.is_datetime64_any_dtype(single_res[col]):
+                    single_res[col] = single_res[col].dt.strftime('%Y-%m-%d')
+
+            sql_res.append(convert_decimals(single_res.to_dict(orient='records')))
            formalized_content.append(single_res.to_markdown(index=False, floatfmt=".6f"))

        self.set_output("json", sql_res)
@ -130,4 +145,4 @@ class ExeSQL(ToolBase, ABC):
        return self.output("formalized_content")

    def thoughts(self) -> str:
-        return "Query sent—waiting for the data."
+        return "Query sent—waiting for the data."