From 7a344a32f9f83529e12ca12f40f2657eb79fe811 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Tue, 25 Nov 2025 14:35:41 +0800 Subject: [PATCH] Fix: code exec component vulnerability and add support for nested list and dict object (#11504) ### What problem does this PR solve? Fix code exec component vulnerability and add support for nested list and dict object. image Return a single value: image ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) --- agent/canvas.py | 20 +++- agent/tools/code_exec.py | 217 +++++++++++++++++++++++++++++---------- 2 files changed, 180 insertions(+), 57 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index 667e457f5..3e15814aa 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -206,17 +206,28 @@ class Graph: for key in path.split('.'): if cur is None: return None + if isinstance(cur, str): try: cur = json.loads(cur) except Exception: return None + if isinstance(cur, dict): cur = cur.get(key) - else: - cur = getattr(cur, key, None) + continue + + if isinstance(cur, (list, tuple)): + try: + idx = int(key) + cur = cur[idx] + except Exception: + return None + continue + + cur = getattr(cur, key, None) return cur - + def set_variable_value(self, exp: str,value): exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}") if exp.find("@") < 0: @@ -440,7 +451,7 @@ class Canvas(Graph): if isinstance(cpn_obj.output("attachment"), tuple): yield decorate("message", {"attachment": cpn_obj.output("attachment")}) - + yield decorate("message_end", {"reference": self.get_reference() if cite else None}) while partials: @@ -647,4 +658,3 @@ class Canvas(Graph): def get_component_thoughts(self, cpn_id) -> str: return self.components.get(cpn_id)["obj"].thoughts() - diff --git a/agent/tools/code_exec.py b/agent/tools/code_exec.py index adba4168e..e62cace5e 100644 --- a/agent/tools/code_exec.py +++ b/agent/tools/code_exec.py @@ -13,16 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import ast import base64 +import json import logging import os from abc import ABC -from strenum import StrEnum from typing import Optional + from pydantic import BaseModel, Field, field_validator -from agent.tools.base import ToolParamBase, ToolBase, ToolMeta -from common.connection_utils import timeout +from strenum import StrEnum + +from agent.tools.base import ToolBase, ToolMeta, ToolParamBase from common import settings +from common.connection_utils import timeout class Language(StrEnum): @@ -62,7 +66,7 @@ class CodeExecParam(ToolParamBase): """ def __init__(self): - self.meta:ToolMeta = { + self.meta: ToolMeta = { "name": "execute_code", "description": """ This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string. @@ -99,16 +103,12 @@ module.exports = { main }; "enum": ["python", "javascript"], "required": True, }, - "script": { - "type": "string", - "description": "A piece of code in right format. There MUST be main function.", - "required": True - } - } + "script": {"type": "string", "description": "A piece of code in right format. There MUST be main function.", "required": True}, + }, } super().__init__() self.lang = Language.PYTHON.value - self.script = "def main(arg1: str, arg2: str) -> dict: return {\"result\": arg1 + arg2}" + self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}' self.arguments = {} self.outputs = {"result": {"value": "", "type": "string"}} @@ -119,17 +119,14 @@ module.exports = { main }; def get_input_form(self) -> dict[str, dict]: res = {} for k, v in self.arguments.items(): - res[k] = { - "type": "line", - "name": k - } + res[k] = {"type": "line", "name": k} return res class CodeExec(ToolBase, ABC): component_name = "CodeExec" - @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60))) def _invoke(self, **kwargs): if self.check_if_canceled("CodeExec processing"): return @@ -138,17 +135,12 @@ class CodeExec(ToolBase, ABC): script = kwargs.get("script", self._param.script) arguments = {} for k, v in self._param.arguments.items(): - if kwargs.get(k): arguments[k] = kwargs[k] continue arguments[k] = self._canvas.get_variable_value(v) if v else None - self._execute_code( - language=lang, - code=script, - arguments=arguments - ) + self._execute_code(language=lang, code=script, arguments=arguments) def _execute_code(self, language: str, code: str, arguments: dict): import requests @@ -169,7 +161,7 @@ class CodeExec(ToolBase, ABC): if self.check_if_canceled("CodeExec execution"): return "Task has been canceled" - resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) + resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60))) logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:") if self.check_if_canceled("CodeExec execution"): @@ -183,35 +175,10 @@ class CodeExec(ToolBase, ABC): if stderr: self.set_output("_ERROR", stderr) return - try: - rt = eval(body.get("stdout", "")) - except Exception: - rt = body.get("stdout", "") - logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}") - if isinstance(rt, tuple): - for i, (k, o) in enumerate(self._param.outputs.items()): - if self.check_if_canceled("CodeExec execution"): - return - - if k.find("_") == 0: - continue - o["value"] = rt[i] - elif isinstance(rt, dict): - for i, (k, o) in enumerate(self._param.outputs.items()): - if self.check_if_canceled("CodeExec execution"): - return - - if k not in rt or k.find("_") == 0: - continue - o["value"] = rt[k] - else: - for i, (k, o) in enumerate(self._param.outputs.items()): - if self.check_if_canceled("CodeExec execution"): - return - - if k.find("_") == 0: - continue - o["value"] = rt + raw_stdout = body.get("stdout", "") + parsed_stdout = self._deserialize_stdout(raw_stdout) + logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}") + self._populate_outputs(parsed_stdout, raw_stdout) else: self.set_output("_ERROR", "There is no response from sandbox") @@ -228,3 +195,149 @@ class CodeExec(ToolBase, ABC): def thoughts(self) -> str: return "Running a short script to process data." + + def _deserialize_stdout(self, stdout: str): + text = str(stdout).strip() + if not text: + return "" + for loader in (json.loads, ast.literal_eval): + try: + return loader(text) + except Exception: + continue + return text + + def _coerce_output_value(self, value, expected_type: Optional[str]): + if expected_type is None: + return value + + etype = expected_type.strip().lower() + inner_type = None + if etype.startswith("array<") and etype.endswith(">"): + inner_type = etype[6:-1].strip() + etype = "array" + + try: + if etype == "string": + return "" if value is None else str(value) + + if etype == "number": + if value is None or value == "": + return None + if isinstance(value, (int, float)): + return value + if isinstance(value, str): + try: + return float(value) + except Exception: + return value + return float(value) + + if etype == "boolean": + if isinstance(value, bool): + return value + if isinstance(value, str): + lv = value.lower() + if lv in ("true", "1", "yes", "y", "on"): + return True + if lv in ("false", "0", "no", "n", "off"): + return False + return bool(value) + + if etype == "array": + candidate = value + if isinstance(candidate, str): + parsed = self._deserialize_stdout(candidate) + candidate = parsed + if isinstance(candidate, tuple): + candidate = list(candidate) + if not isinstance(candidate, list): + candidate = [] if candidate is None else [candidate] + + if inner_type == "string": + return ["" if v is None else str(v) for v in candidate] + if inner_type == "number": + coerced = [] + for v in candidate: + try: + if v is None or v == "": + coerced.append(None) + elif isinstance(v, (int, float)): + coerced.append(v) + else: + coerced.append(float(v)) + except Exception: + coerced.append(v) + return coerced + return candidate + + if etype == "object": + if isinstance(value, dict): + return value + if isinstance(value, str): + parsed = self._deserialize_stdout(value) + if isinstance(parsed, dict): + return parsed + return value + except Exception: + return value + + return value + + def _populate_outputs(self, parsed_stdout, raw_stdout: str): + outputs_items = list(self._param.outputs.items()) + logging.info(f"[CodeExec]: outputs schema keys: {[k for k, _ in outputs_items]}") + if not outputs_items: + return + + if isinstance(parsed_stdout, dict): + for key, meta in outputs_items: + if key.startswith("_"): + continue + val = self._get_by_path(parsed_stdout, key) + coerced = self._coerce_output_value(val, meta.get("type")) + logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'") + self.set_output(key, coerced) + return + + if isinstance(parsed_stdout, (list, tuple)): + for idx, (key, meta) in enumerate(outputs_items): + if key.startswith("_"): + continue + val = parsed_stdout[idx] if idx < len(parsed_stdout) else None + coerced = self._coerce_output_value(val, meta.get("type")) + logging.info(f"[CodeExec]: populate list key='{key}' raw='{val}' coerced='{coerced}'") + self.set_output(key, coerced) + return + + default_val = parsed_stdout if parsed_stdout is not None else raw_stdout + for idx, (key, meta) in enumerate(outputs_items): + if key.startswith("_"): + continue + val = default_val if idx == 0 else None + coerced = self._coerce_output_value(val, meta.get("type")) + logging.info(f"[CodeExec]: populate scalar key='{key}' raw='{val}' coerced='{coerced}'") + self.set_output(key, coerced) + + def _get_by_path(self, data, path: str): + if not path: + return None + cur = data + for part in path.split("."): + part = part.strip() + if not part: + return None + if isinstance(cur, dict): + cur = cur.get(part) + elif isinstance(cur, list): + try: + idx = int(part) + cur = cur[idx] + except Exception: + return None + else: + return None + if cur is None: + return None + logging.info(f"[CodeExec]: resolve path '{path}' -> {cur}") + return cur