Fix: code exec component vulnerability and add support for nested list and dict object (#11504)

### What problem does this PR solve? Fix code exec component vulnerability and add support for nested list and dict object. <img width="1491" height="952" alt="image" src="https://github.com/user-attachments/assets/ec2de4e3-0919-413d-abe6-d19431292f14" /> Return a single value: <img width="1156" height="719" alt="image" src="https://github.com/user-attachments/assets/baa35caa-e27c-4064-a9f9-4c0af9a3d5b8" /> ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality)
2025-12-08 20:42:30 +08:00 · 2025-11-25 14:35:41 +08:00
parent 8c1ee3845a
commit 7a344a32f9
2 changed files with 180 additions and 57 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -206,15 +206,26 @@ class Graph:
        for key in path.split('.'):
            if cur is None:
                return None
            if isinstance(cur, str):
                try:
                    cur = json.loads(cur)
                except Exception:
                    return None
            if isinstance(cur, dict):
                cur = cur.get(key)
-            else:
+                continue
-                cur = getattr(cur, key, None)
+
            if isinstance(cur, (list, tuple)):
                try:
                    idx = int(key)
                    cur = cur[idx]
                except Exception:
                    return None
                continue
            cur = getattr(cur, key, None)
        return cur
    def set_variable_value(self, exp: str,value):
@ -647,4 +658,3 @@ class Canvas(Graph):
    def get_component_thoughts(self, cpn_id) -> str:
        return self.components.get(cpn_id)["obj"].thoughts()
--- a/agent/tools/code_exec.py
+++ b/agent/tools/code_exec.py
@ -13,16 +13,20 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
 import ast
 import base64
 import json
 import logging
 import os
 from abc import ABC
 from strenum import StrEnum
 from typing import Optional
 from pydantic import BaseModel, Field, field_validator
-from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
+from strenum import StrEnum
-from common.connection_utils import timeout
+
 from agent.tools.base import ToolBase, ToolMeta, ToolParamBase
 from common import settings
 from common.connection_utils import timeout
 class Language(StrEnum):
@ -62,7 +66,7 @@ class CodeExecParam(ToolParamBase):
    """
    def __init__(self):
-        self.meta:ToolMeta = {
+        self.meta: ToolMeta = {
            "name": "execute_code",
            "description": """
 This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string.
@ -99,16 +103,12 @@ module.exports = { main };
                    "enum": ["python", "javascript"],
                    "required": True,
                },
-                "script": {
+                "script": {"type": "string", "description": "A piece of code in right format. There MUST be main function.", "required": True},
-                    "type": "string",
+            },
                    "description": "A piece of code in right format. There MUST be main function.",
                    "required": True
                }
            }
        }
        super().__init__()
        self.lang = Language.PYTHON.value
-        self.script = "def main(arg1: str, arg2: str) -> dict: return {\"result\": arg1 + arg2}"
+        self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}'
        self.arguments = {}
        self.outputs = {"result": {"value": "", "type": "string"}}
@ -119,17 +119,14 @@ module.exports = { main };
    def get_input_form(self) -> dict[str, dict]:
        res = {}
        for k, v in self.arguments.items():
-            res[k] = {
+            res[k] = {"type": "line", "name": k}
                "type": "line",
                "name": k
            }
        return res
 class CodeExec(ToolBase, ABC):
    component_name = "CodeExec"
-    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
    def _invoke(self, **kwargs):
        if self.check_if_canceled("CodeExec processing"):
            return
@ -138,17 +135,12 @@ class CodeExec(ToolBase, ABC):
        script = kwargs.get("script", self._param.script)
        arguments = {}
        for k, v in self._param.arguments.items():
            if kwargs.get(k):
                arguments[k] = kwargs[k]
                continue
            arguments[k] = self._canvas.get_variable_value(v) if v else None
-        self._execute_code(
+        self._execute_code(language=lang, code=script, arguments=arguments)
            language=lang,
            code=script,
            arguments=arguments
        )
    def _execute_code(self, language: str, code: str, arguments: dict):
        import requests
@ -169,7 +161,7 @@ class CodeExec(ToolBase, ABC):
            if self.check_if_canceled("CodeExec execution"):
                return "Task has been canceled"
-            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
            logging.info(f"http://{settings.SANDBOX_HOST}:9385/run,  code_req: {code_req}, resp.status_code {resp.status_code}:")
            if self.check_if_canceled("CodeExec execution"):
@ -183,35 +175,10 @@ class CodeExec(ToolBase, ABC):
                if stderr:
                    self.set_output("_ERROR", stderr)
                    return
-                try:
+                raw_stdout = body.get("stdout", "")
-                    rt = eval(body.get("stdout", ""))
+                parsed_stdout = self._deserialize_stdout(raw_stdout)
-                except Exception:
+                logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}")
-                    rt = body.get("stdout", "")
+                self._populate_outputs(parsed_stdout, raw_stdout)
                logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}")
                if isinstance(rt, tuple):
                    for i, (k, o) in enumerate(self._param.outputs.items()):
                        if self.check_if_canceled("CodeExec execution"):
                            return
                        if k.find("_") == 0:
                            continue
                        o["value"] = rt[i]
                elif isinstance(rt, dict):
                    for i, (k, o) in enumerate(self._param.outputs.items()):
                        if self.check_if_canceled("CodeExec execution"):
                            return
                        if k not in rt or k.find("_") == 0:
                            continue
                        o["value"] = rt[k]
                else:
                    for i, (k, o) in enumerate(self._param.outputs.items()):
                        if self.check_if_canceled("CodeExec execution"):
                            return
                        if k.find("_") == 0:
                            continue
                        o["value"] = rt
            else:
                self.set_output("_ERROR", "There is no response from sandbox")
@ -228,3 +195,149 @@ class CodeExec(ToolBase, ABC):
    def thoughts(self) -> str:
        return "Running a short script to process data."
    def _deserialize_stdout(self, stdout: str):
        text = str(stdout).strip()
        if not text:
            return ""
        for loader in (json.loads, ast.literal_eval):
            try:
                return loader(text)
            except Exception:
                continue
        return text
    def _coerce_output_value(self, value, expected_type: Optional[str]):
        if expected_type is None:
            return value
        etype = expected_type.strip().lower()
        inner_type = None
        if etype.startswith("array<") and etype.endswith(">"):
            inner_type = etype[6:-1].strip()
            etype = "array"
        try:
            if etype == "string":
                return "" if value is None else str(value)
            if etype == "number":
                if value is None or value == "":
                    return None
                if isinstance(value, (int, float)):
                    return value
                if isinstance(value, str):
                    try:
                        return float(value)
                    except Exception:
                        return value
                return float(value)
            if etype == "boolean":
                if isinstance(value, bool):
                    return value
                if isinstance(value, str):
                    lv = value.lower()
                    if lv in ("true", "1", "yes", "y", "on"):
                        return True
                    if lv in ("false", "0", "no", "n", "off"):
                        return False
                return bool(value)
            if etype == "array":
                candidate = value
                if isinstance(candidate, str):
                    parsed = self._deserialize_stdout(candidate)
                    candidate = parsed
                if isinstance(candidate, tuple):
                    candidate = list(candidate)
                if not isinstance(candidate, list):
                    candidate = [] if candidate is None else [candidate]
                if inner_type == "string":
                    return ["" if v is None else str(v) for v in candidate]
                if inner_type == "number":
                    coerced = []
                    for v in candidate:
                        try:
                            if v is None or v == "":
                                coerced.append(None)
                            elif isinstance(v, (int, float)):
                                coerced.append(v)
                            else:
                                coerced.append(float(v))
                        except Exception:
                            coerced.append(v)
                    return coerced
                return candidate
            if etype == "object":
                if isinstance(value, dict):
                    return value
                if isinstance(value, str):
                    parsed = self._deserialize_stdout(value)
                    if isinstance(parsed, dict):
                        return parsed
                return value
        except Exception:
            return value
        return value
    def _populate_outputs(self, parsed_stdout, raw_stdout: str):
        outputs_items = list(self._param.outputs.items())
        logging.info(f"[CodeExec]: outputs schema keys: {[k for k, _ in outputs_items]}")
        if not outputs_items:
            return
        if isinstance(parsed_stdout, dict):
            for key, meta in outputs_items:
                if key.startswith("_"):
                    continue
                val = self._get_by_path(parsed_stdout, key)
                coerced = self._coerce_output_value(val, meta.get("type"))
                logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'")
                self.set_output(key, coerced)
            return
        if isinstance(parsed_stdout, (list, tuple)):
            for idx, (key, meta) in enumerate(outputs_items):
                if key.startswith("_"):
                    continue
                val = parsed_stdout[idx] if idx < len(parsed_stdout) else None
                coerced = self._coerce_output_value(val, meta.get("type"))
                logging.info(f"[CodeExec]: populate list key='{key}' raw='{val}' coerced='{coerced}'")
                self.set_output(key, coerced)
            return
        default_val = parsed_stdout if parsed_stdout is not None else raw_stdout
        for idx, (key, meta) in enumerate(outputs_items):
            if key.startswith("_"):
                continue
            val = default_val if idx == 0 else None
            coerced = self._coerce_output_value(val, meta.get("type"))
            logging.info(f"[CodeExec]: populate scalar key='{key}' raw='{val}' coerced='{coerced}'")
            self.set_output(key, coerced)
    def _get_by_path(self, data, path: str):
        if not path:
            return None
        cur = data
        for part in path.split("."):
            part = part.strip()
            if not part:
                return None
            if isinstance(cur, dict):
                cur = cur.get(part)
            elif isinstance(cur, list):
                try:
                    idx = int(part)
                    cur = cur[idx]
                except Exception:
                    return None
            else:
                return None
            if cur is None:
                return None
        logging.info(f"[CodeExec]: resolve path '{path}' -> {cur}")
        return cur