From 7a344a32f9f83529e12ca12f40f2657eb79fe811 Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Tue, 25 Nov 2025 14:35:41 +0800
Subject: [PATCH] Fix: code exec component vulnerability and add support for
 nested list and dict object (#11504)

### What problem does this PR solve?

Fix code exec component vulnerability and add support for nested list
and dict object.

<img width="1491" height="952" alt="image"
src="https://github.com/user-attachments/assets/ec2de4e3-0919-413d-abe6-d19431292f14"
/>

Return a single value:

<img width="1156" height="719" alt="image"
src="https://github.com/user-attachments/assets/baa35caa-e27c-4064-a9f9-4c0af9a3d5b8"
/>


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
---
 agent/canvas.py          |  20 +++-
 agent/tools/code_exec.py | 217 +++++++++++++++++++++++++++++----------
 2 files changed, 180 insertions(+), 57 deletions(-)
diff --git a/agent/canvas.py b/agent/canvas.py
index 667e457f5..3e15814aa 100644
--- a/agent/canvas.py
+++ b/agent/canvas.py
@@ -206,17 +206,28 @@ class Graph:
         for key in path.split('.'):
             if cur is None:
                 return None
+
             if isinstance(cur, str):
                 try:
                     cur = json.loads(cur)
                 except Exception:
                     return None
+
             if isinstance(cur, dict):
                 cur = cur.get(key)
-            else:
-                cur = getattr(cur, key, None)
+                continue
+
+            if isinstance(cur, (list, tuple)):
+                try:
+                    idx = int(key)
+                    cur = cur[idx]
+                except Exception:
+                    return None
+                continue
+
+            cur = getattr(cur, key, None)
         return cur
-    
+
     def set_variable_value(self, exp: str,value):
         exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}")
         if exp.find("@") < 0:
@@ -440,7 +451,7 @@ class Canvas(Graph):
 
                     if isinstance(cpn_obj.output("attachment"), tuple):
                         yield decorate("message", {"attachment": cpn_obj.output("attachment")})
-                        
+
                     yield decorate("message_end", {"reference": self.get_reference() if cite else None})
 
                     while partials:
@@ -647,4 +658,3 @@ class Canvas(Graph):
 
     def get_component_thoughts(self, cpn_id) -> str:
         return self.components.get(cpn_id)["obj"].thoughts()
-
diff --git a/agent/tools/code_exec.py b/agent/tools/code_exec.py
index adba4168e..e62cace5e 100644
--- a/agent/tools/code_exec.py
+++ b/agent/tools/code_exec.py
@@ -13,16 +13,20 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import ast
 import base64
+import json
 import logging
 import os
 from abc import ABC
-from strenum import StrEnum
 from typing import Optional
+
 from pydantic import BaseModel, Field, field_validator
-from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
-from common.connection_utils import timeout
+from strenum import StrEnum
+
+from agent.tools.base import ToolBase, ToolMeta, ToolParamBase
 from common import settings
+from common.connection_utils import timeout
 
 
 class Language(StrEnum):
@@ -62,7 +66,7 @@ class CodeExecParam(ToolParamBase):
     """
 
     def __init__(self):
-        self.meta:ToolMeta = {
+        self.meta: ToolMeta = {
             "name": "execute_code",
             "description": """
 This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string.
@@ -99,16 +103,12 @@ module.exports = { main };
                     "enum": ["python", "javascript"],
                     "required": True,
                 },
-                "script": {
-                    "type": "string",
-                    "description": "A piece of code in right format. There MUST be main function.",
-                    "required": True
-                }
-            }
+                "script": {"type": "string", "description": "A piece of code in right format. There MUST be main function.", "required": True},
+            },
         }
         super().__init__()
         self.lang = Language.PYTHON.value
-        self.script = "def main(arg1: str, arg2: str) -> dict: return {\"result\": arg1 + arg2}"
+        self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}'
         self.arguments = {}
         self.outputs = {"result": {"value": "", "type": "string"}}
 
@@ -119,17 +119,14 @@ module.exports = { main };
     def get_input_form(self) -> dict[str, dict]:
         res = {}
         for k, v in self.arguments.items():
-            res[k] = {
-                "type": "line",
-                "name": k
-            }
+            res[k] = {"type": "line", "name": k}
         return res
 
 
 class CodeExec(ToolBase, ABC):
     component_name = "CodeExec"
 
-    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
     def _invoke(self, **kwargs):
         if self.check_if_canceled("CodeExec processing"):
             return
@@ -138,17 +135,12 @@ class CodeExec(ToolBase, ABC):
         script = kwargs.get("script", self._param.script)
         arguments = {}
         for k, v in self._param.arguments.items():
-
             if kwargs.get(k):
                 arguments[k] = kwargs[k]
                 continue
             arguments[k] = self._canvas.get_variable_value(v) if v else None
 
-        self._execute_code(
-            language=lang,
-            code=script,
-            arguments=arguments
-        )
+        self._execute_code(language=lang, code=script, arguments=arguments)
 
     def _execute_code(self, language: str, code: str, arguments: dict):
         import requests
@@ -169,7 +161,7 @@ class CodeExec(ToolBase, ABC):
             if self.check_if_canceled("CodeExec execution"):
                 return "Task has been canceled"
 
-            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
             logging.info(f"http://{settings.SANDBOX_HOST}:9385/run,  code_req: {code_req}, resp.status_code {resp.status_code}:")
 
             if self.check_if_canceled("CodeExec execution"):
@@ -183,35 +175,10 @@ class CodeExec(ToolBase, ABC):
                 if stderr:
                     self.set_output("_ERROR", stderr)
                     return
-                try:
-                    rt = eval(body.get("stdout", ""))
-                except Exception:
-                    rt = body.get("stdout", "")
-                logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}")
-                if isinstance(rt, tuple):
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k.find("_") == 0:
-                            continue
-                        o["value"] = rt[i]
-                elif isinstance(rt, dict):
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k not in rt or k.find("_") == 0:
-                            continue
-                        o["value"] = rt[k]
-                else:
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k.find("_") == 0:
-                            continue
-                        o["value"] = rt
+                raw_stdout = body.get("stdout", "")
+                parsed_stdout = self._deserialize_stdout(raw_stdout)
+                logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}")
+                self._populate_outputs(parsed_stdout, raw_stdout)
             else:
                 self.set_output("_ERROR", "There is no response from sandbox")
 
@@ -228,3 +195,149 @@ class CodeExec(ToolBase, ABC):
 
     def thoughts(self) -> str:
         return "Running a short script to process data."
+
+    def _deserialize_stdout(self, stdout: str):
+        text = str(stdout).strip()
+        if not text:
+            return ""
+        for loader in (json.loads, ast.literal_eval):
+            try:
+                return loader(text)
+            except Exception:
+                continue
+        return text
+
+    def _coerce_output_value(self, value, expected_type: Optional[str]):
+        if expected_type is None:
+            return value
+
+        etype = expected_type.strip().lower()
+        inner_type = None
+        if etype.startswith("array<") and etype.endswith(">"):
+            inner_type = etype[6:-1].strip()
+            etype = "array"
+
+        try:
+            if etype == "string":
+                return "" if value is None else str(value)
+
+            if etype == "number":
+                if value is None or value == "":
+                    return None
+                if isinstance(value, (int, float)):
+                    return value
+                if isinstance(value, str):
+                    try:
+                        return float(value)
+                    except Exception:
+                        return value
+                return float(value)
+
+            if etype == "boolean":
+                if isinstance(value, bool):
+                    return value
+                if isinstance(value, str):
+                    lv = value.lower()
+                    if lv in ("true", "1", "yes", "y", "on"):
+                        return True
+                    if lv in ("false", "0", "no", "n", "off"):
+                        return False
+                return bool(value)
+
+            if etype == "array":
+                candidate = value
+                if isinstance(candidate, str):
+                    parsed = self._deserialize_stdout(candidate)
+                    candidate = parsed
+                if isinstance(candidate, tuple):
+                    candidate = list(candidate)
+                if not isinstance(candidate, list):
+                    candidate = [] if candidate is None else [candidate]
+
+                if inner_type == "string":
+                    return ["" if v is None else str(v) for v in candidate]
+                if inner_type == "number":
+                    coerced = []
+                    for v in candidate:
+                        try:
+                            if v is None or v == "":
+                                coerced.append(None)
+                            elif isinstance(v, (int, float)):
+                                coerced.append(v)
+                            else:
+                                coerced.append(float(v))
+                        except Exception:
+                            coerced.append(v)
+                    return coerced
+                return candidate
+
+            if etype == "object":
+                if isinstance(value, dict):
+                    return value
+                if isinstance(value, str):
+                    parsed = self._deserialize_stdout(value)
+                    if isinstance(parsed, dict):
+                        return parsed
+                return value
+        except Exception:
+            return value
+
+        return value
+
+    def _populate_outputs(self, parsed_stdout, raw_stdout: str):
+        outputs_items = list(self._param.outputs.items())
+        logging.info(f"[CodeExec]: outputs schema keys: {[k for k, _ in outputs_items]}")
+        if not outputs_items:
+            return
+
+        if isinstance(parsed_stdout, dict):
+            for key, meta in outputs_items:
+                if key.startswith("_"):
+                    continue
+                val = self._get_by_path(parsed_stdout, key)
+                coerced = self._coerce_output_value(val, meta.get("type"))
+                logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'")
+                self.set_output(key, coerced)
+            return
+
+        if isinstance(parsed_stdout, (list, tuple)):
+            for idx, (key, meta) in enumerate(outputs_items):
+                if key.startswith("_"):
+                    continue
+                val = parsed_stdout[idx] if idx < len(parsed_stdout) else None
+                coerced = self._coerce_output_value(val, meta.get("type"))
+                logging.info(f"[CodeExec]: populate list key='{key}' raw='{val}' coerced='{coerced}'")
+                self.set_output(key, coerced)
+            return
+
+        default_val = parsed_stdout if parsed_stdout is not None else raw_stdout
+        for idx, (key, meta) in enumerate(outputs_items):
+            if key.startswith("_"):
+                continue
+            val = default_val if idx == 0 else None
+            coerced = self._coerce_output_value(val, meta.get("type"))
+            logging.info(f"[CodeExec]: populate scalar key='{key}' raw='{val}' coerced='{coerced}'")
+            self.set_output(key, coerced)
+
+    def _get_by_path(self, data, path: str):
+        if not path:
+            return None
+        cur = data
+        for part in path.split("."):
+            part = part.strip()
+            if not part:
+                return None
+            if isinstance(cur, dict):
+                cur = cur.get(part)
+            elif isinstance(cur, list):
+                try:
+                    idx = int(part)
+                    cur = cur[idx]
+                except Exception:
+                    return None
+            else:
+                return None
+            if cur is None:
+                return None
+        logging.info(f"[CodeExec]: resolve path '{path}' -> {cur}")
+        return cur