Fix: code exec component vulnerability and add support for nested list and dict object (#11504)

### What problem does this PR solve? Fix code exec component vulnerability and add support for nested list and dict object. <img width="1491" height="952" alt="image" src="https://github.com/user-attachments/assets/ec2de4e3-0919-413d-abe6-d19431292f14" /> Return a single value: <img width="1156" height="719" alt="image" src="https://github.com/user-attachments/assets/baa35caa-e27c-4064-a9f9-4c0af9a3d5b8" /> ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality)
2025-12-08 12:32:30 +08:00 · 2025-11-25 14:35:41 +08:00
parent 8c1ee3845a
commit 7a344a32f9
2 changed files with 180 additions and 57 deletions
--- a/agent/canvas.py
+++ b/agent/canvas.py
@ -206,17 +206,28 @@ class Graph:
        for key in path.split('.'):
            if cur is None:
                return None
+
            if isinstance(cur, str):
                try:
                    cur = json.loads(cur)
                except Exception:
                    return None
+
            if isinstance(cur, dict):
                cur = cur.get(key)
-            else:
-                cur = getattr(cur, key, None)
+                continue
+
+            if isinstance(cur, (list, tuple)):
+                try:
+                    idx = int(key)
+                    cur = cur[idx]
+                except Exception:
+                    return None
+                continue
+
+            cur = getattr(cur, key, None)
        return cur
-    
+
    def set_variable_value(self, exp: str,value):
        exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}")
        if exp.find("@") < 0:
@ -440,7 +451,7 @@ class Canvas(Graph):

                    if isinstance(cpn_obj.output("attachment"), tuple):
                        yield decorate("message", {"attachment": cpn_obj.output("attachment")})
-                        
+
                    yield decorate("message_end", {"reference": self.get_reference() if cite else None})

                    while partials:
@ -647,4 +658,3 @@ class Canvas(Graph):

    def get_component_thoughts(self, cpn_id) -> str:
        return self.components.get(cpn_id)["obj"].thoughts()
-
--- a/agent/tools/code_exec.py
+++ b/agent/tools/code_exec.py
@ -13,16 +13,20 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import ast
 import base64
+import json
 import logging
 import os
 from abc import ABC
-from strenum import StrEnum
 from typing import Optional
+
 from pydantic import BaseModel, Field, field_validator
-from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
-from common.connection_utils import timeout
+from strenum import StrEnum
+
+from agent.tools.base import ToolBase, ToolMeta, ToolParamBase
 from common import settings
+from common.connection_utils import timeout


 class Language(StrEnum):
@ -62,7 +66,7 @@ class CodeExecParam(ToolParamBase):
    """

    def __init__(self):
-        self.meta:ToolMeta = {
+        self.meta: ToolMeta = {
            "name": "execute_code",
            "description": """
 This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string.
@ -99,16 +103,12 @@ module.exports = { main };
                    "enum": ["python", "javascript"],
                    "required": True,
                },
-                "script": {
-                    "type": "string",
-                    "description": "A piece of code in right format. There MUST be main function.",
-                    "required": True
-                }
-            }
+                "script": {"type": "string", "description": "A piece of code in right format. There MUST be main function.", "required": True},
+            },
        }
        super().__init__()
        self.lang = Language.PYTHON.value
-        self.script = "def main(arg1: str, arg2: str) -> dict: return {\"result\": arg1 + arg2}"
+        self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}'
        self.arguments = {}
        self.outputs = {"result": {"value": "", "type": "string"}}

@ -119,17 +119,14 @@ module.exports = { main };
    def get_input_form(self) -> dict[str, dict]:
        res = {}
        for k, v in self.arguments.items():
-            res[k] = {
-                "type": "line",
-                "name": k
-            }
+            res[k] = {"type": "line", "name": k}
        return res


 class CodeExec(ToolBase, ABC):
    component_name = "CodeExec"

-    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+    @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
    def _invoke(self, **kwargs):
        if self.check_if_canceled("CodeExec processing"):
            return
@ -138,17 +135,12 @@ class CodeExec(ToolBase, ABC):
        script = kwargs.get("script", self._param.script)
        arguments = {}
        for k, v in self._param.arguments.items():
-
            if kwargs.get(k):
                arguments[k] = kwargs[k]
                continue
            arguments[k] = self._canvas.get_variable_value(v) if v else None

-        self._execute_code(
-            language=lang,
-            code=script,
-            arguments=arguments
-        )
+        self._execute_code(language=lang, code=script, arguments=arguments)

    def _execute_code(self, language: str, code: str, arguments: dict):
        import requests
@ -169,7 +161,7 @@ class CodeExec(ToolBase, ABC):
            if self.check_if_canceled("CodeExec execution"):
                return "Task has been canceled"

-            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
+            resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
            logging.info(f"http://{settings.SANDBOX_HOST}:9385/run,  code_req: {code_req}, resp.status_code {resp.status_code}:")

            if self.check_if_canceled("CodeExec execution"):
@ -183,35 +175,10 @@ class CodeExec(ToolBase, ABC):
                if stderr:
                    self.set_output("_ERROR", stderr)
                    return
-                try:
-                    rt = eval(body.get("stdout", ""))
-                except Exception:
-                    rt = body.get("stdout", "")
-                logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}")
-                if isinstance(rt, tuple):
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k.find("_") == 0:
-                            continue
-                        o["value"] = rt[i]
-                elif isinstance(rt, dict):
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k not in rt or k.find("_") == 0:
-                            continue
-                        o["value"] = rt[k]
-                else:
-                    for i, (k, o) in enumerate(self._param.outputs.items()):
-                        if self.check_if_canceled("CodeExec execution"):
-                            return
-
-                        if k.find("_") == 0:
-                            continue
-                        o["value"] = rt
+                raw_stdout = body.get("stdout", "")
+                parsed_stdout = self._deserialize_stdout(raw_stdout)
+                logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}")
+                self._populate_outputs(parsed_stdout, raw_stdout)
            else:
                self.set_output("_ERROR", "There is no response from sandbox")

@ -228,3 +195,149 @@ class CodeExec(ToolBase, ABC):

    def thoughts(self) -> str:
        return "Running a short script to process data."
+
+    def _deserialize_stdout(self, stdout: str):
+        text = str(stdout).strip()
+        if not text:
+            return ""
+        for loader in (json.loads, ast.literal_eval):
+            try:
+                return loader(text)
+            except Exception:
+                continue
+        return text
+
+    def _coerce_output_value(self, value, expected_type: Optional[str]):
+        if expected_type is None:
+            return value
+
+        etype = expected_type.strip().lower()
+        inner_type = None
+        if etype.startswith("array<") and etype.endswith(">"):
+            inner_type = etype[6:-1].strip()
+            etype = "array"
+
+        try:
+            if etype == "string":
+                return "" if value is None else str(value)
+
+            if etype == "number":
+                if value is None or value == "":
+                    return None
+                if isinstance(value, (int, float)):
+                    return value
+                if isinstance(value, str):
+                    try:
+                        return float(value)
+                    except Exception:
+                        return value
+                return float(value)
+
+            if etype == "boolean":
+                if isinstance(value, bool):
+                    return value
+                if isinstance(value, str):
+                    lv = value.lower()
+                    if lv in ("true", "1", "yes", "y", "on"):
+                        return True
+                    if lv in ("false", "0", "no", "n", "off"):
+                        return False
+                return bool(value)
+
+            if etype == "array":
+                candidate = value
+                if isinstance(candidate, str):
+                    parsed = self._deserialize_stdout(candidate)
+                    candidate = parsed
+                if isinstance(candidate, tuple):
+                    candidate = list(candidate)
+                if not isinstance(candidate, list):
+                    candidate = [] if candidate is None else [candidate]
+
+                if inner_type == "string":
+                    return ["" if v is None else str(v) for v in candidate]
+                if inner_type == "number":
+                    coerced = []
+                    for v in candidate:
+                        try:
+                            if v is None or v == "":
+                                coerced.append(None)
+                            elif isinstance(v, (int, float)):
+                                coerced.append(v)
+                            else:
+                                coerced.append(float(v))
+                        except Exception:
+                            coerced.append(v)
+                    return coerced
+                return candidate
+
+            if etype == "object":
+                if isinstance(value, dict):
+                    return value
+                if isinstance(value, str):
+                    parsed = self._deserialize_stdout(value)
+                    if isinstance(parsed, dict):
+                        return parsed
+                return value
+        except Exception:
+            return value
+
+        return value
+
+    def _populate_outputs(self, parsed_stdout, raw_stdout: str):
+        outputs_items = list(self._param.outputs.items())
+        logging.info(f"[CodeExec]: outputs schema keys: {[k for k, _ in outputs_items]}")
+        if not outputs_items:
+            return
+
+        if isinstance(parsed_stdout, dict):
+            for key, meta in outputs_items:
+                if key.startswith("_"):
+                    continue
+                val = self._get_by_path(parsed_stdout, key)
+                coerced = self._coerce_output_value(val, meta.get("type"))
+                logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'")
+                self.set_output(key, coerced)
+            return
+
+        if isinstance(parsed_stdout, (list, tuple)):
+            for idx, (key, meta) in enumerate(outputs_items):
+                if key.startswith("_"):
+                    continue
+                val = parsed_stdout[idx] if idx < len(parsed_stdout) else None
+                coerced = self._coerce_output_value(val, meta.get("type"))
+                logging.info(f"[CodeExec]: populate list key='{key}' raw='{val}' coerced='{coerced}'")
+                self.set_output(key, coerced)
+            return
+
+        default_val = parsed_stdout if parsed_stdout is not None else raw_stdout
+        for idx, (key, meta) in enumerate(outputs_items):
+            if key.startswith("_"):
+                continue
+            val = default_val if idx == 0 else None
+            coerced = self._coerce_output_value(val, meta.get("type"))
+            logging.info(f"[CodeExec]: populate scalar key='{key}' raw='{val}' coerced='{coerced}'")
+            self.set_output(key, coerced)
+
+    def _get_by_path(self, data, path: str):
+        if not path:
+            return None
+        cur = data
+        for part in path.split("."):
+            part = part.strip()
+            if not part:
+                return None
+            if isinstance(cur, dict):
+                cur = cur.get(part)
+            elif isinstance(cur, list):
+                try:
+                    idx = int(part)
+                    cur = cur[idx]
+                except Exception:
+                    return None
+            else:
+                return None
+            if cur is None:
+                return None
+        logging.info(f"[CodeExec]: resolve path '{path}' -> {cur}")
+        return cur