Fix: code exec component vulnerability and add support for nested list and dict object (#11504)

### What problem does this PR solve?

Fix code exec component vulnerability and add support for nested list
and dict object.

<img width="1491" height="952" alt="image"
src="https://github.com/user-attachments/assets/ec2de4e3-0919-413d-abe6-d19431292f14"
/>

Return a single value:

<img width="1156" height="719" alt="image"
src="https://github.com/user-attachments/assets/baa35caa-e27c-4064-a9f9-4c0af9a3d5b8"
/>


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-11-25 14:35:41 +08:00
committed by GitHub
parent 8c1ee3845a
commit 7a344a32f9
2 changed files with 180 additions and 57 deletions

View File

@ -206,15 +206,26 @@ class Graph:
for key in path.split('.'): for key in path.split('.'):
if cur is None: if cur is None:
return None return None
if isinstance(cur, str): if isinstance(cur, str):
try: try:
cur = json.loads(cur) cur = json.loads(cur)
except Exception: except Exception:
return None return None
if isinstance(cur, dict): if isinstance(cur, dict):
cur = cur.get(key) cur = cur.get(key)
else: continue
cur = getattr(cur, key, None)
if isinstance(cur, (list, tuple)):
try:
idx = int(key)
cur = cur[idx]
except Exception:
return None
continue
cur = getattr(cur, key, None)
return cur return cur
def set_variable_value(self, exp: str,value): def set_variable_value(self, exp: str,value):
@ -647,4 +658,3 @@ class Canvas(Graph):
def get_component_thoughts(self, cpn_id) -> str: def get_component_thoughts(self, cpn_id) -> str:
return self.components.get(cpn_id)["obj"].thoughts() return self.components.get(cpn_id)["obj"].thoughts()

View File

@ -13,16 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import ast
import base64 import base64
import json
import logging import logging
import os import os
from abc import ABC from abc import ABC
from strenum import StrEnum
from typing import Optional from typing import Optional
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta from strenum import StrEnum
from common.connection_utils import timeout
from agent.tools.base import ToolBase, ToolMeta, ToolParamBase
from common import settings from common import settings
from common.connection_utils import timeout
class Language(StrEnum): class Language(StrEnum):
@ -62,7 +66,7 @@ class CodeExecParam(ToolParamBase):
""" """
def __init__(self): def __init__(self):
self.meta:ToolMeta = { self.meta: ToolMeta = {
"name": "execute_code", "name": "execute_code",
"description": """ "description": """
This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string. This tool has a sandbox that can execute code written in 'Python'/'Javascript'. It recieves a piece of code and return a Json string.
@ -99,16 +103,12 @@ module.exports = { main };
"enum": ["python", "javascript"], "enum": ["python", "javascript"],
"required": True, "required": True,
}, },
"script": { "script": {"type": "string", "description": "A piece of code in right format. There MUST be main function.", "required": True},
"type": "string", },
"description": "A piece of code in right format. There MUST be main function.",
"required": True
}
}
} }
super().__init__() super().__init__()
self.lang = Language.PYTHON.value self.lang = Language.PYTHON.value
self.script = "def main(arg1: str, arg2: str) -> dict: return {\"result\": arg1 + arg2}" self.script = 'def main(arg1: str, arg2: str) -> dict: return {"result": arg1 + arg2}'
self.arguments = {} self.arguments = {}
self.outputs = {"result": {"value": "", "type": "string"}} self.outputs = {"result": {"value": "", "type": "string"}}
@ -119,17 +119,14 @@ module.exports = { main };
def get_input_form(self) -> dict[str, dict]: def get_input_form(self) -> dict[str, dict]:
res = {} res = {}
for k, v in self.arguments.items(): for k, v in self.arguments.items():
res[k] = { res[k] = {"type": "line", "name": k}
"type": "line",
"name": k
}
return res return res
class CodeExec(ToolBase, ABC): class CodeExec(ToolBase, ABC):
component_name = "CodeExec" component_name = "CodeExec"
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) @timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
def _invoke(self, **kwargs): def _invoke(self, **kwargs):
if self.check_if_canceled("CodeExec processing"): if self.check_if_canceled("CodeExec processing"):
return return
@ -138,17 +135,12 @@ class CodeExec(ToolBase, ABC):
script = kwargs.get("script", self._param.script) script = kwargs.get("script", self._param.script)
arguments = {} arguments = {}
for k, v in self._param.arguments.items(): for k, v in self._param.arguments.items():
if kwargs.get(k): if kwargs.get(k):
arguments[k] = kwargs[k] arguments[k] = kwargs[k]
continue continue
arguments[k] = self._canvas.get_variable_value(v) if v else None arguments[k] = self._canvas.get_variable_value(v) if v else None
self._execute_code( self._execute_code(language=lang, code=script, arguments=arguments)
language=lang,
code=script,
arguments=arguments
)
def _execute_code(self, language: str, code: str, arguments: dict): def _execute_code(self, language: str, code: str, arguments: dict):
import requests import requests
@ -169,7 +161,7 @@ class CodeExec(ToolBase, ABC):
if self.check_if_canceled("CodeExec execution"): if self.check_if_canceled("CodeExec execution"):
return "Task has been canceled" return "Task has been canceled"
resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))) resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10 * 60)))
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:") logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:")
if self.check_if_canceled("CodeExec execution"): if self.check_if_canceled("CodeExec execution"):
@ -183,35 +175,10 @@ class CodeExec(ToolBase, ABC):
if stderr: if stderr:
self.set_output("_ERROR", stderr) self.set_output("_ERROR", stderr)
return return
try: raw_stdout = body.get("stdout", "")
rt = eval(body.get("stdout", "")) parsed_stdout = self._deserialize_stdout(raw_stdout)
except Exception: logging.info(f"[CodeExec]: http://{settings.SANDBOX_HOST}:9385/run -> {parsed_stdout}")
rt = body.get("stdout", "") self._populate_outputs(parsed_stdout, raw_stdout)
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}")
if isinstance(rt, tuple):
for i, (k, o) in enumerate(self._param.outputs.items()):
if self.check_if_canceled("CodeExec execution"):
return
if k.find("_") == 0:
continue
o["value"] = rt[i]
elif isinstance(rt, dict):
for i, (k, o) in enumerate(self._param.outputs.items()):
if self.check_if_canceled("CodeExec execution"):
return
if k not in rt or k.find("_") == 0:
continue
o["value"] = rt[k]
else:
for i, (k, o) in enumerate(self._param.outputs.items()):
if self.check_if_canceled("CodeExec execution"):
return
if k.find("_") == 0:
continue
o["value"] = rt
else: else:
self.set_output("_ERROR", "There is no response from sandbox") self.set_output("_ERROR", "There is no response from sandbox")
@ -228,3 +195,149 @@ class CodeExec(ToolBase, ABC):
def thoughts(self) -> str: def thoughts(self) -> str:
return "Running a short script to process data." return "Running a short script to process data."
def _deserialize_stdout(self, stdout: str):
text = str(stdout).strip()
if not text:
return ""
for loader in (json.loads, ast.literal_eval):
try:
return loader(text)
except Exception:
continue
return text
def _coerce_output_value(self, value, expected_type: Optional[str]):
if expected_type is None:
return value
etype = expected_type.strip().lower()
inner_type = None
if etype.startswith("array<") and etype.endswith(">"):
inner_type = etype[6:-1].strip()
etype = "array"
try:
if etype == "string":
return "" if value is None else str(value)
if etype == "number":
if value is None or value == "":
return None
if isinstance(value, (int, float)):
return value
if isinstance(value, str):
try:
return float(value)
except Exception:
return value
return float(value)
if etype == "boolean":
if isinstance(value, bool):
return value
if isinstance(value, str):
lv = value.lower()
if lv in ("true", "1", "yes", "y", "on"):
return True
if lv in ("false", "0", "no", "n", "off"):
return False
return bool(value)
if etype == "array":
candidate = value
if isinstance(candidate, str):
parsed = self._deserialize_stdout(candidate)
candidate = parsed
if isinstance(candidate, tuple):
candidate = list(candidate)
if not isinstance(candidate, list):
candidate = [] if candidate is None else [candidate]
if inner_type == "string":
return ["" if v is None else str(v) for v in candidate]
if inner_type == "number":
coerced = []
for v in candidate:
try:
if v is None or v == "":
coerced.append(None)
elif isinstance(v, (int, float)):
coerced.append(v)
else:
coerced.append(float(v))
except Exception:
coerced.append(v)
return coerced
return candidate
if etype == "object":
if isinstance(value, dict):
return value
if isinstance(value, str):
parsed = self._deserialize_stdout(value)
if isinstance(parsed, dict):
return parsed
return value
except Exception:
return value
return value
def _populate_outputs(self, parsed_stdout, raw_stdout: str):
outputs_items = list(self._param.outputs.items())
logging.info(f"[CodeExec]: outputs schema keys: {[k for k, _ in outputs_items]}")
if not outputs_items:
return
if isinstance(parsed_stdout, dict):
for key, meta in outputs_items:
if key.startswith("_"):
continue
val = self._get_by_path(parsed_stdout, key)
coerced = self._coerce_output_value(val, meta.get("type"))
logging.info(f"[CodeExec]: populate dict key='{key}' raw='{val}' coerced='{coerced}'")
self.set_output(key, coerced)
return
if isinstance(parsed_stdout, (list, tuple)):
for idx, (key, meta) in enumerate(outputs_items):
if key.startswith("_"):
continue
val = parsed_stdout[idx] if idx < len(parsed_stdout) else None
coerced = self._coerce_output_value(val, meta.get("type"))
logging.info(f"[CodeExec]: populate list key='{key}' raw='{val}' coerced='{coerced}'")
self.set_output(key, coerced)
return
default_val = parsed_stdout if parsed_stdout is not None else raw_stdout
for idx, (key, meta) in enumerate(outputs_items):
if key.startswith("_"):
continue
val = default_val if idx == 0 else None
coerced = self._coerce_output_value(val, meta.get("type"))
logging.info(f"[CodeExec]: populate scalar key='{key}' raw='{val}' coerced='{coerced}'")
self.set_output(key, coerced)
def _get_by_path(self, data, path: str):
if not path:
return None
cur = data
for part in path.split("."):
part = part.strip()
if not part:
return None
if isinstance(cur, dict):
cur = cur.get(part)
elif isinstance(cur, list):
try:
idx = int(part)
cur = cur[idx]
except Exception:
return None
else:
return None
if cur is None:
return None
logging.info(f"[CodeExec]: resolve path '{path}' -> {cur}")
return cur