Feat: Redesign and refactor agent module (#9113)

### What problem does this PR solve?

#9082 #6365

<u> **WARNING: it's not compatible with the older version of `Agent`
module, which means that `Agent` from older versions can not work
anymore.**</u>

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-07-30 19:41:09 +08:00
committed by GitHub
parent 07e37560fc
commit d9fe279dde
124 changed files with 7744 additions and 18226 deletions

View File

@ -13,14 +13,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import base64
import json
import logging
import time
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
from functools import partial
import pandas as pd
from typing import Any, Union, Tuple
from agent.component import component_class
from agent.component.base import ComponentBase
from api.db.services.file_service import FileService
from api.utils import get_uuid, hash_str2int
from rag.prompts.prompts import chunks_format
from rag.utils.redis_conn import REDIS_CONN
class Canvas:
@ -35,14 +42,6 @@ class Canvas:
"downstream": ["answer_0"],
"upstream": [],
},
"answer_0": {
"obj": {
"component_name": "Answer",
"params": {}
},
"downstream": ["retrieval_0"],
"upstream": ["begin", "generate_0"],
},
"retrieval_0": {
"obj": {
"component_name": "Retrieval",
@ -61,19 +60,28 @@ class Canvas:
}
},
"history": [],
"messages": [],
"reference": [],
"path": [["begin"]],
"answer": []
"path": ["begin"],
"retrieval": {"chunks": [], "doc_aggs": []},
"globals": {
"sys.query": "",
"sys.user_id": tenant_id,
"sys.conversation_turns": 0,
"sys.files": []
}
}
"""
def __init__(self, dsl: str, tenant_id=None):
def __init__(self, dsl: str, tenant_id=None, task_id=None):
self.path = []
self.history = []
self.messages = []
self.answer = []
self.components = {}
self.error = ""
self.globals = {
"sys.query": "",
"sys.user_id": tenant_id,
"sys.conversation_turns": 0,
"sys.files": []
}
self.dsl = json.loads(dsl) if dsl else {
"components": {
"begin": {
@ -89,13 +97,17 @@ class Canvas:
}
},
"history": [],
"messages": [],
"reference": [],
"path": [],
"answer": []
"retrieval": [],
"globals": {
"sys.query": "",
"sys.user_id": "",
"sys.conversation_turns": 0,
"sys.files": []
}
}
self._tenant_id = tenant_id
self._embed_id = ""
self.task_id = task_id if task_id else get_uuid()
self.load()
def load(self):
@ -105,33 +117,31 @@ class Canvas:
cpn_nms.add(cpn["obj"]["component_name"])
assert "Begin" in cpn_nms, "There have to be an 'Begin' component."
assert "Answer" in cpn_nms, "There have to be an 'Answer' component."
for k, cpn in self.components.items():
cpn_nms.add(cpn["obj"]["component_name"])
param = component_class(cpn["obj"]["component_name"] + "Param")()
param.update(cpn["obj"]["params"])
param.check()
try:
param.check()
except Exception as e:
raise ValueError(self.get_component_name(k) + f": {e}")
cpn["obj"] = component_class(cpn["obj"]["component_name"])(self, k, param)
if cpn["obj"].component_name == "Categorize":
for _, desc in param.category_description.items():
if desc["to"] not in cpn["downstream"]:
cpn["downstream"].append(desc["to"])
self.path = self.dsl["path"]
self.history = self.dsl["history"]
self.messages = self.dsl["messages"]
self.answer = self.dsl["answer"]
self.reference = self.dsl["reference"]
self._embed_id = self.dsl.get("embed_id", "")
self.globals = self.dsl["globals"]
self.retrieval = self.dsl["retrieval"]
self.memory = self.dsl.get("memory", [])
def __str__(self):
self.dsl["path"] = self.path
self.dsl["history"] = self.history
self.dsl["messages"] = self.messages
self.dsl["answer"] = self.answer
self.dsl["reference"] = self.reference
self.dsl["embed_id"] = self._embed_id
self.dsl["globals"] = self.globals
self.dsl["task_id"] = self.task_id
self.dsl["retrieval"] = self.retrieval
self.dsl["memory"] = self.memory
dsl = {
"components": {}
}
@ -150,161 +160,245 @@ class Canvas:
dsl["components"][k][c] = deepcopy(cpn[c])
return json.dumps(dsl, ensure_ascii=False)
def reset(self):
def reset(self, mem=False):
self.path = []
self.history = []
self.messages = []
self.answer = []
self.reference = []
if not mem:
self.history = []
self.retrieval = []
self.memory = []
for k, cpn in self.components.items():
self.components[k]["obj"].reset()
self._embed_id = ""
for k in self.globals.keys():
if isinstance(self.globals[k], str):
self.globals[k] = ""
elif isinstance(self.globals[k], int):
self.globals[k] = 0
elif isinstance(self.globals[k], float):
self.globals[k] = 0
elif isinstance(self.globals[k], list):
self.globals[k] = []
elif isinstance(self.globals[k], dict):
self.globals[k] = {}
else:
self.globals[k] = None
try:
REDIS_CONN.delete(f"{self.task_id}-logs")
except Exception as e:
logging.exception(e)
def get_component_name(self, cid):
for n in self.dsl["graph"]["nodes"]:
for n in self.dsl.get("graph", {}).get("nodes", []):
if cid == n["id"]:
return n["data"]["name"]
return ""
def run(self, running_hint_text = "is running...🕞", **kwargs):
if not running_hint_text or not isinstance(running_hint_text, str):
running_hint_text = "is running...🕞"
bypass_begin = bool(kwargs.get("bypass_begin", False))
def run(self, **kwargs):
st = time.perf_counter()
self.message_id = get_uuid()
created_at = int(time.time())
self.add_user_input(kwargs.get("query"))
if self.answer:
cpn_id = self.answer[0]
self.answer.pop(0)
try:
ans = self.components[cpn_id]["obj"].run(self.history, **kwargs)
except Exception as e:
ans = ComponentBase.be_output(str(e))
self.path[-1].append(cpn_id)
if kwargs.get("stream"):
for an in ans():
yield an
else:
yield ans
return
if not self.path:
self.components["begin"]["obj"].run(self.history, **kwargs)
self.path.append(["begin"])
if bypass_begin:
cpn = self.get_component("begin")
downstream = cpn["downstream"]
self.path.append(downstream)
self.path.append([])
ran = -1
waiting = []
without_dependent_checking = []
def prepare2run(cpns):
nonlocal ran, ans
for c in cpns:
if self.path[-1] and c == self.path[-1][-1]:
continue
cpn = self.components[c]["obj"]
if cpn.component_name == "Answer":
self.answer.append(c)
for k in kwargs.keys():
if k in ["query", "user_id", "files"] and kwargs[k]:
if k == "files":
self.globals[f"sys.{k}"] = self.get_files(kwargs[k])
else:
logging.debug(f"Canvas.prepare2run: {c}")
if c not in without_dependent_checking:
cpids = cpn.get_dependent_components()
if any([cc not in self.path[-1] for cc in cpids]):
if c not in waiting:
waiting.append(c)
continue
yield "*'{}'* {}".format(self.get_component_name(c), running_hint_text)
self.globals[f"sys.{k}"] = kwargs[k]
if not self.globals["sys.conversation_turns"] :
self.globals["sys.conversation_turns"] = 0
self.globals["sys.conversation_turns"] += 1
if cpn.component_name.lower() == "iteration":
st_cpn = cpn.get_start()
assert st_cpn, "Start component not found for Iteration."
if not st_cpn["obj"].end():
cpn = st_cpn["obj"]
c = cpn._id
def decorate(event, dt):
nonlocal created_at
return {
"event": event,
#"conversation_id": "f3cc152b-24b0-4258-a1a1-7d5e9fc8a115",
"message_id": self.message_id,
"created_at": created_at,
"task_id": self.task_id,
"data": dt
}
try:
ans = cpn.run(self.history, **kwargs)
except Exception as e:
logging.exception(f"Canvas.run got exception: {e}")
self.path[-1].append(c)
ran += 1
raise e
self.path[-1].append(c)
if not self.path or self.path[-1].lower().find("userfillup") < 0:
self.path.append("begin")
self.retrieval.append({"chunks": [], "doc_aggs": []})
ran += 1
yield decorate("workflow_started", {"inputs": kwargs.get("inputs")})
self.retrieval.append({"chunks": {}, "doc_aggs": {}})
downstream = self.components[self.path[-2][-1]]["downstream"]
if not downstream and self.components[self.path[-2][-1]].get("parent_id"):
cid = self.path[-2][-1]
pid = self.components[cid]["parent_id"]
o, _ = self.components[cid]["obj"].output(allow_partial=False)
oo, _ = self.components[pid]["obj"].output(allow_partial=False)
self.components[pid]["obj"].set_output(pd.concat([oo, o], ignore_index=True).dropna())
downstream = [pid]
def _run_batch(f, t):
with ThreadPoolExecutor(max_workers=5) as executor:
thr = []
for i in range(f, t):
cpn = self.get_component_obj(self.path[i])
if cpn.component_name.lower() in ["begin", "userfillup"]:
thr.append(executor.submit(cpn.invoke, inputs=kwargs.get("inputs", {})))
else:
thr.append(executor.submit(cpn.invoke, **cpn.get_input()))
for t in thr:
t.result()
for m in prepare2run(downstream):
yield {"content": m, "running_status": True}
def _node_finished(cpn_obj):
return decorate("node_finished",{
"inputs": cpn_obj.get_input_values(),
"outputs": cpn_obj.output(),
"component_id": cpn_obj._id,
"component_name": self.get_component_name(cpn_obj._id),
"component_type": self.get_component_type(cpn_obj._id),
"error": cpn_obj.error(),
"elapsed_time": time.perf_counter() - cpn_obj.output("_created_time"),
"created_at": cpn_obj.output("_created_time"),
})
while 0 <= ran < len(self.path[-1]):
logging.debug(f"Canvas.run: {ran} {self.path}")
cpn_id = self.path[-1][ran]
cpn = self.get_component(cpn_id)
if not any([cpn["downstream"], cpn.get("parent_id"), waiting]):
def _append_path(cpn_id):
if self.path[-1] == cpn_id:
return
self.path.append(cpn_id)
def _extend_path(cpn_ids):
for cpn_id in cpn_ids:
_append_path(cpn_id)
self.error = ""
idx = len(self.path) - 1
partials = []
while idx < len(self.path):
to = len(self.path)
for i in range(idx, to):
yield decorate("node_started", {
"inputs": None, "created_at": int(time.time()),
"component_id": self.path[i],
"component_name": self.get_component_name(self.path[i]),
"component_type": self.get_component_type(self.path[i]),
})
_run_batch(idx, to)
# post processing of components invocation
for i in range(idx, to):
cpn = self.get_component(self.path[i])
if cpn["obj"].component_name.lower() == "message":
if isinstance(cpn["obj"].output("content"), partial):
_m = ""
for m in cpn["obj"].output("content")():
if not m:
continue
if m == "<think>":
yield decorate("message", {"content": "", "start_to_think": True})
elif m == "</think>":
yield decorate("message", {"content": "", "end_to_think": True})
else:
yield decorate("message", {"content": m})
_m += m
cpn["obj"].set_output("content", _m)
else:
yield decorate("message", {"content": cpn["obj"].output("content")})
yield decorate("message_end", {"reference": self.get_reference()})
while partials:
_cpn = self.get_component(partials[0])
if isinstance(_cpn["obj"].output("content"), partial):
break
yield _node_finished(_cpn["obj"])
partials.pop(0)
if cpn["obj"].error():
ex = cpn["obj"].exception_handler()
if ex and ex["comment"]:
yield decorate("message", {"content": ex["comment"]})
yield decorate("message_end", {})
if ex and ex["goto"]:
self.path.append(ex["goto"])
elif not ex or not ex["default_value"]:
self.error = cpn["obj"].error()
if cpn["obj"].component_name.lower() != "iteration":
if isinstance(cpn["obj"].output("content"), partial):
if self.error:
cpn["obj"].set_output("content", None)
yield _node_finished(cpn["obj"])
else:
partials.append(self.path[i])
else:
yield _node_finished(cpn["obj"])
if cpn["obj"].component_name.lower() == "iterationitem" and cpn["obj"].end():
iter = cpn["obj"].get_parent()
yield _node_finished(iter)
_extend_path(self.get_component(cpn["parent_id"])["downstream"])
elif cpn["obj"].component_name.lower() in ["categorize", "switch"]:
_extend_path(cpn["obj"].output("_next"))
elif cpn["obj"].component_name.lower() == "iteration":
_append_path(cpn["obj"].get_start())
elif not cpn["downstream"] and cpn["obj"].get_parent():
_append_path(cpn["obj"].get_parent().get_start())
else:
_extend_path(cpn["downstream"])
if self.error:
logging.error(f"Runtime Error: {self.error}")
break
idx = to
loop = self._find_loop()
if loop:
raise OverflowError(f"Too much loops: {loop}")
if any([self.get_component(c)["obj"].component_name.lower() == "userfillup" for c in self.path[idx:]]):
path = [c for c in self.path[idx:] if self.get_component(c)["obj"].component_name.lower() == "userfillup"]
path.extend([c for c in self.path[idx:] if self.get_component(c)["obj"].component_name.lower() != "userfillup"])
another_inputs = {}
tips = ""
for c in path:
o = self.get_component(c)["obj"]
if o.component_name.lower() == "userfillup":
another_inputs.update(o.get_input_elements())
if o.get_param("enable_tips"):
tips = o.get_param("tips")
self.path = path
yield decorate("user_inputs", {"inputs": another_inputs, "tips": tips})
return
downstream = []
if cpn["obj"].component_name.lower() in ["switch", "categorize", "relevant"]:
switch_out = cpn["obj"].output()[1].iloc[0, 0]
assert switch_out in self.components, \
"{}'s output: {} not valid.".format(cpn_id, switch_out)
downstream = [switch_out]
else:
downstream = cpn["downstream"]
self.path = self.path[:idx]
if not self.error:
yield decorate("workflow_finished",
{
"inputs": kwargs.get("inputs"),
"outputs": self.get_component_obj(self.path[-1]).output(),
"elapsed_time": time.perf_counter() - st,
"created_at": st,
})
self.history.append(("assistant", self.get_component_obj(self.path[-1]).output()))
if not downstream and cpn.get("parent_id"):
pid = cpn["parent_id"]
_, o = cpn["obj"].output(allow_partial=False)
_, oo = self.components[pid]["obj"].output(allow_partial=False)
self.components[pid]["obj"].set_output(pd.concat([oo.dropna(axis=1), o.dropna(axis=1)], ignore_index=True).dropna())
downstream = [pid]
def get_component(self, cpn_id) -> Union[None, dict[str, Any]]:
return self.components.get(cpn_id)
for m in prepare2run(downstream):
yield {"content": m, "running_status": True}
def get_component_obj(self, cpn_id) -> ComponentBase:
return self.components.get(cpn_id)["obj"]
if ran >= len(self.path[-1]) and waiting:
without_dependent_checking = waiting
waiting = []
for m in prepare2run(without_dependent_checking):
yield {"content": m, "running_status": True}
without_dependent_checking = []
ran -= 1
def get_component_type(self, cpn_id) -> str:
return self.components.get(cpn_id)["obj"].component_name
if self.answer:
cpn_id = self.answer[0]
self.answer.pop(0)
ans = self.components[cpn_id]["obj"].run(self.history, **kwargs)
self.path[-1].append(cpn_id)
if kwargs.get("stream"):
assert isinstance(ans, partial)
for an in ans():
yield an
else:
yield ans
def get_component_input_form(self, cpn_id) -> dict:
return self.components.get(cpn_id)["obj"].get_input_form()
else:
raise Exception("The dialog flow has no way to interact with you. Please add an 'Interact' component to the end of the flow.")
def is_reff(self, exp: str) -> bool:
exp = exp.strip("{").strip("}")
if exp.find("@") < 0:
return exp in self.globals
arr = exp.split("@")
if len(arr) != 2:
return False
if self.get_component(arr[0]) is None:
return False
return True
def get_component(self, cpn_id):
return self.components[cpn_id]
def get_variable_value(self, exp: str) -> Any:
exp = exp.strip("{").strip("}").strip(" ").strip("{").strip("}")
if exp.find("@") < 0:
return self.globals[exp]
cpn_id, var_nm = exp.split("@")
cpn = self.get_component(cpn_id)
if not cpn:
raise Exception(f"Can't find variable: '{cpn_id}@{var_nm}'")
return cpn["obj"].output(var_nm)
def get_tenant_id(self):
return self._tenant_id
@ -314,8 +408,8 @@ class Canvas:
if window_size <= 0:
return convs
for role, obj in self.history[window_size * -1:]:
if isinstance(obj, list) and obj and all([isinstance(o, dict) for o in obj]):
convs.append({"role": role, "content": '\n'.join([str(s.get("content", "")) for s in obj])})
if isinstance(obj, dict):
convs.append({"role": role, "content": obj.get("content", "")})
else:
convs.append({"role": role, "content": str(obj)})
return convs
@ -323,12 +417,6 @@ class Canvas:
def add_user_input(self, question):
self.history.append(("user", question))
def set_embedding_model(self, embed_id):
self._embed_id = embed_id
def get_embedding_model(self):
return self._embed_id
def _find_loop(self, max_loops=6):
path = self.path[-1][::-1]
if len(path) < 2:
@ -363,17 +451,75 @@ class Canvas:
return self.components["begin"]["obj"]._param.prologue
def set_global_param(self, **kwargs):
for k, v in kwargs.items():
for q in self.components["begin"]["obj"]._param.query:
if k != q["key"]:
continue
q["value"] = v
self.globals.update(kwargs)
def get_preset_param(self):
return self.components["begin"]["obj"]._param.query
return self.components["begin"]["obj"]._param.inputs
def get_component_input_elements(self, cpnnm):
return self.components[cpnnm]["obj"].get_input_elements()
def set_component_infor(self, cpn_id, infor):
self.components[cpn_id]["obj"].set_infor(infor)
def get_files(self, files: Union[None, list[dict]]) -> list[str]:
if not files:
return []
def image_to_base64(file):
return "data:{};base64,{}".format(file["mime_type"],
base64.b64encode(FileService.get_blob(file["created_by"], file["id"])).decode("utf-8"))
exe = ThreadPoolExecutor(max_workers=5)
threads = []
for file in files:
if file["mime_type"].find("image") >=0:
threads.append(exe.submit(image_to_base64, file))
continue
threads.append(exe.submit(FileService.parse, file["name"], FileService.get_blob(file["created_by"], file["id"]), True, file["created_by"]))
return [th.result() for th in threads]
def tool_use_callback(self, agent_id: str, func_name: str, params: dict, result: Any):
agent_ids = agent_id.split("-->")
agent_name = self.get_component_name(agent_ids[0])
path = agent_name if len(agent_ids) < 2 else agent_name+"-->"+"-->".join(agent_ids[1:])
try:
bin = REDIS_CONN.get(f"{self.task_id}-{self.message_id}-logs")
if bin:
obj = json.loads(bin.encode("utf-8"))
if obj[-1]["component_id"] == agent_ids[0]:
obj[-1]["trace"].append({"path": path, "tool_name": func_name, "arguments": params, "result": result})
else:
obj.append({
"component_id": agent_ids[0],
"trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result}]
})
else:
obj = [{
"component_id": agent_ids[0],
"trace": [{"path": path, "tool_name": func_name, "arguments": params, "result": result}]
}]
REDIS_CONN.set_obj(f"{self.task_id}-{self.message_id}-logs", obj, 60*10)
except Exception as e:
logging.exception(e)
def add_refernce(self, chunks: list[object], doc_infos: list[object]):
if not self.retrieval:
self.retrieval = [{"chunks": {}, "doc_aggs": {}}]
r = self.retrieval[-1]
for ck in chunks_format({"chunks": chunks}):
cid = hash_str2int(ck["id"], 100)
if cid not in r:
r["chunks"][cid] = ck
for doc in doc_infos:
if doc["doc_name"] not in r:
r["doc_aggs"][doc["doc_name"]] = doc
def get_reference(self):
if not self.retrieval:
return {"chunks": {}, "doc_aggs": {}}
return self.retrieval[-1]
def add_memory(self, user:str, assist:str, summ: str):
self.memory.append((user, assist, summ))
def get_memory(self) -> list[Tuple]:
return self.memory