Feat: Redesign and refactor agent module (#9113)

### What problem does this PR solve? #9082 #6365 <u> **WARNING: it's not compatible with the older version of `Agent` module, which means that `Agent` from older versions can not work anymore.**</u> ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-12-08 20:42:30 +08:00 · 2025-07-30 19:41:09 +08:00
parent 07e37560fc
commit d9fe279dde
124 changed files with 7744 additions and 18226 deletions
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@ -463,6 +463,7 @@ class DataBaseModel(BaseModel):


@DB.connection_context()
+@DB.lock("init_database_tables", 60)
 def init_database_tables(alter_fields=[]):
    members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
    table_objs = []
@ -474,7 +475,7 @@ def init_database_tables(alter_fields=[]):
            if not obj.table_exists():
                logging.debug(f"start create table {obj.__name__}")
                try:
-                    obj.create_table()
+                    obj.create_table(safe=True)
                    logging.debug(f"create table success: {obj.__name__}")
                except Exception as e:
                    logging.exception(e)
@ -798,6 +799,7 @@ class API4Conversation(DataBaseModel):
    duration = FloatField(default=0, index=True)
    round = IntegerField(default=0, index=True)
    thumb_up = IntegerField(default=0, index=True)
+    errors = TextField(null=True, help_text="errors")

    class Meta:
        db_table = "api_4_conversation"
@ -1009,4 +1011,8 @@ def migrate_db():
        migrate(migrator.add_column("document", "suffix", CharField(max_length=32, null=False, default="", help_text="The real file extension suffix", index=True)))
    except Exception:
        pass
-    logging.disable(logging.NOTSET)
+    try:
+        migrate(migrator.add_column("api_4_conversation", "errors", TextField(null=True, help_text="errors")))
+    except Exception:
+        pass
+    logging.disable(logging.NOTSET)
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@ -154,6 +154,11 @@ def init_llm_factory():

 def add_graph_templates():
    dir = os.path.join(get_project_base_directory(), "agent", "templates")
+    CanvasTemplateService.filter_delete([1 == 1])
+    if not os.path.exists(dir):
+        logging.warning("Missing agent templates!")
+        return
+
    for fnm in os.listdir(dir):
        try:
            cnvs = json.load(open(os.path.join(dir, fnm), "r",encoding="utf-8"))
@ -162,7 +167,7 @@ def add_graph_templates():
            except Exception:
                CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
        except Exception:
-            logging.exception("Add graph templates error: ")
+            logging.exception("Add agent templates error: ")


 def init_web_data():
--- a/api/db/services/api_service.py
+++ b/api/db/services/api_service.py
@ -43,7 +43,9 @@ class API4ConversationService(CommonService):
    @DB.connection_context()
    def get_list(cls, dialog_id, tenant_id,
                 page_number, items_per_page,
-                 orderby, desc, id, user_id=None, include_dsl=True):
+                 orderby, desc, id, user_id=None, include_dsl=True, keywords="",
+                 from_date=None, to_date=None
+                 ):
        if include_dsl:
            sessions = cls.model.select().where(cls.model.dialog_id == dialog_id)
        else:
@ -53,13 +55,20 @@ class API4ConversationService(CommonService):
            sessions = sessions.where(cls.model.id == id)
        if user_id:
            sessions = sessions.where(cls.model.user_id == user_id)
+        if keywords:
+            sessions = sessions.where(peewee.fn.LOWER(cls.model.message).contains(keywords.lower()))
+        if from_date:
+            sessions = sessions.where(cls.model.create_date >= from_date)
+        if to_date:
+            sessions = sessions.where(cls.model.create_date <= to_date)
        if desc:
            sessions = sessions.order_by(cls.model.getter_by(orderby).desc())
        else:
            sessions = sessions.order_by(cls.model.getter_by(orderby).asc())
+        count = sessions.count()
        sessions = sessions.paginate(page_number, items_per_page)

-        return list(sessions.dicts())
+        return count, list(sessions.dicts())

    @classmethod
    @DB.connection_context()
--- a/api/db/services/canvas_service.py
+++ b/api/db/services/canvas_service.py
@ -14,6 +14,7 @@
 #  limitations under the License.
 #
 import json
+import logging
 import time
 import traceback
 from uuid import uuid4
@ -22,11 +23,12 @@ from api.db import TenantPermission
 from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation
 from api.db.services.api_service import API4ConversationService
 from api.db.services.common_service import CommonService
-from api.db.services.conversation_service import structure_answer
 from api.utils import get_uuid
 from api.utils.api_utils import get_data_openai
 import tiktoken
 from peewee import fn
+
+
 class CanvasTemplateService(CommonService):
    model = CanvasTemplate

@ -79,7 +81,7 @@ class UserCanvasService(CommonService):
            # obj = cls.model.query(id=pid)[0]
            return True, agents.dicts()[0]
        except Exception as e:
-            print(e)
+            logging.exception(e)
            return False, None
     
    @classmethod
@ -119,120 +121,58 @@ class UserCanvasService(CommonService):
        count = agents.count()
        agents = agents.paginate(page_number, items_per_page)
        return list(agents.dicts()), count
-   

-def completion(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
-    e, cvs = UserCanvasService.get_by_id(agent_id)
-    assert e, "Agent not found."
-    assert cvs.user_id == tenant_id, "You do not own the agent."
-    if not isinstance(cvs.dsl,str):
-        cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
-    canvas = Canvas(cvs.dsl, tenant_id)
-    canvas.reset()
-    message_id = str(uuid4())
-    if not session_id:
-        query = canvas.get_preset_param()
-        if query:
-            for ele in query:
-                if not ele["optional"]:
-                    if not kwargs.get(ele["key"]):
-                        assert False, f"`{ele['key']}` is required"
-                    ele["value"] = kwargs[ele["key"]]
-                if ele["optional"]:
-                    if kwargs.get(ele["key"]):
-                        ele["value"] = kwargs[ele['key']]
-                    else:
-                        if "value" in ele:
-                            ele.pop("value")
-        cvs.dsl = json.loads(str(canvas))
+
+def completion(tenant_id, agent_id, session_id=None, **kwargs):
+    query = kwargs.get("query", "")
+    files = kwargs.get("files", [])
+    inputs = kwargs.get("inputs", {})
+    user_id = kwargs.get("user_id", "")
+
+    if session_id:
+        e, conv = API4ConversationService.get_by_id(session_id)
+        assert e, "Session not found!"
+        if not conv.message:
+            conv.message = []
+        canvas = Canvas(json.dumps(conv.dsl), tenant_id, session_id)
+    else:
+        e, cvs = UserCanvasService.get_by_id(agent_id)
+        assert e, "Agent not found."
+        assert cvs.user_id == tenant_id, "You do not own the agent."
+        if not isinstance(cvs.dsl, str):
+            cvs.dsl = json.dumps(cvs.dsl, ensure_ascii=False)
        session_id=get_uuid()
+        canvas = Canvas(cvs.dsl, tenant_id, session_id)
        conv = {
            "id": session_id,
            "dialog_id": cvs.id,
-            "user_id": kwargs.get("user_id", "") if isinstance(kwargs, dict) else "",
-            "message": [{"role": "assistant", "content": canvas.get_prologue(), "created_at": time.time()}],
+            "user_id": user_id,
+            "message": [],
            "source": "agent",
            "dsl": cvs.dsl
        }
        API4ConversationService.save(**conv)
        conv = API4Conversation(**conv)
-    else:
-        e, conv = API4ConversationService.get_by_id(session_id)
-        assert e, "Session not found!"
-        canvas = Canvas(json.dumps(conv.dsl), tenant_id)
-        canvas.messages.append({"role": "user", "content": question, "id": message_id})
-        canvas.add_user_input(question)
-        if not conv.message:
-            conv.message = []
-        conv.message.append({
-            "role": "user",
-            "content": question,
-            "id": message_id
-        })
-        if not conv.reference:
-            conv.reference = []
-        conv.reference.append({"chunks": [], "doc_aggs": []})

-        kwargs_changed = False
-        if kwargs:
-            query = canvas.get_preset_param()
-            if query:
-                for ele in query:
-                    if ele["key"] in kwargs:
-                        if ele["value"] != kwargs[ele["key"]]:
-                            ele["value"] = kwargs[ele["key"]]
-                            kwargs_changed = True
-        if kwargs_changed:
-            conv.dsl = json.loads(str(canvas))
-            API4ConversationService.update_by_id(session_id, {"dsl": conv.dsl})
+    message_id = str(uuid4())
+    conv.message.append({
+        "role": "user",
+        "content": query,
+        "id": message_id
+    })
+    txt = ""
+    for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
+        ans["session_id"] = session_id
+        if ans["event"] == "message":
+            txt += ans["data"]["content"]
+        yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"

-    final_ans = {"reference": [], "content": ""}
-    if stream:
-        try:
-            for ans in canvas.run(stream=stream):
-                if ans.get("running_status"):
-                    yield "data:" + json.dumps({"code": 0, "message": "",
-                                                "data": {"answer": ans["content"],
-                                                         "running_status": True}},
-                                               ensure_ascii=False) + "\n\n"
-                    continue
-                for k in ans.keys():
-                    final_ans[k] = ans[k]
-                ans = {"answer": ans["content"], "reference": ans.get("reference", []), "param": canvas.get_preset_param()}
-                ans = structure_answer(conv, ans, message_id, session_id)
-                yield "data:" + json.dumps({"code": 0, "message": "", "data": ans},
-                                           ensure_ascii=False) + "\n\n"
+    conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
+    conv.reference = canvas.get_reference()
+    conv.errors = canvas.error
+    API4ConversationService.append_message(conv.id, conv.to_dict())

-            canvas.messages.append({"role": "assistant", "content": final_ans["content"], "created_at": time.time(), "id": message_id})
-            canvas.history.append(("assistant", final_ans["content"]))
-            if final_ans.get("reference"):
-                canvas.reference.append(final_ans["reference"])
-            conv.dsl = json.loads(str(canvas))
-            API4ConversationService.append_message(conv.id, conv.to_dict())
-        except Exception as e:
-            traceback.print_exc()
-            conv.dsl = json.loads(str(canvas))
-            API4ConversationService.append_message(conv.id, conv.to_dict())
-            yield "data:" + json.dumps({"code": 500, "message": str(e),
-                                        "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
-                                       ensure_ascii=False) + "\n\n"
-        yield "data:" + json.dumps({"code": 0, "message": "", "data": True}, ensure_ascii=False) + "\n\n"

-    else:
-        for answer in canvas.run(stream=False):
-            if answer.get("running_status"):
-                continue
-            final_ans["content"] = "\n".join(answer["content"]) if "content" in answer else ""
-            canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
-            if final_ans.get("reference"):
-                canvas.reference.append(final_ans["reference"])
-            conv.dsl = json.loads(str(canvas))
-
-            result = {"answer": final_ans["content"], "reference": final_ans.get("reference", []) , "param": canvas.get_preset_param()}
-            result = structure_answer(conv, result, message_id, session_id)
-            API4ConversationService.append_message(conv.id, conv.to_dict())
-            yield result
-            break
 def completionOpenAI(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
    """Main function for OpenAI-compatible completions, structured similarly to the completion function."""
    tiktokenenc = tiktoken.get_encoding("cl100k_base")
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -27,7 +27,7 @@ import xxhash
 from peewee import fn

 from api import settings
-from api.constants import IMG_BASE64_PREFIX
+from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
 from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole
 from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File
 from api.db.db_utils import bulk_insert_into_db
@ -100,6 +100,17 @@ class DocumentService(CommonService):
        docs = docs.paginate(page_number, items_per_page)
        return list(docs.dicts()), count

+    @classmethod
+    @DB.connection_context()
+    def check_doc_health(cls, tenant_id: str, filename):
+        import os
+        MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
+        if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(tenant_id) >= MAX_FILE_NUM_PER_USER:
+            raise RuntimeError("Exceed the maximum file number of a free user!")
+        if len(filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
+            raise RuntimeError("Exceed the maximum length of file name!")
+        return True
+
    @classmethod
    @DB.connection_context()
    def get_by_kb_id(cls, kb_id, page_number, items_per_page,
@ -258,13 +269,13 @@ class DocumentService(CommonService):
            )
            if len(graph_source) > 0 and doc.id in list(graph_source.values())[0]["source_id"]:
                settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "source_id": doc.id},
-                                            {"remove": {"source_id": doc.id}},
-                                            search.index_name(tenant_id), doc.kb_id)
+                                             {"remove": {"source_id": doc.id}},
+                                             search.index_name(tenant_id), doc.kb_id)
                settings.docStoreConn.update({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["graph"]},
-                                            {"removed_kwd": "Y"},
-                                            search.index_name(tenant_id), doc.kb_id)
+                                             {"removed_kwd": "Y"},
+                                             search.index_name(tenant_id), doc.kb_id)
                settings.docStoreConn.delete({"kb_id": doc.kb_id, "knowledge_graph_kwd": ["entity", "relation", "graph", "subgraph", "community_report"], "must_not": {"exists": "source_id"}},
-                                            search.index_name(tenant_id), doc.kb_id)
+                                             search.index_name(tenant_id), doc.kb_id)
        except Exception:
            pass
        return cls.delete_by_id(doc.id)
@ -323,9 +334,9 @@ class DocumentService(CommonService):
                "Document not found which is supposed to be there")
        num = Knowledgebase.update(
            token_num=Knowledgebase.token_num +
-            token_num,
+                      token_num,
            chunk_num=Knowledgebase.chunk_num +
-            chunk_num).where(
+                      chunk_num).where(
            Knowledgebase.id == kb_id).execute()
        return num

@ -341,9 +352,9 @@ class DocumentService(CommonService):
                "Document not found which is supposed to be there")
        num = Knowledgebase.update(
            token_num=Knowledgebase.token_num -
-            token_num,
+                      token_num,
            chunk_num=Knowledgebase.chunk_num -
-            chunk_num
+                      chunk_num
        ).where(
            Knowledgebase.id == kb_id).execute()
        return num
@ -356,9 +367,9 @@ class DocumentService(CommonService):

        num = Knowledgebase.update(
            token_num=Knowledgebase.token_num -
-            doc.token_num,
+                      doc.token_num,
            chunk_num=Knowledgebase.chunk_num -
-            doc.chunk_num,
+                      doc.chunk_num,
            doc_num=Knowledgebase.doc_num - 1
        ).where(
            Knowledgebase.id == doc.kb_id).execute()
@ -388,7 +399,7 @@ class DocumentService(CommonService):
        docs = cls.model.select(
            Knowledgebase.tenant_id).join(
            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)).where(
+                    Knowledgebase.id == cls.model.kb_id)).where(
            cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
@ -410,7 +421,7 @@ class DocumentService(CommonService):
        docs = cls.model.select(
            Knowledgebase.tenant_id).join(
            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)).where(
+                    Knowledgebase.id == cls.model.kb_id)).where(
            cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
@ -423,7 +434,7 @@ class DocumentService(CommonService):
        docs = cls.model.select(
            cls.model.id).join(
            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)
+                    Knowledgebase.id == cls.model.kb_id)
        ).join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
               ).where(cls.model.id == doc_id, UserTenant.user_id == user_id).paginate(0, 1)
        docs = docs.dicts()
@ -435,12 +446,12 @@ class DocumentService(CommonService):
    @DB.connection_context()
    def accessible4deletion(cls, doc_id, user_id):
        docs = cls.model.select(cls.model.id
-        ).join(
+                                ).join(
            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)
+                    Knowledgebase.id == cls.model.kb_id)
        ).join(
            UserTenant, on=(
-                (UserTenant.tenant_id == Knowledgebase.created_by) & (UserTenant.user_id == user_id))
+                    (UserTenant.tenant_id == Knowledgebase.created_by) & (UserTenant.user_id == user_id))
        ).where(
            cls.model.id == doc_id,
            UserTenant.status == StatusEnum.VALID.value,
@ -457,7 +468,7 @@ class DocumentService(CommonService):
        docs = cls.model.select(
            Knowledgebase.embd_id).join(
            Knowledgebase, on=(
-                Knowledgebase.id == cls.model.kb_id)).where(
+                    Knowledgebase.id == cls.model.kb_id)).where(
            cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
        docs = docs.dicts()
        if not docs:
@ -499,7 +510,7 @@ class DocumentService(CommonService):
        if not doc_id:
            return
        return doc_id[0]["id"]
-    
+
    @classmethod
    @DB.connection_context()
    def get_doc_ids_by_doc_names(cls, doc_names):
@ -612,7 +623,7 @@ class DocumentService(CommonService):
                info = {
                    "process_duration": datetime.timestamp(
                        datetime.now()) -
-                    d["process_begin_at"].timestamp(),
+                                       d["process_begin_at"].timestamp(),
                    "run": status}
                if prg != 0:
                    info["progress"] = prg
--- a/api/db/services/file_service.py
+++ b/api/db/services/file_service.py
@ -14,7 +14,6 @@
 #  limitations under the License.
 #
 import logging
-import os
 import re
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
@ -22,7 +21,6 @@ from pathlib import Path
 from flask_login import current_user
 from peewee import fn

-from api.constants import FILE_NAME_LEN_LIMIT
 from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
 from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
 from api.db.services import duplicate_name
@ -31,6 +29,7 @@ from api.db.services.document_service import DocumentService
 from api.db.services.file2document_service import File2DocumentService
 from api.utils import get_uuid
 from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
+from rag.llm.cv_model import GptV4
 from rag.utils.storage_factory import STORAGE_IMPL


@ -411,12 +410,7 @@ class FileService(CommonService):
        err, files = [], []
        for file in file_objs:
            try:
-                MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
-                if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
-                    raise RuntimeError("Exceed the maximum file number of a free user!")
-                if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
-                    raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.")
-
+                DocumentService.check_doc_health(kb.tenant_id, file.filename)
                filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
                filetype = filename_type(filename)
                if filetype == FileType.OTHER.value:
@ -463,6 +457,19 @@ class FileService(CommonService):

    @staticmethod
    def parse_docs(file_objs, user_id):
+        exe = ThreadPoolExecutor(max_workers=12)
+        threads = []
+        for file in file_objs:
+            threads.append(exe.submit(FileService.parse, file.filename, file.read(), False))
+
+        res = []
+        for th in threads:
+            res.append(th.result())
+
+        return "\n\n".join(res)
+
+    @staticmethod
+    def parse(filename, blob, img_base64=True, tenant_id=None):
        from rag.app import audio, email, naive, picture, presentation

        def dummy(prog=None, msg=""):
@ -470,19 +477,12 @@ class FileService(CommonService):

        FACTORY = {ParserType.PRESENTATION.value: presentation, ParserType.PICTURE.value: picture, ParserType.AUDIO.value: audio, ParserType.EMAIL.value: email}
        parser_config = {"chunk_token_num": 16096, "delimiter": "\n!?;。；！？", "layout_recognize": "Plain Text"}
-        exe = ThreadPoolExecutor(max_workers=12)
-        threads = []
-        for file in file_objs:
-            kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": user_id}
-            filetype = filename_type(file.filename)
-            blob = file.read()
-            threads.append(exe.submit(FACTORY.get(FileService.get_parser(filetype, file.filename, ""), naive).chunk, file.filename, blob, **kwargs))
-
-        res = []
-        for th in threads:
-            res.append("\n".join([ck["content_with_weight"] for ck in th.result()]))
-
-        return "\n\n".join(res)
+        kwargs = {"lang": "English", "callback": dummy, "parser_config": parser_config, "from_page": 0, "to_page": 100000, "tenant_id": current_user.id if current_user else tenant_id}
+        file_type = filename_type(filename)
+        if img_base64 and file_type == FileType.VISUAL.value:
+            return GptV4.image2base64(blob)
+        cks = FACTORY.get(FileService.get_parser(filename_type(filename), filename, ""), naive).chunk(filename, blob, **kwargs)
+        return "\n".join([ck["content_with_weight"] for ck in cks])

    @staticmethod
    def get_parser(doc_type, filename, default):
@ -495,3 +495,14 @@ class FileService(CommonService):
        if re.search(r"\.(eml)$", filename):
            return ParserType.EMAIL.value
        return default
+
+    @staticmethod
+    def get_blob(user_id, location):
+        bname = f"{user_id}-downloads"
+        return  STORAGE_IMPL.get(bname, location)
+
+    @staticmethod
+    def put_blob(user_id, location, blob):
+        bname = f"{user_id}-downloads"
+        return  STORAGE_IMPL.put(bname, location, blob)
+
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -14,6 +14,8 @@
 #  limitations under the License.
 #
 import logging
+import re
+from functools import partial

 from langfuse import Langfuse

@ -137,7 +139,7 @@ class TenantLLMService(CommonService):

    @classmethod
    @DB.connection_context()
-    def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese"):
+    def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese", **kwargs):
        model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
        if llm_type == LLMType.EMBEDDING.value:
            if model_config["llm_factory"] not in EmbeddingModel:
@ -152,12 +154,12 @@ class TenantLLMService(CommonService):
        if llm_type == LLMType.IMAGE2TEXT.value:
            if model_config["llm_factory"] not in CvModel:
                return
-            return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang, base_url=model_config["api_base"])
+            return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang, base_url=model_config["api_base"], **kwargs)

        if llm_type == LLMType.CHAT.value:
            if model_config["llm_factory"] not in ChatModel:
                return
-            return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+            return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], **kwargs)

        if llm_type == LLMType.SPEECH2TEXT:
            if model_config["llm_factory"] not in Seq2txtModel:
@ -221,20 +223,21 @@ class TenantLLMService(CommonService):
        for llm_factory in llm_factories:
            for llm in llm_factory["llm"]:
                if llm_id == llm["llm_name"]:
-                    return llm["model_type"].strip(",")[-1]
+                    return llm["model_type"].split(",")[-1]


 class LLMBundle:
-    def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
+    def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese", **kwargs):
        self.tenant_id = tenant_id
        self.llm_type = llm_type
        self.llm_name = llm_name
-        self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang)
+        self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang, **kwargs)
        assert self.mdl, "Can't find model for {}/{}/{}".format(tenant_id, llm_type, llm_name)
        model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
        self.max_length = model_config.get("max_tokens", 8192)

        self.is_tools = model_config.get("is_tools", False)
+        self.verbose_tool_use = kwargs.get("verbose_tool_use")

        langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
        if langfuse_keys:
@ -331,7 +334,7 @@ class LLMBundle:

        return txt

-    def tts(self, text):
+    def tts(self, text: str) -> None:
        if self.langfuse:
            span = self.trace.span(name="tts", input={"text": text})

@ -359,17 +362,20 @@ class LLMBundle:

        return txt[last_think_end + len("</think>") :]

-    def chat(self, system, history, gen_conf):
+    def chat(self, system: str, history: list, gen_conf: dict={}, **kwargs) -> str:
        if self.langfuse:
            generation = self.trace.generation(name="chat", model=self.llm_name, input={"system": system, "history": history})

-        chat = self.mdl.chat
+        chat_partial = partial(self.mdl.chat, system, history, gen_conf)
        if self.is_tools and self.mdl.is_tools:
-            chat = self.mdl.chat_with_tools
+            chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf)

-        txt, used_tokens = chat(system, history, gen_conf)
+        txt, used_tokens = chat_partial(**kwargs)
        txt = self._remove_reasoning_content(txt)

+        if not self.verbose_tool_use:
+            txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
+
        if isinstance(txt, int) and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, self.llm_name):
            logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))

@ -378,17 +384,17 @@ class LLMBundle:

        return txt

-    def chat_streamly(self, system, history, gen_conf):
+    def chat_streamly(self, system: str, history: list, gen_conf: dict={}, **kwargs):
        if self.langfuse:
            generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})

        ans = ""
-        chat_streamly = self.mdl.chat_streamly
+        chat_partial = partial(self.mdl.chat_streamly, system, history, gen_conf)
        total_tokens = 0
        if self.is_tools and self.mdl.is_tools:
-            chat_streamly = self.mdl.chat_streamly_with_tools
+            chat_partial = partial(self.mdl.chat_streamly_with_tools, system, history, gen_conf)

-        for txt in chat_streamly(system, history, gen_conf):
+        for txt in chat_partial(**kwargs):
            if isinstance(txt, int):
                total_tokens = txt
                if self.langfuse:
@ -398,8 +404,12 @@ class LLMBundle:
            if txt.endswith("</think>"):
                ans = ans.rstrip("</think>")

+            if not self.verbose_tool_use:
+                txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
+
            ans += txt
            yield ans
+
        if total_tokens > 0:
            if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
                logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))