From 83771e500c6fe31788e41ed625474f7320b8c127 Mon Sep 17 00:00:00 2001
From: Yongteng Lei <yongtengrey@outlook.com>
Date: Tue, 12 Aug 2025 10:59:20 +0800
Subject: [PATCH] Refa: migrate chat models to LiteLLM (#9394)

### What problem does this PR solve?

All models pass the mock response tests, which means that if a model can
return the correct response, everything should work as expected.
However, not all models have been fully tested in a real environment,
the real API_KEY. I suggest actively monitoring the refactored models
over the coming period to ensure they work correctly and fixing them
step by step, or waiting to merge until most have been tested in
practical environment.

### Type of change

- [x] Refactoring
---
 api/apps/llm_app.py            |  17 +-
 api/db/services/llm_service.py |   1 +
 pyproject.toml                 |   7 +-
 rag/llm/__init__.py            |  77 ++-
 rag/llm/chat_model.py          | 875 +++++++++++++++++++--------------
 uv.lock                        | 219 +++++----
 web/src/locales/config.ts      |   2 +-
 web/src/locales/ru.ts          |  86 ++--
 8 files changed, 738 insertions(+), 546 deletions(-)

diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py
index eb9ee1c94..2ec8180cd 100644
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@@ -57,6 +57,7 @@ def set_api_key():
     # test if api key works
     chat_passed, embd_passed, rerank_passed = False, False, False
     factory = req["llm_factory"]
+    extra = {"provider": factory}
     msg = ""
     for llm in LLMService.query(fid=factory):
         if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
@@ -73,7 +74,7 @@ def set_api_key():
         elif not chat_passed and llm.model_type == LLMType.CHAT.value:
             assert factory in ChatModel, f"Chat model from {factory} is not supported yet."
             mdl = ChatModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"), **extra)
             try:
                 m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}],
                                  {"temperature": 0.9, 'max_tokens': 50})
@@ -204,6 +205,7 @@ def add_llm():
 
     msg = ""
     mdl_nm = llm["llm_name"].split("___")[0]
+    extra = {"provider": factory}
     if llm["model_type"] == LLMType.EMBEDDING.value:
         assert factory in EmbeddingModel, f"Embedding model from {factory} is not supported yet."
         mdl = EmbeddingModel[factory](
@@ -221,7 +223,8 @@ def add_llm():
         mdl = ChatModel[factory](
             key=llm['api_key'],
             model_name=mdl_nm,
-            base_url=llm["api_base"]
+            base_url=llm["api_base"],
+            **extra,
         )
         try:
             m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {
@@ -312,12 +315,12 @@ def delete_factory():
 def my_llms():
     try:
         include_details = request.args.get('include_details', 'false').lower() == 'true'
-        
+
         if include_details:
             res = {}
             objs = TenantLLMService.query(tenant_id=current_user.id)
             factories = LLMFactoriesService.query(status=StatusEnum.VALID.value)
-            
+
             for o in objs:
                 o_dict = o.to_dict()
                 factory_tags = None
@@ -325,13 +328,13 @@ def my_llms():
                     if f.name == o_dict["llm_factory"]:
                         factory_tags = f.tags
                         break
-                        
+
                 if o_dict["llm_factory"] not in res:
                     res[o_dict["llm_factory"]] = {
                         "tags": factory_tags,
                         "llm": []
                     }
-                
+
                 res[o_dict["llm_factory"]]["llm"].append({
                     "type": o_dict["model_type"],
                     "name": o_dict["llm_name"],
@@ -352,7 +355,7 @@ def my_llms():
                     "name": o["llm_name"],
                     "used_token": o["used_tokens"]
                 })
-        
+
         return get_json_result(data=res)
     except Exception as e:
         return server_error_response(e)
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
index dac080b64..fbfa7d65e 100644
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -141,6 +141,7 @@ class TenantLLMService(CommonService):
     @DB.connection_context()
     def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese", **kwargs):
         model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
+        kwargs.update({"provider": model_config["llm_factory"]})
         if llm_type == LLMType.EMBEDDING.value:
             if model_config["llm_factory"] not in EmbeddingModel:
                 return
diff --git a/pyproject.toml b/pyproject.toml
index 51f740ca3..e0202451a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
     "chardet==5.2.0",
     "cn2an==0.5.22",
     "cohere==5.6.2",
-    "Crawl4AI==0.3.8",
+    "Crawl4AI>=0.3.8",
     "dashscope==1.20.11",
     "deepl==1.18.0",
     "demjson3==3.0.6",
@@ -43,7 +43,7 @@ dependencies = [
     "groq==0.9.0",
     "hanziconv==0.3.2",
     "html-text==0.6.2",
-    "httpx==0.27.0",
+    "httpx==0.27.2",
     "huggingface-hub>=0.25.0,<0.26.0",
     "infinity-sdk==0.6.0-dev4",
     "infinity-emb>=0.0.66,<0.0.67",
@@ -58,7 +58,7 @@ dependencies = [
     "ollama==0.2.1",
     "onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
     "onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
-    "openai==1.45.0",
+    "openai>=1.45.0",
     "opencv-python==4.10.0.84",
     "opencv-python-headless==4.10.0.84",
     "openpyxl>=3.1.0,<4.0.0",
@@ -128,6 +128,7 @@ dependencies = [
     "opensearch-py==2.7.1",
     "pluginlib==0.9.4",
     "click>=8.1.8",
+    "litellm>=1.74.15.post1",
 ]
 
 [project.optional-dependencies]
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index e9542bbe8..58c8379cb 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -19,6 +19,48 @@
 import importlib
 import inspect
 
+from strenum import StrEnum
+
+
+class SupportedLiteLLMProvider(StrEnum):
+    Tongyi_Qianwen = "Tongyi-Qianwen"
+    Dashscope = "Dashscope"
+    Bedrock = "Bedrock"
+    Moonshot = "Moonshot"
+    xAI = "xAI"
+    DeepInfra = "DeepInfra"
+    Groq = "Groq"
+    Cohere = "Cohere"
+    Gemini = "Gemini"
+    DeepSeek = "DeepSeek"
+    Nvidia = "NVIDIA"
+    TogetherAI = "TogetherAI"
+    Anthropic = "Anthropic"
+
+
+FACTORY_DEFAULT_BASE_URL = {
+    SupportedLiteLLMProvider.Tongyi_Qianwen: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+    SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+    SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1",
+}
+
+
+LITELLM_PROVIDER_PREFIX = {
+    SupportedLiteLLMProvider.Tongyi_Qianwen: "dashscope/",
+    SupportedLiteLLMProvider.Dashscope: "dashscope/",
+    SupportedLiteLLMProvider.Bedrock: "bedrock/",
+    SupportedLiteLLMProvider.Moonshot: "moonshot/",
+    SupportedLiteLLMProvider.xAI: "xai/",
+    SupportedLiteLLMProvider.DeepInfra: "deepinfra/",
+    SupportedLiteLLMProvider.Groq: "groq/",
+    SupportedLiteLLMProvider.Cohere: "",  # don't need a prefix
+    SupportedLiteLLMProvider.Gemini: "gemini/",
+    SupportedLiteLLMProvider.DeepSeek: "deepseek/",
+    SupportedLiteLLMProvider.Nvidia: "nvidia_nim/",
+    SupportedLiteLLMProvider.TogetherAI: "together_ai/",
+    SupportedLiteLLMProvider.Anthropic: "",  # don't need a prefix
+}
+
 ChatModel = globals().get("ChatModel", {})
 CvModel = globals().get("CvModel", {})
 EmbeddingModel = globals().get("EmbeddingModel", {})
@@ -26,6 +68,7 @@ RerankModel = globals().get("RerankModel", {})
 Seq2txtModel = globals().get("Seq2txtModel", {})
 TTSModel = globals().get("TTSModel", {})
 
+
 MODULE_MAPPING = {
     "chat_model": ChatModel,
     "cv_model": CvModel,
@@ -42,20 +85,30 @@ for module_name, mapping_dict in MODULE_MAPPING.items():
     module = importlib.import_module(full_module_name)
 
     base_class = None
+    lite_llm_base_class = None
     for name, obj in inspect.getmembers(module):
-        if inspect.isclass(obj) and name == "Base":
-            base_class = obj
-            break
-    if base_class is None:
-        continue
+        if inspect.isclass(obj):
+            if name == "Base":
+                base_class = obj
+            elif name == "LiteLLMBase":
+                lite_llm_base_class = obj
+                assert hasattr(obj, "_FACTORY_NAME"), "LiteLLMbase should have _FACTORY_NAME field."
+                if hasattr(obj, "_FACTORY_NAME"):
+                    if isinstance(obj._FACTORY_NAME, list):
+                        for factory_name in obj._FACTORY_NAME:
+                            mapping_dict[factory_name] = obj
+                    else:
+                        mapping_dict[obj._FACTORY_NAME] = obj
+
+    if base_class is not None:
+        for _, obj in inspect.getmembers(module):
+            if inspect.isclass(obj) and issubclass(obj, base_class) and obj is not base_class and hasattr(obj, "_FACTORY_NAME"):
+                if isinstance(obj._FACTORY_NAME, list):
+                    for factory_name in obj._FACTORY_NAME:
+                        mapping_dict[factory_name] = obj
+                else:
+                    mapping_dict[obj._FACTORY_NAME] = obj
 
-    for _, obj in inspect.getmembers(module):
-        if inspect.isclass(obj) and issubclass(obj, base_class) and obj is not base_class and hasattr(obj, "_FACTORY_NAME"):
-            if isinstance(obj._FACTORY_NAME, list):
-                for factory_name in obj._FACTORY_NAME:
-                    mapping_dict[factory_name] = obj
-            else:
-                mapping_dict[obj._FACTORY_NAME] = obj
 
 __all__ = [
     "ChatModel",
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 1770b5b9e..c96afa12f 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -26,18 +26,20 @@ from typing import Any, Protocol
 from urllib.parse import urljoin
 
 import json_repair
+import litellm
 import openai
 import requests
-from dashscope import Generation
 from ollama import Client
 from openai import OpenAI
 from openai.lib.azure import AzureOpenAI
 from strenum import StrEnum
 from zhipuai import ZhipuAI
 
+from rag.llm import FACTORY_DEFAULT_BASE_URL, LITELLM_PROVIDER_PREFIX, SupportedLiteLLMProvider
 from rag.nlp import is_chinese, is_english
 from rag.utils import num_tokens_from_string
 
+
 # Error message constants
 class LLMErrorCode(StrEnum):
     ERROR_RATE_LIMIT = "RATE_LIMIT_EXCEEDED"
@@ -58,6 +60,7 @@ class ReActMode(StrEnum):
     FUNCTION_CALL = "function_call"
     REACT = "react"
 
+
 ERROR_PREFIX = "**ERROR**"
 LENGTH_NOTIFICATION_CN = "······\n由于大模型的上下文窗口大小限制，回答已经被大模型截断。"
 LENGTH_NOTIFICATION_EN = "...\nThe answer is truncated by your chosen LLM due to its limitation on context length."
@@ -113,7 +116,7 @@ class Base(ABC):
 
     def _chat(self, history, gen_conf, **kwargs):
         logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
-        if self.model_name.lower().find("qwen3") >=0:
+        if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}
         response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
 
@@ -167,7 +170,7 @@ class Base(ABC):
             error_code = LLMErrorCode.ERROR_MAX_RETRIES
 
         # Check if it's a rate limit error or server error and not the last attempt
-        should_retry = (error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER)
+        should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
         if not should_retry:
             return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
 
@@ -176,11 +179,7 @@ class Base(ABC):
         time.sleep(delay)
 
     def _verbose_tool_use(self, name, args, res):
-        return "<tool_call>" + json.dumps({
-            "name": name,
-            "args": args,
-            "result": res
-        }, ensure_ascii=False, indent=2) + "</tool_call>"
+        return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
 
     def _append_history(self, hist, tool_call, tool_res):
         hist.append(
@@ -213,7 +212,7 @@ class Base(ABC):
         self.toolcall_session = toolcall_session
         self.tools = tools
 
-    def chat_with_tools(self, system: str, history: list, gen_conf: dict={}):
+    def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         if system:
             history.insert(0, {"role": "system", "content": system})
@@ -225,7 +224,7 @@ class Base(ABC):
         for attempt in range(self.max_retries + 1):
             history = hist
             try:
-                for _ in range(self.max_rounds+1):
+                for _ in range(self.max_rounds + 1):
                     logging.info(f"{self.tools=}")
                     response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
                     tk_count += self.total_token_count(response)
@@ -255,7 +254,7 @@ class Base(ABC):
                             history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
                             ans += self._verbose_tool_use(name, {}, str(e))
 
-                logging.warning( f"Exceed max rounds: {self.max_rounds}")
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
                 response, token_count = self._chat(history, gen_conf)
                 ans += response
@@ -297,7 +296,7 @@ class Base(ABC):
 
         return final_tool_calls
 
-    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict={}):
+    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         tools = self.tools
         if system:
@@ -309,7 +308,7 @@ class Base(ABC):
         for attempt in range(self.max_retries + 1):
             history = hist
             try:
-                for _ in range(self.max_rounds+1):
+                for _ in range(self.max_rounds + 1):
                     reasoning_start = False
                     logging.info(f"{tools=}")
                     response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
@@ -373,7 +372,7 @@ class Base(ABC):
                             history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
                             yield self._verbose_tool_use(name, {}, str(e))
 
-                logging.warning( f"Exceed max rounds: {self.max_rounds}")
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
                 response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
                 for resp in response:
@@ -402,7 +401,7 @@ class Base(ABC):
 
         assert False, "Shouldn't be here."
 
-    def chat_streamly(self, system, history, gen_conf: dict={}, **kwargs):
+    def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
         if system:
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
@@ -474,15 +473,6 @@ class GptTurbo(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class MoonshotChat(Base):
-    _FACTORY_NAME = "Moonshot"
-
-    def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.moonshot.cn/v1"
-        super().__init__(key, model_name, base_url)
-
-
 class XinferenceChat(Base):
     _FACTORY_NAME = "Xinference"
 
@@ -513,15 +503,6 @@ class ModelScopeChat(Base):
         super().__init__(key, model_name.split("___")[0], base_url, **kwargs)
 
 
-class DeepSeekChat(Base):
-    _FACTORY_NAME = "DeepSeek"
-
-    def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.deepseek.com/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class AzureChat(Base):
     _FACTORY_NAME = "Azure-OpenAI"
 
@@ -608,26 +589,6 @@ class BaiChuanChat(Base):
         yield total_tokens
 
 
-class xAIChat(Base):
-    _FACTORY_NAME = "xAI"
-
-    def __init__(self, key, model_name="grok-3", base_url=None, **kwargs):
-        if not base_url:
-            base_url = "https://api.x.ai/v1"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-        return
-
-
-class QWenChat(Base):
-    _FACTORY_NAME = "Tongyi-Qianwen"
-
-    def __init__(self, key, model_name=Generation.Models.qwen_turbo, base_url=None, **kwargs):
-        if not base_url:
-            base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-        return
-
-
 class ZhipuChat(Base):
     _FACTORY_NAME = "ZHIPU-AI"
 
@@ -973,217 +934,6 @@ class MistralChat(Base):
         yield total_tokens
 
 
-class BedrockChat(Base):
-    _FACTORY_NAME = "Bedrock"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        import boto3
-
-        self.bedrock_ak = json.loads(key).get("bedrock_ak", "")
-        self.bedrock_sk = json.loads(key).get("bedrock_sk", "")
-        self.bedrock_region = json.loads(key).get("bedrock_region", "")
-        self.model_name = model_name
-
-        if self.bedrock_ak == "" or self.bedrock_sk == "" or self.bedrock_region == "":
-            # Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
-            self.client = boto3.client("bedrock-runtime")
-        else:
-            self.client = boto3.client(service_name="bedrock-runtime", region_name=self.bedrock_region, aws_access_key_id=self.bedrock_ak, aws_secret_access_key=self.bedrock_sk)
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature"]:
-                del gen_conf[k]
-        return gen_conf
-
-    def _chat(self, history, gen_conf={}, **kwargs):
-        system = history[0]["content"] if history and history[0]["role"] == "system" else ""
-        hist = []
-        for item in history:
-            if item["role"] == "system":
-                continue
-            hist.append(deepcopy(item))
-            if not isinstance(hist[-1]["content"], list) and not isinstance(hist[-1]["content"], tuple):
-                hist[-1]["content"] = [{"text": hist[-1]["content"]}]
-        # Send the message to the model, using a basic inference configuration.
-        response = self.client.converse(
-            modelId=self.model_name,
-            messages=hist,
-            inferenceConfig=gen_conf,
-            system=[{"text": (system if system else "Answer the user's message.")}],
-        )
-
-        # Extract and print the response text.
-        ans = response["output"]["message"]["content"][0]["text"]
-        return ans, num_tokens_from_string(ans)
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        from botocore.exceptions import ClientError
-
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature"]:
-                del gen_conf[k]
-        for item in history:
-            if not isinstance(item["content"], list) and not isinstance(item["content"], tuple):
-                item["content"] = [{"text": item["content"]}]
-
-        if self.model_name.split(".")[0] == "ai21":
-            try:
-                response = self.client.converse(modelId=self.model_name, messages=history, inferenceConfig=gen_conf, system=[{"text": (system if system else "Answer the user's message.")}])
-                ans = response["output"]["message"]["content"][0]["text"]
-                return ans, num_tokens_from_string(ans)
-
-            except (ClientError, Exception) as e:
-                return f"ERROR: Can't invoke '{self.model_name}'. Reason: {e}", 0
-
-        ans = ""
-        try:
-            # Send the message to the model, using a basic inference configuration.
-            streaming_response = self.client.converse_stream(
-                modelId=self.model_name, messages=history, inferenceConfig=gen_conf, system=[{"text": (system if system else "Answer the user's message.")}]
-            )
-
-            # Extract and print the streamed response text in real-time.
-            for resp in streaming_response["stream"]:
-                if "contentBlockDelta" in resp:
-                    ans = resp["contentBlockDelta"]["delta"]["text"]
-                    yield ans
-
-        except (ClientError, Exception) as e:
-            yield ans + f"ERROR: Can't invoke '{self.model_name}'. Reason: {e}"
-
-        yield num_tokens_from_string(ans)
-
-
-class GeminiChat(Base):
-    _FACTORY_NAME = "Gemini"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from google.generativeai import GenerativeModel, client
-
-        client.configure(api_key=key)
-        _client = client.get_default_generative_client()
-        self.model_name = "models/" + model_name
-        self.model = GenerativeModel(model_name=self.model_name)
-        self.model._client = _client
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-            # if max_tokens exists, rename it to max_output_tokens to match Gemini's API
-            if k == "max_tokens":
-                gen_conf["max_output_tokens"] = gen_conf.pop("max_tokens")
-        return gen_conf
-
-    def _chat(self, history, gen_conf={}, **kwargs):
-        from google.generativeai.types import content_types
-
-        system = history[0]["content"] if history and history[0]["role"] == "system" else ""
-        hist = []
-        for item in history:
-            if item["role"] == "system":
-                continue
-            hist.append(deepcopy(item))
-            item = hist[-1]
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "model"
-            if "role" in item and item["role"] == "system":
-                item["role"] = "user"
-            if "content" in item:
-                item["parts"] = item.pop("content")
-
-        if system:
-            self.model._system_instruction = content_types.to_content(system)
-        retry_count = 0
-        max_retries = 3
-        while retry_count < max_retries:
-            try:
-                response = self.model.generate_content(hist, generation_config=gen_conf)
-                ans = response.text
-                return ans, response.usage_metadata.total_token_count
-            except Exception as e:
-                retry_count += 1
-                if retry_count >= max_retries:
-                    raise e
-                else:
-                    import time
-                    time.sleep(50) 
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        from google.generativeai.types import content_types
-
-        gen_conf = self._clean_conf(gen_conf)
-        if system:
-            self.model._system_instruction = content_types.to_content(system)
-        for item in history:
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "model"
-            if "content" in item:
-                item["parts"] = item.pop("content")
-        ans = ""
-        try:
-            response = self.model.generate_content(history, generation_config=gen_conf, stream=True)
-            for resp in response:
-                ans = resp.text
-                yield ans
-
-            yield response._chunks[-1].usage_metadata.total_token_count
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield 0
-
-
-class GroqChat(Base):
-    _FACTORY_NAME = "Groq"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from groq import Groq
-
-        self.client = Groq(api_key=key)
-        self.model_name = model_name
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-        return gen_conf
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
-            history.insert(0, {"role": "system", "content": system})
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-        ans = ""
-        total_tokens = 0
-        try:
-            response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
-            for resp in response:
-                if not resp.choices or not resp.choices[0].delta.content:
-                    continue
-                ans = resp.choices[0].delta.content
-                total_tokens += 1
-                if resp.choices[0].finish_reason == "length":
-                    if is_chinese(ans):
-                        ans += LENGTH_NOTIFICATION_CN
-                    else:
-                        ans += LENGTH_NOTIFICATION_EN
-                yield ans
-
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield total_tokens
-
-
 ## openrouter
 class OpenRouterChat(Base):
     _FACTORY_NAME = "OpenRouter"
@@ -1203,15 +953,6 @@ class StepFunChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class NvidiaChat(Base):
-    _FACTORY_NAME = "NVIDIA"
-
-    def __init__(self, key, model_name, base_url="https://integrate.api.nvidia.com/v1", **kwargs):
-        if not base_url:
-            base_url = "https://integrate.api.nvidia.com/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class LmStudioChat(Base):
     _FACTORY_NAME = "LM-Studio"
 
@@ -1243,83 +984,6 @@ class PPIOChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class CoHereChat(Base):
-    _FACTORY_NAME = "Cohere"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from cohere import Client
-
-        self.client = Client(api_key=key)
-        self.model_name = model_name
-
-    def _clean_conf(self, gen_conf):
-        if "max_tokens" in gen_conf:
-            del gen_conf["max_tokens"]
-        if "top_p" in gen_conf:
-            gen_conf["p"] = gen_conf.pop("top_p")
-        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
-            gen_conf.pop("presence_penalty")
-        return gen_conf
-
-    def _chat(self, history, gen_conf):
-        hist = []
-        for item in history:
-            hist.append(deepcopy(item))
-            item = hist[-1]
-            if "role" in item and item["role"] == "user":
-                item["role"] = "USER"
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "CHATBOT"
-            if "content" in item:
-                item["message"] = item.pop("content")
-        mes = hist.pop()["message"]
-        response = self.client.chat(model=self.model_name, chat_history=hist, message=mes, **gen_conf)
-        ans = response.text
-        if response.finish_reason == "MAX_TOKENS":
-            ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-        return (
-            ans,
-            response.meta.tokens.input_tokens + response.meta.tokens.output_tokens,
-        )
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
-            history.insert(0, {"role": "system", "content": system})
-        if "max_tokens" in gen_conf:
-            del gen_conf["max_tokens"]
-        if "top_p" in gen_conf:
-            gen_conf["p"] = gen_conf.pop("top_p")
-        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
-            gen_conf.pop("presence_penalty")
-        for item in history:
-            if "role" in item and item["role"] == "user":
-                item["role"] = "USER"
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "CHATBOT"
-            if "content" in item:
-                item["message"] = item.pop("content")
-        mes = history.pop()["message"]
-        ans = ""
-        total_tokens = 0
-        try:
-            response = self.client.chat_stream(model=self.model_name, chat_history=history, message=mes, **gen_conf)
-            for resp in response:
-                if resp.event_type == "text-generation":
-                    ans = resp.text
-                    total_tokens += num_tokens_from_string(resp.text)
-                elif resp.event_type == "stream-end":
-                    if resp.finish_reason == "MAX_TOKENS":
-                        ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-                yield ans
-
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield total_tokens
-
-
 class LeptonAIChat(Base):
     _FACTORY_NAME = "LeptonAI"
 
@@ -1329,15 +993,6 @@ class LeptonAIChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class TogetherAIChat(Base):
-    _FACTORY_NAME = "TogetherAI"
-
-    def __init__(self, key, model_name, base_url="https://api.together.xyz/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.together.xyz/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class PerfXCloudChat(Base):
     _FACTORY_NAME = "PerfXCloud"
 
@@ -1581,15 +1236,6 @@ class BaiduYiyanChat(Base):
         yield total_tokens
 
 
-class AnthropicChat(Base):
-    _FACTORY_NAME = "Anthropic"
-
-    def __init__(self, key, model_name, base_url="https://api.anthropic.com/v1/", **kwargs):
-        if not base_url:
-            base_url = "https://api.anthropic.com/v1/"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-
 class GoogleChat(Base):
     _FACTORY_NAME = "Google Cloud"
 
@@ -1738,14 +1384,7 @@ class GPUStackChat(Base):
             raise ValueError("Local llm url cannot be None")
         base_url = urljoin(base_url, "v1")
         super().__init__(key, model_name, base_url, **kwargs)
-class DeepInfraChat(Base):
-    _FACTORY_NAME = "DeepInfra"
 
-    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
-        if not base_url:
-            base_url = "https://api.deepinfra.com/v1/openai"
-        super().__init__(key, model_name, base_url, **kwargs)
-        
 
 class Ai302Chat(Base):
     _FACTORY_NAME = "302.AI"
@@ -1754,3 +1393,489 @@ class Ai302Chat(Base):
         if not base_url:
             base_url = "https://api.302.ai/v1"
         super().__init__(key, model_name, base_url, **kwargs)
+
+
+class LiteLLMBase(ABC):
+    _FACTORY_NAME = ["Tongyi-Qianwen", "Bedrock", "Moonshot", "xAI", "DeepInfra", "Groq", "Cohere", "Gemini", "DeepSeek", "NVIDIA", "TogetherAI", "Anthropic"]
+
+    def __init__(self, key, model_name, base_url=None, **kwargs):
+        self.timeout = int(os.environ.get("LM_TIMEOUT_SECONDS", 600))
+        self.provider = kwargs.get("provider", "")
+        self.prefix = LITELLM_PROVIDER_PREFIX.get(self.provider, "")
+        self.model_name = f"{self.prefix}{model_name}"
+        self.api_key = key
+        self.base_url = base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")
+        # Configure retry parameters
+        self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
+        self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))
+        self.max_rounds = kwargs.get("max_rounds", 5)
+        self.is_tools = False
+        self.tools = []
+        self.toolcall_sessions = {}
+
+        # Factory specific fields
+        if self.provider == SupportedLiteLLMProvider.Bedrock:
+            self.bedrock_ak = json.loads(key).get("bedrock_ak", "")
+            self.bedrock_sk = json.loads(key).get("bedrock_sk", "")
+            self.bedrock_region = json.loads(key).get("bedrock_region", "")
+
+    def _get_delay(self):
+        """Calculate retry delay time"""
+        return self.base_delay * random.uniform(10, 150)
+
+    def _classify_error(self, error):
+        """Classify error based on error message content"""
+        error_str = str(error).lower()
+
+        keywords_mapping = [
+            (["quota", "capacity", "credit", "billing", "balance", "欠费"], LLMErrorCode.ERROR_QUOTA),
+            (["rate limit", "429", "tpm limit", "too many requests", "requests per minute"], LLMErrorCode.ERROR_RATE_LIMIT),
+            (["auth", "key", "apikey", "401", "forbidden", "permission"], LLMErrorCode.ERROR_AUTHENTICATION),
+            (["invalid", "bad request", "400", "format", "malformed", "parameter"], LLMErrorCode.ERROR_INVALID_REQUEST),
+            (["server", "503", "502", "504", "500", "unavailable"], LLMErrorCode.ERROR_SERVER),
+            (["timeout", "timed out"], LLMErrorCode.ERROR_TIMEOUT),
+            (["connect", "network", "unreachable", "dns"], LLMErrorCode.ERROR_CONNECTION),
+            (["filter", "content", "policy", "blocked", "safety", "inappropriate"], LLMErrorCode.ERROR_CONTENT_FILTER),
+            (["model", "not found", "does not exist", "not available"], LLMErrorCode.ERROR_MODEL),
+            (["max rounds"], LLMErrorCode.ERROR_MODEL),
+        ]
+        for words, code in keywords_mapping:
+            if re.search("({})".format("|".join(words)), error_str):
+                return code
+
+        return LLMErrorCode.ERROR_GENERIC
+
+    def _clean_conf(self, gen_conf):
+        if "max_tokens" in gen_conf:
+            del gen_conf["max_tokens"]
+        return gen_conf
+
+    def _chat(self, history, gen_conf, **kwargs):
+        logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
+        if self.model_name.lower().find("qwen3") >= 0:
+            kwargs["extra_body"] = {"enable_thinking": False}
+
+        completion_args = self._construct_completion_args(history=history, **gen_conf)
+        response = litellm.completion(
+            **completion_args,
+            drop_params=True,
+            timeout=self.timeout,
+        )
+        # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
+
+        if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
+            return "", 0
+        ans = response.choices[0].message.content.strip()
+        if response.choices[0].finish_reason == "length":
+            ans = self._length_stop(ans)
+
+        return ans, self.total_token_count(response)
+
+    def _chat_streamly(self, history, gen_conf, **kwargs):
+        logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4))
+        reasoning_start = False
+
+        completion_args = self._construct_completion_args(history=history, **gen_conf)
+        stop = kwargs.get("stop")
+        if stop:
+            completion_args["stop"] = stop
+        response = litellm.completion(
+            **completion_args,
+            drop_params=True,
+            timeout=self.timeout,
+        )
+
+        for resp in response:
+            if not hasattr(resp, "choices") or not resp.choices:
+                continue
+
+            delta = resp.choices[0].delta
+            if not hasattr(delta, "content") or delta.content is None:
+                delta.content = ""
+
+            if kwargs.get("with_reasoning", True) and hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                ans = ""
+                if not reasoning_start:
+                    reasoning_start = True
+                    ans = "<think>"
+                ans += delta.reasoning_content + "</think>"
+            else:
+                reasoning_start = False
+                ans = delta.content
+
+            tol = self.total_token_count(resp)
+            if not tol:
+                tol = num_tokens_from_string(delta.content)
+
+            finish_reason = resp.choices[0].finish_reason if hasattr(resp.choices[0], "finish_reason") else ""
+            if finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
+
+            yield ans, tol
+
+    def _length_stop(self, ans):
+        if is_chinese([ans]):
+            return ans + LENGTH_NOTIFICATION_CN
+        return ans + LENGTH_NOTIFICATION_EN
+
+    def _exceptions(self, e, attempt):
+        logging.exception("OpenAI chat_with_tools")
+        # Classify the error
+        error_code = self._classify_error(e)
+        if attempt == self.max_retries:
+            error_code = LLMErrorCode.ERROR_MAX_RETRIES
+
+        # Check if it's a rate limit error or server error and not the last attempt
+        should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
+        if not should_retry:
+            return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
+
+        delay = self._get_delay()
+        logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
+        time.sleep(delay)
+
+    def _verbose_tool_use(self, name, args, res):
+        return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
+
+    def _append_history(self, hist, tool_call, tool_res):
+        hist.append(
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "index": tool_call.index,
+                        "id": tool_call.id,
+                        "function": {
+                            "name": tool_call.function.name,
+                            "arguments": tool_call.function.arguments,
+                        },
+                        "type": "function",
+                    },
+                ],
+            }
+        )
+        try:
+            if isinstance(tool_res, dict):
+                tool_res = json.dumps(tool_res, ensure_ascii=False)
+        finally:
+            hist.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(tool_res)})
+        return hist
+
+    def bind_tools(self, toolcall_session, tools):
+        if not (toolcall_session and tools):
+            return
+        self.is_tools = True
+        self.toolcall_session = toolcall_session
+        self.tools = tools
+
+    def _construct_completion_args(self, history, **kwargs):
+        completion_args = {
+            "model": self.model_name,
+            "messages": history,
+            "stream": False,
+            "tools": self.tools,
+            "tool_choice": "auto",
+            "api_key": self.api_key,
+            **kwargs,
+        }
+        if self.provider in SupportedLiteLLMProvider:
+            completion_args.update({"api_base": self.base_url})
+        elif self.provider == SupportedLiteLLMProvider.Bedrock:
+            completion_args.pop("api_key", None)
+            completion_args.pop("api_base", None)
+            completion_args.update(
+                {
+                    "aws_access_key_id": self.bedrock_ak,
+                    "aws_secret_access_key": self.bedrock_sk,
+                    "aws_region_name": self.bedrock_region,
+                }
+            )
+        return completion_args
+
+    def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
+        gen_conf = self._clean_conf(gen_conf)
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+
+        ans = ""
+        tk_count = 0
+        hist = deepcopy(history)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            history = deepcopy(hist)  # deepcopy is required here
+            try:
+                for _ in range(self.max_rounds + 1):
+                    logging.info(f"{self.tools=}")
+
+                    completion_args = self._construct_completion_args(history=history, **gen_conf)
+                    response = litellm.completion(
+                        **completion_args,
+                        drop_params=True,
+                        timeout=self.timeout,
+                    )
+
+                    tk_count += self.total_token_count(response)
+
+                    if not hasattr(response, "choices") or not response.choices or not response.choices[0].message:
+                        raise Exception(f"500 response structure error. Response: {response}")
+
+                    message = response.choices[0].message
+
+                    if not hasattr(message, "tool_calls") or not message.tool_calls:
+                        if hasattr(message, "reasoning_content") and message.reasoning_content:
+                            ans += f"<think>{message.reasoning_content}</think>"
+                        ans += message.content or ""
+                        if response.choices[0].finish_reason == "length":
+                            ans = self._length_stop(ans)
+                        return ans, tk_count
+
+                    for tool_call in message.tool_calls:
+                        logging.info(f"Response {tool_call=}")
+                        name = tool_call.function.name
+                        try:
+                            args = json_repair.loads(tool_call.function.arguments)
+                            tool_response = self.toolcall_session.tool_call(name, args)
+                            history = self._append_history(history, tool_call, tool_response)
+                            ans += self._verbose_tool_use(name, args, tool_response)
+                        except Exception as e:
+                            logging.exception(msg=f"Wrong JSON argument format in LLM tool call response: {tool_call}")
+                            history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
+                            ans += self._verbose_tool_use(name, {}, str(e))
+
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
+                history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
+
+                response, token_count = self._chat(history, gen_conf)
+                ans += response
+                tk_count += token_count
+                return ans, tk_count
+
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    return e, tk_count
+
+        assert False, "Shouldn't be here."
+
+    def chat(self, system, history, gen_conf={}, **kwargs):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        gen_conf = self._clean_conf(gen_conf)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            try:
+                response = self._chat(history, gen_conf, **kwargs)
+                return response
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    return e, 0
+        assert False, "Shouldn't be here."
+
+    def _wrap_toolcall_message(self, stream):
+        final_tool_calls = {}
+
+        for chunk in stream:
+            for tool_call in chunk.choices[0].delta.tool_calls or []:
+                index = tool_call.index
+
+                if index not in final_tool_calls:
+                    final_tool_calls[index] = tool_call
+
+                final_tool_calls[index].function.arguments += tool_call.function.arguments
+
+        return final_tool_calls
+
+    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
+        gen_conf = self._clean_conf(gen_conf)
+        tools = self.tools
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+
+        total_tokens = 0
+        hist = deepcopy(history)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            history = deepcopy(hist)  # deepcopy is required here
+            try:
+                for _ in range(self.max_rounds + 1):
+                    reasoning_start = False
+                    logging.info(f"{tools=}")
+
+                    completion_args = self._construct_completion_args(history=history, **gen_conf)
+                    response = litellm.completion(
+                        **completion_args,
+                        drop_params=True,
+                        timeout=self.timeout,
+                    )
+
+                    final_tool_calls = {}
+                    answer = ""
+
+                    for resp in response:
+                        if not hasattr(resp, "choices") or not resp.choices:
+                            continue
+
+                        delta = resp.choices[0].delta
+
+                        if hasattr(delta, "tool_calls") and delta.tool_calls:
+                            for tool_call in delta.tool_calls:
+                                index = tool_call.index
+                                if index not in final_tool_calls:
+                                    if not tool_call.function.arguments:
+                                        tool_call.function.arguments = ""
+                                    final_tool_calls[index] = tool_call
+                                else:
+                                    final_tool_calls[index].function.arguments += tool_call.function.arguments or ""
+                            continue
+
+                        if not hasattr(delta, "content") or delta.content is None:
+                            delta.content = ""
+
+                        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                            ans = ""
+                            if not reasoning_start:
+                                reasoning_start = True
+                                ans = "<think>"
+                            ans += delta.reasoning_content + "</think>"
+                            yield ans
+                        else:
+                            reasoning_start = False
+                            answer += delta.content
+                            yield delta.content
+
+                        tol = self.total_token_count(resp)
+                        if not tol:
+                            total_tokens += num_tokens_from_string(delta.content)
+                        else:
+                            total_tokens += tol
+
+                        finish_reason = getattr(resp.choices[0], "finish_reason", "")
+                        if finish_reason == "length":
+                            yield self._length_stop("")
+
+                    if answer:
+                        yield total_tokens
+                        return
+
+                    for tool_call in final_tool_calls.values():
+                        name = tool_call.function.name
+                        try:
+                            args = json_repair.loads(tool_call.function.arguments)
+                            yield self._verbose_tool_use(name, args, "Begin to call...")
+                            tool_response = self.toolcall_session.tool_call(name, args)
+                            history = self._append_history(history, tool_call, tool_response)
+                            yield self._verbose_tool_use(name, args, tool_response)
+                        except Exception as e:
+                            logging.exception(msg=f"Wrong JSON argument format in LLM tool call response: {tool_call}")
+                            history.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": tool_call.id,
+                                    "content": f"Tool call error: \n{tool_call}\nException:\n{str(e)}",
+                                }
+                            )
+                            yield self._verbose_tool_use(name, {}, str(e))
+
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
+                history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
+
+                completion_args = self._construct_completion_args(history=history, **gen_conf)
+                response = litellm.completion(
+                    **completion_args,
+                    drop_params=True,
+                    timeout=self.timeout,
+                )
+
+                for resp in response:
+                    if not hasattr(resp, "choices") or not resp.choices:
+                        continue
+                    delta = resp.choices[0].delta
+                    if not hasattr(delta, "content") or delta.content is None:
+                        continue
+                    tol = self.total_token_count(resp)
+                    if not tol:
+                        total_tokens += num_tokens_from_string(delta.content)
+                    else:
+                        total_tokens += tol
+                    yield delta.content
+
+                yield total_tokens
+                return
+
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    yield e
+                    yield total_tokens
+                    return
+
+        assert False, "Shouldn't be here."
+
+    def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        gen_conf = self._clean_conf(gen_conf)
+        ans = ""
+        total_tokens = 0
+        try:
+            for delta_ans, tol in self._chat_streamly(history, gen_conf, **kwargs):
+                yield delta_ans
+                total_tokens += tol
+        except openai.APIError as e:
+            yield ans + "\n**ERROR**: " + str(e)
+
+        yield total_tokens
+
+    def total_token_count(self, resp):
+        try:
+            return resp.usage.total_tokens
+        except Exception:
+            pass
+        try:
+            return resp["usage"]["total_tokens"]
+        except Exception:
+            pass
+        return 0
+
+    def _calculate_dynamic_ctx(self, history):
+        """Calculate dynamic context window size"""
+
+        def count_tokens(text):
+            """Calculate token count for text"""
+            # Simple calculation: 1 token per ASCII character
+            # 2 tokens for non-ASCII characters (Chinese, Japanese, Korean, etc.)
+            total = 0
+            for char in text:
+                if ord(char) < 128:  # ASCII characters
+                    total += 1
+                else:  # Non-ASCII characters (Chinese, Japanese, Korean, etc.)
+                    total += 2
+            return total
+
+        # Calculate total tokens for all messages
+        total_tokens = 0
+        for message in history:
+            content = message.get("content", "")
+            # Calculate content tokens
+            content_tokens = count_tokens(content)
+            # Add role marker token overhead
+            role_tokens = 4
+            total_tokens += content_tokens + role_tokens
+
+        # Apply 1.2x buffer ratio
+        total_tokens_with_buffer = int(total_tokens * 1.2)
+
+        if total_tokens_with_buffer <= 8192:
+            ctx_size = 8192
+        else:
+            ctx_multiplier = (total_tokens_with_buffer // 8192) + 1
+            ctx_size = ctx_multiplier * 8192
+
+        return ctx_size
diff --git a/uv.lock b/uv.lock
index 731e47c3f..0ea2819a9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.10, <3.13"
 resolution-markers = [
     "python_full_version >= '3.12' and sys_platform == 'darwin'",
@@ -30,6 +31,15 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/9f/1c/a17fb513aeb684fb83bef5f395910f53103ab30308bbdd77fd66d6698c46/accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1" },
 ]
 
+[[package]]
+name = "aiofiles"
+version = "24.1.0"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5" },
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -1028,24 +1038,29 @@ wheels = [
 
 [[package]]
 name = "crawl4ai"
-version = "0.3.8"
+version = "0.3.745"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
+    { name = "aiofiles" },
     { name = "aiosqlite" },
     { name = "beautifulsoup4" },
+    { name = "colorama" },
     { name = "html2text" },
     { name = "litellm" },
     { name = "lxml" },
     { name = "numpy" },
     { name = "pillow" },
     { name = "playwright" },
-    { name = "playwright-stealth" },
     { name = "python-dotenv" },
+    { name = "rank-bm25" },
     { name = "requests" },
+    { name = "snowballstemmer" },
+    { name = "tf-playwright-stealth" },
+    { name = "xxhash" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/1c/31/327598a0c2cc3cd13dcb786ab41e9638c4c100db1940c9345b1e4d953f39/crawl4ai-0.3.8.tar.gz", hash = "sha256:bacc97509ddbfa5e328e299538a27a4c7fc2317e3fd5ad707b04677e4fc23fc6" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/02/5a/919e64ff2977d7aa1b2cda4d45f16ff8996cd2c2dc1f55936fb6cd214222/crawl4ai-0.3.745.tar.gz", hash = "sha256:990396d57e10ae7ccabf35c34a317dbd8c59a3ceca475eac75320a8808334438" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/af/03/4d69b8d64b39096a721808a349199ca5d7989acf2177e270d15e6f82c356/Crawl4AI-0.3.8-py3-none-any.whl", hash = "sha256:aa19165440c32b667b7325c166d68b00a99375b09e3a7db929d3873064d5ef4f" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/ed/7e/ebe351a457140330b20b6d8289b8f243b21de6e6bce505cd15b230a83bcb/Crawl4AI-0.3.745-py3-none-any.whl", hash = "sha256:763e6aba80959e60e1fe70cb9d954a4cf257eb230af30f51fcd99ff641a7a88d" },
 ]
 
 [[package]]
@@ -1175,9 +1190,6 @@ name = "datrie"
 version = "0.8.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 sdist = { url = "https://mirrors.aliyun.com/pypi/packages/9d/fe/db74bd405d515f06657f11ad529878fd389576dca4812bea6f98d9b31574/datrie-0.8.2.tar.gz", hash = "sha256:525b08f638d5cf6115df6ccd818e5a01298cd230b2dac91c8ff2e6499d18765d" }
-wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/44/02/53f0cf0bf0cd629ba6c2cc13f2f9db24323459e9c19463783d890a540a96/datrie-0.8.2-pp273-pypy_73-win32.whl", hash = "sha256:b07bd5fdfc3399a6dab86d6e35c72b1dbd598e80c97509c7c7518ab8774d3fda" },
-]
 
 [[package]]
 name = "debugpy"
@@ -1423,6 +1435,14 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
 ]
 
+[[package]]
+name = "fake-http-header"
+version = "0.3.5"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/e3/0b/2849c87d9f13766e29c0a2f4d31681aa72e035016b251ab19d99bde7b592/fake_http_header-0.3.5-py3-none-any.whl", hash = "sha256:cd05f4bebf1b7e38b5f5c03d7fb820c0c17e87d9614fbee0afa39c32c7a2ad3c" },
+]
+
 [[package]]
 name = "fake-useragent"
 version = "1.5.1"
@@ -1486,17 +1506,17 @@ name = "fastembed-gpu"
 version = "0.3.6"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
-    { name = "huggingface-hub" },
-    { name = "loguru" },
-    { name = "mmh3" },
-    { name = "numpy" },
-    { name = "onnxruntime-gpu" },
-    { name = "pillow" },
-    { name = "pystemmer" },
-    { name = "requests" },
-    { name = "snowballstemmer" },
-    { name = "tokenizers" },
-    { name = "tqdm" },
+    { name = "huggingface-hub", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "loguru", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "mmh3", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "onnxruntime-gpu", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "pystemmer", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "requests", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "snowballstemmer", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "tokenizers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "tqdm", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 sdist = { url = "https://mirrors.aliyun.com/pypi/packages/da/07/7336c7f3d7ee47f33b407eeb50f5eeb152889de538a52a8f1cc637192816/fastembed_gpu-0.3.6.tar.gz", hash = "sha256:ee2de8918b142adbbf48caaffec0c492f864d73c073eea5a3dcd0e8c1041c50d" }
 wheels = [
@@ -2142,37 +2162,37 @@ wheels = [
 
 [[package]]
 name = "greenlet"
-version = "3.0.3"
+version = "3.2.3"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/17/14/3bddb1298b9a6786539ac609ba4b7c9c0842e12aa73aaa4d8d73ec8f8185/greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/64/bea53c592e3e45799f7c8039a8ee7d6883c518eafef1fcae60beb776070f/greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/d6/408ad9603339db28ce334021b1403dfcfbcb7501a435d49698408d928de7/greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/6c/90/5b14670653f7363fb3e1665f8da6d64bd4c31d53a796d09ef69f48be7273/greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/ef/17/e8e72cabfb5a906c0d976d7fbcc88310df292beea0f816efbefdaf694284/greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/1c/2f/64628f6ae48e05f585e0eb3fb7399b52e240ef99f602107b445bf6be23ef/greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/24/35/945d5b10648fec9b20bcc6df8952d20bb3bba76413cd71c1fdbee98f5616/greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/74/00/27e2da76b926e9b5a2c97d3f4c0baf1b7d8181209d3026c0171f621ae6c0/greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e1/65/506e0a80931170b0dac1a03d36b7fc299f3fa3576235b916718602fff2c3/greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/76/e1ee9f290bb0d46b09704c2fb0e609cae329eb308ad404c0ee6fa1ecb8a5/greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/6e/20/68a278a6f93fa36e21cfc3d7599399a8a831225644eb3b6b18755cd3d6fc/greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/21/b4/90e06e07c78513ab03855768200bdb35c8e764e805b3f14fb488e56f82dc/greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/f6/a2/0ed21078039072f9dc738bbf3af12b103a84106b1385ac4723841f846ce7/greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/42/11/42ad6b1104c357826bbee7d7b9e4f24dbd9fde94899a03efb004aab62963/greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/bb/6b/384dee7e0121cbd1757bdc1824a5ee28e43d8d4e3f99aa59521f629442fe/greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/c6/1f/12d5a6cc26e8b483c2e7975f9c22e088ac735c0d8dcb8a8f72d31a4e5f04/greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/c7/ec/85b647e59e0f137c7792a809156f413e38379cf7f3f2e1353c37f4be4026/greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/94/ed/1e5f4bca691a81700e5a88e86d6f0e538acb10188cd2cc17140e523255ef/greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/47/79/26d54d7d700ef65b689fc2665a40846d13e834da0486674a8d4f0f371a47/greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a2/2f/461615adc53ba81e99471303b15ac6b2a6daa8d2a0f7f77fd15605e16d5b/greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e9/55/2c3cfa3cdbb940cf7321fbcf544f0e9c74898eed43bf678abf416812d132/greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/38/77/efb21ab402651896c74f24a172eb4d7479f9f53898bd5e56b9e20bb24ffd/greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/74/3a/92f188ace0190f0066dca3636cf1b09481d0854c46e92ec5e29c7cefe5b1/greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/63/0f/847ed02cdfce10f0e6e3425cd054296bddb11a17ef1b34681fa01a055187/greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/bd/37/56b0da468a85e7704f3b2bc045015301bdf4be2184a44868c71f6dca6fe2/greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/7c/68/b5f4084c0a252d7e9c0d95fc1cfc845d08622037adb74e05be3a49831186/greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a4/fa/31e22345518adcd69d1d6ab5087a12c178aa7f3c51103f6d5d702199d243/greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/53/80/3d94d5999b4179d91bcc93745d1b0815b073d61be79dd546b840d17adb18/greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/92/db/b4c12cff13ebac2786f4f217f06588bccd8b53d260453404ef22b121fc3a/greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/52/61/75b4abd8147f13f70986df2801bf93735c1bd87ea780d70e3b3ecda8c165/greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/35/aa/6894ae299d059d26254779a5088632874b80ee8cf89a88bca00b0709d22f/greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/30/64/e01a8261d13c47f3c082519a5e9dbf9e143cc0498ed20c911d04e54d526c/greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/47/48/ff9ca8ba9772d083a4f5221f7b4f0ebe8978131a9ae0909cf202f94cd879/greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/e9/45/626e974948713bc15775b696adb3eb0bd708bec267d6d2d5c47bb47a6119/greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/b1/8e/8b6f42c67d5df7db35b8c55c9a850ea045219741bb14416255616808c690/greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/05/46/ab58828217349500a7ebb81159d52ca357da747ff1797c29c6023d79d798/greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/68/7f/d1b537be5080721c0f0089a8447d4ef72839039cdb743bdd8ffd23046e9a/greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/89/5f/b16dec0cbfd3070658e0d744487919740c6d45eb90946f6787689a7efbce/greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/66/77/d48fb441b5a71125bcac042fc5b1494c806ccb9a1432ecaa421e72157f77/greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849" },
 ]
 
 [[package]]
@@ -2375,7 +2395,7 @@ wheels = [
 
 [[package]]
 name = "httpx"
-version = "0.27.0"
+version = "0.27.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2384,9 +2404,9 @@ dependencies = [
     { name = "idna" },
     { name = "sniffio" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2d/3da5bdf4408b8b2800061c339f240c1802f2e82d55e50bd39c5a881f47f0/httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0" },
 ]
 
 [[package]]
@@ -2857,24 +2877,24 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.48.0"
+version = "1.75.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "aiohttp" },
     { name = "click" },
+    { name = "httpx" },
     { name = "importlib-metadata" },
     { name = "jinja2" },
     { name = "jsonschema" },
     { name = "openai" },
     { name = "pydantic" },
     { name = "python-dotenv" },
-    { name = "requests" },
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/85/cf/ec69c348c6f16148a55657f3bd63215e965028441c0f322ae8edf9c1210a/litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/1b/28/50837cb0246c42a8caac45610572883de7f478543cf4d143e84f099c0234/litellm-1.75.0.tar.gz", hash = "sha256:ec7fbfe79e1b9cd4a2b36ca9e71e71959d8fc43305b222e5f257aced1a0d1d63" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/37/2b/6a42747557dc557e71d1e0664c4d5a814b08cda0589213921bb51c64c5e4/litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/db/43/e10905870d42e927de3b095a9248f2764156c7eb45ec172d72be35cd2bb4/litellm-1.75.0-py3-none-any.whl", hash = "sha256:1657472f37d291b366050dd2035e3640eebd96142d6fa0f935ceb290a0e1d5ad" },
 ]
 
 [[package]]
@@ -3765,12 +3785,12 @@ name = "onnxruntime-gpu"
 version = "1.19.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
-    { name = "coloredlogs" },
-    { name = "flatbuffers" },
-    { name = "numpy" },
-    { name = "packaging" },
-    { name = "protobuf" },
-    { name = "sympy" },
+    { name = "coloredlogs", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "flatbuffers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "packaging", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "protobuf", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/d0/9c/3fa310e0730643051eb88e884f19813a6c8b67d0fbafcda610d960e589db/onnxruntime_gpu-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a49740e079e7c5215830d30cde3df792e903df007aa0b0fd7aa797937061b27a" },
@@ -3783,7 +3803,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.45.0"
+version = "1.99.1"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "anyio" },
@@ -3795,9 +3815,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/70/cd/5ec65b9a56999370c032af7933433143f78239d44a8c03a5ba34159af945/openai-1.45.0.tar.gz", hash = "sha256:731207d10637335413aa3c0955f8f8df30d7636a4a0f9c381f2209d32cf8de97" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/03/30/f0fb7907a77e733bb801c7bdcde903500b31215141cdb261f04421e6fbec/openai-1.99.1.tar.gz", hash = "sha256:2c9d8e498c298f51bb94bcac724257a3a6cac6139ccdfc1186c6708f7a93120f" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/d4/2a/97e80a4551346efc9cd937e11adb640207acc5045fdf4e06786eac55bfb1/openai-1.45.0-py3-none-any.whl", hash = "sha256:2f1f7b7cf90f038a9f1c24f0d26c0f1790c102ec5acd07ffd70a9b7feac1ff4e" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/54/15/9c85154ffd283abfc43309ff3aaa63c3fd02f7767ee684e73670f6c5ade2/openai-1.99.1-py3-none-any.whl", hash = "sha256:8eeccc69e0ece1357b51ca0d9fb21324afee09b20c3e5b547d02445ca18a4e03" },
 ]
 
 [[package]]
@@ -4242,32 +4262,21 @@ wheels = [
 
 [[package]]
 name = "playwright"
-version = "1.47.0"
+version = "1.54.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "greenlet" },
     { name = "pyee" },
 ]
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/f8/70/01cad1d41861cd939fe66bff725771dd03f2de39b7c25b4479de2f583ce0/playwright-1.47.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:f205df24edb925db1a4ab62f1ab0da06f14bb69e382efecfb0deedc4c7f4b8cd" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/42/17/2300e578b434b56ebfc3d56a5e0fe6dc5e99d6ff43a88fa492b881f3b7e3/playwright-1.47.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fc820faf6885f69a52ba4ec94124e575d3c4a4003bf29200029b4a4f2b2d0ab" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/5a/6a/3cff2abfa4b4c52e1fa34fa8b71bf09cc2a89b03b7417733e5138f1be61d/playwright-1.47.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:8e212dc472ff19c7d46ed7e900191c7a786ce697556ac3f1615986ec3aa00341" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/80/a6/c5152c817db664d75c439c2bd99d51f906a31c1df4a04e673ef51008b12f/playwright-1.47.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:a1935672531963e4b2a321de5aa59b982fb92463ee6e1032dd7326378e462955" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/d6/50/b573c13d3748a1ab94ed45f2faeb868c63263df0055f57028c4cc775419f/playwright-1.47.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0a1b61473d6f7f39c5d77d4800b3cbefecb03344c90b98f3fbcae63294ad249" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/7d/6c/34225ee5707db5e34bffa77f05d152c797c0e0b9bf3d3a5b426d99160f8f/playwright-1.47.0-py3-none-win32.whl", hash = "sha256:1b977ed81f6bba5582617684a21adab9bad5676d90a357ebf892db7bdf4a9974" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/cb/88/9a3c77025702e506fe04275e677676246ff0b2e6964de5d2527dfdab3416/playwright-1.47.0-py3-none-win_amd64.whl", hash = "sha256:0ec1056042d2e86088795a503347407570bffa32cbe20748e5d4c93dba085280" },
-]
-
-[[package]]
-name = "playwright-stealth"
-version = "1.0.6"
-source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
-dependencies = [
-    { name = "playwright" },
-]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/e5/dc/4e88b517e4c9cfb63f1b0b67d59adddcef2dc2fe0883b90e07119d15895a/playwright-stealth-1.0.6.tar.gz", hash = "sha256:b504d951d00fac755c7d13665a29611d415180510bd7d23f14ebc89439ba2043" }
-wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/34/10/60981cb8d8e22487061b98a0803313c4fb519cc95ab1421516304a0cfcd0/playwright_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b1b2bcf58eb6859aa53d42c49b91c4e27b74a6d13fc3d0c85eea513dd55efda3" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f3/09/33d5bfe393a582d8dac72165a9e88b274143c9df411b65ece1cc13f42988/playwright-1.54.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:bf3b845af744370f1bd2286c2a9536f474cc8a88dc995b72ea9a5be714c9a77d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/e1/7b/51882dc584f7aa59f446f2bb34e33c0e5f015de4e31949e5b7c2c10e54f0/playwright-1.54.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:780928b3ca2077aea90414b37e54edd0c4bbb57d1aafc42f7aa0b3fd2c2fac02" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/73/a1/7aa8ae175b240c0ec8849fcf000e078f3c693f9aa2ffd992da6550ea0dff/playwright-1.54.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:81d0b6f28843b27f288cfe438af0a12a4851de57998009a519ea84cee6fbbfb9" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/34/a9/45084fd23b6206f954198296ce39b0acf50debfdf3ec83a593e4d73c9c8a/playwright-1.54.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:09919f45cc74c64afb5432646d7fef0d19fff50990c862cb8d9b0577093f40cc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/02/d4/6a692f4c6db223adc50a6e53af405b45308db39270957a6afebddaa80ea2/playwright-1.54.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ae206c55737e8e3eae51fb385d61c0312eeef31535643bb6232741b41b6fdc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/72/7a/4ee60a1c3714321db187bebbc40d52cea5b41a856925156325058b5fca5a/playwright-1.54.0-py3-none-win32.whl", hash = "sha256:0b108622ffb6906e28566f3f31721cd57dda637d7e41c430287804ac01911f56" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/aa/77/8f8fae05a242ef639de963d7ae70a69d0da61d6d72f1207b8bbf74ffd3e7/playwright-1.54.0-py3-none-win_amd64.whl", hash = "sha256:9e5aee9ae5ab1fdd44cd64153313a2045b136fcbcfb2541cc0a3d909132671a2" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/33/ff/99a6f4292a90504f2927d34032a4baf6adb498dc3f7cf0f3e0e22899e310/playwright-1.54.0-py3-none-win_arm64.whl", hash = "sha256:a975815971f7b8dca505c441a4c56de1aeb56a211290f8cc214eeef5524e8d75" },
 ]
 
 [[package]]
@@ -4653,8 +4662,6 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/59/fe/aae679b64363eb78326c7fdc9d06ec3de18bac68be4b612fc1fe8902693c/pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886" },
     { url = "https://mirrors.aliyun.com/pypi/packages/54/2f/e97a1b8294db0daaa87012c24a7bb714147c7ade7656973fd6c736b484ff/pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2" },
     { url = "https://mirrors.aliyun.com/pypi/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/9f/7c/f5b0556590e7b4e710509105e668adb55aa9470a9f0e4dea9c40a4a11ce1/pycryptodome-3.23.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:350ebc1eba1da729b35ab7627a833a1a355ee4e852d8ba0447fafe7b14504d56" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/33/38/dcc795578d610ea1aaffef4b148b8cafcfcf4d126b1e58231ddc4e475c70/pycryptodome-3.23.0-pp27-pypy_73-win32.whl", hash = "sha256:93837e379a3e5fd2bb00302a47aee9fdf7940d83595be3915752c74033d17ca7" },
     { url = "https://mirrors.aliyun.com/pypi/packages/d9/12/e33935a0709c07de084d7d58d330ec3f4daf7910a18e77937affdb728452/pycryptodome-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddb95b49df036ddd264a0ad246d1be5b672000f12d6961ea2c267083a5e19379" },
     { url = "https://mirrors.aliyun.com/pypi/packages/22/0b/aa8f9419f25870889bebf0b26b223c6986652bdf071f000623df11212c90/pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e95564beb8782abfd9e431c974e14563a794a4944c29d6d3b7b5ea042110b4" },
     { url = "https://mirrors.aliyun.com/pypi/packages/d4/5e/63f5cbde2342b7f70a39e591dbe75d9809d6338ce0b07c10406f1a140cdc/pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e15c081e912c4b0d75632acd8382dfce45b258667aa3c67caf7a4d4c13f630" },
@@ -4678,8 +4685,6 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/48/7d/0f2b09490b98cc6a902ac15dda8760c568b9c18cfe70e0ef7a16de64d53a/pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a7a8f33a1f1fb762ede6cc9cbab8f2a9ba13b196bfaf7bc6f0b39d2ba315a43" },
     { url = "https://mirrors.aliyun.com/pypi/packages/b0/1c/375adb14b71ee1c8d8232904e928b3e7af5bbbca7c04e4bec94fe8e90c3d/pycryptodomex-3.20.0-cp35-abi3-win32.whl", hash = "sha256:c39778fd0548d78917b61f03c1fa8bfda6cfcf98c767decf360945fe6f97461e" },
     { url = "https://mirrors.aliyun.com/pypi/packages/b2/e8/1b92184ab7e5595bf38000587e6f8cf9556ebd1bf0a583619bee2057afbd/pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:2a47bcc478741b71273b917232f521fd5704ab4b25d301669879e7273d3586cc" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e7/c5/9140bb867141d948c8e242013ec8a8011172233c898dfdba0a2417c3169a/pycryptodomex-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1be97461c439a6af4fe1cf8bf6ca5936d3db252737d2f379cc6b2e394e12a458" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/5e/6a/04acb4978ce08ab16890c70611ebc6efd251681341617bbb9e53356dee70/pycryptodomex-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:19764605feea0df966445d46533729b645033f134baeb3ea26ad518c9fdf212c" },
     { url = "https://mirrors.aliyun.com/pypi/packages/eb/df/3f1ea084e43b91e6d2b6b3493cc948864c17ea5d93ff1261a03812fbfd1a/pycryptodomex-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e497413560e03421484189a6b65e33fe800d3bd75590e6d78d4dfdb7accf3b" },
     { url = "https://mirrors.aliyun.com/pypi/packages/c9/f3/83ffbdfa0c8f9154bcd8866895f6cae5a3ec749da8b0840603cf936c4412/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48217c7901edd95f9f097feaa0388da215ed14ce2ece803d3f300b4e694abea" },
     { url = "https://mirrors.aliyun.com/pypi/packages/c9/9d/c113e640aaf02af5631ae2686b742aac5cd0e1402b9d6512b1c7ec5ef05d/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d00fe8596e1cc46b44bf3907354e9377aa030ec4cd04afbbf6e899fc1e2a7781" },
@@ -4792,14 +4797,14 @@ wheels = [
 
 [[package]]
 name = "pyee"
-version = "12.0.0"
+version = "13.0.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/a7/8faaa62a488a2a1e0d56969757f087cbd2729e9bcfa508c230299f366b4c/pyee-12.0.0.tar.gz", hash = "sha256:c480603f4aa2927d4766eb41fa82793fe60a82cbfdb8d688e0d08c55a534e145" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/1d/0d/95993c08c721ec68892547f2117e8f9dfbcef2ca71e098533541b4a54d5f/pyee-12.0.0-py3-none-any.whl", hash = "sha256:7b14b74320600049ccc7d0e0b1becd3b4bd0a03c745758225e31a59f4095c990" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498" },
 ]
 
 [[package]]
@@ -5239,6 +5244,7 @@ dependencies = [
     { name = "itsdangerous" },
     { name = "json-repair" },
     { name = "langfuse" },
+    { name = "litellm" },
     { name = "markdown" },
     { name = "markdown-to-json" },
     { name = "mcp" },
@@ -5357,7 +5363,7 @@ requires-dist = [
     { name = "click", specifier = ">=8.1.8" },
     { name = "cn2an", specifier = "==0.5.22" },
     { name = "cohere", specifier = "==5.6.2" },
-    { name = "crawl4ai", specifier = "==0.3.8" },
+    { name = "crawl4ai", specifier = ">=0.3.8" },
     { name = "dashscope", specifier = "==1.20.11" },
     { name = "datrie", specifier = "==0.8.2" },
     { name = "debugpy", specifier = ">=1.8.13" },
@@ -5384,13 +5390,14 @@ requires-dist = [
     { name = "groq", specifier = "==0.9.0" },
     { name = "hanziconv", specifier = "==0.3.2" },
     { name = "html-text", specifier = "==0.6.2" },
-    { name = "httpx", specifier = "==0.27.0" },
+    { name = "httpx", specifier = "==0.27.2" },
     { name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
     { name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
     { name = "infinity-sdk", specifier = "==0.6.0.dev4" },
     { name = "itsdangerous", specifier = "==2.1.2" },
     { name = "json-repair", specifier = "==0.35.0" },
     { name = "langfuse", specifier = ">=2.60.0" },
+    { name = "litellm", specifier = ">=1.74.15.post1" },
     { name = "markdown", specifier = "==3.6" },
     { name = "markdown-to-json", specifier = "==2.1.1" },
     { name = "mcp", specifier = ">=1.9.4" },
@@ -5402,7 +5409,7 @@ requires-dist = [
     { name = "ollama", specifier = "==0.2.1" },
     { name = "onnxruntime", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'", specifier = "==1.19.2" },
     { name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = "==1.19.2" },
-    { name = "openai", specifier = "==1.45.0" },
+    { name = "openai", specifier = ">=1.45.0" },
     { name = "opencv-python", specifier = "==4.10.0.84" },
     { name = "opencv-python-headless", specifier = "==4.10.0.84" },
     { name = "opendal", specifier = ">=0.45.0,<0.46.0" },
@@ -5467,6 +5474,7 @@ requires-dist = [
     { name = "yfinance", specifier = "==0.2.65" },
     { name = "zhipuai", specifier = "==2.0.1" },
 ]
+provides-extras = ["full"]
 
 [package.metadata.requires-dev]
 test = [
@@ -5481,6 +5489,18 @@ test = [
     { name = "requests-toolbelt", specifier = ">=1.0.0" },
 ]
 
+[[package]]
+name = "rank-bm25"
+version = "0.2.2"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/fc/0a/f9579384aa017d8b4c15613f86954b92a95a93d641cc849182467cf0bb3b/rank_bm25-0.2.2.tar.gz", hash = "sha256:096ccef76f8188563419aaf384a02f0ea459503fdf77901378d4fd9d87e5e51d" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/2a/21/f691fb2613100a62b3fa91e9988c991e9ca5b89ea31c0d3152a3210344f9/rank_bm25-0.2.2-py3-none-any.whl", hash = "sha256:7bd4a95571adadfc271746fa146a4bcfd89c0cf731e49c3d1ad863290adbe8ae" },
+]
+
 [[package]]
 name = "ranx"
 version = "0.3.20"
@@ -6423,6 +6443,19 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/55/08/98090d1a139e8995053ed22e099b43aa4dea8cffe056f8f0bc5178aeecbd/tencentcloud_sdk_python-3.0.1215-py2.py3-none-any.whl", hash = "sha256:899ced749baf74846f1eabf452f74aa0e48d1965f0ca7828a8b73b446f76f5f2" },
 ]
 
+[[package]]
+name = "tf-playwright-stealth"
+version = "1.2.0"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+dependencies = [
+    { name = "fake-http-header" },
+    { name = "playwright" },
+]
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d6/6b/32bb58c65991f91aeaaf7473b650175d9d4af5dd383983d177d49ccba08d/tf_playwright_stealth-1.2.0.tar.gz", hash = "sha256:7bb8d32d3e60324fbf6b9eeae540b8cd9f3b9e07baeb33b025dbc98ad47658ba" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/11/3d/2653f4cf49660bb44eeac8270617cc4c0287d61716f249f55053f0af0724/tf_playwright_stealth-1.2.0-py3-none-any.whl", hash = "sha256:26ee47ee89fa0f43c606fe37c188ea3ccd36f96ea90c01d167b768df457e7886" },
+]
+
 [[package]]
 name = "threadpoolctl"
 version = "3.6.0"
diff --git a/web/src/locales/config.ts b/web/src/locales/config.ts
index cd8d928f5..65d85d718 100644
--- a/web/src/locales/config.ts
+++ b/web/src/locales/config.ts
@@ -10,9 +10,9 @@ import translation_fr from './fr';
 import translation_id from './id';
 import translation_ja from './ja';
 import translation_pt_br from './pt-br';
+import translation_ru from './ru';
 import { createTranslationTable, flattenObject } from './until';
 import translation_vi from './vi';
-import translation_ru from './ru';
 import translation_zh from './zh';
 import translation_zh_traditional from './zh-traditional';
 
diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts
index 21d798dea..4cf23cd62 100644
--- a/web/src/locales/ru.ts
+++ b/web/src/locales/ru.ts
@@ -246,8 +246,7 @@ export default {
       methodExamples: 'Примеры',
       methodExamplesDescription: 'Скриншоты для пояснения:',
       dialogueExamplesTitle: 'просмотр',
-      methodEmpty:
-        'Здесь будет визуальное объяснение категорий баз знаний',
+      methodEmpty: 'Здесь будет визуальное объяснение категорий баз знаний',
       book: `<p>Поддерживаемые форматы: <b>DOCX, PDF, TXT</b>.</p><p>
       Для PDF укажите <i>диапазон страниц</i>.</p>`,
       laws: `<p>Поддерживаемые форматы: <b>DOCX, PDF, TXT</b>.</p><p>
@@ -316,21 +315,19 @@ export default {
 <p>В столбце тегов используйте <b>запятую</b> для разделения тегов.</p>
 `,
       useRaptor: 'Использовать RAPTOR',
-      useRaptorTip:
-        'Включите RAPTOR для многошаговых вопросно-ответных задач.',
+      useRaptorTip: 'Включите RAPTOR для многошаговых вопросно-ответных задач.',
       prompt: 'Промпт',
-      promptTip:
-        'Опишите задачу для LLM, укажите формат ответа и требования.',
+      promptTip: 'Опишите задачу для LLM, укажите формат ответа и требования.',
       promptMessage: 'Требуется промпт',
       promptText: `Пожалуйста, обобщите следующие абзацы. Будьте внимательны с числами, не выдумывайте. Абзацы:
       {cluster_content}
 Выше представлен контент для обобщения.`,
       maxToken: 'Макс. токенов',
-      maxTokenTip: 'Максимальное количество токенов на суммаризирующий фрагмент.',
+      maxTokenTip:
+        'Максимальное количество токенов на суммаризирующий фрагмент.',
       maxTokenMessage: 'Требуется макс. токенов',
       threshold: 'Порог',
-      thresholdTip:
-        'Минимальное сходство для группировки фрагментов в RAPTOR.',
+      thresholdTip: 'Минимальное сходство для группировки фрагментов в RAPTOR.',
       thresholdMessage: 'Требуется порог',
       maxCluster: 'Макс. кластеров',
       maxClusterTip: 'Максимальное количество кластеров.',
@@ -463,8 +460,7 @@ export default {
         'Устанавливает порог для выбора наиболее вероятных слов (ядерная выборка).',
       presencePenalty: 'Штраф за присутствие',
       presencePenaltyMessage: 'Требуется штраф за присутствие',
-      presencePenaltyTip:
-        'Штрафует слова, уже появившиеся в диалоге.',
+      presencePenaltyTip: 'Штрафует слова, уже появившиеся в диалоге.',
       frequencyPenalty: 'Штраф за частоту',
       frequencyPenaltyMessage: 'Требуется штраф за частоту',
       frequencyPenaltyTip:
@@ -553,8 +549,7 @@ export default {
       maxTokensInvalidMessage: 'Введите корректное число для Макс. токенов.',
       maxTokensMinMessage: 'Макс. токенов не может быть меньше 0.',
       password: 'Пароль',
-      passwordDescription:
-        'Введите текущий пароль для изменения пароля.',
+      passwordDescription: 'Введите текущий пароль для изменения пароля.',
       model: 'Провайдеры моделей',
       modelDescription: 'Настройте параметры моделей и API KEY.',
       team: 'Команда',
@@ -584,17 +579,14 @@ export default {
         'Ваш новый пароль должен быть длиннее 8 символов.',
       confirmPassword: 'Подтвердите новый пароль',
       confirmPasswordMessage: 'Подтвердите пароль!',
-      confirmPasswordNonMatchMessage:
-        'Новые пароли не совпадают!',
+      confirmPasswordNonMatchMessage: 'Новые пароли не совпадают!',
       cancel: 'Отмена',
       addedModels: 'Добавленные модели',
       modelsToBeAdded: 'Модели для добавления',
       addTheModel: 'Добавить модель',
       apiKey: 'API-Ключ',
-      apiKeyMessage:
-        'Введите API ключ (для локальных моделей игнорируйте).',
-      apiKeyTip:
-        'API ключ можно получить у поставщика LLM.',
+      apiKeyMessage: 'Введите API ключ (для локальных моделей игнорируйте).',
+      apiKeyTip: 'API ключ можно получить у поставщика LLM.',
       showMoreModels: 'Показать модели',
       hideModels: 'Скрыть модели',
       baseUrl: 'Базовый URL',
@@ -603,22 +595,18 @@ export default {
       modify: 'Изменить',
       systemModelSettings: 'Установить модели по умолчанию',
       chatModel: 'Модель чата',
-      chatModelTip:
-        'Модель чата по умолчанию для новых баз знаний.',
+      chatModelTip: 'Модель чата по умолчанию для новых баз знаний.',
       embeddingModel: 'Модель эмбеддинга',
-      embeddingModelTip:
-        'Модель эмбеддинга по умолчанию для новых баз знаний.',
+      embeddingModelTip: 'Модель эмбеддинга по умолчанию для новых баз знаний.',
       img2txtModel: 'Модель Img2txt',
-      img2txtModelTip:
-        'Модель описания изображений/видео по умолчанию.',
+      img2txtModelTip: 'Модель описания изображений/видео по умолчанию.',
       sequence2txtModel: 'Модель Speech2txt',
       sequence2txtModelTip:
         'Модель ASR по умолчанию для преобразования речи в текст.',
       rerankModel: 'Модель реранкинга',
       rerankModelTip: `Модель реранкинга фрагментов по умолчанию.`,
       ttsModel: 'Модель TTS',
-      ttsModelTip:
-        'Модель преобразования текста в речь по умолчанию.',
+      ttsModelTip: 'Модель преобразования текста в речь по умолчанию.',
       workspace: 'Рабочее пространство',
       upgrade: 'Обновить',
       addLlmTitle: 'Добавить LLM',
@@ -677,8 +665,7 @@ export default {
       yiyanAKMessage: 'Введите ваш API KEY',
       addyiyanSK: 'yiyan Secret KEY',
       yiyanSKMessage: 'Введите ваш Secret KEY',
-      FishAudioModelNameMessage:
-        'Дайте имя вашей модели синтеза речи',
+      FishAudioModelNameMessage: 'Дайте имя вашей модели синтеза речи',
       addFishAudioAK: 'Fish Audio API KEY',
       addFishAudioAKMessage: 'Введите ваш API KEY',
       addFishAudioRefID: 'FishAudio Reference ID',
@@ -715,7 +702,7 @@ export default {
       configuration: 'Конфигурация',
       langfuseDescription:
         'Трассировка, оценка, управление промптами и метрики для отладки и улучшения вашего LLM-приложения.',
-      viewLangfuseSDocumentation: "Документация Langfuse",
+      viewLangfuseSDocumentation: 'Документация Langfuse',
       view: 'Просмотр',
       modelsToBeAddedTooltip:
         'Если ваш провайдер не указан, но заявляет о "совместимости с OpenAI API", выберите соответствующую карточку.',
@@ -776,8 +763,7 @@ export default {
       s3: 'S3 загрузки',
       preview: 'Просмотр',
       fileError: 'Ошибка файла',
-      uploadLimit:
-        'Каждый файл ≤10MB, всего файлов ≤128.',
+      uploadLimit: 'Каждый файл ≤10MB, всего файлов ≤128.',
       destinationFolder: 'Целевая папка',
     },
     flow: {
@@ -844,8 +830,7 @@ export default {
       baidu: 'Baidu',
       baiduDescription: `Ищет на baidu.com.`,
       duckDuckGo: 'DuckDuckGo',
-      duckDuckGoDescription:
-        'Ищет на duckduckgo.com.',
+      duckDuckGoDescription: 'Ищет на duckduckgo.com.',
       channel: 'Канал',
       channelTip: `Текстовый или новостной поиск`,
       text: 'Текст',
@@ -855,14 +840,11 @@ export default {
         'Количество сообщений истории, видимых LLM. Учитывайте ограничение токенов модели.',
       wikipedia: 'Wikipedia',
       pubMed: 'PubMed',
-      pubMedDescription:
-        'Ищет на https://pubmed.ncbi.nlm.nih.gov/.',
+      pubMedDescription: 'Ищет на https://pubmed.ncbi.nlm.nih.gov/.',
       email: 'Email',
-      emailTip:
-        'Email обязателен.',
+      emailTip: 'Email обязателен.',
       arXiv: 'ArXiv',
-      arXivDescription:
-        'Ищет на https://arxiv.org/.',
+      arXivDescription: 'Ищет на https://arxiv.org/.',
       sortBy: 'Сортировать по',
       submittedDate: 'Дата отправки',
       lastUpdatedDate: 'Дата обновления',
@@ -877,24 +859,20 @@ export default {
       country: 'Страна и регион',
       language: 'Язык',
       googleScholar: 'Google Scholar',
-      googleScholarDescription:
-        'Ищет на https://scholar.google.com/.',
+      googleScholarDescription: 'Ищет на https://scholar.google.com/.',
       yearLow: 'Год от',
       yearHigh: 'Год до',
       patents: 'Патенты',
       data: 'Данные',
       deepL: 'DeepL',
-      deepLDescription:
-        'Перевод с помощью https://www.deepl.com/.',
+      deepLDescription: 'Перевод с помощью https://www.deepl.com/.',
       authKey: 'Ключ авторизации',
       sourceLang: 'Исходный язык',
       targetLang: 'Целевой язык',
       gitHub: 'GitHub',
-      gitHubDescription:
-        'Ищет репозитории на https://github.com/.',
+      gitHubDescription: 'Ищет репозитории на https://github.com/.',
       baiduFanyi: 'BaiduFanyi',
-      baiduFanyiDescription:
-        'Перевод с помощью https://fanyi.baidu.com/.',
+      baiduFanyiDescription: 'Перевод с помощью https://fanyi.baidu.com/.',
       appid: 'App ID',
       secretKey: 'Секретный ключ',
       domain: 'Домен',
@@ -1062,8 +1040,7 @@ export default {
       yahooFinanceDescription:
         'Запрашивает информацию о публичной компании по тикеру.',
       crawler: 'Веб-краулер',
-      crawlerDescription:
-        'Скачивает HTML-код с указанного URL.',
+      crawlerDescription: 'Скачивает HTML-код с указанного URL.',
       proxy: 'Прокси',
       crawlerResultOptions: {
         html: 'Html',
@@ -1077,8 +1054,7 @@ export default {
       balanceSheet: 'Баланс',
       cashFlowStatement: 'Отчет о движении денежных средств',
       jin10: 'Jin10',
-      jin10Description:
-        'Получает финансовую информацию с Jin10 Open Platform.',
+      jin10Description: 'Получает финансовую информацию с Jin10 Open Platform.',
       flashType: 'Тип новости',
       filter: 'Фильтр',
       contain: 'Содержит',
@@ -1265,13 +1241,13 @@ export default {
         'Выберите базы знаний для ассистента или переменные с ID баз знаний.',
       knowledgeBaseVars: 'Переменные базы знаний',
       code: 'Код',
-      codeDescription: 'Позволяет разработчикам писать пользовательскую логику на Python.',
+      codeDescription:
+        'Позволяет разработчикам писать пользовательскую логику на Python.',
       inputVariables: 'Входные переменные',
       runningHintText: 'выполняется...🕞',
       openingSwitch: 'Приветствие',
       openingCopy: 'Приветственное сообщение',
-      openingSwitchTip:
-        'Пользователи увидят это приветствие в начале.',
+      openingSwitchTip: 'Пользователи увидят это приветствие в начале.',
       modeTip: 'Режим определяет, как запускается рабочий процесс.',
       beginInputTip:
         'Определите входные параметры для доступа в последующих процессах.',