diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py
index eb9ee1c94..2ec8180cd 100644
--- a/api/apps/llm_app.py
+++ b/api/apps/llm_app.py
@@ -57,6 +57,7 @@ def set_api_key():
     # test if api key works
     chat_passed, embd_passed, rerank_passed = False, False, False
     factory = req["llm_factory"]
+    extra = {"provider": factory}
     msg = ""
     for llm in LLMService.query(fid=factory):
         if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
@@ -73,7 +74,7 @@ def set_api_key():
         elif not chat_passed and llm.model_type == LLMType.CHAT.value:
             assert factory in ChatModel, f"Chat model from {factory} is not supported yet."
             mdl = ChatModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"), **extra)
             try:
                 m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}],
                                  {"temperature": 0.9, 'max_tokens': 50})
@@ -204,6 +205,7 @@ def add_llm():
 
     msg = ""
     mdl_nm = llm["llm_name"].split("___")[0]
+    extra = {"provider": factory}
     if llm["model_type"] == LLMType.EMBEDDING.value:
         assert factory in EmbeddingModel, f"Embedding model from {factory} is not supported yet."
         mdl = EmbeddingModel[factory](
@@ -221,7 +223,8 @@ def add_llm():
         mdl = ChatModel[factory](
             key=llm['api_key'],
             model_name=mdl_nm,
-            base_url=llm["api_base"]
+            base_url=llm["api_base"],
+            **extra,
         )
         try:
             m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {
@@ -312,12 +315,12 @@ def delete_factory():
 def my_llms():
     try:
         include_details = request.args.get('include_details', 'false').lower() == 'true'
-        
+
         if include_details:
             res = {}
             objs = TenantLLMService.query(tenant_id=current_user.id)
             factories = LLMFactoriesService.query(status=StatusEnum.VALID.value)
-            
+
             for o in objs:
                 o_dict = o.to_dict()
                 factory_tags = None
@@ -325,13 +328,13 @@ def my_llms():
                     if f.name == o_dict["llm_factory"]:
                         factory_tags = f.tags
                         break
-                        
+
                 if o_dict["llm_factory"] not in res:
                     res[o_dict["llm_factory"]] = {
                         "tags": factory_tags,
                         "llm": []
                     }
-                
+
                 res[o_dict["llm_factory"]]["llm"].append({
                     "type": o_dict["model_type"],
                     "name": o_dict["llm_name"],
@@ -352,7 +355,7 @@ def my_llms():
                     "name": o["llm_name"],
                     "used_token": o["used_tokens"]
                 })
-        
+
         return get_json_result(data=res)
     except Exception as e:
         return server_error_response(e)
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
index dac080b64..fbfa7d65e 100644
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -141,6 +141,7 @@ class TenantLLMService(CommonService):
     @DB.connection_context()
     def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese", **kwargs):
         model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
+        kwargs.update({"provider": model_config["llm_factory"]})
         if llm_type == LLMType.EMBEDDING.value:
             if model_config["llm_factory"] not in EmbeddingModel:
                 return
diff --git a/pyproject.toml b/pyproject.toml
index 51f740ca3..e0202451a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
     "chardet==5.2.0",
     "cn2an==0.5.22",
     "cohere==5.6.2",
-    "Crawl4AI==0.3.8",
+    "Crawl4AI>=0.3.8",
     "dashscope==1.20.11",
     "deepl==1.18.0",
     "demjson3==3.0.6",
@@ -43,7 +43,7 @@ dependencies = [
     "groq==0.9.0",
     "hanziconv==0.3.2",
     "html-text==0.6.2",
-    "httpx==0.27.0",
+    "httpx==0.27.2",
     "huggingface-hub>=0.25.0,<0.26.0",
     "infinity-sdk==0.6.0-dev4",
     "infinity-emb>=0.0.66,<0.0.67",
@@ -58,7 +58,7 @@ dependencies = [
     "ollama==0.2.1",
     "onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
     "onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
-    "openai==1.45.0",
+    "openai>=1.45.0",
     "opencv-python==4.10.0.84",
     "opencv-python-headless==4.10.0.84",
     "openpyxl>=3.1.0,<4.0.0",
@@ -128,6 +128,7 @@ dependencies = [
     "opensearch-py==2.7.1",
     "pluginlib==0.9.4",
     "click>=8.1.8",
+    "litellm>=1.74.15.post1",
 ]
 
 [project.optional-dependencies]
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
index e9542bbe8..58c8379cb 100644
--- a/rag/llm/__init__.py
+++ b/rag/llm/__init__.py
@@ -19,6 +19,48 @@
 import importlib
 import inspect
 
+from strenum import StrEnum
+
+
+class SupportedLiteLLMProvider(StrEnum):
+    Tongyi_Qianwen = "Tongyi-Qianwen"
+    Dashscope = "Dashscope"
+    Bedrock = "Bedrock"
+    Moonshot = "Moonshot"
+    xAI = "xAI"
+    DeepInfra = "DeepInfra"
+    Groq = "Groq"
+    Cohere = "Cohere"
+    Gemini = "Gemini"
+    DeepSeek = "DeepSeek"
+    Nvidia = "NVIDIA"
+    TogetherAI = "TogetherAI"
+    Anthropic = "Anthropic"
+
+
+FACTORY_DEFAULT_BASE_URL = {
+    SupportedLiteLLMProvider.Tongyi_Qianwen: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+    SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+    SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1",
+}
+
+
+LITELLM_PROVIDER_PREFIX = {
+    SupportedLiteLLMProvider.Tongyi_Qianwen: "dashscope/",
+    SupportedLiteLLMProvider.Dashscope: "dashscope/",
+    SupportedLiteLLMProvider.Bedrock: "bedrock/",
+    SupportedLiteLLMProvider.Moonshot: "moonshot/",
+    SupportedLiteLLMProvider.xAI: "xai/",
+    SupportedLiteLLMProvider.DeepInfra: "deepinfra/",
+    SupportedLiteLLMProvider.Groq: "groq/",
+    SupportedLiteLLMProvider.Cohere: "",  # don't need a prefix
+    SupportedLiteLLMProvider.Gemini: "gemini/",
+    SupportedLiteLLMProvider.DeepSeek: "deepseek/",
+    SupportedLiteLLMProvider.Nvidia: "nvidia_nim/",
+    SupportedLiteLLMProvider.TogetherAI: "together_ai/",
+    SupportedLiteLLMProvider.Anthropic: "",  # don't need a prefix
+}
+
 ChatModel = globals().get("ChatModel", {})
 CvModel = globals().get("CvModel", {})
 EmbeddingModel = globals().get("EmbeddingModel", {})
@@ -26,6 +68,7 @@ RerankModel = globals().get("RerankModel", {})
 Seq2txtModel = globals().get("Seq2txtModel", {})
 TTSModel = globals().get("TTSModel", {})
 
+
 MODULE_MAPPING = {
     "chat_model": ChatModel,
     "cv_model": CvModel,
@@ -42,20 +85,30 @@ for module_name, mapping_dict in MODULE_MAPPING.items():
     module = importlib.import_module(full_module_name)
 
     base_class = None
+    lite_llm_base_class = None
     for name, obj in inspect.getmembers(module):
-        if inspect.isclass(obj) and name == "Base":
-            base_class = obj
-            break
-    if base_class is None:
-        continue
+        if inspect.isclass(obj):
+            if name == "Base":
+                base_class = obj
+            elif name == "LiteLLMBase":
+                lite_llm_base_class = obj
+                assert hasattr(obj, "_FACTORY_NAME"), "LiteLLMbase should have _FACTORY_NAME field."
+                if hasattr(obj, "_FACTORY_NAME"):
+                    if isinstance(obj._FACTORY_NAME, list):
+                        for factory_name in obj._FACTORY_NAME:
+                            mapping_dict[factory_name] = obj
+                    else:
+                        mapping_dict[obj._FACTORY_NAME] = obj
+
+    if base_class is not None:
+        for _, obj in inspect.getmembers(module):
+            if inspect.isclass(obj) and issubclass(obj, base_class) and obj is not base_class and hasattr(obj, "_FACTORY_NAME"):
+                if isinstance(obj._FACTORY_NAME, list):
+                    for factory_name in obj._FACTORY_NAME:
+                        mapping_dict[factory_name] = obj
+                else:
+                    mapping_dict[obj._FACTORY_NAME] = obj
 
-    for _, obj in inspect.getmembers(module):
-        if inspect.isclass(obj) and issubclass(obj, base_class) and obj is not base_class and hasattr(obj, "_FACTORY_NAME"):
-            if isinstance(obj._FACTORY_NAME, list):
-                for factory_name in obj._FACTORY_NAME:
-                    mapping_dict[factory_name] = obj
-            else:
-                mapping_dict[obj._FACTORY_NAME] = obj
 
 __all__ = [
     "ChatModel",
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index 1770b5b9e..c96afa12f 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -26,18 +26,20 @@ from typing import Any, Protocol
 from urllib.parse import urljoin
 
 import json_repair
+import litellm
 import openai
 import requests
-from dashscope import Generation
 from ollama import Client
 from openai import OpenAI
 from openai.lib.azure import AzureOpenAI
 from strenum import StrEnum
 from zhipuai import ZhipuAI
 
+from rag.llm import FACTORY_DEFAULT_BASE_URL, LITELLM_PROVIDER_PREFIX, SupportedLiteLLMProvider
 from rag.nlp import is_chinese, is_english
 from rag.utils import num_tokens_from_string
 
+
 # Error message constants
 class LLMErrorCode(StrEnum):
     ERROR_RATE_LIMIT = "RATE_LIMIT_EXCEEDED"
@@ -58,6 +60,7 @@ class ReActMode(StrEnum):
     FUNCTION_CALL = "function_call"
     REACT = "react"
 
+
 ERROR_PREFIX = "**ERROR**"
 LENGTH_NOTIFICATION_CN = "······\n由于大模型的上下文窗口大小限制，回答已经被大模型截断。"
 LENGTH_NOTIFICATION_EN = "...\nThe answer is truncated by your chosen LLM due to its limitation on context length."
@@ -113,7 +116,7 @@ class Base(ABC):
 
     def _chat(self, history, gen_conf, **kwargs):
         logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
-        if self.model_name.lower().find("qwen3") >=0:
+        if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}
         response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
 
@@ -167,7 +170,7 @@ class Base(ABC):
             error_code = LLMErrorCode.ERROR_MAX_RETRIES
 
         # Check if it's a rate limit error or server error and not the last attempt
-        should_retry = (error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER)
+        should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
         if not should_retry:
             return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
 
@@ -176,11 +179,7 @@ class Base(ABC):
         time.sleep(delay)
 
     def _verbose_tool_use(self, name, args, res):
-        return "<tool_call>" + json.dumps({
-            "name": name,
-            "args": args,
-            "result": res
-        }, ensure_ascii=False, indent=2) + "</tool_call>"
+        return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
 
     def _append_history(self, hist, tool_call, tool_res):
         hist.append(
@@ -213,7 +212,7 @@ class Base(ABC):
         self.toolcall_session = toolcall_session
         self.tools = tools
 
-    def chat_with_tools(self, system: str, history: list, gen_conf: dict={}):
+    def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         if system:
             history.insert(0, {"role": "system", "content": system})
@@ -225,7 +224,7 @@ class Base(ABC):
         for attempt in range(self.max_retries + 1):
             history = hist
             try:
-                for _ in range(self.max_rounds+1):
+                for _ in range(self.max_rounds + 1):
                     logging.info(f"{self.tools=}")
                     response = self.client.chat.completions.create(model=self.model_name, messages=history, tools=self.tools, tool_choice="auto", **gen_conf)
                     tk_count += self.total_token_count(response)
@@ -255,7 +254,7 @@ class Base(ABC):
                             history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
                             ans += self._verbose_tool_use(name, {}, str(e))
 
-                logging.warning( f"Exceed max rounds: {self.max_rounds}")
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
                 response, token_count = self._chat(history, gen_conf)
                 ans += response
@@ -297,7 +296,7 @@ class Base(ABC):
 
         return final_tool_calls
 
-    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict={}):
+    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
         gen_conf = self._clean_conf(gen_conf)
         tools = self.tools
         if system:
@@ -309,7 +308,7 @@ class Base(ABC):
         for attempt in range(self.max_retries + 1):
             history = hist
             try:
-                for _ in range(self.max_rounds+1):
+                for _ in range(self.max_rounds + 1):
                     reasoning_start = False
                     logging.info(f"{tools=}")
                     response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, tools=tools, tool_choice="auto", **gen_conf)
@@ -373,7 +372,7 @@ class Base(ABC):
                             history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
                             yield self._verbose_tool_use(name, {}, str(e))
 
-                logging.warning( f"Exceed max rounds: {self.max_rounds}")
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
                 history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
                 response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
                 for resp in response:
@@ -402,7 +401,7 @@ class Base(ABC):
 
         assert False, "Shouldn't be here."
 
-    def chat_streamly(self, system, history, gen_conf: dict={}, **kwargs):
+    def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
         if system:
             history.insert(0, {"role": "system", "content": system})
         gen_conf = self._clean_conf(gen_conf)
@@ -474,15 +473,6 @@ class GptTurbo(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class MoonshotChat(Base):
-    _FACTORY_NAME = "Moonshot"
-
-    def __init__(self, key, model_name="moonshot-v1-8k", base_url="https://api.moonshot.cn/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.moonshot.cn/v1"
-        super().__init__(key, model_name, base_url)
-
-
 class XinferenceChat(Base):
     _FACTORY_NAME = "Xinference"
 
@@ -513,15 +503,6 @@ class ModelScopeChat(Base):
         super().__init__(key, model_name.split("___")[0], base_url, **kwargs)
 
 
-class DeepSeekChat(Base):
-    _FACTORY_NAME = "DeepSeek"
-
-    def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepseek.com/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.deepseek.com/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class AzureChat(Base):
     _FACTORY_NAME = "Azure-OpenAI"
 
@@ -608,26 +589,6 @@ class BaiChuanChat(Base):
         yield total_tokens
 
 
-class xAIChat(Base):
-    _FACTORY_NAME = "xAI"
-
-    def __init__(self, key, model_name="grok-3", base_url=None, **kwargs):
-        if not base_url:
-            base_url = "https://api.x.ai/v1"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-        return
-
-
-class QWenChat(Base):
-    _FACTORY_NAME = "Tongyi-Qianwen"
-
-    def __init__(self, key, model_name=Generation.Models.qwen_turbo, base_url=None, **kwargs):
-        if not base_url:
-            base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-        return
-
-
 class ZhipuChat(Base):
     _FACTORY_NAME = "ZHIPU-AI"
 
@@ -973,217 +934,6 @@ class MistralChat(Base):
         yield total_tokens
 
 
-class BedrockChat(Base):
-    _FACTORY_NAME = "Bedrock"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        import boto3
-
-        self.bedrock_ak = json.loads(key).get("bedrock_ak", "")
-        self.bedrock_sk = json.loads(key).get("bedrock_sk", "")
-        self.bedrock_region = json.loads(key).get("bedrock_region", "")
-        self.model_name = model_name
-
-        if self.bedrock_ak == "" or self.bedrock_sk == "" or self.bedrock_region == "":
-            # Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
-            self.client = boto3.client("bedrock-runtime")
-        else:
-            self.client = boto3.client(service_name="bedrock-runtime", region_name=self.bedrock_region, aws_access_key_id=self.bedrock_ak, aws_secret_access_key=self.bedrock_sk)
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature"]:
-                del gen_conf[k]
-        return gen_conf
-
-    def _chat(self, history, gen_conf={}, **kwargs):
-        system = history[0]["content"] if history and history[0]["role"] == "system" else ""
-        hist = []
-        for item in history:
-            if item["role"] == "system":
-                continue
-            hist.append(deepcopy(item))
-            if not isinstance(hist[-1]["content"], list) and not isinstance(hist[-1]["content"], tuple):
-                hist[-1]["content"] = [{"text": hist[-1]["content"]}]
-        # Send the message to the model, using a basic inference configuration.
-        response = self.client.converse(
-            modelId=self.model_name,
-            messages=hist,
-            inferenceConfig=gen_conf,
-            system=[{"text": (system if system else "Answer the user's message.")}],
-        )
-
-        # Extract and print the response text.
-        ans = response["output"]["message"]["content"][0]["text"]
-        return ans, num_tokens_from_string(ans)
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        from botocore.exceptions import ClientError
-
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature"]:
-                del gen_conf[k]
-        for item in history:
-            if not isinstance(item["content"], list) and not isinstance(item["content"], tuple):
-                item["content"] = [{"text": item["content"]}]
-
-        if self.model_name.split(".")[0] == "ai21":
-            try:
-                response = self.client.converse(modelId=self.model_name, messages=history, inferenceConfig=gen_conf, system=[{"text": (system if system else "Answer the user's message.")}])
-                ans = response["output"]["message"]["content"][0]["text"]
-                return ans, num_tokens_from_string(ans)
-
-            except (ClientError, Exception) as e:
-                return f"ERROR: Can't invoke '{self.model_name}'. Reason: {e}", 0
-
-        ans = ""
-        try:
-            # Send the message to the model, using a basic inference configuration.
-            streaming_response = self.client.converse_stream(
-                modelId=self.model_name, messages=history, inferenceConfig=gen_conf, system=[{"text": (system if system else "Answer the user's message.")}]
-            )
-
-            # Extract and print the streamed response text in real-time.
-            for resp in streaming_response["stream"]:
-                if "contentBlockDelta" in resp:
-                    ans = resp["contentBlockDelta"]["delta"]["text"]
-                    yield ans
-
-        except (ClientError, Exception) as e:
-            yield ans + f"ERROR: Can't invoke '{self.model_name}'. Reason: {e}"
-
-        yield num_tokens_from_string(ans)
-
-
-class GeminiChat(Base):
-    _FACTORY_NAME = "Gemini"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from google.generativeai import GenerativeModel, client
-
-        client.configure(api_key=key)
-        _client = client.get_default_generative_client()
-        self.model_name = "models/" + model_name
-        self.model = GenerativeModel(model_name=self.model_name)
-        self.model._client = _client
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-            # if max_tokens exists, rename it to max_output_tokens to match Gemini's API
-            if k == "max_tokens":
-                gen_conf["max_output_tokens"] = gen_conf.pop("max_tokens")
-        return gen_conf
-
-    def _chat(self, history, gen_conf={}, **kwargs):
-        from google.generativeai.types import content_types
-
-        system = history[0]["content"] if history and history[0]["role"] == "system" else ""
-        hist = []
-        for item in history:
-            if item["role"] == "system":
-                continue
-            hist.append(deepcopy(item))
-            item = hist[-1]
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "model"
-            if "role" in item and item["role"] == "system":
-                item["role"] = "user"
-            if "content" in item:
-                item["parts"] = item.pop("content")
-
-        if system:
-            self.model._system_instruction = content_types.to_content(system)
-        retry_count = 0
-        max_retries = 3
-        while retry_count < max_retries:
-            try:
-                response = self.model.generate_content(hist, generation_config=gen_conf)
-                ans = response.text
-                return ans, response.usage_metadata.total_token_count
-            except Exception as e:
-                retry_count += 1
-                if retry_count >= max_retries:
-                    raise e
-                else:
-                    import time
-                    time.sleep(50) 
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        from google.generativeai.types import content_types
-
-        gen_conf = self._clean_conf(gen_conf)
-        if system:
-            self.model._system_instruction = content_types.to_content(system)
-        for item in history:
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "model"
-            if "content" in item:
-                item["parts"] = item.pop("content")
-        ans = ""
-        try:
-            response = self.model.generate_content(history, generation_config=gen_conf, stream=True)
-            for resp in response:
-                ans = resp.text
-                yield ans
-
-            yield response._chunks[-1].usage_metadata.total_token_count
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield 0
-
-
-class GroqChat(Base):
-    _FACTORY_NAME = "Groq"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from groq import Groq
-
-        self.client = Groq(api_key=key)
-        self.model_name = model_name
-
-    def _clean_conf(self, gen_conf):
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-        return gen_conf
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
-            history.insert(0, {"role": "system", "content": system})
-        for k in list(gen_conf.keys()):
-            if k not in ["temperature", "top_p", "max_tokens"]:
-                del gen_conf[k]
-        ans = ""
-        total_tokens = 0
-        try:
-            response = self.client.chat.completions.create(model=self.model_name, messages=history, stream=True, **gen_conf)
-            for resp in response:
-                if not resp.choices or not resp.choices[0].delta.content:
-                    continue
-                ans = resp.choices[0].delta.content
-                total_tokens += 1
-                if resp.choices[0].finish_reason == "length":
-                    if is_chinese(ans):
-                        ans += LENGTH_NOTIFICATION_CN
-                    else:
-                        ans += LENGTH_NOTIFICATION_EN
-                yield ans
-
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield total_tokens
-
-
 ## openrouter
 class OpenRouterChat(Base):
     _FACTORY_NAME = "OpenRouter"
@@ -1203,15 +953,6 @@ class StepFunChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class NvidiaChat(Base):
-    _FACTORY_NAME = "NVIDIA"
-
-    def __init__(self, key, model_name, base_url="https://integrate.api.nvidia.com/v1", **kwargs):
-        if not base_url:
-            base_url = "https://integrate.api.nvidia.com/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class LmStudioChat(Base):
     _FACTORY_NAME = "LM-Studio"
 
@@ -1243,83 +984,6 @@ class PPIOChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class CoHereChat(Base):
-    _FACTORY_NAME = "Cohere"
-
-    def __init__(self, key, model_name, base_url=None, **kwargs):
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-        from cohere import Client
-
-        self.client = Client(api_key=key)
-        self.model_name = model_name
-
-    def _clean_conf(self, gen_conf):
-        if "max_tokens" in gen_conf:
-            del gen_conf["max_tokens"]
-        if "top_p" in gen_conf:
-            gen_conf["p"] = gen_conf.pop("top_p")
-        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
-            gen_conf.pop("presence_penalty")
-        return gen_conf
-
-    def _chat(self, history, gen_conf):
-        hist = []
-        for item in history:
-            hist.append(deepcopy(item))
-            item = hist[-1]
-            if "role" in item and item["role"] == "user":
-                item["role"] = "USER"
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "CHATBOT"
-            if "content" in item:
-                item["message"] = item.pop("content")
-        mes = hist.pop()["message"]
-        response = self.client.chat(model=self.model_name, chat_history=hist, message=mes, **gen_conf)
-        ans = response.text
-        if response.finish_reason == "MAX_TOKENS":
-            ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-        return (
-            ans,
-            response.meta.tokens.input_tokens + response.meta.tokens.output_tokens,
-        )
-
-    def chat_streamly(self, system, history, gen_conf={}, **kwargs):
-        if system:
-            history.insert(0, {"role": "system", "content": system})
-        if "max_tokens" in gen_conf:
-            del gen_conf["max_tokens"]
-        if "top_p" in gen_conf:
-            gen_conf["p"] = gen_conf.pop("top_p")
-        if "frequency_penalty" in gen_conf and "presence_penalty" in gen_conf:
-            gen_conf.pop("presence_penalty")
-        for item in history:
-            if "role" in item and item["role"] == "user":
-                item["role"] = "USER"
-            if "role" in item and item["role"] == "assistant":
-                item["role"] = "CHATBOT"
-            if "content" in item:
-                item["message"] = item.pop("content")
-        mes = history.pop()["message"]
-        ans = ""
-        total_tokens = 0
-        try:
-            response = self.client.chat_stream(model=self.model_name, chat_history=history, message=mes, **gen_conf)
-            for resp in response:
-                if resp.event_type == "text-generation":
-                    ans = resp.text
-                    total_tokens += num_tokens_from_string(resp.text)
-                elif resp.event_type == "stream-end":
-                    if resp.finish_reason == "MAX_TOKENS":
-                        ans += "...\nFor the content length reason, it stopped, continue?" if is_english([ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-                yield ans
-
-        except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
-
-        yield total_tokens
-
-
 class LeptonAIChat(Base):
     _FACTORY_NAME = "LeptonAI"
 
@@ -1329,15 +993,6 @@ class LeptonAIChat(Base):
         super().__init__(key, model_name, base_url, **kwargs)
 
 
-class TogetherAIChat(Base):
-    _FACTORY_NAME = "TogetherAI"
-
-    def __init__(self, key, model_name, base_url="https://api.together.xyz/v1", **kwargs):
-        if not base_url:
-            base_url = "https://api.together.xyz/v1"
-        super().__init__(key, model_name, base_url, **kwargs)
-
-
 class PerfXCloudChat(Base):
     _FACTORY_NAME = "PerfXCloud"
 
@@ -1581,15 +1236,6 @@ class BaiduYiyanChat(Base):
         yield total_tokens
 
 
-class AnthropicChat(Base):
-    _FACTORY_NAME = "Anthropic"
-
-    def __init__(self, key, model_name, base_url="https://api.anthropic.com/v1/", **kwargs):
-        if not base_url:
-            base_url = "https://api.anthropic.com/v1/"
-        super().__init__(key, model_name, base_url=base_url, **kwargs)
-
-
 class GoogleChat(Base):
     _FACTORY_NAME = "Google Cloud"
 
@@ -1738,14 +1384,7 @@ class GPUStackChat(Base):
             raise ValueError("Local llm url cannot be None")
         base_url = urljoin(base_url, "v1")
         super().__init__(key, model_name, base_url, **kwargs)
-class DeepInfraChat(Base):
-    _FACTORY_NAME = "DeepInfra"
 
-    def __init__(self, key, model_name, base_url="https://api.deepinfra.com/v1/openai", **kwargs):
-        if not base_url:
-            base_url = "https://api.deepinfra.com/v1/openai"
-        super().__init__(key, model_name, base_url, **kwargs)
-        
 
 class Ai302Chat(Base):
     _FACTORY_NAME = "302.AI"
@@ -1754,3 +1393,489 @@ class Ai302Chat(Base):
         if not base_url:
             base_url = "https://api.302.ai/v1"
         super().__init__(key, model_name, base_url, **kwargs)
+
+
+class LiteLLMBase(ABC):
+    _FACTORY_NAME = ["Tongyi-Qianwen", "Bedrock", "Moonshot", "xAI", "DeepInfra", "Groq", "Cohere", "Gemini", "DeepSeek", "NVIDIA", "TogetherAI", "Anthropic"]
+
+    def __init__(self, key, model_name, base_url=None, **kwargs):
+        self.timeout = int(os.environ.get("LM_TIMEOUT_SECONDS", 600))
+        self.provider = kwargs.get("provider", "")
+        self.prefix = LITELLM_PROVIDER_PREFIX.get(self.provider, "")
+        self.model_name = f"{self.prefix}{model_name}"
+        self.api_key = key
+        self.base_url = base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")
+        # Configure retry parameters
+        self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
+        self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))
+        self.max_rounds = kwargs.get("max_rounds", 5)
+        self.is_tools = False
+        self.tools = []
+        self.toolcall_sessions = {}
+
+        # Factory specific fields
+        if self.provider == SupportedLiteLLMProvider.Bedrock:
+            self.bedrock_ak = json.loads(key).get("bedrock_ak", "")
+            self.bedrock_sk = json.loads(key).get("bedrock_sk", "")
+            self.bedrock_region = json.loads(key).get("bedrock_region", "")
+
+    def _get_delay(self):
+        """Calculate retry delay time"""
+        return self.base_delay * random.uniform(10, 150)
+
+    def _classify_error(self, error):
+        """Classify error based on error message content"""
+        error_str = str(error).lower()
+
+        keywords_mapping = [
+            (["quota", "capacity", "credit", "billing", "balance", "欠费"], LLMErrorCode.ERROR_QUOTA),
+            (["rate limit", "429", "tpm limit", "too many requests", "requests per minute"], LLMErrorCode.ERROR_RATE_LIMIT),
+            (["auth", "key", "apikey", "401", "forbidden", "permission"], LLMErrorCode.ERROR_AUTHENTICATION),
+            (["invalid", "bad request", "400", "format", "malformed", "parameter"], LLMErrorCode.ERROR_INVALID_REQUEST),
+            (["server", "503", "502", "504", "500", "unavailable"], LLMErrorCode.ERROR_SERVER),
+            (["timeout", "timed out"], LLMErrorCode.ERROR_TIMEOUT),
+            (["connect", "network", "unreachable", "dns"], LLMErrorCode.ERROR_CONNECTION),
+            (["filter", "content", "policy", "blocked", "safety", "inappropriate"], LLMErrorCode.ERROR_CONTENT_FILTER),
+            (["model", "not found", "does not exist", "not available"], LLMErrorCode.ERROR_MODEL),
+            (["max rounds"], LLMErrorCode.ERROR_MODEL),
+        ]
+        for words, code in keywords_mapping:
+            if re.search("({})".format("|".join(words)), error_str):
+                return code
+
+        return LLMErrorCode.ERROR_GENERIC
+
+    def _clean_conf(self, gen_conf):
+        if "max_tokens" in gen_conf:
+            del gen_conf["max_tokens"]
+        return gen_conf
+
+    def _chat(self, history, gen_conf, **kwargs):
+        logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
+        if self.model_name.lower().find("qwen3") >= 0:
+            kwargs["extra_body"] = {"enable_thinking": False}
+
+        completion_args = self._construct_completion_args(history=history, **gen_conf)
+        response = litellm.completion(
+            **completion_args,
+            drop_params=True,
+            timeout=self.timeout,
+        )
+        # response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
+
+        if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
+            return "", 0
+        ans = response.choices[0].message.content.strip()
+        if response.choices[0].finish_reason == "length":
+            ans = self._length_stop(ans)
+
+        return ans, self.total_token_count(response)
+
+    def _chat_streamly(self, history, gen_conf, **kwargs):
+        logging.info("[HISTORY STREAMLY]" + json.dumps(history, ensure_ascii=False, indent=4))
+        reasoning_start = False
+
+        completion_args = self._construct_completion_args(history=history, **gen_conf)
+        stop = kwargs.get("stop")
+        if stop:
+            completion_args["stop"] = stop
+        response = litellm.completion(
+            **completion_args,
+            drop_params=True,
+            timeout=self.timeout,
+        )
+
+        for resp in response:
+            if not hasattr(resp, "choices") or not resp.choices:
+                continue
+
+            delta = resp.choices[0].delta
+            if not hasattr(delta, "content") or delta.content is None:
+                delta.content = ""
+
+            if kwargs.get("with_reasoning", True) and hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                ans = ""
+                if not reasoning_start:
+                    reasoning_start = True
+                    ans = "<think>"
+                ans += delta.reasoning_content + "</think>"
+            else:
+                reasoning_start = False
+                ans = delta.content
+
+            tol = self.total_token_count(resp)
+            if not tol:
+                tol = num_tokens_from_string(delta.content)
+
+            finish_reason = resp.choices[0].finish_reason if hasattr(resp.choices[0], "finish_reason") else ""
+            if finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
+
+            yield ans, tol
+
+    def _length_stop(self, ans):
+        if is_chinese([ans]):
+            return ans + LENGTH_NOTIFICATION_CN
+        return ans + LENGTH_NOTIFICATION_EN
+
+    def _exceptions(self, e, attempt):
+        logging.exception("OpenAI chat_with_tools")
+        # Classify the error
+        error_code = self._classify_error(e)
+        if attempt == self.max_retries:
+            error_code = LLMErrorCode.ERROR_MAX_RETRIES
+
+        # Check if it's a rate limit error or server error and not the last attempt
+        should_retry = error_code == LLMErrorCode.ERROR_RATE_LIMIT or error_code == LLMErrorCode.ERROR_SERVER
+        if not should_retry:
+            return f"{ERROR_PREFIX}: {error_code} - {str(e)}"
+
+        delay = self._get_delay()
+        logging.warning(f"Error: {error_code}. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{self.max_retries})")
+        time.sleep(delay)
+
+    def _verbose_tool_use(self, name, args, res):
+        return "<tool_call>" + json.dumps({"name": name, "args": args, "result": res}, ensure_ascii=False, indent=2) + "</tool_call>"
+
+    def _append_history(self, hist, tool_call, tool_res):
+        hist.append(
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "index": tool_call.index,
+                        "id": tool_call.id,
+                        "function": {
+                            "name": tool_call.function.name,
+                            "arguments": tool_call.function.arguments,
+                        },
+                        "type": "function",
+                    },
+                ],
+            }
+        )
+        try:
+            if isinstance(tool_res, dict):
+                tool_res = json.dumps(tool_res, ensure_ascii=False)
+        finally:
+            hist.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(tool_res)})
+        return hist
+
+    def bind_tools(self, toolcall_session, tools):
+        if not (toolcall_session and tools):
+            return
+        self.is_tools = True
+        self.toolcall_session = toolcall_session
+        self.tools = tools
+
+    def _construct_completion_args(self, history, **kwargs):
+        completion_args = {
+            "model": self.model_name,
+            "messages": history,
+            "stream": False,
+            "tools": self.tools,
+            "tool_choice": "auto",
+            "api_key": self.api_key,
+            **kwargs,
+        }
+        if self.provider in SupportedLiteLLMProvider:
+            completion_args.update({"api_base": self.base_url})
+        elif self.provider == SupportedLiteLLMProvider.Bedrock:
+            completion_args.pop("api_key", None)
+            completion_args.pop("api_base", None)
+            completion_args.update(
+                {
+                    "aws_access_key_id": self.bedrock_ak,
+                    "aws_secret_access_key": self.bedrock_sk,
+                    "aws_region_name": self.bedrock_region,
+                }
+            )
+        return completion_args
+
+    def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
+        gen_conf = self._clean_conf(gen_conf)
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+
+        ans = ""
+        tk_count = 0
+        hist = deepcopy(history)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            history = deepcopy(hist)  # deepcopy is required here
+            try:
+                for _ in range(self.max_rounds + 1):
+                    logging.info(f"{self.tools=}")
+
+                    completion_args = self._construct_completion_args(history=history, **gen_conf)
+                    response = litellm.completion(
+                        **completion_args,
+                        drop_params=True,
+                        timeout=self.timeout,
+                    )
+
+                    tk_count += self.total_token_count(response)
+
+                    if not hasattr(response, "choices") or not response.choices or not response.choices[0].message:
+                        raise Exception(f"500 response structure error. Response: {response}")
+
+                    message = response.choices[0].message
+
+                    if not hasattr(message, "tool_calls") or not message.tool_calls:
+                        if hasattr(message, "reasoning_content") and message.reasoning_content:
+                            ans += f"<think>{message.reasoning_content}</think>"
+                        ans += message.content or ""
+                        if response.choices[0].finish_reason == "length":
+                            ans = self._length_stop(ans)
+                        return ans, tk_count
+
+                    for tool_call in message.tool_calls:
+                        logging.info(f"Response {tool_call=}")
+                        name = tool_call.function.name
+                        try:
+                            args = json_repair.loads(tool_call.function.arguments)
+                            tool_response = self.toolcall_session.tool_call(name, args)
+                            history = self._append_history(history, tool_call, tool_response)
+                            ans += self._verbose_tool_use(name, args, tool_response)
+                        except Exception as e:
+                            logging.exception(msg=f"Wrong JSON argument format in LLM tool call response: {tool_call}")
+                            history.append({"role": "tool", "tool_call_id": tool_call.id, "content": f"Tool call error: \n{tool_call}\nException:\n" + str(e)})
+                            ans += self._verbose_tool_use(name, {}, str(e))
+
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
+                history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
+
+                response, token_count = self._chat(history, gen_conf)
+                ans += response
+                tk_count += token_count
+                return ans, tk_count
+
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    return e, tk_count
+
+        assert False, "Shouldn't be here."
+
+    def chat(self, system, history, gen_conf={}, **kwargs):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        gen_conf = self._clean_conf(gen_conf)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            try:
+                response = self._chat(history, gen_conf, **kwargs)
+                return response
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    return e, 0
+        assert False, "Shouldn't be here."
+
+    def _wrap_toolcall_message(self, stream):
+        final_tool_calls = {}
+
+        for chunk in stream:
+            for tool_call in chunk.choices[0].delta.tool_calls or []:
+                index = tool_call.index
+
+                if index not in final_tool_calls:
+                    final_tool_calls[index] = tool_call
+
+                final_tool_calls[index].function.arguments += tool_call.function.arguments
+
+        return final_tool_calls
+
+    def chat_streamly_with_tools(self, system: str, history: list, gen_conf: dict = {}):
+        gen_conf = self._clean_conf(gen_conf)
+        tools = self.tools
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+
+        total_tokens = 0
+        hist = deepcopy(history)
+
+        # Implement exponential backoff retry strategy
+        for attempt in range(self.max_retries + 1):
+            history = deepcopy(hist)  # deepcopy is required here
+            try:
+                for _ in range(self.max_rounds + 1):
+                    reasoning_start = False
+                    logging.info(f"{tools=}")
+
+                    completion_args = self._construct_completion_args(history=history, **gen_conf)
+                    response = litellm.completion(
+                        **completion_args,
+                        drop_params=True,
+                        timeout=self.timeout,
+                    )
+
+                    final_tool_calls = {}
+                    answer = ""
+
+                    for resp in response:
+                        if not hasattr(resp, "choices") or not resp.choices:
+                            continue
+
+                        delta = resp.choices[0].delta
+
+                        if hasattr(delta, "tool_calls") and delta.tool_calls:
+                            for tool_call in delta.tool_calls:
+                                index = tool_call.index
+                                if index not in final_tool_calls:
+                                    if not tool_call.function.arguments:
+                                        tool_call.function.arguments = ""
+                                    final_tool_calls[index] = tool_call
+                                else:
+                                    final_tool_calls[index].function.arguments += tool_call.function.arguments or ""
+                            continue
+
+                        if not hasattr(delta, "content") or delta.content is None:
+                            delta.content = ""
+
+                        if hasattr(delta, "reasoning_content") and delta.reasoning_content:
+                            ans = ""
+                            if not reasoning_start:
+                                reasoning_start = True
+                                ans = "<think>"
+                            ans += delta.reasoning_content + "</think>"
+                            yield ans
+                        else:
+                            reasoning_start = False
+                            answer += delta.content
+                            yield delta.content
+
+                        tol = self.total_token_count(resp)
+                        if not tol:
+                            total_tokens += num_tokens_from_string(delta.content)
+                        else:
+                            total_tokens += tol
+
+                        finish_reason = getattr(resp.choices[0], "finish_reason", "")
+                        if finish_reason == "length":
+                            yield self._length_stop("")
+
+                    if answer:
+                        yield total_tokens
+                        return
+
+                    for tool_call in final_tool_calls.values():
+                        name = tool_call.function.name
+                        try:
+                            args = json_repair.loads(tool_call.function.arguments)
+                            yield self._verbose_tool_use(name, args, "Begin to call...")
+                            tool_response = self.toolcall_session.tool_call(name, args)
+                            history = self._append_history(history, tool_call, tool_response)
+                            yield self._verbose_tool_use(name, args, tool_response)
+                        except Exception as e:
+                            logging.exception(msg=f"Wrong JSON argument format in LLM tool call response: {tool_call}")
+                            history.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": tool_call.id,
+                                    "content": f"Tool call error: \n{tool_call}\nException:\n{str(e)}",
+                                }
+                            )
+                            yield self._verbose_tool_use(name, {}, str(e))
+
+                logging.warning(f"Exceed max rounds: {self.max_rounds}")
+                history.append({"role": "user", "content": f"Exceed max rounds: {self.max_rounds}"})
+
+                completion_args = self._construct_completion_args(history=history, **gen_conf)
+                response = litellm.completion(
+                    **completion_args,
+                    drop_params=True,
+                    timeout=self.timeout,
+                )
+
+                for resp in response:
+                    if not hasattr(resp, "choices") or not resp.choices:
+                        continue
+                    delta = resp.choices[0].delta
+                    if not hasattr(delta, "content") or delta.content is None:
+                        continue
+                    tol = self.total_token_count(resp)
+                    if not tol:
+                        total_tokens += num_tokens_from_string(delta.content)
+                    else:
+                        total_tokens += tol
+                    yield delta.content
+
+                yield total_tokens
+                return
+
+            except Exception as e:
+                e = self._exceptions(e, attempt)
+                if e:
+                    yield e
+                    yield total_tokens
+                    return
+
+        assert False, "Shouldn't be here."
+
+    def chat_streamly(self, system, history, gen_conf: dict = {}, **kwargs):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        gen_conf = self._clean_conf(gen_conf)
+        ans = ""
+        total_tokens = 0
+        try:
+            for delta_ans, tol in self._chat_streamly(history, gen_conf, **kwargs):
+                yield delta_ans
+                total_tokens += tol
+        except openai.APIError as e:
+            yield ans + "\n**ERROR**: " + str(e)
+
+        yield total_tokens
+
+    def total_token_count(self, resp):
+        try:
+            return resp.usage.total_tokens
+        except Exception:
+            pass
+        try:
+            return resp["usage"]["total_tokens"]
+        except Exception:
+            pass
+        return 0
+
+    def _calculate_dynamic_ctx(self, history):
+        """Calculate dynamic context window size"""
+
+        def count_tokens(text):
+            """Calculate token count for text"""
+            # Simple calculation: 1 token per ASCII character
+            # 2 tokens for non-ASCII characters (Chinese, Japanese, Korean, etc.)
+            total = 0
+            for char in text:
+                if ord(char) < 128:  # ASCII characters
+                    total += 1
+                else:  # Non-ASCII characters (Chinese, Japanese, Korean, etc.)
+                    total += 2
+            return total
+
+        # Calculate total tokens for all messages
+        total_tokens = 0
+        for message in history:
+            content = message.get("content", "")
+            # Calculate content tokens
+            content_tokens = count_tokens(content)
+            # Add role marker token overhead
+            role_tokens = 4
+            total_tokens += content_tokens + role_tokens
+
+        # Apply 1.2x buffer ratio
+        total_tokens_with_buffer = int(total_tokens * 1.2)
+
+        if total_tokens_with_buffer <= 8192:
+            ctx_size = 8192
+        else:
+            ctx_multiplier = (total_tokens_with_buffer // 8192) + 1
+            ctx_size = ctx_multiplier * 8192
+
+        return ctx_size
diff --git a/uv.lock b/uv.lock
index 731e47c3f..0ea2819a9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.10, <3.13"
 resolution-markers = [
     "python_full_version >= '3.12' and sys_platform == 'darwin'",
@@ -30,6 +31,15 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/9f/1c/a17fb513aeb684fb83bef5f395910f53103ab30308bbdd77fd66d6698c46/accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1" },
 ]
 
+[[package]]
+name = "aiofiles"
+version = "24.1.0"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5" },
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -1028,24 +1038,29 @@ wheels = [
 
 [[package]]
 name = "crawl4ai"
-version = "0.3.8"
+version = "0.3.745"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
+    { name = "aiofiles" },
     { name = "aiosqlite" },
     { name = "beautifulsoup4" },
+    { name = "colorama" },
     { name = "html2text" },
     { name = "litellm" },
     { name = "lxml" },
     { name = "numpy" },
     { name = "pillow" },
     { name = "playwright" },
-    { name = "playwright-stealth" },
     { name = "python-dotenv" },
+    { name = "rank-bm25" },
     { name = "requests" },
+    { name = "snowballstemmer" },
+    { name = "tf-playwright-stealth" },
+    { name = "xxhash" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/1c/31/327598a0c2cc3cd13dcb786ab41e9638c4c100db1940c9345b1e4d953f39/crawl4ai-0.3.8.tar.gz", hash = "sha256:bacc97509ddbfa5e328e299538a27a4c7fc2317e3fd5ad707b04677e4fc23fc6" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/02/5a/919e64ff2977d7aa1b2cda4d45f16ff8996cd2c2dc1f55936fb6cd214222/crawl4ai-0.3.745.tar.gz", hash = "sha256:990396d57e10ae7ccabf35c34a317dbd8c59a3ceca475eac75320a8808334438" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/af/03/4d69b8d64b39096a721808a349199ca5d7989acf2177e270d15e6f82c356/Crawl4AI-0.3.8-py3-none-any.whl", hash = "sha256:aa19165440c32b667b7325c166d68b00a99375b09e3a7db929d3873064d5ef4f" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/ed/7e/ebe351a457140330b20b6d8289b8f243b21de6e6bce505cd15b230a83bcb/Crawl4AI-0.3.745-py3-none-any.whl", hash = "sha256:763e6aba80959e60e1fe70cb9d954a4cf257eb230af30f51fcd99ff641a7a88d" },
 ]
 
 [[package]]
@@ -1175,9 +1190,6 @@ name = "datrie"
 version = "0.8.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 sdist = { url = "https://mirrors.aliyun.com/pypi/packages/9d/fe/db74bd405d515f06657f11ad529878fd389576dca4812bea6f98d9b31574/datrie-0.8.2.tar.gz", hash = "sha256:525b08f638d5cf6115df6ccd818e5a01298cd230b2dac91c8ff2e6499d18765d" }
-wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/44/02/53f0cf0bf0cd629ba6c2cc13f2f9db24323459e9c19463783d890a540a96/datrie-0.8.2-pp273-pypy_73-win32.whl", hash = "sha256:b07bd5fdfc3399a6dab86d6e35c72b1dbd598e80c97509c7c7518ab8774d3fda" },
-]
 
 [[package]]
 name = "debugpy"
@@ -1423,6 +1435,14 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
 ]
 
+[[package]]
+name = "fake-http-header"
+version = "0.3.5"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/e3/0b/2849c87d9f13766e29c0a2f4d31681aa72e035016b251ab19d99bde7b592/fake_http_header-0.3.5-py3-none-any.whl", hash = "sha256:cd05f4bebf1b7e38b5f5c03d7fb820c0c17e87d9614fbee0afa39c32c7a2ad3c" },
+]
+
 [[package]]
 name = "fake-useragent"
 version = "1.5.1"
@@ -1486,17 +1506,17 @@ name = "fastembed-gpu"
 version = "0.3.6"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
-    { name = "huggingface-hub" },
-    { name = "loguru" },
-    { name = "mmh3" },
-    { name = "numpy" },
-    { name = "onnxruntime-gpu" },
-    { name = "pillow" },
-    { name = "pystemmer" },
-    { name = "requests" },
-    { name = "snowballstemmer" },
-    { name = "tokenizers" },
-    { name = "tqdm" },
+    { name = "huggingface-hub", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "loguru", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "mmh3", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "onnxruntime-gpu", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "pystemmer", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "requests", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "snowballstemmer", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "tokenizers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "tqdm", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 sdist = { url = "https://mirrors.aliyun.com/pypi/packages/da/07/7336c7f3d7ee47f33b407eeb50f5eeb152889de538a52a8f1cc637192816/fastembed_gpu-0.3.6.tar.gz", hash = "sha256:ee2de8918b142adbbf48caaffec0c492f864d73c073eea5a3dcd0e8c1041c50d" }
 wheels = [
@@ -2142,37 +2162,37 @@ wheels = [
 
 [[package]]
 name = "greenlet"
-version = "3.0.3"
+version = "3.2.3"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/17/14/3bddb1298b9a6786539ac609ba4b7c9c0842e12aa73aaa4d8d73ec8f8185/greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/64/bea53c592e3e45799f7c8039a8ee7d6883c518eafef1fcae60beb776070f/greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/d6/408ad9603339db28ce334021b1403dfcfbcb7501a435d49698408d928de7/greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/6c/90/5b14670653f7363fb3e1665f8da6d64bd4c31d53a796d09ef69f48be7273/greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/ef/17/e8e72cabfb5a906c0d976d7fbcc88310df292beea0f816efbefdaf694284/greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/1c/2f/64628f6ae48e05f585e0eb3fb7399b52e240ef99f602107b445bf6be23ef/greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/24/35/945d5b10648fec9b20bcc6df8952d20bb3bba76413cd71c1fdbee98f5616/greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/74/00/27e2da76b926e9b5a2c97d3f4c0baf1b7d8181209d3026c0171f621ae6c0/greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e1/65/506e0a80931170b0dac1a03d36b7fc299f3fa3576235b916718602fff2c3/greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a6/76/e1ee9f290bb0d46b09704c2fb0e609cae329eb308ad404c0ee6fa1ecb8a5/greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/6e/20/68a278a6f93fa36e21cfc3d7599399a8a831225644eb3b6b18755cd3d6fc/greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/21/b4/90e06e07c78513ab03855768200bdb35c8e764e805b3f14fb488e56f82dc/greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/f6/a2/0ed21078039072f9dc738bbf3af12b103a84106b1385ac4723841f846ce7/greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/42/11/42ad6b1104c357826bbee7d7b9e4f24dbd9fde94899a03efb004aab62963/greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/bb/6b/384dee7e0121cbd1757bdc1824a5ee28e43d8d4e3f99aa59521f629442fe/greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/c6/1f/12d5a6cc26e8b483c2e7975f9c22e088ac735c0d8dcb8a8f72d31a4e5f04/greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/c7/ec/85b647e59e0f137c7792a809156f413e38379cf7f3f2e1353c37f4be4026/greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/94/ed/1e5f4bca691a81700e5a88e86d6f0e538acb10188cd2cc17140e523255ef/greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/47/79/26d54d7d700ef65b689fc2665a40846d13e834da0486674a8d4f0f371a47/greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a2/2f/461615adc53ba81e99471303b15ac6b2a6daa8d2a0f7f77fd15605e16d5b/greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e9/55/2c3cfa3cdbb940cf7321fbcf544f0e9c74898eed43bf678abf416812d132/greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/38/77/efb21ab402651896c74f24a172eb4d7479f9f53898bd5e56b9e20bb24ffd/greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/74/3a/92f188ace0190f0066dca3636cf1b09481d0854c46e92ec5e29c7cefe5b1/greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/63/0f/847ed02cdfce10f0e6e3425cd054296bddb11a17ef1b34681fa01a055187/greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/bd/37/56b0da468a85e7704f3b2bc045015301bdf4be2184a44868c71f6dca6fe2/greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/7c/68/b5f4084c0a252d7e9c0d95fc1cfc845d08622037adb74e05be3a49831186/greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/a4/fa/31e22345518adcd69d1d6ab5087a12c178aa7f3c51103f6d5d702199d243/greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/53/80/3d94d5999b4179d91bcc93745d1b0815b073d61be79dd546b840d17adb18/greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/92/db/b4c12cff13ebac2786f4f217f06588bccd8b53d260453404ef22b121fc3a/greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/52/61/75b4abd8147f13f70986df2801bf93735c1bd87ea780d70e3b3ecda8c165/greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/35/aa/6894ae299d059d26254779a5088632874b80ee8cf89a88bca00b0709d22f/greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/30/64/e01a8261d13c47f3c082519a5e9dbf9e143cc0498ed20c911d04e54d526c/greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/47/48/ff9ca8ba9772d083a4f5221f7b4f0ebe8978131a9ae0909cf202f94cd879/greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/e9/45/626e974948713bc15775b696adb3eb0bd708bec267d6d2d5c47bb47a6119/greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/b1/8e/8b6f42c67d5df7db35b8c55c9a850ea045219741bb14416255616808c690/greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/05/46/ab58828217349500a7ebb81159d52ca357da747ff1797c29c6023d79d798/greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/68/7f/d1b537be5080721c0f0089a8447d4ef72839039cdb743bdd8ffd23046e9a/greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/89/5f/b16dec0cbfd3070658e0d744487919740c6d45eb90946f6787689a7efbce/greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/66/77/d48fb441b5a71125bcac042fc5b1494c806ccb9a1432ecaa421e72157f77/greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849" },
 ]
 
 [[package]]
@@ -2375,7 +2395,7 @@ wheels = [
 
 [[package]]
 name = "httpx"
-version = "0.27.0"
+version = "0.27.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2384,9 +2404,9 @@ dependencies = [
     { name = "idna" },
     { name = "sniffio" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2d/3da5bdf4408b8b2800061c339f240c1802f2e82d55e50bd39c5a881f47f0/httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0" },
 ]
 
 [[package]]
@@ -2857,24 +2877,24 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.48.0"
+version = "1.75.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "aiohttp" },
     { name = "click" },
+    { name = "httpx" },
     { name = "importlib-metadata" },
     { name = "jinja2" },
     { name = "jsonschema" },
     { name = "openai" },
     { name = "pydantic" },
     { name = "python-dotenv" },
-    { name = "requests" },
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/85/cf/ec69c348c6f16148a55657f3bd63215e965028441c0f322ae8edf9c1210a/litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/1b/28/50837cb0246c42a8caac45610572883de7f478543cf4d143e84f099c0234/litellm-1.75.0.tar.gz", hash = "sha256:ec7fbfe79e1b9cd4a2b36ca9e71e71959d8fc43305b222e5f257aced1a0d1d63" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/37/2b/6a42747557dc557e71d1e0664c4d5a814b08cda0589213921bb51c64c5e4/litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/db/43/e10905870d42e927de3b095a9248f2764156c7eb45ec172d72be35cd2bb4/litellm-1.75.0-py3-none-any.whl", hash = "sha256:1657472f37d291b366050dd2035e3640eebd96142d6fa0f935ceb290a0e1d5ad" },
 ]
 
 [[package]]
@@ -3765,12 +3785,12 @@ name = "onnxruntime-gpu"
 version = "1.19.2"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
-    { name = "coloredlogs" },
-    { name = "flatbuffers" },
-    { name = "numpy" },
-    { name = "packaging" },
-    { name = "protobuf" },
-    { name = "sympy" },
+    { name = "coloredlogs", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "flatbuffers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "packaging", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "protobuf", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/d0/9c/3fa310e0730643051eb88e884f19813a6c8b67d0fbafcda610d960e589db/onnxruntime_gpu-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a49740e079e7c5215830d30cde3df792e903df007aa0b0fd7aa797937061b27a" },
@@ -3783,7 +3803,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.45.0"
+version = "1.99.1"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "anyio" },
@@ -3795,9 +3815,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/70/cd/5ec65b9a56999370c032af7933433143f78239d44a8c03a5ba34159af945/openai-1.45.0.tar.gz", hash = "sha256:731207d10637335413aa3c0955f8f8df30d7636a4a0f9c381f2209d32cf8de97" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/03/30/f0fb7907a77e733bb801c7bdcde903500b31215141cdb261f04421e6fbec/openai-1.99.1.tar.gz", hash = "sha256:2c9d8e498c298f51bb94bcac724257a3a6cac6139ccdfc1186c6708f7a93120f" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/d4/2a/97e80a4551346efc9cd937e11adb640207acc5045fdf4e06786eac55bfb1/openai-1.45.0-py3-none-any.whl", hash = "sha256:2f1f7b7cf90f038a9f1c24f0d26c0f1790c102ec5acd07ffd70a9b7feac1ff4e" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/54/15/9c85154ffd283abfc43309ff3aaa63c3fd02f7767ee684e73670f6c5ade2/openai-1.99.1-py3-none-any.whl", hash = "sha256:8eeccc69e0ece1357b51ca0d9fb21324afee09b20c3e5b547d02445ca18a4e03" },
 ]
 
 [[package]]
@@ -4242,32 +4262,21 @@ wheels = [
 
 [[package]]
 name = "playwright"
-version = "1.47.0"
+version = "1.54.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "greenlet" },
     { name = "pyee" },
 ]
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/f8/70/01cad1d41861cd939fe66bff725771dd03f2de39b7c25b4479de2f583ce0/playwright-1.47.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:f205df24edb925db1a4ab62f1ab0da06f14bb69e382efecfb0deedc4c7f4b8cd" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/42/17/2300e578b434b56ebfc3d56a5e0fe6dc5e99d6ff43a88fa492b881f3b7e3/playwright-1.47.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fc820faf6885f69a52ba4ec94124e575d3c4a4003bf29200029b4a4f2b2d0ab" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/5a/6a/3cff2abfa4b4c52e1fa34fa8b71bf09cc2a89b03b7417733e5138f1be61d/playwright-1.47.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:8e212dc472ff19c7d46ed7e900191c7a786ce697556ac3f1615986ec3aa00341" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/80/a6/c5152c817db664d75c439c2bd99d51f906a31c1df4a04e673ef51008b12f/playwright-1.47.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:a1935672531963e4b2a321de5aa59b982fb92463ee6e1032dd7326378e462955" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/d6/50/b573c13d3748a1ab94ed45f2faeb868c63263df0055f57028c4cc775419f/playwright-1.47.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0a1b61473d6f7f39c5d77d4800b3cbefecb03344c90b98f3fbcae63294ad249" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/7d/6c/34225ee5707db5e34bffa77f05d152c797c0e0b9bf3d3a5b426d99160f8f/playwright-1.47.0-py3-none-win32.whl", hash = "sha256:1b977ed81f6bba5582617684a21adab9bad5676d90a357ebf892db7bdf4a9974" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/cb/88/9a3c77025702e506fe04275e677676246ff0b2e6964de5d2527dfdab3416/playwright-1.47.0-py3-none-win_amd64.whl", hash = "sha256:0ec1056042d2e86088795a503347407570bffa32cbe20748e5d4c93dba085280" },
-]
-
-[[package]]
-name = "playwright-stealth"
-version = "1.0.6"
-source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
-dependencies = [
-    { name = "playwright" },
-]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/e5/dc/4e88b517e4c9cfb63f1b0b67d59adddcef2dc2fe0883b90e07119d15895a/playwright-stealth-1.0.6.tar.gz", hash = "sha256:b504d951d00fac755c7d13665a29611d415180510bd7d23f14ebc89439ba2043" }
-wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/34/10/60981cb8d8e22487061b98a0803313c4fb519cc95ab1421516304a0cfcd0/playwright_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b1b2bcf58eb6859aa53d42c49b91c4e27b74a6d13fc3d0c85eea513dd55efda3" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/f3/09/33d5bfe393a582d8dac72165a9e88b274143c9df411b65ece1cc13f42988/playwright-1.54.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:bf3b845af744370f1bd2286c2a9536f474cc8a88dc995b72ea9a5be714c9a77d" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/e1/7b/51882dc584f7aa59f446f2bb34e33c0e5f015de4e31949e5b7c2c10e54f0/playwright-1.54.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:780928b3ca2077aea90414b37e54edd0c4bbb57d1aafc42f7aa0b3fd2c2fac02" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/73/a1/7aa8ae175b240c0ec8849fcf000e078f3c693f9aa2ffd992da6550ea0dff/playwright-1.54.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:81d0b6f28843b27f288cfe438af0a12a4851de57998009a519ea84cee6fbbfb9" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/34/a9/45084fd23b6206f954198296ce39b0acf50debfdf3ec83a593e4d73c9c8a/playwright-1.54.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:09919f45cc74c64afb5432646d7fef0d19fff50990c862cb8d9b0577093f40cc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/02/d4/6a692f4c6db223adc50a6e53af405b45308db39270957a6afebddaa80ea2/playwright-1.54.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ae206c55737e8e3eae51fb385d61c0312eeef31535643bb6232741b41b6fdc" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/72/7a/4ee60a1c3714321db187bebbc40d52cea5b41a856925156325058b5fca5a/playwright-1.54.0-py3-none-win32.whl", hash = "sha256:0b108622ffb6906e28566f3f31721cd57dda637d7e41c430287804ac01911f56" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/aa/77/8f8fae05a242ef639de963d7ae70a69d0da61d6d72f1207b8bbf74ffd3e7/playwright-1.54.0-py3-none-win_amd64.whl", hash = "sha256:9e5aee9ae5ab1fdd44cd64153313a2045b136fcbcfb2541cc0a3d909132671a2" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/33/ff/99a6f4292a90504f2927d34032a4baf6adb498dc3f7cf0f3e0e22899e310/playwright-1.54.0-py3-none-win_arm64.whl", hash = "sha256:a975815971f7b8dca505c441a4c56de1aeb56a211290f8cc214eeef5524e8d75" },
 ]
 
 [[package]]
@@ -4653,8 +4662,6 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/59/fe/aae679b64363eb78326c7fdc9d06ec3de18bac68be4b612fc1fe8902693c/pycryptodome-3.23.0-cp37-abi3-win32.whl", hash = "sha256:507dbead45474b62b2bbe318eb1c4c8ee641077532067fec9c1aa82c31f84886" },
     { url = "https://mirrors.aliyun.com/pypi/packages/54/2f/e97a1b8294db0daaa87012c24a7bb714147c7ade7656973fd6c736b484ff/pycryptodome-3.23.0-cp37-abi3-win_amd64.whl", hash = "sha256:c75b52aacc6c0c260f204cbdd834f76edc9fb0d8e0da9fbf8352ef58202564e2" },
     { url = "https://mirrors.aliyun.com/pypi/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/9f/7c/f5b0556590e7b4e710509105e668adb55aa9470a9f0e4dea9c40a4a11ce1/pycryptodome-3.23.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:350ebc1eba1da729b35ab7627a833a1a355ee4e852d8ba0447fafe7b14504d56" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/33/38/dcc795578d610ea1aaffef4b148b8cafcfcf4d126b1e58231ddc4e475c70/pycryptodome-3.23.0-pp27-pypy_73-win32.whl", hash = "sha256:93837e379a3e5fd2bb00302a47aee9fdf7940d83595be3915752c74033d17ca7" },
     { url = "https://mirrors.aliyun.com/pypi/packages/d9/12/e33935a0709c07de084d7d58d330ec3f4daf7910a18e77937affdb728452/pycryptodome-3.23.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ddb95b49df036ddd264a0ad246d1be5b672000f12d6961ea2c267083a5e19379" },
     { url = "https://mirrors.aliyun.com/pypi/packages/22/0b/aa8f9419f25870889bebf0b26b223c6986652bdf071f000623df11212c90/pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e95564beb8782abfd9e431c974e14563a794a4944c29d6d3b7b5ea042110b4" },
     { url = "https://mirrors.aliyun.com/pypi/packages/d4/5e/63f5cbde2342b7f70a39e591dbe75d9809d6338ce0b07c10406f1a140cdc/pycryptodome-3.23.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14e15c081e912c4b0d75632acd8382dfce45b258667aa3c67caf7a4d4c13f630" },
@@ -4678,8 +4685,6 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/48/7d/0f2b09490b98cc6a902ac15dda8760c568b9c18cfe70e0ef7a16de64d53a/pycryptodomex-3.20.0-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a7a8f33a1f1fb762ede6cc9cbab8f2a9ba13b196bfaf7bc6f0b39d2ba315a43" },
     { url = "https://mirrors.aliyun.com/pypi/packages/b0/1c/375adb14b71ee1c8d8232904e928b3e7af5bbbca7c04e4bec94fe8e90c3d/pycryptodomex-3.20.0-cp35-abi3-win32.whl", hash = "sha256:c39778fd0548d78917b61f03c1fa8bfda6cfcf98c767decf360945fe6f97461e" },
     { url = "https://mirrors.aliyun.com/pypi/packages/b2/e8/1b92184ab7e5595bf38000587e6f8cf9556ebd1bf0a583619bee2057afbd/pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl", hash = "sha256:2a47bcc478741b71273b917232f521fd5704ab4b25d301669879e7273d3586cc" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/e7/c5/9140bb867141d948c8e242013ec8a8011172233c898dfdba0a2417c3169a/pycryptodomex-3.20.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1be97461c439a6af4fe1cf8bf6ca5936d3db252737d2f379cc6b2e394e12a458" },
-    { url = "https://mirrors.aliyun.com/pypi/packages/5e/6a/04acb4978ce08ab16890c70611ebc6efd251681341617bbb9e53356dee70/pycryptodomex-3.20.0-pp27-pypy_73-win32.whl", hash = "sha256:19764605feea0df966445d46533729b645033f134baeb3ea26ad518c9fdf212c" },
     { url = "https://mirrors.aliyun.com/pypi/packages/eb/df/3f1ea084e43b91e6d2b6b3493cc948864c17ea5d93ff1261a03812fbfd1a/pycryptodomex-3.20.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e497413560e03421484189a6b65e33fe800d3bd75590e6d78d4dfdb7accf3b" },
     { url = "https://mirrors.aliyun.com/pypi/packages/c9/f3/83ffbdfa0c8f9154bcd8866895f6cae5a3ec749da8b0840603cf936c4412/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48217c7901edd95f9f097feaa0388da215ed14ce2ece803d3f300b4e694abea" },
     { url = "https://mirrors.aliyun.com/pypi/packages/c9/9d/c113e640aaf02af5631ae2686b742aac5cd0e1402b9d6512b1c7ec5ef05d/pycryptodomex-3.20.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d00fe8596e1cc46b44bf3907354e9377aa030ec4cd04afbbf6e899fc1e2a7781" },
@@ -4792,14 +4797,14 @@ wheels = [
 
 [[package]]
 name = "pyee"
-version = "12.0.0"
+version = "13.0.0"
 source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
 dependencies = [
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/a7/8faaa62a488a2a1e0d56969757f087cbd2729e9bcfa508c230299f366b4c/pyee-12.0.0.tar.gz", hash = "sha256:c480603f4aa2927d4766eb41fa82793fe60a82cbfdb8d688e0d08c55a534e145" }
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37" }
 wheels = [
-    { url = "https://mirrors.aliyun.com/pypi/packages/1d/0d/95993c08c721ec68892547f2117e8f9dfbcef2ca71e098533541b4a54d5f/pyee-12.0.0-py3-none-any.whl", hash = "sha256:7b14b74320600049ccc7d0e0b1becd3b4bd0a03c745758225e31a59f4095c990" },
+    { url = "https://mirrors.aliyun.com/pypi/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498" },
 ]
 
 [[package]]
@@ -5239,6 +5244,7 @@ dependencies = [
     { name = "itsdangerous" },
     { name = "json-repair" },
     { name = "langfuse" },
+    { name = "litellm" },
     { name = "markdown" },
     { name = "markdown-to-json" },
     { name = "mcp" },
@@ -5357,7 +5363,7 @@ requires-dist = [
     { name = "click", specifier = ">=8.1.8" },
     { name = "cn2an", specifier = "==0.5.22" },
     { name = "cohere", specifier = "==5.6.2" },
-    { name = "crawl4ai", specifier = "==0.3.8" },
+    { name = "crawl4ai", specifier = ">=0.3.8" },
     { name = "dashscope", specifier = "==1.20.11" },
     { name = "datrie", specifier = "==0.8.2" },
     { name = "debugpy", specifier = ">=1.8.13" },
@@ -5384,13 +5390,14 @@ requires-dist = [
     { name = "groq", specifier = "==0.9.0" },
     { name = "hanziconv", specifier = "==0.3.2" },
     { name = "html-text", specifier = "==0.6.2" },
-    { name = "httpx", specifier = "==0.27.0" },
+    { name = "httpx", specifier = "==0.27.2" },
     { name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
     { name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
     { name = "infinity-sdk", specifier = "==0.6.0.dev4" },
     { name = "itsdangerous", specifier = "==2.1.2" },
     { name = "json-repair", specifier = "==0.35.0" },
     { name = "langfuse", specifier = ">=2.60.0" },
+    { name = "litellm", specifier = ">=1.74.15.post1" },
     { name = "markdown", specifier = "==3.6" },
     { name = "markdown-to-json", specifier = "==2.1.1" },
     { name = "mcp", specifier = ">=1.9.4" },
@@ -5402,7 +5409,7 @@ requires-dist = [
     { name = "ollama", specifier = "==0.2.1" },
     { name = "onnxruntime", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'", specifier = "==1.19.2" },
     { name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = "==1.19.2" },
-    { name = "openai", specifier = "==1.45.0" },
+    { name = "openai", specifier = ">=1.45.0" },
     { name = "opencv-python", specifier = "==4.10.0.84" },
     { name = "opencv-python-headless", specifier = "==4.10.0.84" },
     { name = "opendal", specifier = ">=0.45.0,<0.46.0" },
@@ -5467,6 +5474,7 @@ requires-dist = [
     { name = "yfinance", specifier = "==0.2.65" },
     { name = "zhipuai", specifier = "==2.0.1" },
 ]
+provides-extras = ["full"]
 
 [package.metadata.requires-dev]
 test = [
@@ -5481,6 +5489,18 @@ test = [
     { name = "requests-toolbelt", specifier = ">=1.0.0" },
 ]
 
+[[package]]
+name = "rank-bm25"
+version = "0.2.2"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/fc/0a/f9579384aa017d8b4c15613f86954b92a95a93d641cc849182467cf0bb3b/rank_bm25-0.2.2.tar.gz", hash = "sha256:096ccef76f8188563419aaf384a02f0ea459503fdf77901378d4fd9d87e5e51d" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/2a/21/f691fb2613100a62b3fa91e9988c991e9ca5b89ea31c0d3152a3210344f9/rank_bm25-0.2.2-py3-none-any.whl", hash = "sha256:7bd4a95571adadfc271746fa146a4bcfd89c0cf731e49c3d1ad863290adbe8ae" },
+]
+
 [[package]]
 name = "ranx"
 version = "0.3.20"
@@ -6423,6 +6443,19 @@ wheels = [
     { url = "https://mirrors.aliyun.com/pypi/packages/55/08/98090d1a139e8995053ed22e099b43aa4dea8cffe056f8f0bc5178aeecbd/tencentcloud_sdk_python-3.0.1215-py2.py3-none-any.whl", hash = "sha256:899ced749baf74846f1eabf452f74aa0e48d1965f0ca7828a8b73b446f76f5f2" },
 ]
 
+[[package]]
+name = "tf-playwright-stealth"
+version = "1.2.0"
+source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
+dependencies = [
+    { name = "fake-http-header" },
+    { name = "playwright" },
+]
+sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d6/6b/32bb58c65991f91aeaaf7473b650175d9d4af5dd383983d177d49ccba08d/tf_playwright_stealth-1.2.0.tar.gz", hash = "sha256:7bb8d32d3e60324fbf6b9eeae540b8cd9f3b9e07baeb33b025dbc98ad47658ba" }
+wheels = [
+    { url = "https://mirrors.aliyun.com/pypi/packages/11/3d/2653f4cf49660bb44eeac8270617cc4c0287d61716f249f55053f0af0724/tf_playwright_stealth-1.2.0-py3-none-any.whl", hash = "sha256:26ee47ee89fa0f43c606fe37c188ea3ccd36f96ea90c01d167b768df457e7886" },
+]
+
 [[package]]
 name = "threadpoolctl"
 version = "3.6.0"
diff --git a/web/src/locales/config.ts b/web/src/locales/config.ts
index cd8d928f5..65d85d718 100644
--- a/web/src/locales/config.ts
+++ b/web/src/locales/config.ts
@@ -10,9 +10,9 @@ import translation_fr from './fr';
 import translation_id from './id';
 import translation_ja from './ja';
 import translation_pt_br from './pt-br';
+import translation_ru from './ru';
 import { createTranslationTable, flattenObject } from './until';
 import translation_vi from './vi';
-import translation_ru from './ru';
 import translation_zh from './zh';
 import translation_zh_traditional from './zh-traditional';
 
diff --git a/web/src/locales/ru.ts b/web/src/locales/ru.ts
index 21d798dea..4cf23cd62 100644
--- a/web/src/locales/ru.ts
+++ b/web/src/locales/ru.ts
@@ -246,8 +246,7 @@ export default {
       methodExamples: 'Примеры',
       methodExamplesDescription: 'Скриншоты для пояснения:',
       dialogueExamplesTitle: 'просмотр',
-      methodEmpty:
-        'Здесь будет визуальное объяснение категорий баз знаний',
+      methodEmpty: 'Здесь будет визуальное объяснение категорий баз знаний',
       book: `<p>Поддерживаемые форматы: <b>DOCX, PDF, TXT</b>.</p><p>
       Для PDF укажите <i>диапазон страниц</i>.</p>`,
       laws: `<p>Поддерживаемые форматы: <b>DOCX, PDF, TXT</b>.</p><p>
@@ -316,21 +315,19 @@ export default {
 <p>В столбце тегов используйте <b>запятую</b> для разделения тегов.</p>
 `,
       useRaptor: 'Использовать RAPTOR',
-      useRaptorTip:
-        'Включите RAPTOR для многошаговых вопросно-ответных задач.',
+      useRaptorTip: 'Включите RAPTOR для многошаговых вопросно-ответных задач.',
       prompt: 'Промпт',
-      promptTip:
-        'Опишите задачу для LLM, укажите формат ответа и требования.',
+      promptTip: 'Опишите задачу для LLM, укажите формат ответа и требования.',
       promptMessage: 'Требуется промпт',
       promptText: `Пожалуйста, обобщите следующие абзацы. Будьте внимательны с числами, не выдумывайте. Абзацы:
       {cluster_content}
 Выше представлен контент для обобщения.`,
       maxToken: 'Макс. токенов',
-      maxTokenTip: 'Максимальное количество токенов на суммаризирующий фрагмент.',
+      maxTokenTip:
+        'Максимальное количество токенов на суммаризирующий фрагмент.',
       maxTokenMessage: 'Требуется макс. токенов',
       threshold: 'Порог',
-      thresholdTip:
-        'Минимальное сходство для группировки фрагментов в RAPTOR.',
+      thresholdTip: 'Минимальное сходство для группировки фрагментов в RAPTOR.',
       thresholdMessage: 'Требуется порог',
       maxCluster: 'Макс. кластеров',
       maxClusterTip: 'Максимальное количество кластеров.',
@@ -463,8 +460,7 @@ export default {
         'Устанавливает порог для выбора наиболее вероятных слов (ядерная выборка).',
       presencePenalty: 'Штраф за присутствие',
       presencePenaltyMessage: 'Требуется штраф за присутствие',
-      presencePenaltyTip:
-        'Штрафует слова, уже появившиеся в диалоге.',
+      presencePenaltyTip: 'Штрафует слова, уже появившиеся в диалоге.',
       frequencyPenalty: 'Штраф за частоту',
       frequencyPenaltyMessage: 'Требуется штраф за частоту',
       frequencyPenaltyTip:
@@ -553,8 +549,7 @@ export default {
       maxTokensInvalidMessage: 'Введите корректное число для Макс. токенов.',
       maxTokensMinMessage: 'Макс. токенов не может быть меньше 0.',
       password: 'Пароль',
-      passwordDescription:
-        'Введите текущий пароль для изменения пароля.',
+      passwordDescription: 'Введите текущий пароль для изменения пароля.',
       model: 'Провайдеры моделей',
       modelDescription: 'Настройте параметры моделей и API KEY.',
       team: 'Команда',
@@ -584,17 +579,14 @@ export default {
         'Ваш новый пароль должен быть длиннее 8 символов.',
       confirmPassword: 'Подтвердите новый пароль',
       confirmPasswordMessage: 'Подтвердите пароль!',
-      confirmPasswordNonMatchMessage:
-        'Новые пароли не совпадают!',
+      confirmPasswordNonMatchMessage: 'Новые пароли не совпадают!',
       cancel: 'Отмена',
       addedModels: 'Добавленные модели',
       modelsToBeAdded: 'Модели для добавления',
       addTheModel: 'Добавить модель',
       apiKey: 'API-Ключ',
-      apiKeyMessage:
-        'Введите API ключ (для локальных моделей игнорируйте).',
-      apiKeyTip:
-        'API ключ можно получить у поставщика LLM.',
+      apiKeyMessage: 'Введите API ключ (для локальных моделей игнорируйте).',
+      apiKeyTip: 'API ключ можно получить у поставщика LLM.',
       showMoreModels: 'Показать модели',
       hideModels: 'Скрыть модели',
       baseUrl: 'Базовый URL',
@@ -603,22 +595,18 @@ export default {
       modify: 'Изменить',
       systemModelSettings: 'Установить модели по умолчанию',
       chatModel: 'Модель чата',
-      chatModelTip:
-        'Модель чата по умолчанию для новых баз знаний.',
+      chatModelTip: 'Модель чата по умолчанию для новых баз знаний.',
       embeddingModel: 'Модель эмбеддинга',
-      embeddingModelTip:
-        'Модель эмбеддинга по умолчанию для новых баз знаний.',
+      embeddingModelTip: 'Модель эмбеддинга по умолчанию для новых баз знаний.',
       img2txtModel: 'Модель Img2txt',
-      img2txtModelTip:
-        'Модель описания изображений/видео по умолчанию.',
+      img2txtModelTip: 'Модель описания изображений/видео по умолчанию.',
       sequence2txtModel: 'Модель Speech2txt',
       sequence2txtModelTip:
         'Модель ASR по умолчанию для преобразования речи в текст.',
       rerankModel: 'Модель реранкинга',
       rerankModelTip: `Модель реранкинга фрагментов по умолчанию.`,
       ttsModel: 'Модель TTS',
-      ttsModelTip:
-        'Модель преобразования текста в речь по умолчанию.',
+      ttsModelTip: 'Модель преобразования текста в речь по умолчанию.',
       workspace: 'Рабочее пространство',
       upgrade: 'Обновить',
       addLlmTitle: 'Добавить LLM',
@@ -677,8 +665,7 @@ export default {
       yiyanAKMessage: 'Введите ваш API KEY',
       addyiyanSK: 'yiyan Secret KEY',
       yiyanSKMessage: 'Введите ваш Secret KEY',
-      FishAudioModelNameMessage:
-        'Дайте имя вашей модели синтеза речи',
+      FishAudioModelNameMessage: 'Дайте имя вашей модели синтеза речи',
       addFishAudioAK: 'Fish Audio API KEY',
       addFishAudioAKMessage: 'Введите ваш API KEY',
       addFishAudioRefID: 'FishAudio Reference ID',
@@ -715,7 +702,7 @@ export default {
       configuration: 'Конфигурация',
       langfuseDescription:
         'Трассировка, оценка, управление промптами и метрики для отладки и улучшения вашего LLM-приложения.',
-      viewLangfuseSDocumentation: "Документация Langfuse",
+      viewLangfuseSDocumentation: 'Документация Langfuse',
       view: 'Просмотр',
       modelsToBeAddedTooltip:
         'Если ваш провайдер не указан, но заявляет о "совместимости с OpenAI API", выберите соответствующую карточку.',
@@ -776,8 +763,7 @@ export default {
       s3: 'S3 загрузки',
       preview: 'Просмотр',
       fileError: 'Ошибка файла',
-      uploadLimit:
-        'Каждый файл ≤10MB, всего файлов ≤128.',
+      uploadLimit: 'Каждый файл ≤10MB, всего файлов ≤128.',
       destinationFolder: 'Целевая папка',
     },
     flow: {
@@ -844,8 +830,7 @@ export default {
       baidu: 'Baidu',
       baiduDescription: `Ищет на baidu.com.`,
       duckDuckGo: 'DuckDuckGo',
-      duckDuckGoDescription:
-        'Ищет на duckduckgo.com.',
+      duckDuckGoDescription: 'Ищет на duckduckgo.com.',
       channel: 'Канал',
       channelTip: `Текстовый или новостной поиск`,
       text: 'Текст',
@@ -855,14 +840,11 @@ export default {
         'Количество сообщений истории, видимых LLM. Учитывайте ограничение токенов модели.',
       wikipedia: 'Wikipedia',
       pubMed: 'PubMed',
-      pubMedDescription:
-        'Ищет на https://pubmed.ncbi.nlm.nih.gov/.',
+      pubMedDescription: 'Ищет на https://pubmed.ncbi.nlm.nih.gov/.',
       email: 'Email',
-      emailTip:
-        'Email обязателен.',
+      emailTip: 'Email обязателен.',
       arXiv: 'ArXiv',
-      arXivDescription:
-        'Ищет на https://arxiv.org/.',
+      arXivDescription: 'Ищет на https://arxiv.org/.',
       sortBy: 'Сортировать по',
       submittedDate: 'Дата отправки',
       lastUpdatedDate: 'Дата обновления',
@@ -877,24 +859,20 @@ export default {
       country: 'Страна и регион',
       language: 'Язык',
       googleScholar: 'Google Scholar',
-      googleScholarDescription:
-        'Ищет на https://scholar.google.com/.',
+      googleScholarDescription: 'Ищет на https://scholar.google.com/.',
       yearLow: 'Год от',
       yearHigh: 'Год до',
       patents: 'Патенты',
       data: 'Данные',
       deepL: 'DeepL',
-      deepLDescription:
-        'Перевод с помощью https://www.deepl.com/.',
+      deepLDescription: 'Перевод с помощью https://www.deepl.com/.',
       authKey: 'Ключ авторизации',
       sourceLang: 'Исходный язык',
       targetLang: 'Целевой язык',
       gitHub: 'GitHub',
-      gitHubDescription:
-        'Ищет репозитории на https://github.com/.',
+      gitHubDescription: 'Ищет репозитории на https://github.com/.',
       baiduFanyi: 'BaiduFanyi',
-      baiduFanyiDescription:
-        'Перевод с помощью https://fanyi.baidu.com/.',
+      baiduFanyiDescription: 'Перевод с помощью https://fanyi.baidu.com/.',
       appid: 'App ID',
       secretKey: 'Секретный ключ',
       domain: 'Домен',
@@ -1062,8 +1040,7 @@ export default {
       yahooFinanceDescription:
         'Запрашивает информацию о публичной компании по тикеру.',
       crawler: 'Веб-краулер',
-      crawlerDescription:
-        'Скачивает HTML-код с указанного URL.',
+      crawlerDescription: 'Скачивает HTML-код с указанного URL.',
       proxy: 'Прокси',
       crawlerResultOptions: {
         html: 'Html',
@@ -1077,8 +1054,7 @@ export default {
       balanceSheet: 'Баланс',
       cashFlowStatement: 'Отчет о движении денежных средств',
       jin10: 'Jin10',
-      jin10Description:
-        'Получает финансовую информацию с Jin10 Open Platform.',
+      jin10Description: 'Получает финансовую информацию с Jin10 Open Platform.',
       flashType: 'Тип новости',
       filter: 'Фильтр',
       contain: 'Содержит',
@@ -1265,13 +1241,13 @@ export default {
         'Выберите базы знаний для ассистента или переменные с ID баз знаний.',
       knowledgeBaseVars: 'Переменные базы знаний',
       code: 'Код',
-      codeDescription: 'Позволяет разработчикам писать пользовательскую логику на Python.',
+      codeDescription:
+        'Позволяет разработчикам писать пользовательскую логику на Python.',
       inputVariables: 'Входные переменные',
       runningHintText: 'выполняется...🕞',
       openingSwitch: 'Приветствие',
       openingCopy: 'Приветственное сообщение',
-      openingSwitchTip:
-        'Пользователи увидят это приветствие в начале.',
+      openingSwitchTip: 'Пользователи увидят это приветствие в начале.',
       modeTip: 'Режим определяет, как запускается рабочий процесс.',
       beginInputTip:
         'Определите входные параметры для доступа в последующих процессах.',