diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index eccf3f7df..14e9a8a19 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -43,6 +43,7 @@ FACTORY_DEFAULT_BASE_URL = { SupportedLiteLLMProvider.Tongyi_Qianwen: "https://dashscope.aliyuncs.com/compatible-mode/v1", SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1", SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1", + SupportedLiteLLMProvider.Ollama: "", } diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 1948ee848..5575aa390 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -1362,7 +1362,7 @@ class LiteLLMBase(ABC): self.prefix = LITELLM_PROVIDER_PREFIX.get(self.provider, "") self.model_name = f"{self.prefix}{model_name}" self.api_key = key - self.base_url = base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "") + self.base_url = (base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")).rstrip('/') # Configure retry parameters self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5))) self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))