From 6b64641042342fafe9da6026c6391bc8280fd4f7 Mon Sep 17 00:00:00 2001 From: Scott Davidson <49713135+sd109@users.noreply.github.com> Date: Mon, 17 Nov 2025 06:21:27 +0000 Subject: [PATCH] Fix: default model base url extraction logic (#11263) ### What problem does this PR solve? Fixes an issue where default models which used the same factory but different base URLs would all be initialised with the default chat model's base URL and would ignore e.g. the embedding model's base URL config. For example, with the following service config, the embedding and reranker models would end up using the base URL for the default chat model (i.e. `llm1.example.com`): ```yaml ragflow: service_conf: user_default_llm: factory: OpenAI-API-Compatible api_key: not-used default_models: chat_model: name: llm1 base_url: https://llm1.example.com/v1 embedding_model: name: llm2 base_url: https://llm2.example.com/v1 rerank_model: name: llm3 base_url: https://llm3.example.com/v1/rerank llm_factories: factory_llm_infos: - name: OpenAI-API-Compatible logo: "" tags: "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION" status: "1" llm: - llm_name: llm1 base_url: 'https://llm1.example.com/v1' api_key: not-used tags: "LLM,CHAT,IMAGE2TEXT" max_tokens: 100000 model_type: chat is_tools: false - llm_name: llm2 base_url: https://llm2.example.com/v1 api_key: not-used tags: "TEXT EMBEDDING" max_tokens: 10000 model_type: embedding - llm_name: llm3 base_url: https://llm3.example.com/v1/rerank api_key: not-used tags: "RERANK,1k" max_tokens: 10000 model_type: rerank ``` ### Type of change - [X] Bug Fix (non-breaking change which fixes an issue) --- api/db/services/llm_service.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 6ccbf5a94..4d4ccaa57 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -19,6 +19,7 @@ import re from common.token_utils import num_tokens_from_string from functools import partial from typing import Generator +from common.constants import LLMType from api.db.db_models import LLM from api.db.services.common_service import CommonService from api.db.services.tenant_llm_service import LLM4Tenant, TenantLLMService @@ -32,6 +33,14 @@ def get_init_tenant_llm(user_id): from common import settings tenant_llm = [] + model_configs = { + LLMType.CHAT: settings.CHAT_CFG, + LLMType.EMBEDDING: settings.EMBEDDING_CFG, + LLMType.SPEECH2TEXT: settings.ASR_CFG, + LLMType.IMAGE2TEXT: settings.IMAGE2TEXT_CFG, + LLMType.RERANK: settings.RERANK_CFG, + } + seen = set() factory_configs = [] for factory_config in [ @@ -54,8 +63,8 @@ def get_init_tenant_llm(user_id): "llm_factory": factory_config["factory"], "llm_name": llm.llm_name, "model_type": llm.model_type, - "api_key": factory_config["api_key"], - "api_base": factory_config["base_url"], + "api_key": model_configs.get(llm.model_type, {}).get("api_key", factory_config["api_key"]), + "api_base": model_configs.get(llm.model_type, {}).get("base_url", factory_config["base_url"]), "max_tokens": llm.max_tokens if llm.max_tokens else 8192, } ) @@ -80,8 +89,8 @@ class LLMBundle(LLM4Tenant): def encode(self, texts: list): if self.langfuse: - generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) - + generation = self.langfuse.start_generation(trace_context=self.trace_context, name="encode", model=self.llm_name, input={"texts": texts}) + safe_texts = [] for text in texts: token_size = num_tokens_from_string(text) @@ -90,7 +99,7 @@ class LLMBundle(LLM4Tenant): safe_texts.append(text[:target_len]) else: safe_texts.append(text) - + embeddings, used_tokens = self.mdl.encode(safe_texts) llm_name = getattr(self, "llm_name", None)