From 0fa1a1469e1cade873fc3a2e8740db242aa5020b Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Mon, 16 Jun 2025 13:40:12 +0800 Subject: [PATCH] Fix: avoid mixing different embedding models in document parsing (#8260) ### What problem does this PR solve? Fix mixing different embedding models in document parsing. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu --- api/db/services/llm_service.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 02e66944e..e124b5b16 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -169,7 +169,7 @@ class TenantLLMService(CommonService): return 0 llm_map = { - LLMType.EMBEDDING.value: tenant.embd_id, + LLMType.EMBEDDING.value: tenant.embd_id if not llm_name else llm_name, LLMType.SPEECH2TEXT.value: tenant.asr_id, LLMType.IMAGE2TEXT.value: tenant.img2txt_id, LLMType.CHAT.value: tenant.llm_id if not llm_name else llm_name, @@ -235,7 +235,8 @@ class LLMBundle: generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts}) embeddings, used_tokens = self.mdl.encode(texts) - if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): + llm_name = getattr(self, "llm_name", None) + if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) if self.langfuse: @@ -248,7 +249,8 @@ class LLMBundle: generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query}) emd, used_tokens = self.mdl.encode_queries(query) - if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens): + llm_name = getattr(self, "llm_name", None) + if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name): logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens)) if self.langfuse: