diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
index 02e66944e..e124b5b16 100644
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -169,7 +169,7 @@ class TenantLLMService(CommonService):
             return 0
 
         llm_map = {
-            LLMType.EMBEDDING.value: tenant.embd_id,
+            LLMType.EMBEDDING.value: tenant.embd_id if not llm_name else llm_name,
             LLMType.SPEECH2TEXT.value: tenant.asr_id,
             LLMType.IMAGE2TEXT.value: tenant.img2txt_id,
             LLMType.CHAT.value: tenant.llm_id if not llm_name else llm_name,
@@ -235,7 +235,8 @@ class LLMBundle:
             generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
 
         embeddings, used_tokens = self.mdl.encode(texts)
-        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+        llm_name = getattr(self, "llm_name", None)
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
             logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
 
         if self.langfuse:
@@ -248,7 +249,8 @@ class LLMBundle:
             generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query})
 
         emd, used_tokens = self.mdl.encode_queries(query)
-        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+        llm_name = getattr(self, "llm_name", None)
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, llm_name):
             logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
 
         if self.langfuse: