Feat: add basic Langfuse support for LLM module (#6443)

### What problem does this PR solve? #6155 Add basic Langfuse support for LLM module. A trace example: <img width="755" alt="image" src="https://github.com/user-attachments/assets/25c1f852-5116-486c-a47f-6097187142ca" /> ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-01 16:15:07 +08:00 · 2025-03-24 13:18:47 +08:00
parent 0b63346a1a
commit 85eb367ede
7 changed files with 714 additions and 892 deletions
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -15,10 +15,13 @@
 #
 import logging

+from langfuse import Langfuse
+
 from api import settings
 from api.db import LLMType
 from api.db.db_models import DB, LLM, LLMFactories, TenantLLM
 from api.db.services.common_service import CommonService
+from api.db.services.langfuse_service import TenantLangfuseService
 from api.db.services.user_service import TenantService
 from rag.llm import ChatModel, CvModel, EmbeddingModel, RerankModel, Seq2txtModel, TTSModel

@ -49,16 +52,8 @@ class TenantLLMService(CommonService):
    @classmethod
    @DB.connection_context()
    def get_my_llms(cls, tenant_id):
-        fields = [
-            cls.model.llm_factory,
-            LLMFactories.logo,
-            LLMFactories.tags,
-            cls.model.model_type,
-            cls.model.llm_name,
-            cls.model.used_tokens
-        ]
-        objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(
-            cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()
+        fields = [cls.model.llm_factory, LLMFactories.logo, LLMFactories.tags, cls.model.model_type, cls.model.llm_name, cls.model.used_tokens]
+        objs = cls.model.select(*fields).join(LLMFactories, on=(cls.model.llm_factory == LLMFactories.name)).where(cls.model.tenant_id == tenant_id, ~cls.model.api_key.is_null()).dicts()

        return list(objs)

@ -114,8 +109,7 @@ class TenantLLMService(CommonService):
                    model_config = {"llm_factory": llm[0].fid, "api_key": "", "llm_name": mdlnm, "api_base": ""}
            if not model_config:
                if mdlnm == "flag-embedding":
-                    model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "",
-                                    "llm_name": llm_name, "api_base": ""}
+                    model_config = {"llm_factory": "Tongyi-Qianwen", "api_key": "", "llm_name": llm_name, "api_base": ""}
                else:
                    if not mdlnm:
                        raise LookupError(f"Type of {llm_type} model is not set.")
@ -124,43 +118,32 @@ class TenantLLMService(CommonService):

    @classmethod
    @DB.connection_context()
-    def model_instance(cls, tenant_id, llm_type,
-                       llm_name=None, lang="Chinese"):
+    def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese"):
        model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
        if llm_type == LLMType.EMBEDDING.value:
            if model_config["llm_factory"] not in EmbeddingModel:
                return
-            return EmbeddingModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+            return EmbeddingModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

        if llm_type == LLMType.RERANK:
            if model_config["llm_factory"] not in RerankModel:
                return
-            return RerankModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+            return RerankModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

        if llm_type == LLMType.IMAGE2TEXT.value:
            if model_config["llm_factory"] not in CvModel:
                return
-            return CvModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], lang,
-                base_url=model_config["api_base"]
-            )
+            return CvModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], lang, base_url=model_config["api_base"])

        if llm_type == LLMType.CHAT.value:
            if model_config["llm_factory"] not in ChatModel:
                return
-            return ChatModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+            return ChatModel[model_config["llm_factory"]](model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

        if llm_type == LLMType.SPEECH2TEXT:
            if model_config["llm_factory"] not in Seq2txtModel:
                return
-            return Seq2txtModel[model_config["llm_factory"]](
-                key=model_config["api_key"], model_name=model_config["llm_name"],
-                lang=lang,
-                base_url=model_config["api_base"]
-            )
+            return Seq2txtModel[model_config["llm_factory"]](key=model_config["api_key"], model_name=model_config["llm_name"], lang=lang, base_url=model_config["api_base"])
        if llm_type == LLMType.TTS:
            if model_config["llm_factory"] not in TTSModel:
                return
@ -184,7 +167,7 @@ class TenantLLMService(CommonService):
            LLMType.IMAGE2TEXT.value: tenant.img2txt_id,
            LLMType.CHAT.value: tenant.llm_id if not llm_name else llm_name,
            LLMType.RERANK.value: tenant.rerank_id if not llm_name else llm_name,
-            LLMType.TTS.value: tenant.tts_id if not llm_name else llm_name
+            LLMType.TTS.value: tenant.tts_id if not llm_name else llm_name,
        }

        mdlnm = llm_map.get(llm_type)
@ -195,17 +178,13 @@ class TenantLLMService(CommonService):
        llm_name, llm_factory = TenantLLMService.split_model_name_and_factory(mdlnm)

        try:
-            num = cls.model.update(
-                used_tokens=cls.model.used_tokens + used_tokens
-            ).where(
-                cls.model.tenant_id == tenant_id,
-                cls.model.llm_name == llm_name,
-                cls.model.llm_factory == llm_factory if llm_factory else True
-            ).execute()
+            num = (
+                cls.model.update(used_tokens=cls.model.used_tokens + used_tokens)
+                .where(cls.model.tenant_id == tenant_id, cls.model.llm_name == llm_name, cls.model.llm_factory == llm_factory if llm_factory else True)
+                .execute()
+            )
        except Exception:
-            logging.exception(
-                "TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s",
-                tenant_id, llm_name)
+            logging.exception("TenantLLMService.increase_usage got exception,Failed to update used_tokens for tenant_id=%s, llm_name=%s", tenant_id, llm_name)
            return 0

        return num
@ -213,11 +192,7 @@ class TenantLLMService(CommonService):
    @classmethod
    @DB.connection_context()
    def get_openai_models(cls):
-        objs = cls.model.select().where(
-            (cls.model.llm_factory == "OpenAI"),
-            ~(cls.model.llm_name == "text-embedding-3-small"),
-            ~(cls.model.llm_name == "text-embedding-3-large")
-        ).dicts()
+        objs = cls.model.select().where((cls.model.llm_factory == "OpenAI"), ~(cls.model.llm_name == "text-embedding-3-small"), ~(cls.model.llm_name == "text-embedding-3-large")).dicts()
        return list(objs)


@ -226,87 +201,138 @@ class LLMBundle:
        self.tenant_id = tenant_id
        self.llm_type = llm_type
        self.llm_name = llm_name
-        self.mdl = TenantLLMService.model_instance(
-            tenant_id, llm_type, llm_name, lang=lang)
-        assert self.mdl, "Can't find model for {}/{}/{}".format(
-            tenant_id, llm_type, llm_name)
+        self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang)
+        assert self.mdl, "Can't find model for {}/{}/{}".format(tenant_id, llm_type, llm_name)
        model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
        self.max_length = model_config.get("max_tokens", 8192)

+        langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
+        if langfuse_keys:
+            langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
+            if langfuse.auth_check():
+                self.langfuse = langfuse
+                self.trace = self.langfuse.trace(name=f"{self.llm_type}-{self.llm_name}")
+        else:
+            self.langfuse = None
+
    def encode(self, texts: list):
+        if self.langfuse:
+            generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
+
        embeddings, used_tokens = self.mdl.encode(texts)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(usage_details={"total_tokens": used_tokens})
+
        return embeddings, used_tokens

    def encode_queries(self, query: str):
+        if self.langfuse:
+            generation = self.trace.generation(name="encode_queries", model=self.llm_name, input={"query": query})
+
        emd, used_tokens = self.mdl.encode_queries(query)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(usage_details={"total_tokens": used_tokens})
+
        return emd, used_tokens

    def similarity(self, query: str, texts: list):
+        if self.langfuse:
+            generation = self.trace.generation(name="similarity", model=self.llm_name, input={"query": query, "texts": texts})
+
        sim, used_tokens = self.mdl.similarity(query, texts)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(usage_details={"total_tokens": used_tokens})
+
        return sim, used_tokens

    def describe(self, image, max_tokens=300):
+        if self.langfuse:
+            generation = self.trace.generation(name="describe", metadata={"model": self.llm_name})
+
        txt, used_tokens = self.mdl.describe(image, max_tokens)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
+
        return txt

    def describe_with_prompt(self, image, prompt):
+        if self.langfuse:
+            generation = self.trace.generation(name="describe_with_prompt", metadata={"model": self.llm_name, "prompt": prompt})
+
        txt, used_tokens = self.mdl.describe_with_prompt(image, prompt)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
+
        return txt

    def transcription(self, audio):
+        if self.langfuse:
+            generation = self.trace.generation(name="transcription", metadata={"model": self.llm_name})
+
        txt, used_tokens = self.mdl.transcription(audio)
-        if not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens):
-            logging.error(
-                "LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
+        if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens):
+            logging.error("LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
+
+        if self.langfuse:
+            generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
+
        return txt

    def tts(self, text):
+        if self.langfuse:
+            span = self.trace.span(name="tts", input={"text": text})
+
        for chunk in self.mdl.tts(text):
            if isinstance(chunk, int):
-                if not TenantLLMService.increase_usage(
-                        self.tenant_id, self.llm_type, chunk, self.llm_name):
-                    logging.error(
-                        "LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
+                if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, chunk, self.llm_name):
+                    logging.error("LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
                return
            yield chunk

+        if self.langfuse:
+            span.end()
+
    def chat(self, system, history, gen_conf):
+        if self.langfuse:
+            generation = self.trace.generation(name="chat", model=self.llm_name, input={"system": system, "history": history})
+
        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
-        if isinstance(txt, int) and not TenantLLMService.increase_usage(
-                self.tenant_id, self.llm_type, used_tokens, self.llm_name):
-            logging.error(
-                "LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name,
-                                                                                                           used_tokens))
+        if isinstance(txt, int) and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, self.llm_name):
+            logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
+
+        if self.langfuse:
+            generation.end(output={"output": txt}, usage_details={"total_tokens": used_tokens})
+
        return txt

    def chat_streamly(self, system, history, gen_conf):
+        if self.langfuse:
+            generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
+
+        output = ""
        for txt in self.mdl.chat_streamly(system, history, gen_conf):
            if isinstance(txt, int):
-                if not TenantLLMService.increase_usage(
-                        self.tenant_id, self.llm_type, txt, self.llm_name):
-                    logging.error(
-                        "LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name,
-                                                                                                                        txt))
+                if self.langfuse:
+                    generation.end(output={"output": output})
+
+                if not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, txt, self.llm_name):
+                    logging.error("LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
                return
+
+            output = txt
            yield txt