Add ParsertType Audio (#1637)

### What problem does this PR solve? #1514 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-02 16:45:08 +08:00 · 2024-07-22 19:17:30 +08:00
parent 9f109adf28
commit ac7a0d4fbf
10 changed files with 80 additions and 8 deletions
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@ -15,7 +15,7 @@
 #
 from api.db.services.user_service import TenantService
 from api.settings import database_logger
-from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel
+from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
 from api.db import LLMType
 from api.db.db_models import DB, UserTenant
 from api.db.db_models import LLMFactories, LLM, TenantLLM
@ -120,6 +120,14 @@ class TenantLLMService(CommonService):
            return ChatModel[model_config["llm_factory"]](
                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])

+        if llm_type == LLMType.SPEECH2TEXT:
+            if model_config["llm_factory"] not in Seq2txtModel:
+                return
+            return Seq2txtModel[model_config["llm_factory"]](
+                model_config["api_key"], model_config["llm_name"], lang,
+                base_url=model_config["api_base"]
+            )
+
    @classmethod
    @DB.connection_context()
    def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
@ -207,6 +215,14 @@ class LLMBundle(object):
                "Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
        return txt

+    def transcription(self, audio):
+        txt, used_tokens = self.mdl.transcription(audio)
+        if not TenantLLMService.increase_usage(
+                self.tenant_id, self.llm_type, used_tokens):
+            database_logger.error(
+                "Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
+        return txt
+
    def chat(self, system, history, gen_conf):
        txt, used_tokens = self.mdl.chat(system, history, gen_conf)
        if not TenantLLMService.increase_usage(