mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add ParsertType Audio (#1637)
### What problem does this PR solve? #1514 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -84,6 +84,7 @@ class ParserType(StrEnum):
|
||||
NAIVE = "naive"
|
||||
PICTURE = "picture"
|
||||
ONE = "one"
|
||||
AUDIO = "audio"
|
||||
|
||||
|
||||
class FileSource(StrEnum):
|
||||
@ -96,4 +97,4 @@ class CanvasType(StrEnum):
|
||||
ChatBot = "chatbot"
|
||||
DocBot = "docbot"
|
||||
|
||||
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
||||
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"
|
||||
|
||||
@ -121,6 +121,8 @@ def init_llm_factory():
|
||||
LLMFactoriesService.filter_delete([LLMFactoriesService.model.name == "QAnything"])
|
||||
LLMService.filter_delete([LLMService.model.fid == "QAnything"])
|
||||
TenantLLMService.filter_update([TenantLLMService.model.llm_factory == "QAnything"], {"llm_factory": "Youdao"})
|
||||
TenantService.filter_update([1 == 1], {
|
||||
"parser_ids": "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio"})
|
||||
## insert openai two embedding models to the current openai user.
|
||||
print("Start to insert 2 OpenAI embedding models...")
|
||||
tenant_ids = set([row["tenant_id"] for row in TenantLLMService.get_openai_models()])
|
||||
@ -143,7 +145,7 @@ def init_llm_factory():
|
||||
"""
|
||||
drop table llm;
|
||||
drop table llm_factories;
|
||||
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One';
|
||||
update tenant set parser_ids='naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio';
|
||||
alter table knowledgebase modify avatar longtext;
|
||||
alter table user modify avatar longtext;
|
||||
alter table dialog modify icon longtext;
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
#
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.settings import database_logger
|
||||
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel
|
||||
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel
|
||||
from api.db import LLMType
|
||||
from api.db.db_models import DB, UserTenant
|
||||
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
||||
@ -120,6 +120,14 @@ class TenantLLMService(CommonService):
|
||||
return ChatModel[model_config["llm_factory"]](
|
||||
model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
|
||||
|
||||
if llm_type == LLMType.SPEECH2TEXT:
|
||||
if model_config["llm_factory"] not in Seq2txtModel:
|
||||
return
|
||||
return Seq2txtModel[model_config["llm_factory"]](
|
||||
model_config["api_key"], model_config["llm_name"], lang,
|
||||
base_url=model_config["api_base"]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def increase_usage(cls, tenant_id, llm_type, used_tokens, llm_name=None):
|
||||
@ -207,6 +215,14 @@ class LLMBundle(object):
|
||||
"Can't update token usage for {}/IMAGE2TEXT".format(self.tenant_id))
|
||||
return txt
|
||||
|
||||
def transcription(self, audio):
|
||||
txt, used_tokens = self.mdl.transcription(audio)
|
||||
if not TenantLLMService.increase_usage(
|
||||
self.tenant_id, self.llm_type, used_tokens):
|
||||
database_logger.error(
|
||||
"Can't update token usage for {}/SEQUENCE2TXT".format(self.tenant_id))
|
||||
return txt
|
||||
|
||||
def chat(self, system, history, gen_conf):
|
||||
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
||||
if not TenantLLMService.increase_usage(
|
||||
|
||||
Reference in New Issue
Block a user