mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat/parse audio (#10133)
### What problem does this PR solve? Dataflow support audio. And fix giteeAI's sequence2text model. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -54,7 +54,9 @@ class ParserParam(ProcessParamBase):
|
||||
"text",
|
||||
"json"
|
||||
],
|
||||
"audio": [],
|
||||
"audio": [
|
||||
"json"
|
||||
],
|
||||
"video": [],
|
||||
}
|
||||
|
||||
@ -102,7 +104,26 @@ class ParserParam(ProcessParamBase):
|
||||
],
|
||||
"output_format": "json",
|
||||
},
|
||||
"audio": {},
|
||||
"audio": {
|
||||
"suffix":[
|
||||
"da",
|
||||
"wave",
|
||||
"wav",
|
||||
"mp3",
|
||||
"aac",
|
||||
"flac",
|
||||
"ogg",
|
||||
"aiff",
|
||||
"au",
|
||||
"midi",
|
||||
"wma",
|
||||
"realaudio",
|
||||
"vqf",
|
||||
"oggvorbis",
|
||||
"ape"
|
||||
],
|
||||
"output_format": "json",
|
||||
},
|
||||
"video": {},
|
||||
}
|
||||
|
||||
@ -146,6 +167,12 @@ class ParserParam(ProcessParamBase):
|
||||
text_output_format = text_config.get("output_format", "")
|
||||
self.check_valid_value(text_output_format, "Text output format abnormal.", self.allowed_output_format["text"])
|
||||
|
||||
audio_config = self.setups.get("audio", "")
|
||||
if audio_config:
|
||||
self.check_empty(audio_config.get("llm_id"), "VLM")
|
||||
audio_language = audio_config.get("lang", "")
|
||||
self.check_empty(audio_language, "Language")
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {}
|
||||
|
||||
@ -313,6 +340,30 @@ class Parser(ProcessBase):
|
||||
|
||||
self.set_output("text", txt)
|
||||
|
||||
def _audio(self, from_upstream: ParserFromUpstream):
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on an audio.")
|
||||
|
||||
blob = from_upstream.blob
|
||||
name = from_upstream.name
|
||||
conf = self._param.setups["audio"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
|
||||
lang = conf["lang"]
|
||||
_, ext = os.path.splitext(name)
|
||||
tmp_path = ""
|
||||
with tempfile.NamedTemporaryFile(suffix=ext) as tmpf:
|
||||
tmpf.write(blob)
|
||||
tmpf.flush()
|
||||
tmp_path = os.path.abspath(tmpf.name)
|
||||
|
||||
seq2txt_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.SPEECH2TEXT, lang=lang)
|
||||
txt = seq2txt_mdl.transcription(tmp_path)
|
||||
|
||||
self.set_output("text", txt)
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
function_map = {
|
||||
"pdf": self._pdf,
|
||||
@ -321,6 +372,7 @@ class Parser(ProcessBase):
|
||||
"word": self._word,
|
||||
"text": self._text,
|
||||
"image": self._image,
|
||||
"audio": self._audio,
|
||||
}
|
||||
try:
|
||||
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
||||
|
||||
Reference in New Issue
Block a user