Feat:new api /sequence2txt and update QWenSeq2txt (#11643)

### What problem does this PR solve?
change:
new api /sequence2txt,
update QWenSeq2txt and ZhipuSeq2txt

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
buua436
2025-12-02 11:17:31 +08:00
committed by GitHub
parent d1e172171f
commit b8c0fb4572
7 changed files with 3630 additions and 3408 deletions

View File

@ -185,6 +185,66 @@ class LLMBundle(LLM4Tenant):
return txt
def stream_transcription(self, audio):
mdl = self.mdl
supports_stream = hasattr(mdl, "stream_transcription") and callable(getattr(mdl, "stream_transcription"))
if supports_stream:
if self.langfuse:
generation = self.langfuse.start_generation(
trace_context=self.trace_context,
name="stream_transcription",
metadata={"model": self.llm_name}
)
final_text = ""
used_tokens = 0
try:
for evt in mdl.stream_transcription(audio):
if evt.get("event") == "final":
final_text = evt.get("text", "")
yield evt
except Exception as e:
err = {"event": "error", "text": str(e)}
yield err
final_text = final_text or ""
finally:
if final_text:
used_tokens = num_tokens_from_string(final_text)
TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens)
if self.langfuse:
generation.update(
output={"output": final_text},
usage_details={"total_tokens": used_tokens}
)
generation.end()
return
if self.langfuse:
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="stream_transcription", metadata={"model": self.llm_name})
full_text, used_tokens = mdl.transcription(audio)
if not TenantLLMService.increase_usage(
self.tenant_id, self.llm_type, used_tokens
):
logging.error(
f"LLMBundle.stream_transcription can't update token usage for {self.tenant_id}/SEQUENCE2TXT used_tokens: {used_tokens}"
)
if self.langfuse:
generation.update(
output={"output": full_text},
usage_details={"total_tokens": used_tokens}
)
generation.end()
yield {
"event": "final",
"text": full_text,
"streaming": False
}
def tts(self, text: str) -> Generator[bytes, None, None]:
if self.langfuse:
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="tts", input={"text": text})