Feat: dataflow supports Spreadsheet and Word processor document (#9996)

### What problem does this PR solve?

Dataflow supports Spreadsheet and Word processor document

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-09-10 13:02:53 +08:00
committed by GitHub
parent e650f0d368
commit 0d9c1f1c3c
9 changed files with 126 additions and 43 deletions

View File

@ -751,6 +751,8 @@ class SILICONFLOWEmbed(Base):
token_count = 0
for i in range(0, len(texts), batch_size):
texts_batch = texts[i : i + batch_size]
texts_batch = [" " if not text.strip() else text for text in texts_batch]
payload = {
"model": self.model_name,
"input": texts_batch,
@ -935,7 +937,7 @@ class GiteeEmbed(SILICONFLOWEmbed):
if not base_url:
base_url = "https://ai.gitee.com/v1/embeddings"
super().__init__(key, model_name, base_url)
class DeepInfraEmbed(OpenAIEmbed):
_FACTORY_NAME = "DeepInfra"
@ -951,4 +953,4 @@ class Ai302Embed(Base):
def __init__(self, key, model_name, base_url="https://api.302.ai/v1/embeddings"):
if not base_url:
base_url = "https://api.302.ai/v1/embeddings"
super().__init__(key, model_name, base_url)
super().__init__(key, model_name, base_url)