Refa: Add error handling for JSON decode in embedding models (#8162)

### What problem does this PR solve?

Improve robustness of Jina, Nvidia, and SILICONFLOW embedding models by:
1. Adding try-catch blocks for JSON decode errors
2. Logging error details including response content
3. Raising exceptions with meaningful error messages

### Type of change

- [x] Refactoring
This commit is contained in:
Liu An
2025-06-10 19:04:17 +08:00
committed by GitHub
parent c5e4684b44
commit a43adafc6b

View File

@ -19,6 +19,7 @@ import threading
from urllib.parse import urljoin from urllib.parse import urljoin
import requests import requests
from requests.exceptions import JSONDecodeError
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from zhipuai import ZhipuAI from zhipuai import ZhipuAI
import os import os
@ -397,7 +398,12 @@ class JinaEmbed(Base):
"input": texts[i:i + batch_size], "input": texts[i:i + batch_size],
'encoding_type': 'float' 'encoding_type': 'float'
} }
res = requests.post(self.base_url, headers=self.headers, json=data).json() response = requests.post(self.base_url, headers=self.headers, json=data)
try:
res = response.json()
except JSONDecodeError as e:
logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}")
raise
ress.extend([d["embedding"] for d in res["data"]]) ress.extend([d["embedding"] for d in res["data"]])
token_count += self.total_token_count(res) token_count += self.total_token_count(res)
return np.array(ress), token_count return np.array(ress), token_count
@ -584,7 +590,12 @@ class NvidiaEmbed(Base):
"encoding_format": "float", "encoding_format": "float",
"truncate": "END", "truncate": "END",
} }
res = requests.post(self.base_url, headers=self.headers, json=payload).json() response = requests.post(self.base_url, headers=self.headers, json=payload)
try:
res = response.json()
except JSONDecodeError as e:
logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}")
raise
ress.extend([d["embedding"] for d in res["data"]]) ress.extend([d["embedding"] for d in res["data"]])
token_count += self.total_token_count(res) token_count += self.total_token_count(res)
return np.array(ress), token_count return np.array(ress), token_count
@ -692,7 +703,12 @@ class SILICONFLOWEmbed(Base):
"input": texts_batch, "input": texts_batch,
"encoding_format": "float", "encoding_format": "float",
} }
res = requests.post(self.base_url, json=payload, headers=self.headers).json() response = requests.post(self.base_url, json=payload, headers=self.headers)
try:
res = response.json()
except JSONDecodeError as e:
logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}")
raise
if "data" not in res or not isinstance(res["data"], list) or len(res["data"]) != len(texts_batch): if "data" not in res or not isinstance(res["data"], list) or len(res["data"]) != len(texts_batch):
raise ValueError(f"SILICONFLOWEmbed.encode got invalid response from {self.base_url}") raise ValueError(f"SILICONFLOWEmbed.encode got invalid response from {self.base_url}")
ress.extend([d["embedding"] for d in res["data"]]) ress.extend([d["embedding"] for d in res["data"]])
@ -705,7 +721,12 @@ class SILICONFLOWEmbed(Base):
"input": text, "input": text,
"encoding_format": "float", "encoding_format": "float",
} }
res = requests.post(self.base_url, json=payload, headers=self.headers).json() response = requests.post(self.base_url, json=payload, headers=self.headers).json()
try:
res = response.json()
except JSONDecodeError as e:
logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}")
raise
if "data" not in res or not isinstance(res["data"], list) or len(res["data"])!= 1: if "data" not in res or not isinstance(res["data"], list) or len(res["data"])!= 1:
raise ValueError(f"SILICONFLOWEmbed.encode_queries got invalid response from {self.base_url}") raise ValueError(f"SILICONFLOWEmbed.encode_queries got invalid response from {self.base_url}")
return np.array(res["data"][0]["embedding"]), self.total_token_count(res) return np.array(res["data"][0]["embedding"]), self.total_token_count(res)