From a43adafc6b317785155eff2cecbf9e720dd8b816 Mon Sep 17 00:00:00 2001 From: Liu An Date: Tue, 10 Jun 2025 19:04:17 +0800 Subject: [PATCH] Refa: Add error handling for JSON decode in embedding models (#8162) ### What problem does this PR solve? Improve robustness of Jina, Nvidia, and SILICONFLOW embedding models by: 1. Adding try-catch blocks for JSON decode errors 2. Logging error details including response content 3. Raising exceptions with meaningful error messages ### Type of change - [x] Refactoring --- rag/llm/embedding_model.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index d14752ce7..e02c81c27 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -19,6 +19,7 @@ import threading from urllib.parse import urljoin import requests +from requests.exceptions import JSONDecodeError from huggingface_hub import snapshot_download from zhipuai import ZhipuAI import os @@ -397,7 +398,12 @@ class JinaEmbed(Base): "input": texts[i:i + batch_size], 'encoding_type': 'float' } - res = requests.post(self.base_url, headers=self.headers, json=data).json() + response = requests.post(self.base_url, headers=self.headers, json=data) + try: + res = response.json() + except JSONDecodeError as e: + logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}") + raise ress.extend([d["embedding"] for d in res["data"]]) token_count += self.total_token_count(res) return np.array(ress), token_count @@ -584,7 +590,12 @@ class NvidiaEmbed(Base): "encoding_format": "float", "truncate": "END", } - res = requests.post(self.base_url, headers=self.headers, json=payload).json() + response = requests.post(self.base_url, headers=self.headers, json=payload) + try: + res = response.json() + except JSONDecodeError as e: + logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}") + raise ress.extend([d["embedding"] for d in res["data"]]) token_count += self.total_token_count(res) return np.array(ress), token_count @@ -692,7 +703,12 @@ class SILICONFLOWEmbed(Base): "input": texts_batch, "encoding_format": "float", } - res = requests.post(self.base_url, json=payload, headers=self.headers).json() + response = requests.post(self.base_url, json=payload, headers=self.headers) + try: + res = response.json() + except JSONDecodeError as e: + logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}") + raise if "data" not in res or not isinstance(res["data"], list) or len(res["data"]) != len(texts_batch): raise ValueError(f"SILICONFLOWEmbed.encode got invalid response from {self.base_url}") ress.extend([d["embedding"] for d in res["data"]]) @@ -705,7 +721,12 @@ class SILICONFLOWEmbed(Base): "input": text, "encoding_format": "float", } - res = requests.post(self.base_url, json=payload, headers=self.headers).json() + response = requests.post(self.base_url, json=payload, headers=self.headers).json() + try: + res = response.json() + except JSONDecodeError as e: + logging.error(f"JSON decode error: {e}\nResponse content: {response.text[:2000]}") + raise if "data" not in res or not isinstance(res["data"], list) or len(res["data"])!= 1: raise ValueError(f"SILICONFLOWEmbed.encode_queries got invalid response from {self.base_url}") return np.array(res["data"][0]["embedding"]), self.total_token_count(res)