Fix: presentation parsing & Embedding encode exception handling (#11933)

### What problem does this PR solve?

Fix: presentation parsing #11920
Fix: Embeddin encode exception handling
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Magicbook1108
2025-12-13 11:37:42 +08:00
committed by GitHub
parent 6be0338aa0
commit 7d23c3aed0
2 changed files with 31 additions and 5 deletions

View File

@ -227,7 +227,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
for pn, (txt, img) in enumerate(sections):
d = copy.deepcopy(doc)
pn += from_page
if img:
if not isinstance(img, Image.Image):
img = None
d["image"] = img
d["page_num_int"] = [pn + 1]
d["top_int"] = [0]

View File

@ -121,11 +121,16 @@ class OpenAIEmbed(Base):
total_tokens += self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
return np.array(ress), total_tokens
def encode_queries(self, text):
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
try:
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class LocalAIEmbed(Base):
@ -147,6 +152,7 @@ class LocalAIEmbed(Base):
ress.extend([d.embedding for d in res.data])
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
# local embedding for LmStudio donot count tokens
return np.array(ress), 1024
@ -222,6 +228,7 @@ class QWenEmbed(Base):
return np.array(resp["output"]["embeddings"][0]["embedding"]), self.total_token_count(resp)
except Exception as _e:
log_exception(_e, resp)
raise Exception(f"Error: {resp}")
class ZhipuEmbed(Base):
@ -249,6 +256,7 @@ class ZhipuEmbed(Base):
tks_num += self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
return np.array(arr), tks_num
def encode_queries(self, text):
@ -257,6 +265,7 @@ class ZhipuEmbed(Base):
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class OllamaEmbed(Base):
@ -281,6 +290,7 @@ class OllamaEmbed(Base):
arr.append(res["embedding"])
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
tks_num += 128
return np.array(arr), tks_num
@ -293,6 +303,7 @@ class OllamaEmbed(Base):
return np.array(res["embedding"]), 128
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class XinferenceEmbed(Base):
@ -315,6 +326,7 @@ class XinferenceEmbed(Base):
total_tokens += self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
return np.array(ress), total_tokens
def encode_queries(self, text):
@ -324,6 +336,7 @@ class XinferenceEmbed(Base):
return np.array(res.data[0].embedding), self.total_token_count(res)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class YoudaoEmbed(Base):
@ -399,6 +412,7 @@ class JinaMultiVecEmbed(Base):
token_count += self.total_token_count(res)
except Exception as _e:
log_exception(_e, response)
raise Exception(f"Error: {response}")
return np.array(ress), token_count
def encode_queries(self, text):
@ -531,6 +545,7 @@ class GeminiEmbed(Base):
ress.extend(result["embedding"])
except Exception as _e:
log_exception(_e, result)
raise Exception(f"Error: {result}")
return np.array(ress), token_count
def encode_queries(self, text):
@ -541,6 +556,7 @@ class GeminiEmbed(Base):
return np.array(result["embedding"]), token_count
except Exception as _e:
log_exception(_e, result)
raise Exception(f"Error: {result}")
class NvidiaEmbed(Base):
@ -578,10 +594,11 @@ class NvidiaEmbed(Base):
response = requests.post(self.base_url, headers=self.headers, json=payload)
try:
res = response.json()
except Exception as _e:
log_exception(_e, response)
ress.extend([d["embedding"] for d in res["data"]])
token_count += self.total_token_count(res)
except Exception as _e:
log_exception(_e, response)
raise Exception(f"Error: {response}")
return np.array(ress), token_count
def encode_queries(self, text):
@ -636,6 +653,7 @@ class CoHereEmbed(Base):
token_count += res.meta.billed_units.input_tokens
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
return np.array(ress), token_count
def encode_queries(self, text):
@ -649,6 +667,7 @@ class CoHereEmbed(Base):
return np.array(res.embeddings.float[0]), int(res.meta.billed_units.input_tokens)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class TogetherAIEmbed(OpenAIEmbed):
@ -716,6 +735,7 @@ class SILICONFLOWEmbed(Base):
token_count += self.total_token_count(res)
except Exception as _e:
log_exception(_e, response)
raise Exception(f"Error: {response}")
return np.array(ress), token_count
@ -731,6 +751,7 @@ class SILICONFLOWEmbed(Base):
return np.array(res["data"][0]["embedding"]), self.total_token_count(res)
except Exception as _e:
log_exception(_e, response)
raise Exception(f"Error: {response}")
class ReplicateEmbed(Base):
@ -777,6 +798,7 @@ class BaiduYiyanEmbed(Base):
)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
def encode_queries(self, text):
res = self.client.do(model=self.model_name, texts=[text]).body
@ -787,6 +809,7 @@ class BaiduYiyanEmbed(Base):
)
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class VoyageEmbed(Base):
@ -809,6 +832,7 @@ class VoyageEmbed(Base):
token_count += res.total_tokens
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
return np.array(ress), token_count
def encode_queries(self, text):
@ -817,6 +841,7 @@ class VoyageEmbed(Base):
return np.array(res.embeddings)[0], res.total_tokens
except Exception as _e:
log_exception(_e, res)
raise Exception(f"Error: {res}")
class HuggingFaceEmbed(Base):