mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-20 04:39:00 +08:00
Fix: presentation parsing & Embedding encode exception handling (#11933)
### What problem does this PR solve? Fix: presentation parsing #11920 Fix: Embeddin encode exception handling ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -227,8 +227,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|||||||
for pn, (txt, img) in enumerate(sections):
|
for pn, (txt, img) in enumerate(sections):
|
||||||
d = copy.deepcopy(doc)
|
d = copy.deepcopy(doc)
|
||||||
pn += from_page
|
pn += from_page
|
||||||
if img:
|
if not isinstance(img, Image.Image):
|
||||||
d["image"] = img
|
img = None
|
||||||
|
d["image"] = img
|
||||||
d["page_num_int"] = [pn + 1]
|
d["page_num_int"] = [pn + 1]
|
||||||
d["top_int"] = [0]
|
d["top_int"] = [0]
|
||||||
d["position_int"] = [(pn + 1, 0, img.size[0] if img else 0, 0,
|
d["position_int"] = [(pn + 1, 0, img.size[0] if img else 0, 0,
|
||||||
|
|||||||
@ -121,11 +121,16 @@ class OpenAIEmbed(Base):
|
|||||||
total_tokens += self.total_token_count(res)
|
total_tokens += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
return np.array(ress), total_tokens
|
return np.array(ress), total_tokens
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
|
res = self.client.embeddings.create(input=[truncate(text, 8191)], model=self.model_name, encoding_format="float",extra_body={"drop_params": True})
|
||||||
return np.array(res.data[0].embedding), self.total_token_count(res)
|
try:
|
||||||
|
return np.array(res.data[0].embedding), self.total_token_count(res)
|
||||||
|
except Exception as _e:
|
||||||
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class LocalAIEmbed(Base):
|
class LocalAIEmbed(Base):
|
||||||
@ -147,6 +152,7 @@ class LocalAIEmbed(Base):
|
|||||||
ress.extend([d.embedding for d in res.data])
|
ress.extend([d.embedding for d in res.data])
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
# local embedding for LmStudio donot count tokens
|
# local embedding for LmStudio donot count tokens
|
||||||
return np.array(ress), 1024
|
return np.array(ress), 1024
|
||||||
|
|
||||||
@ -222,6 +228,7 @@ class QWenEmbed(Base):
|
|||||||
return np.array(resp["output"]["embeddings"][0]["embedding"]), self.total_token_count(resp)
|
return np.array(resp["output"]["embeddings"][0]["embedding"]), self.total_token_count(resp)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, resp)
|
log_exception(_e, resp)
|
||||||
|
raise Exception(f"Error: {resp}")
|
||||||
|
|
||||||
|
|
||||||
class ZhipuEmbed(Base):
|
class ZhipuEmbed(Base):
|
||||||
@ -249,6 +256,7 @@ class ZhipuEmbed(Base):
|
|||||||
tks_num += self.total_token_count(res)
|
tks_num += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
return np.array(arr), tks_num
|
return np.array(arr), tks_num
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -257,6 +265,7 @@ class ZhipuEmbed(Base):
|
|||||||
return np.array(res.data[0].embedding), self.total_token_count(res)
|
return np.array(res.data[0].embedding), self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class OllamaEmbed(Base):
|
class OllamaEmbed(Base):
|
||||||
@ -281,6 +290,7 @@ class OllamaEmbed(Base):
|
|||||||
arr.append(res["embedding"])
|
arr.append(res["embedding"])
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
tks_num += 128
|
tks_num += 128
|
||||||
return np.array(arr), tks_num
|
return np.array(arr), tks_num
|
||||||
|
|
||||||
@ -293,6 +303,7 @@ class OllamaEmbed(Base):
|
|||||||
return np.array(res["embedding"]), 128
|
return np.array(res["embedding"]), 128
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class XinferenceEmbed(Base):
|
class XinferenceEmbed(Base):
|
||||||
@ -315,6 +326,7 @@ class XinferenceEmbed(Base):
|
|||||||
total_tokens += self.total_token_count(res)
|
total_tokens += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
return np.array(ress), total_tokens
|
return np.array(ress), total_tokens
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -324,6 +336,7 @@ class XinferenceEmbed(Base):
|
|||||||
return np.array(res.data[0].embedding), self.total_token_count(res)
|
return np.array(res.data[0].embedding), self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class YoudaoEmbed(Base):
|
class YoudaoEmbed(Base):
|
||||||
@ -399,6 +412,7 @@ class JinaMultiVecEmbed(Base):
|
|||||||
token_count += self.total_token_count(res)
|
token_count += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, response)
|
log_exception(_e, response)
|
||||||
|
raise Exception(f"Error: {response}")
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -531,6 +545,7 @@ class GeminiEmbed(Base):
|
|||||||
ress.extend(result["embedding"])
|
ress.extend(result["embedding"])
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, result)
|
log_exception(_e, result)
|
||||||
|
raise Exception(f"Error: {result}")
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -541,6 +556,7 @@ class GeminiEmbed(Base):
|
|||||||
return np.array(result["embedding"]), token_count
|
return np.array(result["embedding"]), token_count
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, result)
|
log_exception(_e, result)
|
||||||
|
raise Exception(f"Error: {result}")
|
||||||
|
|
||||||
|
|
||||||
class NvidiaEmbed(Base):
|
class NvidiaEmbed(Base):
|
||||||
@ -578,10 +594,11 @@ class NvidiaEmbed(Base):
|
|||||||
response = requests.post(self.base_url, headers=self.headers, json=payload)
|
response = requests.post(self.base_url, headers=self.headers, json=payload)
|
||||||
try:
|
try:
|
||||||
res = response.json()
|
res = response.json()
|
||||||
|
ress.extend([d["embedding"] for d in res["data"]])
|
||||||
|
token_count += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, response)
|
log_exception(_e, response)
|
||||||
ress.extend([d["embedding"] for d in res["data"]])
|
raise Exception(f"Error: {response}")
|
||||||
token_count += self.total_token_count(res)
|
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -636,6 +653,7 @@ class CoHereEmbed(Base):
|
|||||||
token_count += res.meta.billed_units.input_tokens
|
token_count += res.meta.billed_units.input_tokens
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -649,6 +667,7 @@ class CoHereEmbed(Base):
|
|||||||
return np.array(res.embeddings.float[0]), int(res.meta.billed_units.input_tokens)
|
return np.array(res.embeddings.float[0]), int(res.meta.billed_units.input_tokens)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class TogetherAIEmbed(OpenAIEmbed):
|
class TogetherAIEmbed(OpenAIEmbed):
|
||||||
@ -716,6 +735,7 @@ class SILICONFLOWEmbed(Base):
|
|||||||
token_count += self.total_token_count(res)
|
token_count += self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, response)
|
log_exception(_e, response)
|
||||||
|
raise Exception(f"Error: {response}")
|
||||||
|
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
@ -731,6 +751,7 @@ class SILICONFLOWEmbed(Base):
|
|||||||
return np.array(res["data"][0]["embedding"]), self.total_token_count(res)
|
return np.array(res["data"][0]["embedding"]), self.total_token_count(res)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, response)
|
log_exception(_e, response)
|
||||||
|
raise Exception(f"Error: {response}")
|
||||||
|
|
||||||
|
|
||||||
class ReplicateEmbed(Base):
|
class ReplicateEmbed(Base):
|
||||||
@ -777,6 +798,7 @@ class BaiduYiyanEmbed(Base):
|
|||||||
)
|
)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
res = self.client.do(model=self.model_name, texts=[text]).body
|
res = self.client.do(model=self.model_name, texts=[text]).body
|
||||||
@ -787,6 +809,7 @@ class BaiduYiyanEmbed(Base):
|
|||||||
)
|
)
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class VoyageEmbed(Base):
|
class VoyageEmbed(Base):
|
||||||
@ -809,6 +832,7 @@ class VoyageEmbed(Base):
|
|||||||
token_count += res.total_tokens
|
token_count += res.total_tokens
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
return np.array(ress), token_count
|
return np.array(ress), token_count
|
||||||
|
|
||||||
def encode_queries(self, text):
|
def encode_queries(self, text):
|
||||||
@ -817,6 +841,7 @@ class VoyageEmbed(Base):
|
|||||||
return np.array(res.embeddings)[0], res.total_tokens
|
return np.array(res.embeddings)[0], res.total_tokens
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
log_exception(_e, res)
|
log_exception(_e, res)
|
||||||
|
raise Exception(f"Error: {res}")
|
||||||
|
|
||||||
|
|
||||||
class HuggingFaceEmbed(Base):
|
class HuggingFaceEmbed(Base):
|
||||||
|
|||||||
Reference in New Issue
Block a user