mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: add video parser (#10735)
### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -70,6 +70,7 @@ def create():
|
|||||||
e, t = TenantService.get_by_id(current_user.id)
|
e, t = TenantService.get_by_id(current_user.id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Tenant not found.")
|
return get_data_error_result(message="Tenant not found.")
|
||||||
|
req["embd_id"] = t.embd_id
|
||||||
req["parser_config"] = {
|
req["parser_config"] = {
|
||||||
"layout_recognize": "DeepDOC",
|
"layout_recognize": "DeepDOC",
|
||||||
"chunk_token_num": 512,
|
"chunk_token_num": 512,
|
||||||
|
|||||||
@ -140,7 +140,13 @@ class ParserParam(ProcessParamBase):
|
|||||||
],
|
],
|
||||||
"output_format": "json",
|
"output_format": "json",
|
||||||
},
|
},
|
||||||
"video": {},
|
"video": {
|
||||||
|
"suffix":[
|
||||||
|
"mp4",
|
||||||
|
"av"
|
||||||
|
],
|
||||||
|
"output_format": "json",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
@ -185,6 +191,10 @@ class ParserParam(ProcessParamBase):
|
|||||||
if audio_config:
|
if audio_config:
|
||||||
self.check_empty(audio_config.get("llm_id"), "Audio VLM")
|
self.check_empty(audio_config.get("llm_id"), "Audio VLM")
|
||||||
|
|
||||||
|
video_config = self.setups.get("video", "")
|
||||||
|
if video_config:
|
||||||
|
self.check_empty(video_config.get("llm_id"), "Video VLM")
|
||||||
|
|
||||||
email_config = self.setups.get("email", "")
|
email_config = self.setups.get("email", "")
|
||||||
if email_config:
|
if email_config:
|
||||||
email_output_format = email_config.get("output_format", "")
|
email_output_format = email_config.get("output_format", "")
|
||||||
@ -212,8 +222,8 @@ class Parser(ProcessBase):
|
|||||||
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
|
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
|
||||||
bboxes = []
|
bboxes = []
|
||||||
for t, poss in lines:
|
for t, poss in lines:
|
||||||
pn, x0, x1, top, bott = poss.split(" ")
|
for pn, x0, x1, top, bott in RAGFlowPdfParser.extract_positions(poss):
|
||||||
bboxes.append({"page_number": int(pn), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
|
bboxes.append({"page_number": int(pn[0]), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
|
||||||
|
|
||||||
if conf.get("output_format") == "json":
|
if conf.get("output_format") == "json":
|
||||||
self.set_output("json", bboxes)
|
self.set_output("json", bboxes)
|
||||||
@ -357,6 +367,17 @@ class Parser(ProcessBase):
|
|||||||
|
|
||||||
self.set_output("text", txt)
|
self.set_output("text", txt)
|
||||||
|
|
||||||
|
def _video(self, name, blob):
|
||||||
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on an video.")
|
||||||
|
|
||||||
|
conf = self._param.setups["video"]
|
||||||
|
self.set_output("output_format", conf["output_format"])
|
||||||
|
|
||||||
|
cv_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT)
|
||||||
|
txt = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=blob, filename=name)
|
||||||
|
|
||||||
|
self.set_output("text", txt)
|
||||||
|
|
||||||
def _email(self, name, blob):
|
def _email(self, name, blob):
|
||||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
|
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
|
||||||
|
|
||||||
@ -483,6 +504,7 @@ class Parser(ProcessBase):
|
|||||||
"word": self._word,
|
"word": self._word,
|
||||||
"image": self._image,
|
"image": self._image,
|
||||||
"audio": self._audio,
|
"audio": self._audio,
|
||||||
|
"video": self._video,
|
||||||
"email": self._email,
|
"email": self._email,
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user