Fix: add video parser (#10735)

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-10-23 09:24:16 +08:00
committed by GitHub
parent 15fff5724e
commit de4f75dcd8
2 changed files with 26 additions and 3 deletions

View File

@ -70,6 +70,7 @@ def create():
e, t = TenantService.get_by_id(current_user.id) e, t = TenantService.get_by_id(current_user.id)
if not e: if not e:
return get_data_error_result(message="Tenant not found.") return get_data_error_result(message="Tenant not found.")
req["embd_id"] = t.embd_id
req["parser_config"] = { req["parser_config"] = {
"layout_recognize": "DeepDOC", "layout_recognize": "DeepDOC",
"chunk_token_num": 512, "chunk_token_num": 512,

View File

@ -140,7 +140,13 @@ class ParserParam(ProcessParamBase):
], ],
"output_format": "json", "output_format": "json",
}, },
"video": {}, "video": {
"suffix":[
"mp4",
"av"
],
"output_format": "json",
},
} }
def check(self): def check(self):
@ -185,6 +191,10 @@ class ParserParam(ProcessParamBase):
if audio_config: if audio_config:
self.check_empty(audio_config.get("llm_id"), "Audio VLM") self.check_empty(audio_config.get("llm_id"), "Audio VLM")
video_config = self.setups.get("video", "")
if video_config:
self.check_empty(video_config.get("llm_id"), "Video VLM")
email_config = self.setups.get("email", "") email_config = self.setups.get("email", "")
if email_config: if email_config:
email_output_format = email_config.get("output_format", "") email_output_format = email_config.get("output_format", "")
@ -212,8 +222,8 @@ class Parser(ProcessBase):
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback) lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
bboxes = [] bboxes = []
for t, poss in lines: for t, poss in lines:
pn, x0, x1, top, bott = poss.split(" ") for pn, x0, x1, top, bott in RAGFlowPdfParser.extract_positions(poss):
bboxes.append({"page_number": int(pn), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t}) bboxes.append({"page_number": int(pn[0]), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
if conf.get("output_format") == "json": if conf.get("output_format") == "json":
self.set_output("json", bboxes) self.set_output("json", bboxes)
@ -357,6 +367,17 @@ class Parser(ProcessBase):
self.set_output("text", txt) self.set_output("text", txt)
def _video(self, name, blob):
self.callback(random.randint(1, 5) / 100.0, "Start to work on an video.")
conf = self._param.setups["video"]
self.set_output("output_format", conf["output_format"])
cv_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT)
txt = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=blob, filename=name)
self.set_output("text", txt)
def _email(self, name, blob): def _email(self, name, blob):
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.") self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
@ -483,6 +504,7 @@ class Parser(ProcessBase):
"word": self._word, "word": self._word,
"image": self._image, "image": self._image,
"audio": self._audio, "audio": self._audio,
"video": self._video,
"email": self._email, "email": self._email,
} }
try: try: