Fix: add video parser (#10735)

### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2026-01-31 23:55:06 +08:00 · 2025-10-23 09:24:16 +08:00
parent 15fff5724e
commit de4f75dcd8
2 changed files with 26 additions and 3 deletions
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@ -70,6 +70,7 @@ def create():
        e, t = TenantService.get_by_id(current_user.id)
        if not e:
            return get_data_error_result(message="Tenant not found.")
        req["embd_id"] = t.embd_id
        req["parser_config"] = {
            "layout_recognize": "DeepDOC",
            "chunk_token_num": 512,
--- a/rag/flow/parser/parser.py
+++ b/rag/flow/parser/parser.py
@ -140,7 +140,13 @@ class ParserParam(ProcessParamBase):
                ],
                "output_format": "json",
            },
-            "video": {},
+            "video": {
                "suffix":[
                    "mp4",
                    "av"
                ],
                "output_format": "json",
            },
        }
    def check(self):
@ -185,6 +191,10 @@ class ParserParam(ProcessParamBase):
        if audio_config:
            self.check_empty(audio_config.get("llm_id"), "Audio VLM")
        video_config = self.setups.get("video", "")
        if video_config:
            self.check_empty(video_config.get("llm_id"), "Video VLM")
        email_config = self.setups.get("email", "")
        if email_config:
            email_output_format = email_config.get("output_format", "")
@ -212,8 +222,8 @@ class Parser(ProcessBase):
            lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
            bboxes = []
            for t, poss in lines:
-                pn, x0, x1, top, bott = poss.split(" ")
+                for pn, x0, x1, top, bott in RAGFlowPdfParser.extract_positions(poss):
-                bboxes.append({"page_number": int(pn), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
+                    bboxes.append({"page_number": int(pn[0]), "x0": float(x0), "x1": float(x1), "top": float(top), "bottom": float(bott), "text": t})
        if conf.get("output_format") == "json":
            self.set_output("json", bboxes)
@ -357,6 +367,17 @@ class Parser(ProcessBase):
            self.set_output("text", txt)
    def _video(self, name, blob):
        self.callback(random.randint(1, 5) / 100.0, "Start to work on an video.")
        conf = self._param.setups["video"]
        self.set_output("output_format", conf["output_format"])
        cv_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT)
        txt = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=blob, filename=name)
        self.set_output("text", txt)
    def _email(self, name, blob):
        self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
@ -483,6 +504,7 @@ class Parser(ProcessBase):
            "word": self._word,
            "image": self._image,
            "audio": self._audio,
            "video": self._video,
            "email": self._email,
        }
        try: