From 27a36344d4ca03ab117cd8c6022ebd80ee74b3c0 Mon Sep 17 00:00:00 2001
From: Lin Manhui <bob1998425@hotmail.com>
Date: Tue, 27 Jan 2026 09:49:46 +0800
Subject: [PATCH] Feat: Support PaddleOCR-VL-1.5 interface (#12819)

### What problem does this PR solve?

This PR adds support to PaddleOCR-VL-1.5 interface to the PaddleOCR PDF
Parser.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
---
 deepdoc/parser/paddleocr_parser.py            | 16 +++++++----
 docs/faq.mdx                                  | 28 +++++++++----------
 .../paddleocr-options-form-field.tsx          |  2 +-
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/deepdoc/parser/paddleocr_parser.py b/deepdoc/parser/paddleocr_parser.py
index f6611e0c4..85db63b86 100644
--- a/deepdoc/parser/paddleocr_parser.py
+++ b/deepdoc/parser/paddleocr_parser.py
@@ -63,10 +63,10 @@ def _remove_images_from_markdown(markdown: str) -> str:
 class PaddleOCRVLConfig:
     """Configuration for PaddleOCR-VL algorithm."""
 
+    use_doc_orientation_classify: Optional[bool] = False
     use_doc_orientation_classify: Optional[bool] = False
     use_doc_unwarping: Optional[bool] = False
     use_layout_detection: Optional[bool] = None
-    use_polygon_points: Optional[bool] = None
     use_chart_recognition: Optional[bool] = None
     use_seal_recognition: Optional[bool] = None
     use_ocr_for_image_block: Optional[bool] = None
@@ -74,6 +74,7 @@ class PaddleOCRVLConfig:
     layout_nms: Optional[bool] = None
     layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None
     layout_merge_bboxes_mode: Optional[Union[str, dict]] = None
+    layout_shape_mode: Optional[str] = None
     prompt_label: Optional[str] = None
     format_block_content: Optional[bool] = True
     repetition_penalty: Optional[float] = None
@@ -85,6 +86,9 @@ class PaddleOCRVLConfig:
     merge_layout_blocks: Optional[bool] = False
     markdown_ignore_labels: Optional[List[str]] = None
     vlm_extra_args: Optional[dict] = None
+    restructure_pages: Optional[bool] = False
+    merge_tables: Optional[bool] = None
+    relevel_titles: Optional[bool] = None
 
 
 @dataclass
@@ -111,13 +115,12 @@ class PaddleOCRConfig:
         algorithm = cfg.get("algorithm", "PaddleOCR-VL")
 
         # Validate algorithm
-        if algorithm not in ("PaddleOCR-VL",):
+        if algorithm not in ("PaddleOCR-VL"):
             raise ValueError(f"Unsupported algorithm: {algorithm}")
 
         # Extract algorithm-specific configuration
         algorithm_config: dict[str, Any] = {}
         if algorithm == "PaddleOCR-VL":
-            # Create default PaddleOCRVLConfig object and convert to dict
             algorithm_config = asdict(PaddleOCRVLConfig())
         algorithm_config_user = cfg.get("algorithm_config")
         if isinstance(algorithm_config_user, dict):
@@ -160,7 +163,6 @@ class PaddleOCRParser(RAGFlowPdfParser):
             "use_doc_orientation_classify": "useDocOrientationClassify",
             "use_doc_unwarping": "useDocUnwarping",
             "use_layout_detection": "useLayoutDetection",
-            "use_polygon_points": "usePolygonPoints",
             "use_chart_recognition": "useChartRecognition",
             "use_seal_recognition": "useSealRecognition",
             "use_ocr_for_image_block": "useOcrForImageBlock",
@@ -168,6 +170,7 @@ class PaddleOCRParser(RAGFlowPdfParser):
             "layout_nms": "layoutNms",
             "layout_unclip_ratio": "layoutUnclipRatio",
             "layout_merge_bboxes_mode": "layoutMergeBboxesMode",
+            "layout_shape_mode": "layoutShapeMode",
             "prompt_label": "promptLabel",
             "format_block_content": "formatBlockContent",
             "repetition_penalty": "repetitionPenalty",
@@ -179,6 +182,9 @@ class PaddleOCRParser(RAGFlowPdfParser):
             "merge_layout_blocks": "mergeLayoutBlocks",
             "markdown_ignore_labels": "markdownIgnoreLabels",
             "vlm_extra_args": "vlmExtraArgs",
+            "restructure_pages": "restructurePages",
+            "merge_tables": "mergeTables",
+            "relevel_titles": "relevelTitles",
         },
     }
 
@@ -370,7 +376,7 @@ class PaddleOCRParser(RAGFlowPdfParser):
         """Convert API response to section tuples."""
         sections: list[SectionTuple] = []
 
-        if algorithm == "PaddleOCR-VL":
+        if algorithm in ("PaddleOCR-VL",):
             layout_parsing_results = result.get("layoutParsingResults", [])
 
             for page_idx, layout_result in enumerate(layout_parsing_results):
diff --git a/docs/faq.mdx b/docs/faq.mdx
index d08bb9361..cc7ab374b 100644
--- a/docs/faq.mdx
+++ b/docs/faq.mdx
@@ -43,11 +43,11 @@ You can find the RAGFlow version number on the **System** page of the UI:
 If you build RAGFlow from source, the version number is also in the system log:
 
 ```
-        ____   ___    ______ ______ __               
+        ____   ___    ______ ______ __
        / __ \ /   |  / ____// ____// /____  _      __
       / /_/ // /| | / / __ / /_   / // __ \| | /| / /
-     / _, _// ___ |/ /_/ // __/  / // /_/ /| |/ |/ / 
-    /_/ |_|/_/  |_|\____//_/    /_/ \____/ |__/|__/                             
+     / _, _// ___ |/ /_/ // __/  / // /_/ /| |/ |/ /
+    /_/ |_|/_/  |_|\____//_/    /_/ \____/ |__/|__/
 
 2025-02-18 10:10:43,835 INFO     1445658 RAGFlow version: v0.15.0-50-g6daae7f2
 ```
@@ -177,7 +177,7 @@ To fix this issue, use https://hf-mirror.com instead:
 3. Start up the server:
 
    ```bash
-   docker compose up -d 
+   docker compose up -d
    ```
 
 ---
@@ -210,11 +210,11 @@ You will not log in to RAGFlow unless the server is fully initialized. Run `dock
 *The server is successfully initialized, if your system displays the following:*
 
 ```
-     ____   ___    ______ ______ __               
+     ____   ___    ______ ______ __
     / __ \ /   |  / ____// ____// /____  _      __
    / /_/ // /| | / / __ / /_   / // __ \| | /| / /
-  / _, _// ___ |/ /_/ // __/  / // /_/ /| |/ |/ / 
- /_/ |_|/_/  |_|\____//_/    /_/ \____/ |__/|__/  
+  / _, _// ___ |/ /_/ // __/  / // /_/ /| |/ |/ /
+ /_/ |_|/_/  |_|\____//_/    /_/ \____/ |__/|__/
 
  * Running on all addresses (0.0.0.0)
  * Running on http://127.0.0.1:9380
@@ -317,7 +317,7 @@ The status of a Docker container status does not necessarily reflect the status
    $ docker ps
    ```
 
-   *The status of a healthy Elasticsearch component should look as follows:*  
+   *The status of a healthy Elasticsearch component should look as follows:*
 
    ```
    91220e3285dd   docker.elastic.co/elasticsearch/elasticsearch:8.11.3   "/bin/tini -- /usr/l…"   11 hours ago   Up 11 hours (healthy)     9300/tcp, 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp           ragflow-es-01
@@ -370,7 +370,7 @@ Yes, we do. See the Python files under the **rag/app** folder.
    $ docker ps
    ```
 
-   *The status of a healthy Elasticsearch component should look as follows:*  
+   *The status of a healthy Elasticsearch component should look as follows:*
 
    ```bash
    cd29bcb254bc   quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z       "/usr/bin/docker-ent…"   2 weeks ago    Up 11 hours      0.0.0.0:9001->9001/tcp, :::9001->9001/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp     ragflow-minio
@@ -453,7 +453,7 @@ See [Upgrade RAGFlow](./guides/upgrade_ragflow.mdx) for more information.
 
 To switch your document engine from Elasticsearch to [Infinity](https://github.com/infiniflow/infinity):
 
-1. Stop all running containers:  
+1. Stop all running containers:
 
    ```bash
    $ docker compose -f docker/docker-compose.yml down -v
@@ -463,7 +463,7 @@ To switch your document engine from Elasticsearch to [Infinity](https://github.c
    :::
 
 2. In **docker/.env**, set `DOC_ENGINE=${DOC_ENGINE:-infinity}`
-3. Restart your Docker image: 
+3. Restart your Docker image:
 
    ```bash
    $ docker compose -f docker-compose.yml up -d
@@ -508,12 +508,12 @@ From v0.22.0 onwards, RAGFlow includes MinerU (&ge; 2.6.3) as an optional PDF pa
       - `"vlm-mlx-engine"`
       - `"vlm-vllm-async-engine"`
       - `"vlm-lmdeploy-engine"`.
-   - `MINERU_SERVER_URL`: (optional) The downstream vLLM HTTP server (e.g., `http://vllm-host:30000`). Applicable when `MINERU_BACKEND` is set to `"vlm-http-client"`. 
+   - `MINERU_SERVER_URL`: (optional) The downstream vLLM HTTP server (e.g., `http://vllm-host:30000`). Applicable when `MINERU_BACKEND` is set to `"vlm-http-client"`.
    - `MINERU_OUTPUT_DIR`: (optional) The local directory for holding the outputs of the MinerU API service (zip/JSON) before ingestion.
    - `MINERU_DELETE_OUTPUT`: Whether to delete temporary output when a temporary directory is used:
      - `1`: Delete.
      - `0`: Retain.
-3. In the web UI, navigate to your dataset's **Configuration** page and find the **Ingestion pipeline** section:  
+3. In the web UI, navigate to your dataset's **Configuration** page and find the **Ingestion pipeline** section:
    - If you decide to use a chunking method from the **Built-in** dropdown, ensure it supports PDF parsing, then select **MinerU** from the **PDF parser** dropdown.
    - If you use a custom ingestion pipeline instead, select **MinerU** in the **PDF parser** section of the **Parser** component.
 
@@ -600,7 +600,7 @@ This method uses PaddleOCR's official API service with an access token.
 - If using custom ingestion pipeline, select **PaddleOCR** in the **Parser** component
 
 **Notes:**
-- To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr/task), click the **API** button in the upper-left corner, choose the example code for the specific algorithm you want to use (e.g., PaddleOCR-VL), and copy the `API_URL`.
+- To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr), click the **API** button, choose the example code for the specific algorithm you want to use (e.g., PaddleOCR-VL), and copy the `API_URL`.
 - Access tokens can be obtained from the [AI Studio platform](https://aistudio.baidu.com/account/accessToken).
 - This method requires internet connectivity to reach the official PaddleOCR API.
 
diff --git a/web/src/components/paddleocr-options-form-field.tsx b/web/src/components/paddleocr-options-form-field.tsx
index 0d70519eb..03adf5ee4 100644
--- a/web/src/components/paddleocr-options-form-field.tsx
+++ b/web/src/components/paddleocr-options-form-field.tsx
@@ -83,7 +83,7 @@ export function PaddleOCROptionsFormField({
       >
         {(field) => (
           <RAGFlowSelect
-            value={field.value || 'PaddleOCR-VL'}
+            value={field.value || undefined}
             onChange={field.onChange}
             options={algorithmOptions}
             placeholder={t('common.selectPlaceholder', 'Select value')}