mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-02 08:35:08 +08:00
Feat: Support PaddleOCR-VL-1.5 interface (#12819)
### What problem does this PR solve? This PR adds support to PaddleOCR-VL-1.5 interface to the PaddleOCR PDF Parser. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -63,10 +63,10 @@ def _remove_images_from_markdown(markdown: str) -> str:
|
|||||||
class PaddleOCRVLConfig:
|
class PaddleOCRVLConfig:
|
||||||
"""Configuration for PaddleOCR-VL algorithm."""
|
"""Configuration for PaddleOCR-VL algorithm."""
|
||||||
|
|
||||||
|
use_doc_orientation_classify: Optional[bool] = False
|
||||||
use_doc_orientation_classify: Optional[bool] = False
|
use_doc_orientation_classify: Optional[bool] = False
|
||||||
use_doc_unwarping: Optional[bool] = False
|
use_doc_unwarping: Optional[bool] = False
|
||||||
use_layout_detection: Optional[bool] = None
|
use_layout_detection: Optional[bool] = None
|
||||||
use_polygon_points: Optional[bool] = None
|
|
||||||
use_chart_recognition: Optional[bool] = None
|
use_chart_recognition: Optional[bool] = None
|
||||||
use_seal_recognition: Optional[bool] = None
|
use_seal_recognition: Optional[bool] = None
|
||||||
use_ocr_for_image_block: Optional[bool] = None
|
use_ocr_for_image_block: Optional[bool] = None
|
||||||
@ -74,6 +74,7 @@ class PaddleOCRVLConfig:
|
|||||||
layout_nms: Optional[bool] = None
|
layout_nms: Optional[bool] = None
|
||||||
layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None
|
layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None
|
||||||
layout_merge_bboxes_mode: Optional[Union[str, dict]] = None
|
layout_merge_bboxes_mode: Optional[Union[str, dict]] = None
|
||||||
|
layout_shape_mode: Optional[str] = None
|
||||||
prompt_label: Optional[str] = None
|
prompt_label: Optional[str] = None
|
||||||
format_block_content: Optional[bool] = True
|
format_block_content: Optional[bool] = True
|
||||||
repetition_penalty: Optional[float] = None
|
repetition_penalty: Optional[float] = None
|
||||||
@ -85,6 +86,9 @@ class PaddleOCRVLConfig:
|
|||||||
merge_layout_blocks: Optional[bool] = False
|
merge_layout_blocks: Optional[bool] = False
|
||||||
markdown_ignore_labels: Optional[List[str]] = None
|
markdown_ignore_labels: Optional[List[str]] = None
|
||||||
vlm_extra_args: Optional[dict] = None
|
vlm_extra_args: Optional[dict] = None
|
||||||
|
restructure_pages: Optional[bool] = False
|
||||||
|
merge_tables: Optional[bool] = None
|
||||||
|
relevel_titles: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -111,13 +115,12 @@ class PaddleOCRConfig:
|
|||||||
algorithm = cfg.get("algorithm", "PaddleOCR-VL")
|
algorithm = cfg.get("algorithm", "PaddleOCR-VL")
|
||||||
|
|
||||||
# Validate algorithm
|
# Validate algorithm
|
||||||
if algorithm not in ("PaddleOCR-VL",):
|
if algorithm not in ("PaddleOCR-VL"):
|
||||||
raise ValueError(f"Unsupported algorithm: {algorithm}")
|
raise ValueError(f"Unsupported algorithm: {algorithm}")
|
||||||
|
|
||||||
# Extract algorithm-specific configuration
|
# Extract algorithm-specific configuration
|
||||||
algorithm_config: dict[str, Any] = {}
|
algorithm_config: dict[str, Any] = {}
|
||||||
if algorithm == "PaddleOCR-VL":
|
if algorithm == "PaddleOCR-VL":
|
||||||
# Create default PaddleOCRVLConfig object and convert to dict
|
|
||||||
algorithm_config = asdict(PaddleOCRVLConfig())
|
algorithm_config = asdict(PaddleOCRVLConfig())
|
||||||
algorithm_config_user = cfg.get("algorithm_config")
|
algorithm_config_user = cfg.get("algorithm_config")
|
||||||
if isinstance(algorithm_config_user, dict):
|
if isinstance(algorithm_config_user, dict):
|
||||||
@ -160,7 +163,6 @@ class PaddleOCRParser(RAGFlowPdfParser):
|
|||||||
"use_doc_orientation_classify": "useDocOrientationClassify",
|
"use_doc_orientation_classify": "useDocOrientationClassify",
|
||||||
"use_doc_unwarping": "useDocUnwarping",
|
"use_doc_unwarping": "useDocUnwarping",
|
||||||
"use_layout_detection": "useLayoutDetection",
|
"use_layout_detection": "useLayoutDetection",
|
||||||
"use_polygon_points": "usePolygonPoints",
|
|
||||||
"use_chart_recognition": "useChartRecognition",
|
"use_chart_recognition": "useChartRecognition",
|
||||||
"use_seal_recognition": "useSealRecognition",
|
"use_seal_recognition": "useSealRecognition",
|
||||||
"use_ocr_for_image_block": "useOcrForImageBlock",
|
"use_ocr_for_image_block": "useOcrForImageBlock",
|
||||||
@ -168,6 +170,7 @@ class PaddleOCRParser(RAGFlowPdfParser):
|
|||||||
"layout_nms": "layoutNms",
|
"layout_nms": "layoutNms",
|
||||||
"layout_unclip_ratio": "layoutUnclipRatio",
|
"layout_unclip_ratio": "layoutUnclipRatio",
|
||||||
"layout_merge_bboxes_mode": "layoutMergeBboxesMode",
|
"layout_merge_bboxes_mode": "layoutMergeBboxesMode",
|
||||||
|
"layout_shape_mode": "layoutShapeMode",
|
||||||
"prompt_label": "promptLabel",
|
"prompt_label": "promptLabel",
|
||||||
"format_block_content": "formatBlockContent",
|
"format_block_content": "formatBlockContent",
|
||||||
"repetition_penalty": "repetitionPenalty",
|
"repetition_penalty": "repetitionPenalty",
|
||||||
@ -179,6 +182,9 @@ class PaddleOCRParser(RAGFlowPdfParser):
|
|||||||
"merge_layout_blocks": "mergeLayoutBlocks",
|
"merge_layout_blocks": "mergeLayoutBlocks",
|
||||||
"markdown_ignore_labels": "markdownIgnoreLabels",
|
"markdown_ignore_labels": "markdownIgnoreLabels",
|
||||||
"vlm_extra_args": "vlmExtraArgs",
|
"vlm_extra_args": "vlmExtraArgs",
|
||||||
|
"restructure_pages": "restructurePages",
|
||||||
|
"merge_tables": "mergeTables",
|
||||||
|
"relevel_titles": "relevelTitles",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -370,7 +376,7 @@ class PaddleOCRParser(RAGFlowPdfParser):
|
|||||||
"""Convert API response to section tuples."""
|
"""Convert API response to section tuples."""
|
||||||
sections: list[SectionTuple] = []
|
sections: list[SectionTuple] = []
|
||||||
|
|
||||||
if algorithm == "PaddleOCR-VL":
|
if algorithm in ("PaddleOCR-VL",):
|
||||||
layout_parsing_results = result.get("layoutParsingResults", [])
|
layout_parsing_results = result.get("layoutParsingResults", [])
|
||||||
|
|
||||||
for page_idx, layout_result in enumerate(layout_parsing_results):
|
for page_idx, layout_result in enumerate(layout_parsing_results):
|
||||||
|
|||||||
28
docs/faq.mdx
28
docs/faq.mdx
@ -43,11 +43,11 @@ You can find the RAGFlow version number on the **System** page of the UI:
|
|||||||
If you build RAGFlow from source, the version number is also in the system log:
|
If you build RAGFlow from source, the version number is also in the system log:
|
||||||
|
|
||||||
```
|
```
|
||||||
____ ___ ______ ______ __
|
____ ___ ______ ______ __
|
||||||
/ __ \ / | / ____// ____// /____ _ __
|
/ __ \ / | / ____// ____// /____ _ __
|
||||||
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
||||||
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
||||||
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
||||||
|
|
||||||
2025-02-18 10:10:43,835 INFO 1445658 RAGFlow version: v0.15.0-50-g6daae7f2
|
2025-02-18 10:10:43,835 INFO 1445658 RAGFlow version: v0.15.0-50-g6daae7f2
|
||||||
```
|
```
|
||||||
@ -177,7 +177,7 @@ To fix this issue, use https://hf-mirror.com instead:
|
|||||||
3. Start up the server:
|
3. Start up the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@ -210,11 +210,11 @@ You will not log in to RAGFlow unless the server is fully initialized. Run `dock
|
|||||||
*The server is successfully initialized, if your system displays the following:*
|
*The server is successfully initialized, if your system displays the following:*
|
||||||
|
|
||||||
```
|
```
|
||||||
____ ___ ______ ______ __
|
____ ___ ______ ______ __
|
||||||
/ __ \ / | / ____// ____// /____ _ __
|
/ __ \ / | / ____// ____// /____ _ __
|
||||||
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
||||||
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
/ _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
|
||||||
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
||||||
|
|
||||||
* Running on all addresses (0.0.0.0)
|
* Running on all addresses (0.0.0.0)
|
||||||
* Running on http://127.0.0.1:9380
|
* Running on http://127.0.0.1:9380
|
||||||
@ -317,7 +317,7 @@ The status of a Docker container status does not necessarily reflect the status
|
|||||||
$ docker ps
|
$ docker ps
|
||||||
```
|
```
|
||||||
|
|
||||||
*The status of a healthy Elasticsearch component should look as follows:*
|
*The status of a healthy Elasticsearch component should look as follows:*
|
||||||
|
|
||||||
```
|
```
|
||||||
91220e3285dd docker.elastic.co/elasticsearch/elasticsearch:8.11.3 "/bin/tini -- /usr/l…" 11 hours ago Up 11 hours (healthy) 9300/tcp, 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp ragflow-es-01
|
91220e3285dd docker.elastic.co/elasticsearch/elasticsearch:8.11.3 "/bin/tini -- /usr/l…" 11 hours ago Up 11 hours (healthy) 9300/tcp, 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp ragflow-es-01
|
||||||
@ -370,7 +370,7 @@ Yes, we do. See the Python files under the **rag/app** folder.
|
|||||||
$ docker ps
|
$ docker ps
|
||||||
```
|
```
|
||||||
|
|
||||||
*The status of a healthy Elasticsearch component should look as follows:*
|
*The status of a healthy Elasticsearch component should look as follows:*
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd29bcb254bc quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z "/usr/bin/docker-ent…" 2 weeks ago Up 11 hours 0.0.0.0:9001->9001/tcp, :::9001->9001/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp ragflow-minio
|
cd29bcb254bc quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z "/usr/bin/docker-ent…" 2 weeks ago Up 11 hours 0.0.0.0:9001->9001/tcp, :::9001->9001/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp ragflow-minio
|
||||||
@ -453,7 +453,7 @@ See [Upgrade RAGFlow](./guides/upgrade_ragflow.mdx) for more information.
|
|||||||
|
|
||||||
To switch your document engine from Elasticsearch to [Infinity](https://github.com/infiniflow/infinity):
|
To switch your document engine from Elasticsearch to [Infinity](https://github.com/infiniflow/infinity):
|
||||||
|
|
||||||
1. Stop all running containers:
|
1. Stop all running containers:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ docker compose -f docker/docker-compose.yml down -v
|
$ docker compose -f docker/docker-compose.yml down -v
|
||||||
@ -463,7 +463,7 @@ To switch your document engine from Elasticsearch to [Infinity](https://github.c
|
|||||||
:::
|
:::
|
||||||
|
|
||||||
2. In **docker/.env**, set `DOC_ENGINE=${DOC_ENGINE:-infinity}`
|
2. In **docker/.env**, set `DOC_ENGINE=${DOC_ENGINE:-infinity}`
|
||||||
3. Restart your Docker image:
|
3. Restart your Docker image:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ docker compose -f docker-compose.yml up -d
|
$ docker compose -f docker-compose.yml up -d
|
||||||
@ -508,12 +508,12 @@ From v0.22.0 onwards, RAGFlow includes MinerU (≥ 2.6.3) as an optional PDF pa
|
|||||||
- `"vlm-mlx-engine"`
|
- `"vlm-mlx-engine"`
|
||||||
- `"vlm-vllm-async-engine"`
|
- `"vlm-vllm-async-engine"`
|
||||||
- `"vlm-lmdeploy-engine"`.
|
- `"vlm-lmdeploy-engine"`.
|
||||||
- `MINERU_SERVER_URL`: (optional) The downstream vLLM HTTP server (e.g., `http://vllm-host:30000`). Applicable when `MINERU_BACKEND` is set to `"vlm-http-client"`.
|
- `MINERU_SERVER_URL`: (optional) The downstream vLLM HTTP server (e.g., `http://vllm-host:30000`). Applicable when `MINERU_BACKEND` is set to `"vlm-http-client"`.
|
||||||
- `MINERU_OUTPUT_DIR`: (optional) The local directory for holding the outputs of the MinerU API service (zip/JSON) before ingestion.
|
- `MINERU_OUTPUT_DIR`: (optional) The local directory for holding the outputs of the MinerU API service (zip/JSON) before ingestion.
|
||||||
- `MINERU_DELETE_OUTPUT`: Whether to delete temporary output when a temporary directory is used:
|
- `MINERU_DELETE_OUTPUT`: Whether to delete temporary output when a temporary directory is used:
|
||||||
- `1`: Delete.
|
- `1`: Delete.
|
||||||
- `0`: Retain.
|
- `0`: Retain.
|
||||||
3. In the web UI, navigate to your dataset's **Configuration** page and find the **Ingestion pipeline** section:
|
3. In the web UI, navigate to your dataset's **Configuration** page and find the **Ingestion pipeline** section:
|
||||||
- If you decide to use a chunking method from the **Built-in** dropdown, ensure it supports PDF parsing, then select **MinerU** from the **PDF parser** dropdown.
|
- If you decide to use a chunking method from the **Built-in** dropdown, ensure it supports PDF parsing, then select **MinerU** from the **PDF parser** dropdown.
|
||||||
- If you use a custom ingestion pipeline instead, select **MinerU** in the **PDF parser** section of the **Parser** component.
|
- If you use a custom ingestion pipeline instead, select **MinerU** in the **PDF parser** section of the **Parser** component.
|
||||||
|
|
||||||
@ -600,7 +600,7 @@ This method uses PaddleOCR's official API service with an access token.
|
|||||||
- If using custom ingestion pipeline, select **PaddleOCR** in the **Parser** component
|
- If using custom ingestion pipeline, select **PaddleOCR** in the **Parser** component
|
||||||
|
|
||||||
**Notes:**
|
**Notes:**
|
||||||
- To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr/task), click the **API** button in the upper-left corner, choose the example code for the specific algorithm you want to use (e.g., PaddleOCR-VL), and copy the `API_URL`.
|
- To obtain the API URL, visit the [PaddleOCR official website](https://aistudio.baidu.com/paddleocr), click the **API** button, choose the example code for the specific algorithm you want to use (e.g., PaddleOCR-VL), and copy the `API_URL`.
|
||||||
- Access tokens can be obtained from the [AI Studio platform](https://aistudio.baidu.com/account/accessToken).
|
- Access tokens can be obtained from the [AI Studio platform](https://aistudio.baidu.com/account/accessToken).
|
||||||
- This method requires internet connectivity to reach the official PaddleOCR API.
|
- This method requires internet connectivity to reach the official PaddleOCR API.
|
||||||
|
|
||||||
|
|||||||
@ -83,7 +83,7 @@ export function PaddleOCROptionsFormField({
|
|||||||
>
|
>
|
||||||
{(field) => (
|
{(field) => (
|
||||||
<RAGFlowSelect
|
<RAGFlowSelect
|
||||||
value={field.value || 'PaddleOCR-VL'}
|
value={field.value || undefined}
|
||||||
onChange={field.onChange}
|
onChange={field.onChange}
|
||||||
options={algorithmOptions}
|
options={algorithmOptions}
|
||||||
placeholder={t('common.selectPlaceholder', 'Select value')}
|
placeholder={t('common.selectPlaceholder', 'Select value')}
|
||||||
|
|||||||
Reference in New Issue
Block a user