feat: add paddleocr parser (#12513)

### What problem does this PR solve?

Add PaddleOCR as a new PDF parser.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Lin Manhui
2026-01-09 17:48:45 +08:00
committed by GitHub
parent 6abf55c048
commit 2e09db02f3
34 changed files with 1510 additions and 453 deletions

View File

@ -390,6 +390,17 @@ export default {
'启用公式识别。注意:对于西里尔文档可能无法正常工作。',
mineruTableEnable: '表格识别',
mineruTableEnableTip: '启用表格识别和提取。',
paddleocrOptions: 'PaddleOCR 选项',
paddleocrApiUrl: 'PaddleOCR API URL',
paddleocrApiUrlTip: 'PaddleOCR 服务的 API 端点 URL',
paddleocrApiUrlPlaceholder: '例如https://paddleocr-server.com/layout-parsing',
paddleocrAccessToken: 'AI Studio 访问令牌',
paddleocrAccessTokenTip: 'PaddleOCR API 的访问令牌(可选)',
paddleocrAccessTokenPlaceholder: '您的 AI Studio 令牌(可选)',
paddleocrAlgorithm: 'PaddleOCR 算法',
paddleocrAlgorithmTip: '用于 PaddleOCR 解析的算法',
paddleocrSelectAlgorithm: '选择算法',
paddleocrModelNamePlaceholder: '例如paddleocr-环境-1',
generationScopeTip: '选择 RAPTOR 的生成范围:整个知识库或单个文件。',
generationScope: '生成范围',
scopeSingleFile: '单文件',
@ -1113,6 +1124,17 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
vlmLmdeployEngine: '基于LMDeploy引擎的视觉语言模型实验性',
},
},
paddleocr: {
apiUrl: 'PaddleOCR API URL',
apiUrlPlaceholder: '例如https://paddleocr-server.com/layout-parsing',
accessToken: 'AI Studio访问令牌',
accessTokenPlaceholder: '您的 AI Studio 令牌(可选)',
algorithm: 'PaddleOCR算法',
selectAlgorithm: '选择算法',
modelNamePlaceholder: '例如paddleocr-from-env-1',
modelNameRequired: '模型名称为必填项',
apiUrlRequired: 'PaddleOCR API URL 为必填项'
},
},
message: {
registered: '注册成功',