mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-07 11:05:05 +08:00
feat: add paddleocr parser (#12513)
### What problem does this PR solve? Add PaddleOCR as a new PDF parser. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -6,6 +6,7 @@ import { camelCase } from 'lodash';
|
||||
import { ReactNode, useMemo } from 'react';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { MinerUOptionsFormField } from './mineru-options-form-field';
|
||||
import { PaddleOCROptionsFormField } from './paddleocr-options-form-field';
|
||||
import { SelectWithSearch } from './originui/select-with-search';
|
||||
import {
|
||||
FormControl,
|
||||
@ -28,12 +29,14 @@ export function LayoutRecognizeFormField({
|
||||
optionsWithoutLLM,
|
||||
label,
|
||||
showMineruOptions = true,
|
||||
showPaddleocrOptions = true,
|
||||
}: {
|
||||
name?: string;
|
||||
horizontal?: boolean;
|
||||
optionsWithoutLLM?: { value: string; label: string }[];
|
||||
label?: ReactNode;
|
||||
showMineruOptions?: boolean;
|
||||
showPaddleocrOptions?: boolean;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
|
||||
@ -113,6 +116,7 @@ export function LayoutRecognizeFormField({
|
||||
</div>
|
||||
</FormItem>
|
||||
{showMineruOptions && <MinerUOptionsFormField />}
|
||||
{showPaddleocrOptions && <PaddleOCROptionsFormField />}
|
||||
</>
|
||||
);
|
||||
}}
|
||||
|
||||
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
@ -0,0 +1,95 @@
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { RAGFlowSelect } from '@/components/ui/select';
|
||||
import { LLMFactory } from '@/constants/llm';
|
||||
import { buildOptions } from '@/utils/form';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const algorithmOptions = buildOptions(['PaddleOCR-VL']);
|
||||
|
||||
export function PaddleOCROptionsFormField({
|
||||
namePrefix = 'parser_config',
|
||||
}: {
|
||||
namePrefix?: string;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
const buildName = (field: string) =>
|
||||
namePrefix ? `${namePrefix}.${field}` : field;
|
||||
|
||||
const layoutRecognize = useWatch({
|
||||
control: form.control,
|
||||
name: 'parser_config.layout_recognize',
|
||||
});
|
||||
|
||||
// Check if PaddleOCR is selected (the value contains 'PaddleOCR' or matches the factory name)
|
||||
const isPaddleOCRSelected =
|
||||
layoutRecognize?.includes(LLMFactory.PaddleOCR) ||
|
||||
layoutRecognize?.toLowerCase()?.includes('paddleocr');
|
||||
|
||||
if (!isPaddleOCRSelected) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4 border-l-2 border-primary/30 pl-4 ml-2">
|
||||
<div className="text-sm font-medium text-text-secondary">
|
||||
{t('knowledgeConfiguration.paddleocrOptions', 'PaddleOCR Options')}
|
||||
</div>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_api_url')}
|
||||
label={t('knowledgeConfiguration.paddleocrApiUrl', 'PaddleOCR API URL')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrApiUrlTip',
|
||||
'The API endpoint URL for PaddleOCR service',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrApiUrlPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_access_token')}
|
||||
label={t('knowledgeConfiguration.paddleocrAccessToken', 'AI Studio Access Token')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAccessTokenTip',
|
||||
'Access token for PaddleOCR API (optional)',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrAccessTokenPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_algorithm')}
|
||||
label={t('knowledgeConfiguration.paddleocrAlgorithm', 'PaddleOCR Algorithm')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAlgorithmTip',
|
||||
'Algorithm to use for PaddleOCR parsing',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<RAGFlowSelect
|
||||
value={field.value || 'PaddleOCR-VL'}
|
||||
onChange={field.onChange}
|
||||
options={algorithmOptions}
|
||||
placeholder={t('common.selectPlaceholder', 'Select value')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@ -105,6 +105,7 @@ export const LlmIcon = ({
|
||||
LLMFactory.Gemini,
|
||||
LLMFactory.StepFun,
|
||||
LLMFactory.MinerU,
|
||||
LLMFactory.PaddleOCR,
|
||||
// LLMFactory.DeerAPI,
|
||||
];
|
||||
if (svgIcons.includes(name as LLMFactory)) {
|
||||
|
||||
Reference in New Issue
Block a user