mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-07 19:15:05 +08:00
feat: add paddleocr parser (#12513)
### What problem does this PR solve? Add PaddleOCR as a new PDF parser. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
95
web/src/components/paddleocr-options-form-field.tsx
Normal file
@ -0,0 +1,95 @@
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { RAGFlowSelect } from '@/components/ui/select';
|
||||
import { LLMFactory } from '@/constants/llm';
|
||||
import { buildOptions } from '@/utils/form';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const algorithmOptions = buildOptions(['PaddleOCR-VL']);
|
||||
|
||||
export function PaddleOCROptionsFormField({
|
||||
namePrefix = 'parser_config',
|
||||
}: {
|
||||
namePrefix?: string;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
const buildName = (field: string) =>
|
||||
namePrefix ? `${namePrefix}.${field}` : field;
|
||||
|
||||
const layoutRecognize = useWatch({
|
||||
control: form.control,
|
||||
name: 'parser_config.layout_recognize',
|
||||
});
|
||||
|
||||
// Check if PaddleOCR is selected (the value contains 'PaddleOCR' or matches the factory name)
|
||||
const isPaddleOCRSelected =
|
||||
layoutRecognize?.includes(LLMFactory.PaddleOCR) ||
|
||||
layoutRecognize?.toLowerCase()?.includes('paddleocr');
|
||||
|
||||
if (!isPaddleOCRSelected) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4 border-l-2 border-primary/30 pl-4 ml-2">
|
||||
<div className="text-sm font-medium text-text-secondary">
|
||||
{t('knowledgeConfiguration.paddleocrOptions', 'PaddleOCR Options')}
|
||||
</div>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_api_url')}
|
||||
label={t('knowledgeConfiguration.paddleocrApiUrl', 'PaddleOCR API URL')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrApiUrlTip',
|
||||
'The API endpoint URL for PaddleOCR service',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrApiUrlPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_access_token')}
|
||||
label={t('knowledgeConfiguration.paddleocrAccessToken', 'AI Studio Access Token')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAccessTokenTip',
|
||||
'Access token for PaddleOCR API (optional)',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('knowledgeConfiguration.paddleocrAccessTokenPlaceholder')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('paddleocr_algorithm')}
|
||||
label={t('knowledgeConfiguration.paddleocrAlgorithm', 'PaddleOCR Algorithm')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.paddleocrAlgorithmTip',
|
||||
'Algorithm to use for PaddleOCR parsing',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<RAGFlowSelect
|
||||
value={field.value || 'PaddleOCR-VL'}
|
||||
onChange={field.onChange}
|
||||
options={algorithmOptions}
|
||||
placeholder={t('common.selectPlaceholder', 'Select value')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user