Refa: only support MinerU-API now (#11977)

### What problem does this PR solve?

Only support MinerU-API now, still need to complete frontend for
pipeline to allow the configuration of MinerU options.

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-12-17 12:58:48 +08:00
committed by GitHub
parent 5e05f43c3d
commit 03f9be7cbb
19 changed files with 273 additions and 624 deletions

View File

@ -118,6 +118,10 @@ export function ChunkMethodDialog({
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
toc_extraction: z.boolean().optional(),
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
mineru_formula_enable: z.boolean().optional(),
mineru_table_enable: z.boolean().optional(),
mineru_lang: z.string().optional(),
// raptor: z
// .object({
// use_raptor: z.boolean().optional(),
@ -166,6 +170,9 @@ export function ChunkMethodDialog({
name: 'parser_id',
control: form.control,
});
const isMineruSelected =
selectedTag?.toLowerCase().includes('mineru') ||
layoutRecognize?.toLowerCase?.()?.includes('mineru');
const isPdf = documentExtension === 'pdf';
@ -328,7 +335,7 @@ export function ChunkMethodDialog({
className="space-y-3"
>
{showOne && (
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<LayoutRecognizeFormField showMineruOptions={false} />
)}
{showMaxTokenNumber && (
<>
@ -345,9 +352,16 @@ export function ChunkMethodDialog({
)}
</FormContainer>
<FormContainer
show={showAutoKeywords(selectedTag) || showExcelToHtml}
show={
isMineruSelected ||
showAutoKeywords(selectedTag) ||
showExcelToHtml
}
className="space-y-3"
>
{isMineruSelected && (
<LayoutRecognizeFormField showMineruOptions />
)}
{selectedTag === DocumentParserType.Naive && (
<EnableTocToggle />
)}

View File

@ -18,6 +18,10 @@ export function useDefaultParserValues() {
auto_questions: 0,
html4excel: false,
toc_extraction: false,
mineru_parse_method: 'auto',
mineru_formula_enable: true,
mineru_table_enable: true,
mineru_lang: 'English',
// raptor: {
// use_raptor: false,
// prompt: t('knowledgeConfiguration.promptText'),

View File

@ -5,6 +5,7 @@ import { cn } from '@/lib/utils';
import { camelCase } from 'lodash';
import { ReactNode, useMemo } from 'react';
import { useFormContext } from 'react-hook-form';
import { MinerUOptionsFormField } from './mineru-options-form-field';
import { SelectWithSearch } from './originui/select-with-search';
import {
FormControl,
@ -26,11 +27,13 @@ export function LayoutRecognizeFormField({
horizontal = true,
optionsWithoutLLM,
label,
showMineruOptions = true,
}: {
name?: string;
horizontal?: boolean;
optionsWithoutLLM?: { value: string; label: string }[];
label?: ReactNode;
showMineruOptions?: boolean;
}) {
const form = useFormContext();
@ -79,35 +82,38 @@ export function LayoutRecognizeFormField({
name={name}
render={({ field }) => {
return (
<FormItem className={'items-center space-y-0 '}>
<div
className={cn('flex', {
'flex-col ': !horizontal,
'items-center': horizontal,
})}
>
<FormLabel
tooltip={t('layoutRecognizeTip')}
className={cn('text-sm text-text-secondary whitespace-wrap', {
['w-1/4']: horizontal,
<>
<FormItem className={'items-center space-y-0 '}>
<div
className={cn('flex', {
'flex-col ': !horizontal,
'items-center': horizontal,
})}
>
{label || t('layoutRecognize')}
</FormLabel>
<div className={horizontal ? 'w-3/4' : 'w-full'}>
<FormControl>
<SelectWithSearch
{...field}
options={options}
></SelectWithSearch>
</FormControl>
<FormLabel
tooltip={t('layoutRecognizeTip')}
className={cn('text-sm text-text-secondary whitespace-wrap', {
['w-1/4']: horizontal,
})}
>
{label || t('layoutRecognize')}
</FormLabel>
<div className={horizontal ? 'w-3/4' : 'w-full'}>
<FormControl>
<SelectWithSearch
{...field}
options={options}
></SelectWithSearch>
</FormControl>
</div>
</div>
</div>
<div className="flex pt-1">
<div className={horizontal ? 'w-1/4' : 'w-full'}></div>
<FormMessage />
</div>
</FormItem>
<div className="flex pt-1">
<div className={horizontal ? 'w-1/4' : 'w-full'}></div>
<FormMessage />
</div>
</FormItem>
{showMineruOptions && <MinerUOptionsFormField />}
</>
);
}}
/>

View File

@ -7,10 +7,38 @@ import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
const parseMethodOptions = buildOptions(['auto', 'txt', 'ocr']);
const languageOptions = buildOptions([
'English',
'Chinese',
'Traditional Chinese',
'Russian',
'Ukrainian',
'Indonesian',
'Spanish',
'Vietnamese',
'Japanese',
'Korean',
'Portuguese BR',
'German',
'French',
'Italian',
'Tamil',
'Telugu',
'Kannada',
'Thai',
'Greek',
'Hindi',
]);
export function MinerUOptionsFormField() {
export function MinerUOptionsFormField({
namePrefix = 'parser_config',
}: {
namePrefix?: string;
}) {
const form = useFormContext();
const { t } = useTranslation();
const buildName = (field: string) =>
namePrefix ? `${namePrefix}.${field}` : field;
const layoutRecognize = useWatch({
control: form.control,
@ -33,7 +61,7 @@ export function MinerUOptionsFormField() {
</div>
<RAGFlowFormItem
name="parser_config.mineru_parse_method"
name={buildName('mineru_parse_method')}
label={t('knowledgeConfiguration.mineruParseMethod', 'Parse Method')}
tooltip={t(
'knowledgeConfiguration.mineruParseMethodTip',
@ -52,7 +80,26 @@ export function MinerUOptionsFormField() {
</RAGFlowFormItem>
<RAGFlowFormItem
name="parser_config.mineru_formula_enable"
name={buildName('mineru_lang')}
label={t('knowledgeConfiguration.mineruLanguage', 'Language')}
tooltip={t(
'knowledgeConfiguration.mineruLanguageTip',
'Preferred OCR language for MinerU.',
)}
horizontal={true}
>
{(field) => (
<RAGFlowSelect
value={field.value || 'English'}
onChange={field.onChange}
options={languageOptions}
placeholder={t('common.selectPlaceholder', 'Select value')}
/>
)}
</RAGFlowFormItem>
<RAGFlowFormItem
name={buildName('mineru_formula_enable')}
label={t(
'knowledgeConfiguration.mineruFormulaEnable',
'Formula Recognition',
@ -73,7 +120,7 @@ export function MinerUOptionsFormField() {
</RAGFlowFormItem>
<RAGFlowFormItem
name="parser_config.mineru_table_enable"
name={buildName('mineru_table_enable')}
label={t(
'knowledgeConfiguration.mineruTableEnable',
'Table Recognition',

View File

@ -34,8 +34,13 @@ export interface IDocumentInfo {
export interface IParserConfig {
delimiter?: string;
html4excel?: boolean;
layout_recognize?: boolean;
pages: any[];
layout_recognize?: string;
pages?: any[];
chunk_token_num?: number;
auto_keywords?: number;
auto_questions?: number;
toc_extraction?: boolean;
task_page_size?: number;
raptor?: Raptor;
graphrag?: GraphRag;
}

View File

@ -1,8 +1,13 @@
export interface IChangeParserConfigRequestBody {
pages: number[][];
chunk_token_num: number;
layout_recognize: boolean;
task_page_size: number;
pages?: number[][];
chunk_token_num?: number;
layout_recognize?: string;
task_page_size?: number;
delimiter?: string;
auto_keywords?: number;
auto_questions?: number;
html4excel?: boolean;
toc_extraction?: boolean;
}
export interface IChangeParserRequestBody {

View File

@ -7,7 +7,6 @@ import { DelimiterFormField } from '@/components/delimiter-form-field';
import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
import { MinerUOptionsFormField } from '@/components/mineru-options-form-field';
import {
ConfigurationFormContainer,
MainContainer,
@ -19,7 +18,6 @@ export function NaiveConfiguration() {
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<MinerUOptionsFormField></MinerUOptionsFormField>
<MaxTokenNumberFormField initialValue={512}></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
<ChildrenDelimiterForm />

View File

@ -37,6 +37,7 @@ export const formSchema = z
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
mineru_formula_enable: z.boolean().optional(),
mineru_table_enable: z.boolean().optional(),
mineru_lang: z.string().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),

View File

@ -75,6 +75,7 @@ export default function DatasetSettings() {
mineru_parse_method: 'auto',
mineru_formula_enable: true,
mineru_table_enable: true,
mineru_lang: 'English',
raptor: {
use_raptor: true,
max_token: 256,