mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-24 07:26:47 +08:00
Refa: only support MinerU-API now (#11977)
### What problem does this PR solve? Only support MinerU-API now, still need to complete frontend for pipeline to allow the configuration of MinerU options. ### Type of change - [x] Refactoring
This commit is contained in:
@ -118,6 +118,10 @@ export function ChunkMethodDialog({
|
||||
auto_questions: z.coerce.number().optional(),
|
||||
html4excel: z.boolean().optional(),
|
||||
toc_extraction: z.boolean().optional(),
|
||||
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
||||
mineru_formula_enable: z.boolean().optional(),
|
||||
mineru_table_enable: z.boolean().optional(),
|
||||
mineru_lang: z.string().optional(),
|
||||
// raptor: z
|
||||
// .object({
|
||||
// use_raptor: z.boolean().optional(),
|
||||
@ -166,6 +170,9 @@ export function ChunkMethodDialog({
|
||||
name: 'parser_id',
|
||||
control: form.control,
|
||||
});
|
||||
const isMineruSelected =
|
||||
selectedTag?.toLowerCase().includes('mineru') ||
|
||||
layoutRecognize?.toLowerCase?.()?.includes('mineru');
|
||||
|
||||
const isPdf = documentExtension === 'pdf';
|
||||
|
||||
@ -328,7 +335,7 @@ export function ChunkMethodDialog({
|
||||
className="space-y-3"
|
||||
>
|
||||
{showOne && (
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
<LayoutRecognizeFormField showMineruOptions={false} />
|
||||
)}
|
||||
{showMaxTokenNumber && (
|
||||
<>
|
||||
@ -345,9 +352,16 @@ export function ChunkMethodDialog({
|
||||
)}
|
||||
</FormContainer>
|
||||
<FormContainer
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
show={
|
||||
isMineruSelected ||
|
||||
showAutoKeywords(selectedTag) ||
|
||||
showExcelToHtml
|
||||
}
|
||||
className="space-y-3"
|
||||
>
|
||||
{isMineruSelected && (
|
||||
<LayoutRecognizeFormField showMineruOptions />
|
||||
)}
|
||||
{selectedTag === DocumentParserType.Naive && (
|
||||
<EnableTocToggle />
|
||||
)}
|
||||
|
||||
@ -18,6 +18,10 @@ export function useDefaultParserValues() {
|
||||
auto_questions: 0,
|
||||
html4excel: false,
|
||||
toc_extraction: false,
|
||||
mineru_parse_method: 'auto',
|
||||
mineru_formula_enable: true,
|
||||
mineru_table_enable: true,
|
||||
mineru_lang: 'English',
|
||||
// raptor: {
|
||||
// use_raptor: false,
|
||||
// prompt: t('knowledgeConfiguration.promptText'),
|
||||
|
||||
@ -5,6 +5,7 @@ import { cn } from '@/lib/utils';
|
||||
import { camelCase } from 'lodash';
|
||||
import { ReactNode, useMemo } from 'react';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { MinerUOptionsFormField } from './mineru-options-form-field';
|
||||
import { SelectWithSearch } from './originui/select-with-search';
|
||||
import {
|
||||
FormControl,
|
||||
@ -26,11 +27,13 @@ export function LayoutRecognizeFormField({
|
||||
horizontal = true,
|
||||
optionsWithoutLLM,
|
||||
label,
|
||||
showMineruOptions = true,
|
||||
}: {
|
||||
name?: string;
|
||||
horizontal?: boolean;
|
||||
optionsWithoutLLM?: { value: string; label: string }[];
|
||||
label?: ReactNode;
|
||||
showMineruOptions?: boolean;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
|
||||
@ -79,35 +82,38 @@ export function LayoutRecognizeFormField({
|
||||
name={name}
|
||||
render={({ field }) => {
|
||||
return (
|
||||
<FormItem className={'items-center space-y-0 '}>
|
||||
<div
|
||||
className={cn('flex', {
|
||||
'flex-col ': !horizontal,
|
||||
'items-center': horizontal,
|
||||
})}
|
||||
>
|
||||
<FormLabel
|
||||
tooltip={t('layoutRecognizeTip')}
|
||||
className={cn('text-sm text-text-secondary whitespace-wrap', {
|
||||
['w-1/4']: horizontal,
|
||||
<>
|
||||
<FormItem className={'items-center space-y-0 '}>
|
||||
<div
|
||||
className={cn('flex', {
|
||||
'flex-col ': !horizontal,
|
||||
'items-center': horizontal,
|
||||
})}
|
||||
>
|
||||
{label || t('layoutRecognize')}
|
||||
</FormLabel>
|
||||
<div className={horizontal ? 'w-3/4' : 'w-full'}>
|
||||
<FormControl>
|
||||
<SelectWithSearch
|
||||
{...field}
|
||||
options={options}
|
||||
></SelectWithSearch>
|
||||
</FormControl>
|
||||
<FormLabel
|
||||
tooltip={t('layoutRecognizeTip')}
|
||||
className={cn('text-sm text-text-secondary whitespace-wrap', {
|
||||
['w-1/4']: horizontal,
|
||||
})}
|
||||
>
|
||||
{label || t('layoutRecognize')}
|
||||
</FormLabel>
|
||||
<div className={horizontal ? 'w-3/4' : 'w-full'}>
|
||||
<FormControl>
|
||||
<SelectWithSearch
|
||||
{...field}
|
||||
options={options}
|
||||
></SelectWithSearch>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex pt-1">
|
||||
<div className={horizontal ? 'w-1/4' : 'w-full'}></div>
|
||||
<FormMessage />
|
||||
</div>
|
||||
</FormItem>
|
||||
<div className="flex pt-1">
|
||||
<div className={horizontal ? 'w-1/4' : 'w-full'}></div>
|
||||
<FormMessage />
|
||||
</div>
|
||||
</FormItem>
|
||||
{showMineruOptions && <MinerUOptionsFormField />}
|
||||
</>
|
||||
);
|
||||
}}
|
||||
/>
|
||||
|
||||
@ -7,10 +7,38 @@ import { useFormContext, useWatch } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const parseMethodOptions = buildOptions(['auto', 'txt', 'ocr']);
|
||||
const languageOptions = buildOptions([
|
||||
'English',
|
||||
'Chinese',
|
||||
'Traditional Chinese',
|
||||
'Russian',
|
||||
'Ukrainian',
|
||||
'Indonesian',
|
||||
'Spanish',
|
||||
'Vietnamese',
|
||||
'Japanese',
|
||||
'Korean',
|
||||
'Portuguese BR',
|
||||
'German',
|
||||
'French',
|
||||
'Italian',
|
||||
'Tamil',
|
||||
'Telugu',
|
||||
'Kannada',
|
||||
'Thai',
|
||||
'Greek',
|
||||
'Hindi',
|
||||
]);
|
||||
|
||||
export function MinerUOptionsFormField() {
|
||||
export function MinerUOptionsFormField({
|
||||
namePrefix = 'parser_config',
|
||||
}: {
|
||||
namePrefix?: string;
|
||||
}) {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
const buildName = (field: string) =>
|
||||
namePrefix ? `${namePrefix}.${field}` : field;
|
||||
|
||||
const layoutRecognize = useWatch({
|
||||
control: form.control,
|
||||
@ -33,7 +61,7 @@ export function MinerUOptionsFormField() {
|
||||
</div>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name="parser_config.mineru_parse_method"
|
||||
name={buildName('mineru_parse_method')}
|
||||
label={t('knowledgeConfiguration.mineruParseMethod', 'Parse Method')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.mineruParseMethodTip',
|
||||
@ -52,7 +80,26 @@ export function MinerUOptionsFormField() {
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name="parser_config.mineru_formula_enable"
|
||||
name={buildName('mineru_lang')}
|
||||
label={t('knowledgeConfiguration.mineruLanguage', 'Language')}
|
||||
tooltip={t(
|
||||
'knowledgeConfiguration.mineruLanguageTip',
|
||||
'Preferred OCR language for MinerU.',
|
||||
)}
|
||||
horizontal={true}
|
||||
>
|
||||
{(field) => (
|
||||
<RAGFlowSelect
|
||||
value={field.value || 'English'}
|
||||
onChange={field.onChange}
|
||||
options={languageOptions}
|
||||
placeholder={t('common.selectPlaceholder', 'Select value')}
|
||||
/>
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name={buildName('mineru_formula_enable')}
|
||||
label={t(
|
||||
'knowledgeConfiguration.mineruFormulaEnable',
|
||||
'Formula Recognition',
|
||||
@ -73,7 +120,7 @@ export function MinerUOptionsFormField() {
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<RAGFlowFormItem
|
||||
name="parser_config.mineru_table_enable"
|
||||
name={buildName('mineru_table_enable')}
|
||||
label={t(
|
||||
'knowledgeConfiguration.mineruTableEnable',
|
||||
'Table Recognition',
|
||||
|
||||
@ -34,8 +34,13 @@ export interface IDocumentInfo {
|
||||
export interface IParserConfig {
|
||||
delimiter?: string;
|
||||
html4excel?: boolean;
|
||||
layout_recognize?: boolean;
|
||||
pages: any[];
|
||||
layout_recognize?: string;
|
||||
pages?: any[];
|
||||
chunk_token_num?: number;
|
||||
auto_keywords?: number;
|
||||
auto_questions?: number;
|
||||
toc_extraction?: boolean;
|
||||
task_page_size?: number;
|
||||
raptor?: Raptor;
|
||||
graphrag?: GraphRag;
|
||||
}
|
||||
|
||||
@ -1,8 +1,13 @@
|
||||
export interface IChangeParserConfigRequestBody {
|
||||
pages: number[][];
|
||||
chunk_token_num: number;
|
||||
layout_recognize: boolean;
|
||||
task_page_size: number;
|
||||
pages?: number[][];
|
||||
chunk_token_num?: number;
|
||||
layout_recognize?: string;
|
||||
task_page_size?: number;
|
||||
delimiter?: string;
|
||||
auto_keywords?: number;
|
||||
auto_questions?: number;
|
||||
html4excel?: boolean;
|
||||
toc_extraction?: boolean;
|
||||
}
|
||||
|
||||
export interface IChangeParserRequestBody {
|
||||
|
||||
@ -7,7 +7,6 @@ import { DelimiterFormField } from '@/components/delimiter-form-field';
|
||||
import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field';
|
||||
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
|
||||
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
|
||||
import { MinerUOptionsFormField } from '@/components/mineru-options-form-field';
|
||||
import {
|
||||
ConfigurationFormContainer,
|
||||
MainContainer,
|
||||
@ -19,7 +18,6 @@ export function NaiveConfiguration() {
|
||||
<MainContainer>
|
||||
<ConfigurationFormContainer>
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
<MinerUOptionsFormField></MinerUOptionsFormField>
|
||||
<MaxTokenNumberFormField initialValue={512}></MaxTokenNumberFormField>
|
||||
<DelimiterFormField></DelimiterFormField>
|
||||
<ChildrenDelimiterForm />
|
||||
|
||||
@ -37,6 +37,7 @@ export const formSchema = z
|
||||
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
||||
mineru_formula_enable: z.boolean().optional(),
|
||||
mineru_table_enable: z.boolean().optional(),
|
||||
mineru_lang: z.string().optional(),
|
||||
raptor: z
|
||||
.object({
|
||||
use_raptor: z.boolean().optional(),
|
||||
|
||||
@ -75,6 +75,7 @@ export default function DatasetSettings() {
|
||||
mineru_parse_method: 'auto',
|
||||
mineru_formula_enable: true,
|
||||
mineru_table_enable: true,
|
||||
mineru_lang: 'English',
|
||||
raptor: {
|
||||
use_raptor: true,
|
||||
max_token: 256,
|
||||
|
||||
Reference in New Issue
Block a user