Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)

### What problem does this PR solve?

#9869

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: jinhai <haijin.chn@gmail.com>
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: chanx <1243304602@qq.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com>
Co-authored-by: huangzl <huangzl@shinemo.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Wilmer <33392318@qq.com>
Co-authored-by: Adrian Weidig <adrianweidig@gmx.net>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yongteng Lei <yongtengrey@outlook.com>
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com>
Co-authored-by: BadwomanCraZY <511528396@qq.com>
Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com>
Co-authored-by: Russell Valentine <russ@coldstonelabs.org>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Billy Bao <newyorkupperbay@gmail.com>
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com>
Co-authored-by: TensorNull <tensor.null@gmail.com>
Co-authored-by: TeslaZY <TeslaZY@outlook.com>
Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com>
Co-authored-by: AB <aj@Ajays-MacBook-Air.local>
Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com>
Co-authored-by: He Wang <wanghechn@qq.com>
Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com>
Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box>
Co-authored-by: Stephen Hu <stephenhu@seismic.com>
Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com>
Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com>
Co-authored-by: mxc <mxc@example.com>
Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com>
Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com>
Co-authored-by: mcoder6425 <mcoder64@gmail.com>
Co-authored-by: lemsn <lemsn@msn.com>
Co-authored-by: lemsn <lemsn@126.com>
Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com>
Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com>
Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
Kevin Hu
2025-10-09 12:36:19 +08:00
committed by GitHub
parent ef0aecea3b
commit cbf04ee470
490 changed files with 10630 additions and 30688 deletions

View File

@ -0,0 +1,85 @@
import { crossLanguageOptions } from '@/components/cross-language-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { LLMFormField } from '@/components/llm-setting-items/llm-form-field';
import {
SelectWithSearch,
SelectWithSearchFlagOptionType,
} from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { buildOptions } from '@/utils/form';
import { useTranslation } from 'react-i18next';
import { FileType, OutputFormatMap } from '../../constant';
import { CommonProps } from './interface';
import { buildFieldNameWithPrefix } from './utils';
function buildOutputOptionsFormatMap() {
return Object.entries(OutputFormatMap).reduce<
Record<string, SelectWithSearchFlagOptionType[]>
>((pre, [key, value]) => {
pre[key] = buildOptions(value);
return pre;
}, {});
}
export type OutputFormatFormFieldProps = CommonProps & {
fileType: FileType;
};
export function OutputFormatFormField({
prefix,
fileType,
}: OutputFormatFormFieldProps) {
const { t } = useTranslation();
return (
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`output_format`, prefix)}
label={t('dataflow.outputFormat')}
>
<SelectWithSearch
options={buildOutputOptionsFormatMap()[fileType]}
></SelectWithSearch>
</RAGFlowFormItem>
);
}
export function ParserMethodFormField({
prefix,
optionsWithoutLLM,
}: CommonProps & { optionsWithoutLLM?: { value: string; label: string }[] }) {
const { t } = useTranslation();
return (
<LayoutRecognizeFormField
name={buildFieldNameWithPrefix(`parse_method`, prefix)}
horizontal={false}
optionsWithoutLLM={optionsWithoutLLM}
label={t('dataflow.parserMethod')}
></LayoutRecognizeFormField>
);
}
export function LargeModelFormField({ prefix }: CommonProps) {
return (
<LLMFormField
name={buildFieldNameWithPrefix('llm_id', prefix)}
></LLMFormField>
);
}
export function LanguageFormField({ prefix }: CommonProps) {
const { t } = useTranslation();
return (
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`lang`, prefix)}
label={t('dataflow.lang')}
>
{(field) => (
<SelectWithSearch
options={crossLanguageOptions}
value={field.value}
onChange={field.onChange}
></SelectWithSearch>
)}
</RAGFlowFormItem>
);
}

View File

@ -0,0 +1,30 @@
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { MultiSelect } from '@/components/ui/multi-select';
import { buildOptions } from '@/utils/form';
import { useTranslation } from 'react-i18next';
import { ParserFields } from '../../constant';
import { CommonProps } from './interface';
import { buildFieldNameWithPrefix } from './utils';
const options = buildOptions(ParserFields);
export function EmailFormFields({ prefix }: CommonProps) {
const { t } = useTranslation();
return (
<>
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`fields`, prefix)}
label={t('dataflow.fields')}
>
{(field) => (
<MultiSelect
options={options}
onValueChange={field.onChange}
defaultValue={field.value}
variant="inverted"
></MultiSelect>
)}
</RAGFlowFormItem>
</>
);
}

View File

@ -0,0 +1,57 @@
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Textarea } from '@/components/ui/textarea';
import { buildOptions } from '@/utils/form';
import { isEmpty } from 'lodash';
import { useEffect, useMemo } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { ImageParseMethod } from '../../constant';
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
import { CommonProps } from './interface';
import { useSetInitialLanguage } from './use-set-initial-language';
import { buildFieldNameWithPrefix } from './utils';
const options = buildOptions(ImageParseMethod);
export function ImageFormFields({ prefix }: CommonProps) {
const { t } = useTranslation();
const form = useFormContext();
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
const parseMethod = useWatch({
name: parseMethodName,
});
const languageShown = useMemo(() => {
return !isEmpty(parseMethod) && parseMethod !== ImageParseMethod.OCR;
}, [parseMethod]);
useEffect(() => {
if (isEmpty(form.getValues(parseMethodName))) {
form.setValue(parseMethodName, ImageParseMethod.OCR, {
shouldValidate: true,
shouldDirty: true,
});
}
}, [form, parseMethodName]);
useSetInitialLanguage({ prefix, languageShown });
return (
<>
<ParserMethodFormField
prefix={prefix}
optionsWithoutLLM={options}
></ParserMethodFormField>
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
{languageShown && (
<RAGFlowFormItem
name={buildFieldNameWithPrefix('system_prompt', prefix)}
label={t('dataflow.systemPrompt')}
>
<Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
</RAGFlowFormItem>
)}
</>
);
}

View File

@ -1,135 +1,202 @@
import { FormContainer } from '@/components/form-container';
import NumberInput from '@/components/originui/number-input';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import {
Form,
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from '@/components/ui/form';
import { useTranslate } from '@/hooks/common-hooks';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { BlockButton, Button } from '@/components/ui/button';
import { Form } from '@/components/ui/form';
import { Separator } from '@/components/ui/separator';
import { cn } from '@/lib/utils';
import { buildOptions } from '@/utils/form';
import { zodResolver } from '@hookform/resolvers/zod';
import { memo } from 'react';
import { useForm, useFormContext } from 'react-hook-form';
import { useHover } from 'ahooks';
import { Trash2 } from 'lucide-react';
import { memo, useCallback, useMemo, useRef } from 'react';
import {
UseFieldArrayRemove,
useFieldArray,
useForm,
useFormContext,
} from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { initialParserValues } from '../../constant';
import {
FileType,
InitialOutputFormatMap,
initialParserValues,
} from '../../constant';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
import { INextOperatorForm } from '../../interface';
import { GoogleCountryOptions, GoogleLanguageOptions } from '../../options';
import { buildOutputList } from '../../utils/build-output-list';
import { ApiKeyField } from '../components/api-key-field';
import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output';
import { QueryVariable } from '../components/query-variable';
import { OutputFormatFormField } from './common-form-fields';
import { EmailFormFields } from './email-form-fields';
import { ImageFormFields } from './image-form-fields';
import { PdfFormFields } from './pdf-form-fields';
import { buildFieldNameWithPrefix } from './utils';
import { VideoFormFields } from './video-form-fields';
const outputList = buildOutputList(initialParserValues.outputs);
export const GoogleFormPartialSchema = {
api_key: z.string(),
country: z.string(),
language: z.string(),
const FileFormatOptions = buildOptions(FileType).filter(
(x) => x.value !== FileType.Video, // Temporarily hide the video option
);
const FileFormatWidgetMap = {
[FileType.PDF]: PdfFormFields,
[FileType.Video]: VideoFormFields,
[FileType.Audio]: VideoFormFields,
[FileType.Email]: EmailFormFields,
[FileType.Image]: ImageFormFields,
};
type ParserItemProps = {
name: string;
index: number;
fieldLength: number;
remove: UseFieldArrayRemove;
};
export const FormSchema = z.object({
...GoogleFormPartialSchema,
q: z.string(),
start: z.number(),
num: z.number(),
setups: z.array(
z.object({
fileFormat: z.string().nullish(),
output_format: z.string().optional(),
parse_method: z.string().optional(),
lang: z.string().optional(),
fields: z.array(z.string()).optional(),
llm_id: z.string().optional(),
system_prompt: z.string().optional(),
}),
),
});
export function GoogleFormWidgets() {
const form = useFormContext();
const { t } = useTranslate('flow');
export type ParserFormSchemaType = z.infer<typeof FormSchema>;
function ParserItem({ name, index, fieldLength, remove }: ParserItemProps) {
const { t } = useTranslation();
const form = useFormContext<ParserFormSchemaType>();
const ref = useRef(null);
const isHovering = useHover(ref);
const prefix = `${name}.${index}`;
const fileFormat = form.getValues(`setups.${index}.fileFormat`);
const values = form.getValues();
const parserList = values.setups.slice(); // Adding, deleting, or modifying the parser array will not change the reference.
const filteredFileFormatOptions = useMemo(() => {
const otherFileFormatList = parserList
.filter((_, idx) => idx !== index)
.map((x) => x.fileFormat);
return FileFormatOptions.filter((x) => {
return !otherFileFormatList.includes(x.value);
});
}, [index, parserList]);
const Widget =
typeof fileFormat === 'string' && fileFormat in FileFormatWidgetMap
? FileFormatWidgetMap[fileFormat as keyof typeof FileFormatWidgetMap]
: () => <></>;
const handleFileTypeChange = useCallback(
(value: FileType) => {
form.setValue(
`setups.${index}.output_format`,
InitialOutputFormatMap[value],
{ shouldDirty: true, shouldValidate: true, shouldTouch: true },
);
},
[form, index],
);
return (
<>
<FormField
control={form.control}
name={`country`}
render={({ field }) => (
<FormItem className="flex-1">
<FormLabel>{t('country')}</FormLabel>
<FormControl>
<SelectWithSearch
{...field}
options={GoogleCountryOptions}
></SelectWithSearch>
</FormControl>
<FormMessage />
</FormItem>
<section
className={cn('space-y-5 py-2.5 rounded-md', {
'bg-state-error-5': isHovering,
})}
>
<div className="flex justify-between items-center">
<span className="text-text-primary text-sm font-medium">
Parser {index + 1}
</span>
{index > 0 && (
<Button variant={'ghost'} onClick={() => remove(index)} ref={ref}>
<Trash2 />
</Button>
)}
/>
<FormField
control={form.control}
name={`language`}
render={({ field }) => (
<FormItem className="flex-1">
<FormLabel>{t('language')}</FormLabel>
<FormControl>
<SelectWithSearch
{...field}
options={GoogleLanguageOptions}
></SelectWithSearch>
</FormControl>
<FormMessage />
</FormItem>
</div>
<RAGFlowFormItem
name={buildFieldNameWithPrefix(`fileFormat`, prefix)}
label={t('dataflow.fileFormats')}
>
{(field) => (
<SelectWithSearch
value={field.value}
onChange={(val) => {
field.onChange(val);
handleFileTypeChange(val as FileType);
}}
options={filteredFileFormatOptions}
></SelectWithSearch>
)}
</RAGFlowFormItem>
<Widget prefix={prefix} fileType={fileFormat as FileType}></Widget>
<OutputFormatFormField
prefix={prefix}
fileType={fileFormat as FileType}
/>
</>
{index < fieldLength - 1 && <Separator />}
</section>
);
}
const ParserForm = ({ node }: INextOperatorForm) => {
const { t } = useTranslate('flow');
const { t } = useTranslation();
const defaultValues = useFormValues(initialParserValues, node);
const form = useForm<z.infer<typeof FormSchema>>({
defaultValues,
resolver: zodResolver(FormSchema),
shouldUnregister: true,
});
const name = 'setups';
const { fields, remove, append } = useFieldArray({
name,
control: form.control,
});
const add = useCallback(() => {
append({
fileFormat: null,
output_format: '',
parse_method: '',
lang: '',
fields: [],
llm_id: '',
});
}, [append]);
useWatchFormChange(node?.id, form);
return (
<Form {...form}>
<FormWrapper>
<FormContainer>
<QueryVariable name="q"></QueryVariable>
</FormContainer>
<FormContainer>
<ApiKeyField placeholder={t('apiKeyPlaceholder')}></ApiKeyField>
<FormField
control={form.control}
name={`start`}
render={({ field }) => (
<FormItem>
<FormLabel>{t('flowStart')}</FormLabel>
<FormControl>
<NumberInput {...field} className="w-full"></NumberInput>
</FormControl>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name={`num`}
render={({ field }) => (
<FormItem>
<FormLabel>{t('flowNum')}</FormLabel>
<FormControl>
<NumberInput {...field} className="w-full"></NumberInput>
</FormControl>
<FormMessage />
</FormItem>
)}
/>
<GoogleFormWidgets></GoogleFormWidgets>
</FormContainer>
</FormWrapper>
<form className="px-5">
{fields.map((field, index) => {
return (
<ParserItem
key={field.id}
name={name}
index={index}
fieldLength={fields.length}
remove={remove}
></ParserItem>
);
})}
<BlockButton onClick={add} type="button" className="mt-2.5">
{t('dataflow.addParser')}
</BlockButton>
</form>
<div className="p-5">
<Output list={outputList}></Output>
</div>

View File

@ -0,0 +1,3 @@
export type CommonProps = {
prefix: string;
};

View File

@ -0,0 +1,44 @@
import { ParseDocumentType } from '@/components/layout-recognize-form-field';
import { isEmpty } from 'lodash';
import { useEffect, useMemo } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
import { CommonProps } from './interface';
import { useSetInitialLanguage } from './use-set-initial-language';
import { buildFieldNameWithPrefix } from './utils';
export function PdfFormFields({ prefix }: CommonProps) {
const form = useFormContext();
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
const parseMethod = useWatch({
name: parseMethodName,
});
const languageShown = useMemo(() => {
return (
!isEmpty(parseMethod) &&
parseMethod !== ParseDocumentType.DeepDOC &&
parseMethod !== ParseDocumentType.PlainText
);
}, [parseMethod]);
useSetInitialLanguage({ prefix, languageShown });
useEffect(() => {
if (isEmpty(form.getValues(parseMethodName))) {
form.setValue(parseMethodName, ParseDocumentType.DeepDOC, {
shouldValidate: true,
shouldDirty: true,
});
}
}, [form, parseMethodName]);
return (
<>
<ParserMethodFormField prefix={prefix}></ParserMethodFormField>
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
</>
);
}

View File

@ -0,0 +1,29 @@
import { crossLanguageOptions } from '@/components/cross-language-form-field';
import { isEmpty } from 'lodash';
import { useEffect } from 'react';
import { useFormContext } from 'react-hook-form';
import { buildFieldNameWithPrefix } from './utils';
export function useSetInitialLanguage({
prefix,
languageShown,
}: {
prefix: string;
languageShown: boolean;
}) {
const form = useFormContext();
const lang = form.getValues(buildFieldNameWithPrefix('lang', prefix));
useEffect(() => {
if (languageShown && isEmpty(lang)) {
form.setValue(
buildFieldNameWithPrefix('lang', prefix),
crossLanguageOptions[0].value,
{
shouldValidate: true,
shouldDirty: true,
},
);
}
}, [form, lang, languageShown, prefix]);
}

View File

@ -0,0 +1,3 @@
export function buildFieldNameWithPrefix(name: string, prefix: string) {
return `${prefix}.${name}`;
}

View File

@ -0,0 +1,13 @@
import {
LargeModelFormField,
OutputFormatFormFieldProps,
} from './common-form-fields';
export function VideoFormFields({ prefix }: OutputFormatFormFieldProps) {
return (
<>
{/* Multimodal Model */}
<LargeModelFormField prefix={prefix}></LargeModelFormField>
</>
);
}