mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-19 20:16:49 +08:00
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)
### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
@ -18,8 +18,11 @@ import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-reques
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IParserConfig } from '@/interfaces/database/document';
|
||||
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
|
||||
import {
|
||||
ChunkMethodItem,
|
||||
ParseTypeItem,
|
||||
} from '@/pages/dataset/dataset-setting/configuration/common-item';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import get from 'lodash/get';
|
||||
import omit from 'lodash/omit';
|
||||
import {} from 'module';
|
||||
import { useEffect, useMemo } from 'react';
|
||||
@ -30,24 +33,17 @@ import {
|
||||
AutoKeywordsFormField,
|
||||
AutoQuestionsFormField,
|
||||
} from '../auto-keywords-form-field';
|
||||
import { DataFlowSelect } from '../data-pipeline-select';
|
||||
import { DelimiterFormField } from '../delimiter-form-field';
|
||||
import { EntityTypesFormField } from '../entity-types-form-field';
|
||||
import { ExcelToHtmlFormField } from '../excel-to-html-form-field';
|
||||
import { FormContainer } from '../form-container';
|
||||
import { LayoutRecognizeFormField } from '../layout-recognize-form-field';
|
||||
import { MaxTokenNumberFormField } from '../max-token-number-from-field';
|
||||
import {
|
||||
UseGraphRagFormField,
|
||||
showGraphRagItems,
|
||||
} from '../parse-configuration/graph-rag-form-fields';
|
||||
import RaptorFormFields, {
|
||||
showRaptorParseConfiguration,
|
||||
} from '../parse-configuration/raptor-form-fields';
|
||||
import { ButtonLoading } from '../ui/button';
|
||||
import { Input } from '../ui/input';
|
||||
import { RAGFlowSelect } from '../ui/select';
|
||||
import { DynamicPageRange } from './dynamic-page-range';
|
||||
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
|
||||
import { useShowAutoKeywords } from './hooks';
|
||||
import {
|
||||
useDefaultParserValues,
|
||||
useFillDefaultValueOnMount,
|
||||
@ -62,6 +58,7 @@ interface IProps
|
||||
}> {
|
||||
loading: boolean;
|
||||
parserId: string;
|
||||
pipelineId?: string;
|
||||
parserConfig: IParserConfig;
|
||||
documentExtension: string;
|
||||
documentId: string;
|
||||
@ -80,6 +77,7 @@ export function ChunkMethodDialog({
|
||||
hideModal,
|
||||
onOk,
|
||||
parserId,
|
||||
pipelineId,
|
||||
documentExtension,
|
||||
visible,
|
||||
parserConfig,
|
||||
@ -87,8 +85,6 @@ export function ChunkMethodDialog({
|
||||
}: IProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { parserList } = useFetchParserListOnMount(documentExtension);
|
||||
|
||||
const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration();
|
||||
|
||||
const useGraphRag = useMemo(() => {
|
||||
@ -99,46 +95,59 @@ export function ChunkMethodDialog({
|
||||
|
||||
const fillDefaultParserValue = useFillDefaultValueOnMount();
|
||||
|
||||
const FormSchema = z.object({
|
||||
parser_id: z
|
||||
.string()
|
||||
.min(1, {
|
||||
message: t('common.pleaseSelect'),
|
||||
})
|
||||
.trim(),
|
||||
parser_config: z.object({
|
||||
task_page_size: z.coerce.number().optional(),
|
||||
layout_recognize: z.string().optional(),
|
||||
chunk_token_num: z.coerce.number().optional(),
|
||||
delimiter: z.string().optional(),
|
||||
auto_keywords: z.coerce.number().optional(),
|
||||
auto_questions: z.coerce.number().optional(),
|
||||
html4excel: z.boolean().optional(),
|
||||
raptor: z
|
||||
.object({
|
||||
use_raptor: z.boolean().optional(),
|
||||
prompt: z.string().optional().optional(),
|
||||
max_token: z.coerce.number().optional(),
|
||||
threshold: z.coerce.number().optional(),
|
||||
max_cluster: z.coerce.number().optional(),
|
||||
random_seed: z.coerce.number().optional(),
|
||||
const FormSchema = z
|
||||
.object({
|
||||
parseType: z.number(),
|
||||
parser_id: z
|
||||
.string()
|
||||
.min(1, {
|
||||
message: t('common.pleaseSelect'),
|
||||
})
|
||||
.optional(),
|
||||
graphrag: z.object({
|
||||
use_graphrag: z.boolean().optional(),
|
||||
.trim(),
|
||||
pipeline_id: z.string().optional(),
|
||||
parser_config: z.object({
|
||||
task_page_size: z.coerce.number().optional(),
|
||||
layout_recognize: z.string().optional(),
|
||||
chunk_token_num: z.coerce.number().optional(),
|
||||
delimiter: z.string().optional(),
|
||||
auto_keywords: z.coerce.number().optional(),
|
||||
auto_questions: z.coerce.number().optional(),
|
||||
html4excel: z.boolean().optional(),
|
||||
// raptor: z
|
||||
// .object({
|
||||
// use_raptor: z.boolean().optional(),
|
||||
// prompt: z.string().optional().optional(),
|
||||
// max_token: z.coerce.number().optional(),
|
||||
// threshold: z.coerce.number().optional(),
|
||||
// max_cluster: z.coerce.number().optional(),
|
||||
// random_seed: z.coerce.number().optional(),
|
||||
// })
|
||||
// .optional(),
|
||||
// graphrag: z.object({
|
||||
// use_graphrag: z.boolean().optional(),
|
||||
// }),
|
||||
entity_types: z.array(z.string()).optional(),
|
||||
pages: z
|
||||
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
|
||||
.optional(),
|
||||
}),
|
||||
entity_types: z.array(z.string()).optional(),
|
||||
pages: z
|
||||
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
|
||||
.optional(),
|
||||
}),
|
||||
});
|
||||
})
|
||||
.superRefine((data, ctx) => {
|
||||
if (data.parseType === 2 && !data.pipeline_id) {
|
||||
ctx.addIssue({
|
||||
path: ['pipeline_id'],
|
||||
message: t('common.pleaseSelect'),
|
||||
code: 'custom',
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
defaultValues: {
|
||||
parser_id: parserId,
|
||||
|
||||
parser_id: parserId || '',
|
||||
pipeline_id: pipelineId || '',
|
||||
parseType: pipelineId ? 2 : 1,
|
||||
parser_config: defaultParserValues,
|
||||
},
|
||||
});
|
||||
@ -200,17 +209,19 @@ export function ChunkMethodDialog({
|
||||
const pages =
|
||||
parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
|
||||
form.reset({
|
||||
parser_id: parserId,
|
||||
parser_id: parserId || '',
|
||||
pipeline_id: pipelineId || '',
|
||||
parseType: pipelineId ? 2 : 1,
|
||||
parser_config: fillDefaultParserValue({
|
||||
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
|
||||
...omit(parserConfig, 'pages'),
|
||||
graphrag: {
|
||||
use_graphrag: get(
|
||||
parserConfig,
|
||||
'graphrag.use_graphrag',
|
||||
useGraphRag,
|
||||
),
|
||||
},
|
||||
// graphrag: {
|
||||
// use_graphrag: get(
|
||||
// parserConfig,
|
||||
// 'graphrag.use_graphrag',
|
||||
// useGraphRag,
|
||||
// ),
|
||||
// },
|
||||
}),
|
||||
});
|
||||
}
|
||||
@ -220,10 +231,20 @@ export function ChunkMethodDialog({
|
||||
knowledgeDetails.parser_config,
|
||||
parserConfig,
|
||||
parserId,
|
||||
pipelineId,
|
||||
useGraphRag,
|
||||
visible,
|
||||
]);
|
||||
|
||||
const parseType = useWatch({
|
||||
control: form.control,
|
||||
name: 'parseType',
|
||||
defaultValue: pipelineId ? 2 : 1,
|
||||
});
|
||||
useEffect(() => {
|
||||
if (parseType === 1) {
|
||||
form.setValue('pipeline_id', '');
|
||||
}
|
||||
}, [parseType, form]);
|
||||
return (
|
||||
<Dialog open onOpenChange={hideModal}>
|
||||
<DialogContent className="max-w-[50vw]">
|
||||
@ -237,7 +258,17 @@ export function ChunkMethodDialog({
|
||||
id={FormId}
|
||||
>
|
||||
<FormContainer>
|
||||
<FormField
|
||||
<ParseTypeItem />
|
||||
{parseType === 1 && <ChunkMethodItem></ChunkMethodItem>}
|
||||
{parseType === 2 && (
|
||||
<DataFlowSelect
|
||||
isMult={false}
|
||||
// toDataPipeline={navigateToAgents}
|
||||
formFieldName="pipeline_id"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* <FormField
|
||||
control={form.control}
|
||||
name="parser_id"
|
||||
render={({ field }) => (
|
||||
@ -252,9 +283,11 @@ export function ChunkMethodDialog({
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
{showPages && <DynamicPageRange></DynamicPageRange>}
|
||||
{showPages && layoutRecognize && (
|
||||
/> */}
|
||||
{showPages && parseType === 1 && (
|
||||
<DynamicPageRange></DynamicPageRange>
|
||||
)}
|
||||
{showPages && parseType === 1 && layoutRecognize && (
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="parser_config.task_page_size"
|
||||
@ -279,50 +312,60 @@ export function ChunkMethodDialog({
|
||||
/>
|
||||
)}
|
||||
</FormContainer>
|
||||
<FormContainer
|
||||
show={showOne || showMaxTokenNumber}
|
||||
className="space-y-3"
|
||||
>
|
||||
{showOne && <LayoutRecognizeFormField></LayoutRecognizeFormField>}
|
||||
{showMaxTokenNumber && (
|
||||
<>
|
||||
<MaxTokenNumberFormField
|
||||
max={
|
||||
selectedTag === DocumentParserType.KnowledgeGraph
|
||||
? 8192 * 2
|
||||
: 2048
|
||||
}
|
||||
></MaxTokenNumberFormField>
|
||||
<DelimiterFormField></DelimiterFormField>
|
||||
</>
|
||||
)}
|
||||
</FormContainer>
|
||||
<FormContainer
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
className="space-y-3"
|
||||
>
|
||||
{showAutoKeywords(selectedTag) && (
|
||||
<>
|
||||
<AutoKeywordsFormField></AutoKeywordsFormField>
|
||||
<AutoQuestionsFormField></AutoQuestionsFormField>
|
||||
</>
|
||||
)}
|
||||
{showExcelToHtml && <ExcelToHtmlFormField></ExcelToHtmlFormField>}
|
||||
</FormContainer>
|
||||
{showRaptorParseConfiguration(
|
||||
selectedTag as DocumentParserType,
|
||||
) && (
|
||||
<FormContainer>
|
||||
<RaptorFormFields></RaptorFormFields>
|
||||
</FormContainer>
|
||||
)}
|
||||
{showGraphRagItems(selectedTag as DocumentParserType) &&
|
||||
useGraphRag && (
|
||||
<FormContainer>
|
||||
<UseGraphRagFormField></UseGraphRagFormField>
|
||||
{parseType === 1 && (
|
||||
<>
|
||||
<FormContainer
|
||||
show={showOne || showMaxTokenNumber}
|
||||
className="space-y-3"
|
||||
>
|
||||
{showOne && (
|
||||
<LayoutRecognizeFormField></LayoutRecognizeFormField>
|
||||
)}
|
||||
{showMaxTokenNumber && (
|
||||
<>
|
||||
<MaxTokenNumberFormField
|
||||
max={
|
||||
selectedTag === DocumentParserType.KnowledgeGraph
|
||||
? 8192 * 2
|
||||
: 2048
|
||||
}
|
||||
></MaxTokenNumberFormField>
|
||||
<DelimiterFormField></DelimiterFormField>
|
||||
</>
|
||||
)}
|
||||
</FormContainer>
|
||||
)}
|
||||
{showEntityTypes && <EntityTypesFormField></EntityTypesFormField>}
|
||||
<FormContainer
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
className="space-y-3"
|
||||
>
|
||||
{showAutoKeywords(selectedTag) && (
|
||||
<>
|
||||
<AutoKeywordsFormField></AutoKeywordsFormField>
|
||||
<AutoQuestionsFormField></AutoQuestionsFormField>
|
||||
</>
|
||||
)}
|
||||
{showExcelToHtml && (
|
||||
<ExcelToHtmlFormField></ExcelToHtmlFormField>
|
||||
)}
|
||||
</FormContainer>
|
||||
{/* {showRaptorParseConfiguration(
|
||||
selectedTag as DocumentParserType,
|
||||
) && (
|
||||
<FormContainer>
|
||||
<RaptorFormFields></RaptorFormFields>
|
||||
</FormContainer>
|
||||
)} */}
|
||||
{/* {showGraphRagItems(selectedTag as DocumentParserType) &&
|
||||
useGraphRag && (
|
||||
<FormContainer>
|
||||
<UseGraphRagFormField></UseGraphRagFormField>
|
||||
</FormContainer>
|
||||
)} */}
|
||||
{showEntityTypes && (
|
||||
<EntityTypesFormField></EntityTypesFormField>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</form>
|
||||
</Form>
|
||||
<DialogFooter>
|
||||
|
||||
Reference in New Issue
Block a user