import { Dialog, DialogContent, DialogFooter, DialogHeader, DialogTitle, } from '@/components/ui/dialog'; import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage, } from '@/components/ui/form'; import { DocumentParserType } from '@/constants/knowledge'; import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request'; import { IModalProps } from '@/interfaces/common'; import { IParserConfig } from '@/interfaces/database/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { ChunkMethodItem, ParseTypeItem, } from '@/pages/dataset/dataset-setting/configuration/common-item'; import { zodResolver } from '@hookform/resolvers/zod'; import get from 'lodash/get'; import omit from 'lodash/omit'; import {} from 'module'; import { useEffect, useMemo } from 'react'; import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; import { AutoKeywordsFormField, AutoQuestionsFormField, } from '../auto-keywords-form-field'; import { DataFlowSelect } from '../data-pipeline-select'; import { DelimiterFormField } from '../delimiter-form-field'; import { EntityTypesFormField } from '../entity-types-form-field'; import { ExcelToHtmlFormField } from '../excel-to-html-form-field'; import { FormContainer } from '../form-container'; import { LayoutRecognizeFormField } from '../layout-recognize-form-field'; import { MaxTokenNumberFormField } from '../max-token-number-from-field'; import { UseGraphRagFormField, showGraphRagItems, } from '../parse-configuration/graph-rag-form-fields'; import RaptorFormFields, { showRaptorParseConfiguration, } from '../parse-configuration/raptor-form-fields'; import { ButtonLoading } from '../ui/button'; import { Input } from '../ui/input'; import { DynamicPageRange } from './dynamic-page-range'; import { useShowAutoKeywords } from './hooks'; import { useDefaultParserValues, useFillDefaultValueOnMount, } from './use-default-parser-values'; const FormId = 'ChunkMethodDialogForm'; interface IProps extends IModalProps<{ parserId: string; parserConfig: IChangeParserConfigRequestBody; }> { loading: boolean; parserId: string; pipelineId?: string; parserConfig: IParserConfig; documentExtension: string; documentId: string; } const hidePagesChunkMethods = [ DocumentParserType.Qa, DocumentParserType.Table, DocumentParserType.Picture, DocumentParserType.Resume, DocumentParserType.One, DocumentParserType.KnowledgeGraph, ]; export function ChunkMethodDialog({ hideModal, onOk, parserId, pipelineId, documentExtension, visible, parserConfig, loading, }: IProps) { const { t } = useTranslation(); const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration(); const useGraphRag = useMemo(() => { return knowledgeDetails.parser_config?.graphrag?.use_graphrag; }, [knowledgeDetails.parser_config?.graphrag?.use_graphrag]); const defaultParserValues = useDefaultParserValues(); const fillDefaultParserValue = useFillDefaultValueOnMount(); const FormSchema = z .object({ parseType: z.number(), parser_id: z .string() .min(1, { message: t('common.pleaseSelect'), }) .trim(), pipeline_id: z.string().optional(), parser_config: z.object({ task_page_size: z.coerce.number().optional(), layout_recognize: z.string().optional(), chunk_token_num: z.coerce.number().optional(), delimiter: z.string().optional(), auto_keywords: z.coerce.number().optional(), auto_questions: z.coerce.number().optional(), html4excel: z.boolean().optional(), raptor: z .object({ use_raptor: z.boolean().optional(), prompt: z.string().optional().optional(), max_token: z.coerce.number().optional(), threshold: z.coerce.number().optional(), max_cluster: z.coerce.number().optional(), random_seed: z.coerce.number().optional(), }) .optional(), graphrag: z.object({ use_graphrag: z.boolean().optional(), }), entity_types: z.array(z.string()).optional(), pages: z .array(z.object({ from: z.coerce.number(), to: z.coerce.number() })) .optional(), }), }) .superRefine((data, ctx) => { if (data.parseType === 2 && !data.pipeline_id) { ctx.addIssue({ path: ['pipeline_id'], message: t('common.pleaseSelect'), code: 'custom', }); } }); const form = useForm>({ resolver: zodResolver(FormSchema), defaultValues: { parser_id: parserId || '', pipeline_id: pipelineId || '', parseType: pipelineId ? 2 : 1, parser_config: defaultParserValues, }, }); const layoutRecognize = useWatch({ name: 'parser_config.layout_recognize', control: form.control, }); const selectedTag = useWatch({ name: 'parser_id', control: form.control, }); const isPdf = documentExtension === 'pdf'; const showPages = useMemo(() => { return isPdf && hidePagesChunkMethods.every((x) => x !== selectedTag); }, [selectedTag, isPdf]); const showOne = useMemo(() => { return ( isPdf && hidePagesChunkMethods .filter((x) => x !== DocumentParserType.One) .every((x) => x !== selectedTag) ); }, [selectedTag, isPdf]); const showMaxTokenNumber = selectedTag === DocumentParserType.Naive || selectedTag === DocumentParserType.KnowledgeGraph; const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph; const showExcelToHtml = selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx'; const showAutoKeywords = useShowAutoKeywords(); async function onSubmit(data: z.infer) { console.log('🚀 ~ onSubmit ~ data:', data); const nextData = { ...data, parser_config: { ...data.parser_config, pages: data.parser_config?.pages?.map((x: any) => [x.from, x.to]) ?? [], }, }; console.log('🚀 ~ onSubmit ~ nextData:', nextData); const ret = await onOk?.(nextData); if (ret) { hideModal?.(); } } useEffect(() => { if (visible) { const pages = parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? []; form.reset({ parser_id: parserId || '', pipeline_id: pipelineId || '', parseType: pipelineId ? 2 : 1, parser_config: fillDefaultParserValue({ pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }], ...omit(parserConfig, 'pages'), graphrag: { use_graphrag: get( parserConfig, 'graphrag.use_graphrag', useGraphRag, ), }, }), }); } }, [ fillDefaultParserValue, form, knowledgeDetails.parser_config, parserConfig, parserId, pipelineId, useGraphRag, visible, ]); const parseType = useWatch({ control: form.control, name: 'parseType', defaultValue: pipelineId ? 2 : 1, }); return ( {t('knowledgeDetails.chunkMethod')}
{parseType === 1 && } {parseType === 2 && ( )} {/* ( {t('knowledgeDetails.chunkMethod')} )} /> */} {showPages && parseType === 1 && ( )} {showPages && parseType === 1 && layoutRecognize && ( ( {t('knowledgeDetails.taskPageSize')} )} /> )} {parseType === 1 && ( <> {showOne && ( )} {showMaxTokenNumber && ( <> )} {showAutoKeywords(selectedTag) && ( <> )} {showExcelToHtml && ( )} {showRaptorParseConfiguration( selectedTag as DocumentParserType, ) && ( )} {showGraphRagItems(selectedTag as DocumentParserType) && useGraphRag && ( )} {showEntityTypes && ( )} )}
{t('common.save')}
); }