From c21cea20387bfcdeef22928b0a5279ccabf7cce5 Mon Sep 17 00:00:00 2001 From: chanx <1243304602@qq.com> Date: Sat, 11 Oct 2025 18:45:55 +0800 Subject: [PATCH] Fix: Added table of contents extraction functionality and optimized form item layout #9869 (#10492) ### What problem does this PR solve? Fix: Added table of contents extraction functionality and optimized form item layout #9869 - Added `EnableTocToggle` component to toggle table of contents extraction on and off - Added multiple parser configuration components (such as naive, book, laws, etc.), displaying different parser components based on built-in slicing methods ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- .../components/chunk-method-dialog/index.tsx | 5 +- .../use-default-parser-values.ts | 1 + .../components/data-pipeline-select/index.tsx | 138 ++++++++++----- web/src/components/delimiter-form-field.tsx | 2 +- .../components/excel-to-html-form-field.tsx | 2 +- .../layout-recognize-form-field.tsx | 2 +- .../max-token-number-from-field.tsx | 2 +- .../components/slider-input-form-field.tsx | 2 +- web/src/components/ui/radio.tsx | 3 +- web/src/hooks/logic-hooks/navigate-hooks.ts | 3 +- web/src/locales/en.ts | 7 +- web/src/locales/zh.ts | 7 +- .../dataset-setting/chunk-method-form.tsx | 64 +++++++ .../dataset-setting/components/tag-item.tsx | 25 +-- .../dataset-setting/configuration/audio.tsx | 20 +++ .../dataset-setting/configuration/book.tsx | 28 +++ .../configuration/common-item.tsx | 61 ++++++- .../dataset-setting/configuration/email.tsx | 18 ++ .../configuration/knowledge-graph.tsx | 15 ++ .../dataset-setting/configuration/laws.tsx | 29 ++++ .../dataset-setting/configuration/manual.tsx | 27 +++ .../dataset-setting/configuration/naive.tsx | 33 ++++ .../dataset-setting/configuration/one.tsx | 21 +++ .../dataset-setting/configuration/paper.tsx | 28 +++ .../dataset-setting/configuration/picture.tsx | 18 ++ .../configuration/presentation.tsx | 29 ++++ .../dataset-setting/configuration/qa.tsx | 10 ++ .../dataset-setting/configuration/resume.tsx | 10 ++ .../dataset-setting/configuration/table.tsx | 12 ++ .../dataset-setting/configuration/tag.tsx | 5 + .../dataset/dataset-setting/form-schema.ts | 161 ++++++++++-------- .../pages/dataset/dataset-setting/index.tsx | 64 +++++-- .../dataset/dataset-setting/saving-button.tsx | 1 + web/src/pages/dataset/sidebar/index.tsx | 10 +- .../datasets/dataset-creating-dialog.tsx | 4 +- 35 files changed, 694 insertions(+), 173 deletions(-) create mode 100644 web/src/pages/dataset/dataset-setting/chunk-method-form.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/audio.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/book.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/email.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/knowledge-graph.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/laws.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/manual.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/naive.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/one.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/paper.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/picture.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/presentation.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/qa.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/resume.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/table.tsx create mode 100644 web/src/pages/dataset/dataset-setting/configuration/tag.tsx diff --git a/web/src/components/chunk-method-dialog/index.tsx b/web/src/components/chunk-method-dialog/index.tsx index 8c1bb855e..f73680ad8 100644 --- a/web/src/components/chunk-method-dialog/index.tsx +++ b/web/src/components/chunk-method-dialog/index.tsx @@ -20,6 +20,7 @@ import { IParserConfig } from '@/interfaces/database/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { ChunkMethodItem, + EnableTocToggle, ParseTypeItem, } from '@/pages/dataset/dataset-setting/configuration/common-item'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -113,6 +114,7 @@ export function ChunkMethodDialog({ auto_keywords: z.coerce.number().optional(), auto_questions: z.coerce.number().optional(), html4excel: z.boolean().optional(), + toc_extraction: z.boolean().optional(), // raptor: z // .object({ // use_raptor: z.boolean().optional(), @@ -247,7 +249,7 @@ export function ChunkMethodDialog({ }, [parseType, form]); return ( - + {t('knowledgeDetails.chunkMethod')} @@ -338,6 +340,7 @@ export function ChunkMethodDialog({ show={showAutoKeywords(selectedTag) || showExcelToHtml} className="space-y-3" > + {showAutoKeywords(selectedTag) && ( <> diff --git a/web/src/components/chunk-method-dialog/use-default-parser-values.ts b/web/src/components/chunk-method-dialog/use-default-parser-values.ts index 829b98605..238047db6 100644 --- a/web/src/components/chunk-method-dialog/use-default-parser-values.ts +++ b/web/src/components/chunk-method-dialog/use-default-parser-values.ts @@ -15,6 +15,7 @@ export function useDefaultParserValues() { auto_keywords: 0, auto_questions: 0, html4excel: false, + toc_extraction: false, // raptor: { // use_raptor: false, // prompt: t('knowledgeConfiguration.promptText'), diff --git a/web/src/components/data-pipeline-select/index.tsx b/web/src/components/data-pipeline-select/index.tsx index 76dd15fa1..18e246529 100644 --- a/web/src/components/data-pipeline-select/index.tsx +++ b/web/src/components/data-pipeline-select/index.tsx @@ -1,5 +1,7 @@ import { AgentCategory } from '@/constants/agent'; +import { FormLayout } from '@/constants/form'; import { useTranslate } from '@/hooks/common-hooks'; +import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks'; import { useFetchAgentList } from '@/hooks/use-agent-request'; import { buildSelectOptions } from '@/utils/component-util'; import { ArrowUpRight } from 'lucide-react'; @@ -21,18 +23,27 @@ export interface IDataPipelineSelectNode { } interface IProps { - toDataPipeline?: () => void; + showToDataPipeline?: boolean; formFieldName: string; isMult?: boolean; setDataList?: (data: IDataPipelineSelectNode[]) => void; + layout?: FormLayout; } export function DataFlowSelect(props: IProps) { - const { toDataPipeline, formFieldName, isMult = false, setDataList } = props; + const { + showToDataPipeline, + formFieldName, + isMult = false, + setDataList, + layout = FormLayout.Vertical, + } = props; + const { t } = useTranslate('knowledgeConfiguration'); const form = useFormContext(); + const { navigateToAgents } = useNavigatePage(); const toDataPipLine = () => { - toDataPipeline?.(); + navigateToAgents(); }; const { data: dataPipelineOptions } = useFetchAgentList({ canvas_category: AgentCategory.DataflowCanvas, @@ -69,47 +80,92 @@ export function DataFlowSelect(props: IProps) { name={formFieldName} render={({ field }) => ( -
-
- - {t('dataPipeline')} - - {toDataPipeline && ( -
+
+ - {t('buildItFromScratch')} - -
- )} -
+ {t('manualSetup')} + + {showToDataPipeline && ( +
+ {t('buildItFromScratch')} + +
+ )} +
-
- - <> - {!isMult && ( - - )} - {isMult && ( - - )} - - +
+ + <> + {!isMult && ( + + )} + {isMult && ( + + )} + + +
-
+ )} + {layout === FormLayout.Horizontal && ( +
+
+ + {t('manualSetup')} + +
+ +
+ {showToDataPipeline && ( +
+ {t('buildItFromScratch')} + +
+ )} + + <> + {!isMult && ( + + )} + {isMult && ( + + )} + + +
+
+ )}
diff --git a/web/src/components/delimiter-form-field.tsx b/web/src/components/delimiter-form-field.tsx index ef23fec3b..271721021 100644 --- a/web/src/components/delimiter-form-field.tsx +++ b/web/src/components/delimiter-form-field.tsx @@ -61,7 +61,7 @@ export function DelimiterFormField() { {t('knowledgeDetails.delimiter')} diff --git a/web/src/components/excel-to-html-form-field.tsx b/web/src/components/excel-to-html-form-field.tsx index a51587405..13ff8b821 100644 --- a/web/src/components/excel-to-html-form-field.tsx +++ b/web/src/components/excel-to-html-form-field.tsx @@ -28,7 +28,7 @@ export function ExcelToHtmlFormField() {
{t('html4excel')} diff --git a/web/src/components/layout-recognize-form-field.tsx b/web/src/components/layout-recognize-form-field.tsx index 0e5b660bb..d0991c0db 100644 --- a/web/src/components/layout-recognize-form-field.tsx +++ b/web/src/components/layout-recognize-form-field.tsx @@ -79,7 +79,7 @@ export function LayoutRecognizeFormField({ > diff --git a/web/src/components/max-token-number-from-field.tsx b/web/src/components/max-token-number-from-field.tsx index c4e07da4f..b01598d93 100644 --- a/web/src/components/max-token-number-from-field.tsx +++ b/web/src/components/max-token-number-from-field.tsx @@ -17,7 +17,7 @@ export function MaxTokenNumberFormField({ max = 2048, initialValue }: IProps) { tooltip={t('chunkTokenNumberTip')} max={max} defaultValue={initialValue ?? 0} - layout={FormLayout.Vertical} + layout={FormLayout.Horizontal} > ); } diff --git a/web/src/components/slider-input-form-field.tsx b/web/src/components/slider-input-form-field.tsx index 7550e9653..8972222a8 100644 --- a/web/src/components/slider-input-form-field.tsx +++ b/web/src/components/slider-input-form-field.tsx @@ -36,7 +36,7 @@ export function SliderInputFormField({ tooltip, defaultValue, className, - layout = FormLayout.Vertical, + layout = FormLayout.Horizontal, }: SliderInputFormFieldProps) { const form = useFormContext(); diff --git a/web/src/components/ui/radio.tsx b/web/src/components/ui/radio.tsx index 5db0fca43..eeea31201 100644 --- a/web/src/components/ui/radio.tsx +++ b/web/src/components/ui/radio.tsx @@ -1,5 +1,4 @@ import { cn } from '@/lib/utils'; -import { Radio as LucideRadio } from 'lucide-react'; import React, { useContext, useState } from 'react'; const RadioGroupContext = React.createContext<{ @@ -57,7 +56,7 @@ function Radio({ value, checked, disabled, onChange, children }: RadioProps) { onClick={handleClick} > {isChecked && ( - +
)} {children && {children}} diff --git a/web/src/hooks/logic-hooks/navigate-hooks.ts b/web/src/hooks/logic-hooks/navigate-hooks.ts index 642b8c3c0..6f852ccfa 100644 --- a/web/src/hooks/logic-hooks/navigate-hooks.ts +++ b/web/src/hooks/logic-hooks/navigate-hooks.ts @@ -19,7 +19,8 @@ export const useNavigatePage = () => { const navigateToDataset = useCallback( (id: string) => () => { - navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`); + // navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`); + navigate(`${Routes.Dataset}/${id}`); }, [navigate], ); diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 7b29800a3..d112d0891 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -126,8 +126,8 @@ export default { startDate: 'Start Date', source: 'Source', fileName: 'File Name', - datasetLogs: 'Dataset Logs', - fileLogs: 'File Logs', + datasetLogs: 'Dataset', + fileLogs: 'File', overview: 'Overview', success: 'Success', failed: 'Failed', @@ -270,6 +270,9 @@ export default { reRankModelWaring: 'Re-rank model is very time consuming.', }, knowledgeConfiguration: { + tocExtraction: 'toc toggle', + tocExtractionTip: + " For existing chunks, generate a hierarchical table of contents (one directory per file). During queries, when Directory Enhancement is activated, the system will use a large model to determine which directory items are relevant to the user's question, thereby identifying the relevant chunks.", deleteGenerateModalContent: `

Deleting the generated {{type}} results will remove all derived entities and relationships from this dataset. diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index e9e0db3ce..f58209995 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -114,8 +114,8 @@ export default { startDate: '开始时间', source: '来源', fileName: '文件名', - datasetLogs: '数据集日志', - fileLogs: '文件日志', + datasetLogs: '数据集', + fileLogs: '文件', overview: '概览', success: '成功', failed: '失败', @@ -255,6 +255,9 @@ export default { theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除', }, knowledgeConfiguration: { + tocExtraction: '目录提取', + tocExtractionTip: + '对于已有的chunk生成层级结构的目录信息(每个文件一个目录)。在查询时,激活`目录增强`后,系统会用大模型去判断用户问题和哪些目录项相关,从而找到相关的chunk。', deleteGenerateModalContent: `

删除生成的 {{type}} 结果 将从此数据集中移除所有派生实体和关系。 diff --git a/web/src/pages/dataset/dataset-setting/chunk-method-form.tsx b/web/src/pages/dataset/dataset-setting/chunk-method-form.tsx new file mode 100644 index 000000000..8d6debc16 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/chunk-method-form.tsx @@ -0,0 +1,64 @@ +import { useFormContext, useWatch } from 'react-hook-form'; + +import { DocumentParserType } from '@/constants/knowledge'; +import { useMemo } from 'react'; +import { AudioConfiguration } from './configuration/audio'; +import { BookConfiguration } from './configuration/book'; +import { EmailConfiguration } from './configuration/email'; +import { KnowledgeGraphConfiguration } from './configuration/knowledge-graph'; +import { LawsConfiguration } from './configuration/laws'; +import { ManualConfiguration } from './configuration/manual'; +import { NaiveConfiguration } from './configuration/naive'; +import { OneConfiguration } from './configuration/one'; +import { PaperConfiguration } from './configuration/paper'; +import { PictureConfiguration } from './configuration/picture'; +import { PresentationConfiguration } from './configuration/presentation'; +import { QAConfiguration } from './configuration/qa'; +import { ResumeConfiguration } from './configuration/resume'; +import { TableConfiguration } from './configuration/table'; +import { TagConfiguration } from './configuration/tag'; + +const ConfigurationComponentMap = { + [DocumentParserType.Naive]: NaiveConfiguration, + [DocumentParserType.Qa]: QAConfiguration, + [DocumentParserType.Resume]: ResumeConfiguration, + [DocumentParserType.Manual]: ManualConfiguration, + [DocumentParserType.Table]: TableConfiguration, + [DocumentParserType.Paper]: PaperConfiguration, + [DocumentParserType.Book]: BookConfiguration, + [DocumentParserType.Laws]: LawsConfiguration, + [DocumentParserType.Presentation]: PresentationConfiguration, + [DocumentParserType.Picture]: PictureConfiguration, + [DocumentParserType.One]: OneConfiguration, + [DocumentParserType.Audio]: AudioConfiguration, + [DocumentParserType.Email]: EmailConfiguration, + [DocumentParserType.Tag]: TagConfiguration, + [DocumentParserType.KnowledgeGraph]: KnowledgeGraphConfiguration, +}; + +function EmptyComponent() { + return

; +} + +export function ChunkMethodForm() { + const form = useFormContext(); + + const finalParserId: DocumentParserType = useWatch({ + control: form.control, + name: 'parser_id', + }); + + const ConfigurationComponent = useMemo(() => { + return finalParserId + ? ConfigurationComponentMap[finalParserId] + : EmptyComponent; + }, [finalParserId]); + + return ( +
+
+ +
+
+ ); +} diff --git a/web/src/pages/dataset/dataset-setting/components/tag-item.tsx b/web/src/pages/dataset/dataset-setting/components/tag-item.tsx index c5ccecca4..241fc240a 100644 --- a/web/src/pages/dataset/dataset-setting/components/tag-item.tsx +++ b/web/src/pages/dataset/dataset-setting/components/tag-item.tsx @@ -8,8 +8,9 @@ import { FormMessage, } from '@/components/ui/form'; import { MultiSelect } from '@/components/ui/multi-select'; +import { FormLayout } from '@/constants/form'; import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks'; -import { Flex, Form, InputNumber, Select, Slider, Space } from 'antd'; +import { Form, Select, Space } from 'antd'; import DOMPurify from 'dompurify'; import { useFormContext, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; @@ -44,7 +45,7 @@ export const TagSetItem = () => {
{ max={10} min={1} defaultValue={3} + layout={FormLayout.Horizontal} > ); - - return ( - - - - - - - - - - - - - ); }; export function TagItems() { diff --git a/web/src/pages/dataset/dataset-setting/configuration/audio.tsx b/web/src/pages/dataset/dataset-setting/configuration/audio.tsx new file mode 100644 index 000000000..8cc6ceff8 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/audio.tsx @@ -0,0 +1,20 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +import { TagItems } from '../components/tag-item'; + +export function AudioConfiguration() { + return ( + + <> + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/book.tsx b/web/src/pages/dataset/dataset-setting/configuration/book.tsx new file mode 100644 index 000000000..919c35efd --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/book.tsx @@ -0,0 +1,28 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; + +export function BookConfiguration() { + return ( + + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx index a92937e3e..031bce547 100644 --- a/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx +++ b/web/src/pages/dataset/dataset-setting/configuration/common-item.tsx @@ -42,7 +42,7 @@ export function ChunkMethodItem(props: IProps) { 'w-1/4 whitespace-pre-wrap': line === 1, })} > - {t('dataPipeline')} + {t('builtIn')}
@@ -115,7 +115,7 @@ export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) { ); } -export function ParseTypeItem() { +export function ParseTypeItem({ line = 2 }: { line?: number }) { const { t } = useTranslate('knowledgeConfiguration'); const form = useFormContext(); @@ -125,17 +125,26 @@ export function ParseTypeItem() { name={'parseType'} render={({ field }) => ( -
+
{t('parseType')} -
+
-
+
{t('builtIn')} {t('manualSetup')}
@@ -144,7 +153,7 @@ export function ParseTypeItem() {
-
+
@@ -188,3 +197,39 @@ export function EnableAutoGenerateItem() { /> ); } + +export function EnableTocToggle() { + const { t } = useTranslate('knowledgeConfiguration'); + const form = useFormContext(); + + return ( + ( + +
+ + {t('tocExtraction')} + +
+ + + +
+
+
+
+ +
+
+ )} + /> + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/email.tsx b/web/src/pages/dataset/dataset-setting/configuration/email.tsx new file mode 100644 index 000000000..7af07fcbe --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/email.tsx @@ -0,0 +1,18 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { TagItems } from '../components/tag-item'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function EmailConfiguration() { + return ( + + <> + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/knowledge-graph.tsx b/web/src/pages/dataset/dataset-setting/configuration/knowledge-graph.tsx new file mode 100644 index 000000000..8912467fa --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/knowledge-graph.tsx @@ -0,0 +1,15 @@ +import { DelimiterFormField } from '@/components/delimiter-form-field'; +import { EntityTypesFormField } from '@/components/entity-types-form-field'; +import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field'; + +export function KnowledgeGraphConfiguration() { + return ( + <> + <> + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/laws.tsx b/web/src/pages/dataset/dataset-setting/configuration/laws.tsx new file mode 100644 index 000000000..2d9658637 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/laws.tsx @@ -0,0 +1,29 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; + +export function LawsConfiguration() { + return ( + + + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/manual.tsx b/web/src/pages/dataset/dataset-setting/configuration/manual.tsx new file mode 100644 index 000000000..ccb1e99a5 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/manual.tsx @@ -0,0 +1,27 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; + +export function ManualConfiguration() { + return ( + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/naive.tsx b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx new file mode 100644 index 000000000..fd1b522df --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx @@ -0,0 +1,33 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { DelimiterFormField } from '@/components/delimiter-form-field'; +import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; +import { EnableTocToggle } from './common-item'; + +export function NaiveConfiguration() { + return ( + + + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/one.tsx b/web/src/pages/dataset/dataset-setting/configuration/one.tsx new file mode 100644 index 000000000..d84b043ec --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/one.tsx @@ -0,0 +1,21 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function OneConfiguration() { + return ( + + + <> + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/paper.tsx b/web/src/pages/dataset/dataset-setting/configuration/paper.tsx new file mode 100644 index 000000000..c24089bd3 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/paper.tsx @@ -0,0 +1,28 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; + +export function PaperConfiguration() { + return ( + + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/picture.tsx b/web/src/pages/dataset/dataset-setting/configuration/picture.tsx new file mode 100644 index 000000000..6d4e6b557 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/picture.tsx @@ -0,0 +1,18 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { TagItems } from '../components/tag-item'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function PictureConfiguration() { + return ( + + <> + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/presentation.tsx b/web/src/pages/dataset/dataset-setting/configuration/presentation.tsx new file mode 100644 index 000000000..ab4bc0796 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/presentation.tsx @@ -0,0 +1,29 @@ +import { + AutoKeywordsFormField, + AutoQuestionsFormField, +} from '@/components/auto-keywords-form-field'; +import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; +import { TagItems } from '../components/tag-item'; +import { + ConfigurationFormContainer, + MainContainer, +} from '../configuration-form-container'; + +export function PresentationConfiguration() { + return ( + + + + + + + + + + + + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/qa.tsx b/web/src/pages/dataset/dataset-setting/configuration/qa.tsx new file mode 100644 index 000000000..4d64b116a --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/qa.tsx @@ -0,0 +1,10 @@ +import { TagItems } from '../components/tag-item'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function QAConfiguration() { + return ( + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/resume.tsx b/web/src/pages/dataset/dataset-setting/configuration/resume.tsx new file mode 100644 index 000000000..bb0515064 --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/resume.tsx @@ -0,0 +1,10 @@ +import { TagItems } from '../components/tag-item'; +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function ResumeConfiguration() { + return ( + + + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/table.tsx b/web/src/pages/dataset/dataset-setting/configuration/table.tsx new file mode 100644 index 000000000..ecf9fc7cc --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/table.tsx @@ -0,0 +1,12 @@ +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function TableConfiguration() { + return ( + + {/* + + + */} + + ); +} diff --git a/web/src/pages/dataset/dataset-setting/configuration/tag.tsx b/web/src/pages/dataset/dataset-setting/configuration/tag.tsx new file mode 100644 index 000000000..b068ec26f --- /dev/null +++ b/web/src/pages/dataset/dataset-setting/configuration/tag.tsx @@ -0,0 +1,5 @@ +import { ConfigurationFormContainer } from '../configuration-form-container'; + +export function TagConfiguration() { + return ; +} diff --git a/web/src/pages/dataset/dataset-setting/form-schema.ts b/web/src/pages/dataset/dataset-setting/form-schema.ts index 3f1011121..490eb5d56 100644 --- a/web/src/pages/dataset/dataset-setting/form-schema.ts +++ b/web/src/pages/dataset/dataset-setting/form-schema.ts @@ -1,79 +1,92 @@ +import { t } from 'i18next'; import { z } from 'zod'; -export const formSchema = z.object({ - name: z.string().min(1, { - message: 'Username must be at least 2 characters.', - }), - description: z.string().min(2, { - message: 'Username must be at least 2 characters.', - }), - // avatar: z.instanceof(File), - avatar: z.any().nullish(), - permission: z.string().optional(), - parser_id: z.string(), - pipeline_id: z.string().optional(), - pipeline_name: z.string().optional(), - pipeline_avatar: z.string().optional(), - embd_id: z.string(), - parser_config: z - .object({ - layout_recognize: z.string(), - chunk_token_num: z.number(), - delimiter: z.string(), - auto_keywords: z.number().optional(), - auto_questions: z.number().optional(), - html4excel: z.boolean(), - tag_kb_ids: z.array(z.string()).nullish(), - topn_tags: z.number().optional(), - raptor: z - .object({ - use_raptor: z.boolean().optional(), - prompt: z.string().optional(), - max_token: z.number().optional(), - threshold: z.number().optional(), - max_cluster: z.number().optional(), - random_seed: z.number().optional(), - }) - .refine( - (data) => { - if (data.use_raptor && !data.prompt) { - return false; - } - return true; - }, - { - message: 'Prompt is required', - path: ['prompt'], - }, - ), - graphrag: z - .object({ - use_graphrag: z.boolean().optional(), - entity_types: z.array(z.string()).optional(), - method: z.string().optional(), - resolution: z.boolean().optional(), - community: z.boolean().optional(), - }) - .refine( - (data) => { - if ( - data.use_graphrag && - (!data.entity_types || data.entity_types.length === 0) - ) { - return false; - } - return true; - }, - { - message: 'Please enter Entity types', - path: ['entity_types'], - }, - ), - }) - .optional(), - pagerank: z.number(), - // icon: z.array(z.instanceof(File)), -}); +export const formSchema = z + .object({ + parseType: z.number(), + name: z.string().min(1, { + message: 'Username must be at least 2 characters.', + }), + description: z.string().min(2, { + message: 'Username must be at least 2 characters.', + }), + // avatar: z.instanceof(File), + avatar: z.any().nullish(), + permission: z.string().optional(), + parser_id: z.string(), + pipeline_id: z.string().optional(), + pipeline_name: z.string().optional(), + pipeline_avatar: z.string().optional(), + embd_id: z.string(), + parser_config: z + .object({ + layout_recognize: z.string(), + chunk_token_num: z.number(), + delimiter: z.string(), + auto_keywords: z.number().optional(), + auto_questions: z.number().optional(), + html4excel: z.boolean(), + tag_kb_ids: z.array(z.string()).nullish(), + topn_tags: z.number().optional(), + toc_extraction: z.boolean().optional(), + raptor: z + .object({ + use_raptor: z.boolean().optional(), + prompt: z.string().optional(), + max_token: z.number().optional(), + threshold: z.number().optional(), + max_cluster: z.number().optional(), + random_seed: z.number().optional(), + }) + .refine( + (data) => { + if (data.use_raptor && !data.prompt) { + return false; + } + return true; + }, + { + message: 'Prompt is required', + path: ['prompt'], + }, + ), + graphrag: z + .object({ + use_graphrag: z.boolean().optional(), + entity_types: z.array(z.string()).optional(), + method: z.string().optional(), + resolution: z.boolean().optional(), + community: z.boolean().optional(), + }) + .refine( + (data) => { + if ( + data.use_graphrag && + (!data.entity_types || data.entity_types.length === 0) + ) { + return false; + } + return true; + }, + { + message: 'Please enter Entity types', + path: ['entity_types'], + }, + ), + }) + .optional(), + pagerank: z.number(), + // icon: z.array(z.instanceof(File)), + }) + .superRefine((data, ctx) => { + if (data.parseType === 2 && !data.pipeline_id) { + ctx.addIssue({ + path: ['pipeline_id'], + message: t('common.pleaseSelect'), + code: 'custom', + }); + } + }); export const pipelineFormSchema = z.object({ pipeline_id: z.string().optional(), diff --git a/web/src/pages/dataset/dataset-setting/index.tsx b/web/src/pages/dataset/dataset-setting/index.tsx index 25d8fd7ea..079b04571 100644 --- a/web/src/pages/dataset/dataset-setting/index.tsx +++ b/web/src/pages/dataset/dataset-setting/index.tsx @@ -1,14 +1,18 @@ -import { IDataPipelineSelectNode } from '@/components/data-pipeline-select'; +import { + DataFlowSelect, + IDataPipelineSelectNode, +} from '@/components/data-pipeline-select'; import GraphRagItems from '@/components/parse-configuration/graph-rag-form-fields'; import RaptorFormFields from '@/components/parse-configuration/raptor-form-fields'; import { Button } from '@/components/ui/button'; import Divider from '@/components/ui/divider'; import { Form } from '@/components/ui/form'; +import { FormLayout } from '@/constants/form'; import { DocumentParserType } from '@/constants/knowledge'; import { PermissionRole } from '@/constants/permission'; import { zodResolver } from '@hookform/resolvers/zod'; import { useEffect, useState } from 'react'; -import { useForm } from 'react-hook-form'; +import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; import { TopTitle } from '../dataset-title'; @@ -16,10 +20,10 @@ import { GenerateType, IGenerateLogButtonProps, } from '../dataset/generate-button/generate'; -import LinkDataPipeline, { - IDataPipelineNodeProps, -} from './components/link-data-pipeline'; +import { ChunkMethodForm } from './chunk-method-form'; +import { IDataPipelineNodeProps } from './components/link-data-pipeline'; import { MainContainer } from './configuration-form-container'; +import { ChunkMethodItem, ParseTypeItem } from './configuration/common-item'; import { formSchema } from './form-schema'; import { GeneralForm } from './general-form'; import { useFetchKnowledgeConfigurationOnMount } from './hooks'; @@ -44,6 +48,7 @@ const enum MethodValue { export default function DatasetSettings() { const { t } = useTranslation(); + const form = useForm>({ resolver: zodResolver(formSchema), defaultValues: { @@ -58,6 +63,7 @@ export default function DatasetSettings() { auto_questions: 0, html4excel: false, topn_tags: 3, + toc_extraction: false, raptor: { use_raptor: true, max_token: 256, @@ -73,17 +79,17 @@ export default function DatasetSettings() { }, }, pipeline_id: '', + parseType: 1, pagerank: 0, }, }); - const knowledgeDetails = useFetchKnowledgeConfigurationOnMount(form); - const [pipelineData, setPipelineData] = useState(); const [graphRagGenerateData, setGraphRagGenerateData] = useState(); const [raptorGenerateData, setRaptorGenerateData] = useState(); + useEffect(() => { console.log('🚀 ~ DatasetSettings ~ knowledgeDetails:', knowledgeDetails); if (knowledgeDetails) { @@ -102,8 +108,10 @@ export default function DatasetSettings() { finish_at: knowledgeDetails.raptor_task_finish_at, task_id: knowledgeDetails.raptor_task_id, } as IGenerateLogButtonProps); + form.setValue('parseType', knowledgeDetails.pipeline_id ? 2 : 1); + form.setValue('pipeline_id', knowledgeDetails.pipeline_id || ''); } - }, [knowledgeDetails]); + }, [knowledgeDetails, form]); async function onSubmit(data: z.infer) { try { @@ -137,6 +145,22 @@ export default function DatasetSettings() { } as IGenerateLogButtonProps); } }; + + const parseType = useWatch({ + control: form.control, + name: 'parseType', + defaultValue: knowledgeDetails.pipeline_id ? 2 : 1, + }); + const selectedTag = useWatch({ + name: 'parser_id', + control: form.control, + }); + useEffect(() => { + if (parseType === 1) { + form.setValue('pipeline_id', ''); + } + console.log('parseType', parseType); + }, [parseType, form]); return (
handleDeletePipelineTask(GenerateType.Raptor)} > - + {parseType === 1 && ( + + )} + {parseType === 2 && ( + + )} + + + {parseType === 1 && ( + + )} + + {/* + /> */}
diff --git a/web/src/pages/dataset/dataset-setting/saving-button.tsx b/web/src/pages/dataset/dataset-setting/saving-button.tsx index 7a063081a..558150b4f 100644 --- a/web/src/pages/dataset/dataset-setting/saving-button.tsx +++ b/web/src/pages/dataset/dataset-setting/saving-button.tsx @@ -62,6 +62,7 @@ export function SavingButton() { if (beValid) { form.handleSubmit(async (values) => { console.log('saveKnowledgeConfiguration: ', values); + delete values['parseType']; // delete values['avatar']; await saveKnowledgeConfiguration({ kb_id, diff --git a/web/src/pages/dataset/sidebar/index.tsx b/web/src/pages/dataset/sidebar/index.tsx index c612f2059..722f59967 100644 --- a/web/src/pages/dataset/sidebar/index.tsx +++ b/web/src/pages/dataset/sidebar/index.tsx @@ -29,11 +29,6 @@ export function SideBar({ refreshCount }: PropType) { const items = useMemo(() => { const list = [ - { - icon: , - label: t(`knowledgeDetails.overview`), - key: Routes.DataSetOverview, - }, { icon: , label: t(`knowledgeDetails.subbarFiles`), @@ -44,6 +39,11 @@ export function SideBar({ refreshCount }: PropType) { label: t(`knowledgeDetails.testing`), key: Routes.DatasetTesting, }, + { + icon: , + label: t(`knowledgeDetails.overview`), + key: Routes.DataSetOverview, + }, { icon: , label: t(`knowledgeDetails.configuration`), diff --git a/web/src/pages/datasets/dataset-creating-dialog.tsx b/web/src/pages/datasets/dataset-creating-dialog.tsx index c2bbf3705..6b54de904 100644 --- a/web/src/pages/datasets/dataset-creating-dialog.tsx +++ b/web/src/pages/datasets/dataset-creating-dialog.tsx @@ -16,6 +16,7 @@ import { FormMessage, } from '@/components/ui/form'; import { Input } from '@/components/ui/input'; +import { FormLayout } from '@/constants/form'; import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks'; import { IModalProps } from '@/interfaces/common'; import { zodResolver } from '@hookform/resolvers/zod'; @@ -137,8 +138,9 @@ export function InputForm({ onOk }: IModalProps) { {parseType === 2 && ( )}