diff --git a/web/src/components/children-delimiter-form.tsx b/web/src/components/children-delimiter-form.tsx new file mode 100644 index 000000000..7fa417a56 --- /dev/null +++ b/web/src/components/children-delimiter-form.tsx @@ -0,0 +1,116 @@ +import { cn } from '@/lib/utils'; +import { forwardRef } from 'react'; +import { useFormContext } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; +import { + FormControl, + FormField, + FormItem, + FormLabel, + FormMessage, +} from './ui/form'; +import { Input, InputProps } from './ui/input'; +import { Switch } from './ui/switch'; + +interface IProps { + value?: string | undefined; + onChange?: (val: string | undefined) => void; +} + +export const DelimiterInput = forwardRef( + ({ value, onChange, maxLength, defaultValue, ...props }, ref) => { + const nextValue = value + ?.replaceAll('\n', '\\n') + .replaceAll('\t', '\\t') + .replaceAll('\r', '\\r'); + const handleInputChange = (e: React.ChangeEvent) => { + const val = e.target.value; + const nextValue = val + .replaceAll('\\n', '\n') + .replaceAll('\\t', '\t') + .replaceAll('\\r', '\r'); + onChange?.(nextValue); + }; + return ( + + ); + }, +); + +export function ChildrenDelimiterForm() { + const { t } = useTranslation(); + const form = useFormContext(); + + const delimiterValue = form.watch('parser_config.children_delimiter'); + + return ( +
+ ( + +
+ + {t('knowledgeDetails.enableChildrenDelimiter')} + + +
+ + { + if (checked && !delimiterValue) { + form.setValue('parser_config.children_delimiter', '\n'); + } + + onChange(checked); + }} + {...restProps} + /> + +
+
+
+ )} + /> + + {form.getValues('parser_config.enable_children') && ( + ( + +
+ + {t('knowledgeDetails.childrenDelimiter')} + +
+ + + +
+
+
+
+ +
+
+ )} + /> + )} +
+ ); +} diff --git a/web/src/components/chunk-method-dialog/index.tsx b/web/src/components/chunk-method-dialog/index.tsx index d8a4d1115..2fd3d2617 100644 --- a/web/src/components/chunk-method-dialog/index.tsx +++ b/web/src/components/chunk-method-dialog/index.tsx @@ -34,6 +34,7 @@ import { AutoKeywordsFormField, AutoQuestionsFormField, } from '../auto-keywords-form-field'; +import { ChildrenDelimiterForm } from '../children-delimiter-form'; import { DataFlowSelect } from '../data-pipeline-select'; import { DelimiterFormField } from '../delimiter-form-field'; import { EntityTypesFormField } from '../entity-types-form-field'; @@ -111,6 +112,8 @@ export function ChunkMethodDialog({ layout_recognize: z.string().optional(), chunk_token_num: z.coerce.number().optional(), delimiter: z.string().optional(), + enable_children: z.boolean().optional(), + children_delimiter: z.string().optional(), auto_keywords: z.coerce.number().optional(), auto_questions: z.coerce.number().optional(), html4excel: z.boolean().optional(), @@ -196,6 +199,10 @@ export function ChunkMethodDialog({ ...data, parser_config: { ...data.parser_config, + // Unset children delimiter if this option is not enabled + children_delimiter: data.parser_config.enable_children + ? data.parser_config.children_delimiter + : null, pages: data.parser_config?.pages?.map((x: any) => [x.from, x.to]) ?? [], }, }; @@ -333,6 +340,7 @@ export function ChunkMethodDialog({ } > + )} diff --git a/web/src/components/chunk-method-dialog/use-default-parser-values.ts b/web/src/components/chunk-method-dialog/use-default-parser-values.ts index 238047db6..14495c8b2 100644 --- a/web/src/components/chunk-method-dialog/use-default-parser-values.ts +++ b/web/src/components/chunk-method-dialog/use-default-parser-values.ts @@ -12,6 +12,8 @@ export function useDefaultParserValues() { layout_recognize: ParseDocumentType.DeepDOC, chunk_token_num: 512, delimiter: '\n', + enable_children: false, + children_delimiter: '\n', auto_keywords: 0, auto_questions: 0, html4excel: false, diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 88bfc3da5..467ca8ade 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -295,6 +295,11 @@ export default { delimiter: `Delimiter for text`, delimiterTip: 'A delimiter or separator can consist of one or multiple special characters. If it is multiple characters, ensure they are enclosed in backticks( ``). For example, if you configure your delimiters like this: \\n`##`;, then your texts will be separated at line breaks, double hash symbols (##), and semicolons.', + enableChildrenDelimiter: 'Child chunk are used for retrieval', + childrenDelimiter: 'Delimiter for text', + childrenDelimiterTip: + 'A delimiter or separator can consist of one or multiple special characters. If it is multiple characters, ensure they are enclosed in backticks( ``). For example, if you configure your delimiters like this: \\n`##`;, then your texts will be separated at line breaks, double hash symbols (##), and semicolons.', + html4excel: 'Excel to HTML', html4excelTip: `Use with the General chunking method. When disabled, spreadsheets (XLSX or XLS(Excel 97-2003)) in the knowledge base will be parsed into key-value pairs. When enabled, they will be parsed into HTML tables, splitting every 12 rows if the original table has more than 12 rows. See https://ragflow.io/docs/dev/enable_excel2html for details.`, autoKeywords: 'Auto-keyword', @@ -779,7 +784,7 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s 'The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)', confluenceSpaceKeyTip: 'Optional: Specify a space key to limit syncing to a specific space. Leave empty to sync all accessible spaces. For multiple spaces, separate with commas (e.g., DEV,DOCS,HR)', - s3PrefixTip: `Specify the folder path within your S3 bucket to fetch files from. + s3PrefixTip: `Specify the folder path within your S3 bucket to fetch files from. Example: general/v2/`, S3CompatibleEndpointUrlTip: `Required for S3 compatible Storage Box. Specify the S3-compatible endpoint URL. Example: https://fsn1.your-objectstorage.com`, @@ -1199,6 +1204,7 @@ Example: Virtual Hosted Style`, tab: 'Tab', space: 'Space', delimiters: 'Delimiters', + enableChildrenDelimiters: 'Child chunk are used for retrieval', merge: 'Merge', split: 'Split', script: 'Script', diff --git a/web/src/pages/agent/form/splitter-form/index.tsx b/web/src/pages/agent/form/splitter-form/index.tsx index 0438dcf8d..c6cb8962a 100644 --- a/web/src/pages/agent/form/splitter-form/index.tsx +++ b/web/src/pages/agent/form/splitter-form/index.tsx @@ -2,7 +2,8 @@ import { DelimiterInput } from '@/components/delimiter-form-field'; import { RAGFlowFormItem } from '@/components/ragflow-form'; import { SliderInputFormField } from '@/components/slider-input-form-field'; import { BlockButton, Button } from '@/components/ui/button'; -import { Form } from '@/components/ui/form'; +import { Form, FormControl, FormField, FormItem } from '@/components/ui/form'; +import { Switch } from '@/components/ui/switch'; import { zodResolver } from '@hookform/resolvers/zod'; import { Trash2 } from 'lucide-react'; import { memo } from 'react'; @@ -26,6 +27,12 @@ export const FormSchema = z.object({ value: z.string().optional(), }), ), + enable_children: z.boolean(), + children_delimiters: z.array( + z.object({ + value: z.string().optional(), + }), + ), overlapped_percent: z.number(), // 0.0 - 0.3 , 0% - 30% }); @@ -46,6 +53,11 @@ const SplitterForm = ({ node }: INextOperatorForm) => { control: form.control, }); + const childrenDelimiters = useFieldArray({ + name: 'children_delimiters', + control: form.control, + }); + useWatchFormChange(node?.id, form); return ( @@ -90,6 +102,59 @@ const SplitterForm = ({ node }: INextOperatorForm) => { append({ value: '\n' })}> {t('common.add')} + +
+
+ {t('flow.enableChildrenDelimiters')} + + ( + + + + + + )} + /> +
+ + {form.getValues('enable_children') && ( +
+ {childrenDelimiters.fields.map((field, index) => ( +
+ + + + + +
+ ))} + + childrenDelimiters.append({ value: '\n' })} + > + {t('common.add')} + +
+ )} +
diff --git a/web/src/pages/agent/utils.ts b/web/src/pages/agent/utils.ts index 4f3df85c3..3221d8ca0 100644 --- a/web/src/pages/agent/utils.ts +++ b/web/src/pages/agent/utils.ts @@ -288,6 +288,11 @@ function transformSplitterParams(params: SplitterFormSchemaType) { ...params, overlapped_percent: Number(params.overlapped_percent) / 100, delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'), + + // Unset children delimiters if this option is not enabled + children_delimiters: params.enable_children + ? transformObjectArrayToPureArray(params.children_delimiters, 'value') + : [], }; } @@ -713,7 +718,7 @@ export function convertToObjectArray( /** * convert the following object into a list - * + * * { "product_related": { "description": "The question is about product usage, appearance and how it works.", diff --git a/web/src/pages/dataset/dataset-setting/configuration/naive.tsx b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx index d14eca96f..22c260278 100644 --- a/web/src/pages/dataset/dataset-setting/configuration/naive.tsx +++ b/web/src/pages/dataset/dataset-setting/configuration/naive.tsx @@ -2,6 +2,7 @@ import { AutoKeywordsFormField, AutoQuestionsFormField, } from '@/components/auto-keywords-form-field'; +import { ChildrenDelimiterForm } from '@/components/children-delimiter-form'; import { DelimiterFormField } from '@/components/delimiter-form-field'; import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field'; import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field'; @@ -21,6 +22,7 @@ export function NaiveConfiguration() { + diff --git a/web/src/pages/dataset/dataset-setting/form-schema.ts b/web/src/pages/dataset/dataset-setting/form-schema.ts index 00c00a725..575dbb17b 100644 --- a/web/src/pages/dataset/dataset-setting/form-schema.ts +++ b/web/src/pages/dataset/dataset-setting/form-schema.ts @@ -24,6 +24,8 @@ export const formSchema = z layout_recognize: z.string(), chunk_token_num: z.number(), delimiter: z.string(), + enable_children: z.boolean(), + children_delimiter: z.string(), auto_keywords: z.number().optional(), auto_questions: z.number().optional(), html4excel: z.boolean(), diff --git a/web/src/pages/dataset/dataset-setting/index.tsx b/web/src/pages/dataset/dataset-setting/index.tsx index 02667ae49..f8e3ee0cc 100644 --- a/web/src/pages/dataset/dataset-setting/index.tsx +++ b/web/src/pages/dataset/dataset-setting/index.tsx @@ -63,6 +63,8 @@ export default function DatasetSettings() { layout_recognize: DocumentType.DeepDOC, chunk_token_num: 512, delimiter: `\n`, + enable_children: false, + children_delimiter: `\n`, auto_keywords: 0, auto_questions: 0, html4excel: false, diff --git a/web/src/pages/dataset/dataset-setting/saving-button.tsx b/web/src/pages/dataset/dataset-setting/saving-button.tsx index 558150b4f..155dad13c 100644 --- a/web/src/pages/dataset/dataset-setting/saving-button.tsx +++ b/web/src/pages/dataset/dataset-setting/saving-button.tsx @@ -67,6 +67,13 @@ export function SavingButton() { await saveKnowledgeConfiguration({ kb_id, ...values, + parser_config: { + ...values.parser_config, + // Unset children delimiter if this option is not enabled + children_delimiter: values.parser_config.enable_children + ? values.parser_config.children_delimiter + : null, + }, }); })(); }