feat: add ingestion pipeline children delimiters configs (#11979)

### What problem does this PR solve?

Add children delimiters for Ingestion pipeline config

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Jimmy Ben Klieve
2025-12-17 11:18:54 +08:00
committed by GitHub
parent 30019dab9f
commit 2595644dfd
10 changed files with 218 additions and 3 deletions

View File

@ -0,0 +1,116 @@
import { cn } from '@/lib/utils';
import { forwardRef } from 'react';
import { useFormContext } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import {
FormControl,
FormField,
FormItem,
FormLabel,
FormMessage,
} from './ui/form';
import { Input, InputProps } from './ui/input';
import { Switch } from './ui/switch';
interface IProps {
value?: string | undefined;
onChange?: (val: string | undefined) => void;
}
export const DelimiterInput = forwardRef<HTMLInputElement, InputProps & IProps>(
({ value, onChange, maxLength, defaultValue, ...props }, ref) => {
const nextValue = value
?.replaceAll('\n', '\\n')
.replaceAll('\t', '\\t')
.replaceAll('\r', '\\r');
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const val = e.target.value;
const nextValue = val
.replaceAll('\\n', '\n')
.replaceAll('\\t', '\t')
.replaceAll('\\r', '\r');
onChange?.(nextValue);
};
return (
<Input
value={nextValue}
onChange={handleInputChange}
maxLength={maxLength}
defaultValue={defaultValue}
ref={ref}
className={cn('bg-bg-base', props.className)}
{...props}
/>
);
},
);
export function ChildrenDelimiterForm() {
const { t } = useTranslation();
const form = useFormContext();
const delimiterValue = form.watch('parser_config.children_delimiter');
return (
<fieldset className="space-y-2">
<FormField
control={form.control}
name="parser_config.enable_children"
render={({ field: { value, onChange, ...restProps } }) => (
<FormItem className="items-center space-y-0 ">
<div className="flex items-center justify-between gap-1">
<FormLabel>
{t('knowledgeDetails.enableChildrenDelimiter')}
</FormLabel>
<div className="flex-none">
<FormControl>
<Switch
checked={value}
onCheckedChange={(checked) => {
if (checked && !delimiterValue) {
form.setValue('parser_config.children_delimiter', '\n');
}
onChange(checked);
}}
{...restProps}
/>
</FormControl>
</div>
</div>
</FormItem>
)}
/>
{form.getValues('parser_config.enable_children') && (
<FormField
control={form.control}
name="parser_config.children_delimiter"
render={({ field }) => (
<FormItem className="items-center space-y-0 ">
<div className="flex items-center gap-1">
<FormLabel
required
tooltip={t('knowledgeDetails.childrenDelimiterTip')}
className="text-sm text-text-secondary whitespace-break-spaces w-1/4"
>
{t('knowledgeDetails.childrenDelimiter')}
</FormLabel>
<div className="w-3/4">
<FormControl>
<DelimiterInput {...field} />
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
)}
</fieldset>
);
}

View File

@ -34,6 +34,7 @@ import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '../auto-keywords-form-field';
import { ChildrenDelimiterForm } from '../children-delimiter-form';
import { DataFlowSelect } from '../data-pipeline-select';
import { DelimiterFormField } from '../delimiter-form-field';
import { EntityTypesFormField } from '../entity-types-form-field';
@ -111,6 +112,8 @@ export function ChunkMethodDialog({
layout_recognize: z.string().optional(),
chunk_token_num: z.coerce.number().optional(),
delimiter: z.string().optional(),
enable_children: z.boolean().optional(),
children_delimiter: z.string().optional(),
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
@ -196,6 +199,10 @@ export function ChunkMethodDialog({
...data,
parser_config: {
...data.parser_config,
// Unset children delimiter if this option is not enabled
children_delimiter: data.parser_config.enable_children
? data.parser_config.children_delimiter
: null,
pages: data.parser_config?.pages?.map((x: any) => [x.from, x.to]) ?? [],
},
};
@ -333,6 +340,7 @@ export function ChunkMethodDialog({
}
></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
<ChildrenDelimiterForm />
</>
)}
</FormContainer>

View File

@ -12,6 +12,8 @@ export function useDefaultParserValues() {
layout_recognize: ParseDocumentType.DeepDOC,
chunk_token_num: 512,
delimiter: '\n',
enable_children: false,
children_delimiter: '\n',
auto_keywords: 0,
auto_questions: 0,
html4excel: false,