Fix: Optimized knowledge base file parsing and display #9869 (#10292)

### What problem does this PR solve?

Fix: Optimized knowledge base file parsing and display #9869

- Optimized the ChunkMethodDialog component logic and adjusted
FormSchema validation rules
- Updated the document information interface definition, adding
pipeline_id, pipeline_name, and suffix fields
- Refactored the ChunkResultBar component, removing filter-related logic
and simplifying the input box and chunk creation functionality
- Improved FormatPreserveEditor to support text mode switching
(full/omitted) display control
- Updated timeline node titles to more accurate semantic descriptions
(e.g., character splitters)
- Optimized the data flow result page structure and style, dynamically
adjusting height and content display
- Fixed the table sorting function on the dataset overview page and
enhanced the display of task type icons and status mapping.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
chanx
2025-09-25 19:53:49 +08:00
committed by GitHub
parent abe7132630
commit 14273b4595
27 changed files with 674 additions and 386 deletions

View File

@ -51,7 +51,7 @@ import RaptorFormFields, {
import { ButtonLoading } from '../ui/button';
import { Input } from '../ui/input';
import { DynamicPageRange } from './dynamic-page-range';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
import { useShowAutoKeywords } from './hooks';
import {
useDefaultParserValues,
useFillDefaultValueOnMount,
@ -93,8 +93,6 @@ export function ChunkMethodDialog({
}: IProps) {
const { t } = useTranslation();
const { parserList } = useFetchParserListOnMount(documentExtension);
const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration();
const useGraphRag = useMemo(() => {
@ -105,48 +103,58 @@ export function ChunkMethodDialog({
const fillDefaultParserValue = useFillDefaultValueOnMount();
const FormSchema = z.object({
parseType: z.number(),
parser_id: z
.string()
.min(1, {
message: t('common.pleaseSelect'),
})
.trim(),
pipeline_id: z.string().optional(),
parser_config: z.object({
task_page_size: z.coerce.number().optional(),
layout_recognize: z.string().optional(),
chunk_token_num: z.coerce.number().optional(),
delimiter: z.string().optional(),
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional().optional(),
max_token: z.coerce.number().optional(),
threshold: z.coerce.number().optional(),
max_cluster: z.coerce.number().optional(),
random_seed: z.coerce.number().optional(),
const FormSchema = z
.object({
parseType: z.number(),
parser_id: z
.string()
.min(1, {
message: t('common.pleaseSelect'),
})
.optional(),
graphrag: z.object({
use_graphrag: z.boolean().optional(),
.trim(),
pipeline_id: z.string().optional(),
parser_config: z.object({
task_page_size: z.coerce.number().optional(),
layout_recognize: z.string().optional(),
chunk_token_num: z.coerce.number().optional(),
delimiter: z.string().optional(),
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional().optional(),
max_token: z.coerce.number().optional(),
threshold: z.coerce.number().optional(),
max_cluster: z.coerce.number().optional(),
random_seed: z.coerce.number().optional(),
})
.optional(),
graphrag: z.object({
use_graphrag: z.boolean().optional(),
}),
entity_types: z.array(z.string()).optional(),
pages: z
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
.optional(),
}),
entity_types: z.array(z.string()).optional(),
pages: z
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
.optional(),
}),
});
})
.superRefine((data, ctx) => {
if (data.parseType === 2 && !data.pipeline_id) {
ctx.addIssue({
path: ['pipeline_id'],
message: t('common.pleaseSelect'),
code: 'custom',
});
}
});
const form = useForm<z.infer<typeof FormSchema>>({
resolver: zodResolver(FormSchema),
defaultValues: {
parser_id: parserId,
pipeline_id: pipelineId,
parser_id: parserId || '',
pipeline_id: pipelineId || '',
parseType: pipelineId ? 2 : 1,
parser_config: defaultParserValues,
},
@ -209,8 +217,8 @@ export function ChunkMethodDialog({
const pages =
parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
form.reset({
parser_id: parserId,
pipeline_id: pipelineId,
parser_id: parserId || '',
pipeline_id: pipelineId || '',
parseType: pipelineId ? 2 : 1,
parser_config: fillDefaultParserValue({
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
@ -231,13 +239,14 @@ export function ChunkMethodDialog({
knowledgeDetails.parser_config,
parserConfig,
parserId,
pipelineId,
useGraphRag,
visible,
]);
const parseType = useWatch({
control: form.control,
name: 'parseType',
defaultValue: 1,
defaultValue: pipelineId ? 2 : 1,
});
return (
<Dialog open onOpenChange={hideModal}>