Fix: Added table of contents extraction functionality and optimized form item layout #9869 (#10492)

### What problem does this PR solve?

Fix: Added table of contents extraction functionality and optimized form
item layout #9869

- Added `EnableTocToggle` component to toggle table of contents
extraction on and off
- Added multiple parser configuration components (such as naive, book,
laws, etc.), displaying different parser components based on built-in
slicing methods

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
chanx
2025-10-11 18:45:55 +08:00
committed by GitHub
parent 6a0f448419
commit c21cea2038
35 changed files with 694 additions and 173 deletions

View File

@ -1,79 +1,92 @@
import { t } from 'i18next';
import { z } from 'zod';
export const formSchema = z.object({
name: z.string().min(1, {
message: 'Username must be at least 2 characters.',
}),
description: z.string().min(2, {
message: 'Username must be at least 2 characters.',
}),
// avatar: z.instanceof(File),
avatar: z.any().nullish(),
permission: z.string().optional(),
parser_id: z.string(),
pipeline_id: z.string().optional(),
pipeline_name: z.string().optional(),
pipeline_avatar: z.string().optional(),
embd_id: z.string(),
parser_config: z
.object({
layout_recognize: z.string(),
chunk_token_num: z.number(),
delimiter: z.string(),
auto_keywords: z.number().optional(),
auto_questions: z.number().optional(),
html4excel: z.boolean(),
tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional(),
max_token: z.number().optional(),
threshold: z.number().optional(),
max_cluster: z.number().optional(),
random_seed: z.number().optional(),
})
.refine(
(data) => {
if (data.use_raptor && !data.prompt) {
return false;
}
return true;
},
{
message: 'Prompt is required',
path: ['prompt'],
},
),
graphrag: z
.object({
use_graphrag: z.boolean().optional(),
entity_types: z.array(z.string()).optional(),
method: z.string().optional(),
resolution: z.boolean().optional(),
community: z.boolean().optional(),
})
.refine(
(data) => {
if (
data.use_graphrag &&
(!data.entity_types || data.entity_types.length === 0)
) {
return false;
}
return true;
},
{
message: 'Please enter Entity types',
path: ['entity_types'],
},
),
})
.optional(),
pagerank: z.number(),
// icon: z.array(z.instanceof(File)),
});
export const formSchema = z
.object({
parseType: z.number(),
name: z.string().min(1, {
message: 'Username must be at least 2 characters.',
}),
description: z.string().min(2, {
message: 'Username must be at least 2 characters.',
}),
// avatar: z.instanceof(File),
avatar: z.any().nullish(),
permission: z.string().optional(),
parser_id: z.string(),
pipeline_id: z.string().optional(),
pipeline_name: z.string().optional(),
pipeline_avatar: z.string().optional(),
embd_id: z.string(),
parser_config: z
.object({
layout_recognize: z.string(),
chunk_token_num: z.number(),
delimiter: z.string(),
auto_keywords: z.number().optional(),
auto_questions: z.number().optional(),
html4excel: z.boolean(),
tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(),
toc_extraction: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional(),
max_token: z.number().optional(),
threshold: z.number().optional(),
max_cluster: z.number().optional(),
random_seed: z.number().optional(),
})
.refine(
(data) => {
if (data.use_raptor && !data.prompt) {
return false;
}
return true;
},
{
message: 'Prompt is required',
path: ['prompt'],
},
),
graphrag: z
.object({
use_graphrag: z.boolean().optional(),
entity_types: z.array(z.string()).optional(),
method: z.string().optional(),
resolution: z.boolean().optional(),
community: z.boolean().optional(),
})
.refine(
(data) => {
if (
data.use_graphrag &&
(!data.entity_types || data.entity_types.length === 0)
) {
return false;
}
return true;
},
{
message: 'Please enter Entity types',
path: ['entity_types'],
},
),
})
.optional(),
pagerank: z.number(),
// icon: z.array(z.instanceof(File)),
})
.superRefine((data, ctx) => {
if (data.parseType === 2 && !data.pipeline_id) {
ctx.addIssue({
path: ['pipeline_id'],
message: t('common.pleaseSelect'),
code: 'custom',
});
}
});
export const pipelineFormSchema = z.object({
pipeline_id: z.string().optional(),