feat: add image context window in dataset config (#12094)

### What problem does this PR solve?

Add image context window configuration in **Dataset** >
**Configduration** and **Dataset** > **Files** > **Parse** > **Ingestion
Pipeline** (**Chunk Method** modal)

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Jimmy Ben Klieve
2025-12-22 19:51:23 +08:00
committed by GitHub
parent e5f3d5ae26
commit 38ac6a7c27
8 changed files with 163 additions and 436 deletions

554
web/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@ import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import {
ChunkMethodItem,
EnableTocToggle,
ImageContextWindow,
ParseTypeItem,
} from '@/pages/dataset/dataset-setting/configuration/common-item';
import { zodResolver } from '@hookform/resolvers/zod';
@ -119,6 +120,7 @@ export function ChunkMethodDialog({
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
toc_extraction: z.boolean().optional(),
image_context_window: z.coerce.number().optional(),
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
mineru_formula_enable: z.boolean().optional(),
mineru_table_enable: z.boolean().optional(),
@ -364,7 +366,10 @@ export function ChunkMethodDialog({
className="space-y-3"
>
{selectedTag === DocumentParserType.Naive && (
<EnableTocToggle />
<>
<EnableTocToggle />
<ImageContextWindow />
</>
)}
{showAutoKeywords(selectedTag) && (
<>

View File

@ -18,6 +18,7 @@ export function useDefaultParserValues() {
auto_questions: 0,
html4excel: false,
toc_extraction: false,
image_context_window: 0,
mineru_parse_method: 'auto',
mineru_formula_enable: true,
mineru_table_enable: true,

View File

@ -364,6 +364,9 @@ Procedural Memory: Learned skills, habits, and automated procedures.`,
},
knowledgeConfiguration: {
settings: 'Settings',
imageContextWindow: 'Image context window',
imageContextWindowTip:
'Captures N tokens of text above and below the image to provide richer background context for the image chunk.',
autoMetadata: 'Auto metadata',
mineruOptions: 'MinerU Options',
mineruParseMethod: 'Parse Method',

View File

@ -310,6 +310,36 @@ export function EnableTocToggle() {
);
}
export function ImageContextWindow() {
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
return (
<FormField
control={form.control}
name="parser_config.image_context_window"
render={({ field }) => (
<FormItem>
<FormControl>
<SliderInputFormField
{...field}
label={t('imageContextWindow')}
tooltip={t('imageContextWindowTip')}
defaultValue={0}
min={0}
max={256}
/>
</FormControl>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
);
}
export function OverlappedPercent() {
return (
<SliderInputFormField

View File

@ -14,6 +14,7 @@ import {
import {
AutoMetadata,
EnableTocToggle,
ImageContextWindow,
OverlappedPercent,
} from './common-item';
@ -26,6 +27,7 @@ export function NaiveConfiguration() {
<DelimiterFormField></DelimiterFormField>
<ChildrenDelimiterForm />
<EnableTocToggle />
<ImageContextWindow />
<AutoMetadata />
<OverlappedPercent />
</ConfigurationFormContainer>

View File

@ -32,6 +32,7 @@ export const formSchema = z
tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(),
toc_extraction: z.boolean().optional(),
image_context_window: z.number().optional(),
overlapped_percent: z.number().optional(),
// MinerU-specific options
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),

View File

@ -70,6 +70,7 @@ export default function DatasetSettings() {
html4excel: false,
topn_tags: 3,
toc_extraction: false,
image_context_window: 0,
overlapped_percent: 0,
// MinerU-specific defaults
mineru_parse_method: 'auto',