mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-03 09:05:07 +08:00
feat: add image context window in dataset config (#12094)
### What problem does this PR solve? Add image context window configuration in **Dataset** > **Configduration** and **Dataset** > **Files** > **Parse** > **Ingestion Pipeline** (**Chunk Method** modal) ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
554
web/package-lock.json
generated
554
web/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -21,6 +21,7 @@ import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
|
|||||||
import {
|
import {
|
||||||
ChunkMethodItem,
|
ChunkMethodItem,
|
||||||
EnableTocToggle,
|
EnableTocToggle,
|
||||||
|
ImageContextWindow,
|
||||||
ParseTypeItem,
|
ParseTypeItem,
|
||||||
} from '@/pages/dataset/dataset-setting/configuration/common-item';
|
} from '@/pages/dataset/dataset-setting/configuration/common-item';
|
||||||
import { zodResolver } from '@hookform/resolvers/zod';
|
import { zodResolver } from '@hookform/resolvers/zod';
|
||||||
@ -119,6 +120,7 @@ export function ChunkMethodDialog({
|
|||||||
auto_questions: z.coerce.number().optional(),
|
auto_questions: z.coerce.number().optional(),
|
||||||
html4excel: z.boolean().optional(),
|
html4excel: z.boolean().optional(),
|
||||||
toc_extraction: z.boolean().optional(),
|
toc_extraction: z.boolean().optional(),
|
||||||
|
image_context_window: z.coerce.number().optional(),
|
||||||
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
||||||
mineru_formula_enable: z.boolean().optional(),
|
mineru_formula_enable: z.boolean().optional(),
|
||||||
mineru_table_enable: z.boolean().optional(),
|
mineru_table_enable: z.boolean().optional(),
|
||||||
@ -364,7 +366,10 @@ export function ChunkMethodDialog({
|
|||||||
className="space-y-3"
|
className="space-y-3"
|
||||||
>
|
>
|
||||||
{selectedTag === DocumentParserType.Naive && (
|
{selectedTag === DocumentParserType.Naive && (
|
||||||
<EnableTocToggle />
|
<>
|
||||||
|
<EnableTocToggle />
|
||||||
|
<ImageContextWindow />
|
||||||
|
</>
|
||||||
)}
|
)}
|
||||||
{showAutoKeywords(selectedTag) && (
|
{showAutoKeywords(selectedTag) && (
|
||||||
<>
|
<>
|
||||||
|
|||||||
@ -18,6 +18,7 @@ export function useDefaultParserValues() {
|
|||||||
auto_questions: 0,
|
auto_questions: 0,
|
||||||
html4excel: false,
|
html4excel: false,
|
||||||
toc_extraction: false,
|
toc_extraction: false,
|
||||||
|
image_context_window: 0,
|
||||||
mineru_parse_method: 'auto',
|
mineru_parse_method: 'auto',
|
||||||
mineru_formula_enable: true,
|
mineru_formula_enable: true,
|
||||||
mineru_table_enable: true,
|
mineru_table_enable: true,
|
||||||
|
|||||||
@ -364,6 +364,9 @@ Procedural Memory: Learned skills, habits, and automated procedures.`,
|
|||||||
},
|
},
|
||||||
knowledgeConfiguration: {
|
knowledgeConfiguration: {
|
||||||
settings: 'Settings',
|
settings: 'Settings',
|
||||||
|
imageContextWindow: 'Image context window',
|
||||||
|
imageContextWindowTip:
|
||||||
|
'Captures N tokens of text above and below the image to provide richer background context for the image chunk.',
|
||||||
autoMetadata: 'Auto metadata',
|
autoMetadata: 'Auto metadata',
|
||||||
mineruOptions: 'MinerU Options',
|
mineruOptions: 'MinerU Options',
|
||||||
mineruParseMethod: 'Parse Method',
|
mineruParseMethod: 'Parse Method',
|
||||||
|
|||||||
@ -310,6 +310,36 @@ export function EnableTocToggle() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function ImageContextWindow() {
|
||||||
|
const { t } = useTranslate('knowledgeConfiguration');
|
||||||
|
const form = useFormContext();
|
||||||
|
|
||||||
|
return (
|
||||||
|
<FormField
|
||||||
|
control={form.control}
|
||||||
|
name="parser_config.image_context_window"
|
||||||
|
render={({ field }) => (
|
||||||
|
<FormItem>
|
||||||
|
<FormControl>
|
||||||
|
<SliderInputFormField
|
||||||
|
{...field}
|
||||||
|
label={t('imageContextWindow')}
|
||||||
|
tooltip={t('imageContextWindowTip')}
|
||||||
|
defaultValue={0}
|
||||||
|
min={0}
|
||||||
|
max={256}
|
||||||
|
/>
|
||||||
|
</FormControl>
|
||||||
|
<div className="flex pt-1">
|
||||||
|
<div className="w-1/4"></div>
|
||||||
|
<FormMessage />
|
||||||
|
</div>
|
||||||
|
</FormItem>
|
||||||
|
)}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
export function OverlappedPercent() {
|
export function OverlappedPercent() {
|
||||||
return (
|
return (
|
||||||
<SliderInputFormField
|
<SliderInputFormField
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import {
|
|||||||
import {
|
import {
|
||||||
AutoMetadata,
|
AutoMetadata,
|
||||||
EnableTocToggle,
|
EnableTocToggle,
|
||||||
|
ImageContextWindow,
|
||||||
OverlappedPercent,
|
OverlappedPercent,
|
||||||
} from './common-item';
|
} from './common-item';
|
||||||
|
|
||||||
@ -26,6 +27,7 @@ export function NaiveConfiguration() {
|
|||||||
<DelimiterFormField></DelimiterFormField>
|
<DelimiterFormField></DelimiterFormField>
|
||||||
<ChildrenDelimiterForm />
|
<ChildrenDelimiterForm />
|
||||||
<EnableTocToggle />
|
<EnableTocToggle />
|
||||||
|
<ImageContextWindow />
|
||||||
<AutoMetadata />
|
<AutoMetadata />
|
||||||
<OverlappedPercent />
|
<OverlappedPercent />
|
||||||
</ConfigurationFormContainer>
|
</ConfigurationFormContainer>
|
||||||
|
|||||||
@ -32,6 +32,7 @@ export const formSchema = z
|
|||||||
tag_kb_ids: z.array(z.string()).nullish(),
|
tag_kb_ids: z.array(z.string()).nullish(),
|
||||||
topn_tags: z.number().optional(),
|
topn_tags: z.number().optional(),
|
||||||
toc_extraction: z.boolean().optional(),
|
toc_extraction: z.boolean().optional(),
|
||||||
|
image_context_window: z.number().optional(),
|
||||||
overlapped_percent: z.number().optional(),
|
overlapped_percent: z.number().optional(),
|
||||||
// MinerU-specific options
|
// MinerU-specific options
|
||||||
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
mineru_parse_method: z.enum(['auto', 'txt', 'ocr']).optional(),
|
||||||
|
|||||||
@ -70,6 +70,7 @@ export default function DatasetSettings() {
|
|||||||
html4excel: false,
|
html4excel: false,
|
||||||
topn_tags: 3,
|
topn_tags: 3,
|
||||||
toc_extraction: false,
|
toc_extraction: false,
|
||||||
|
image_context_window: 0,
|
||||||
overlapped_percent: 0,
|
overlapped_percent: 0,
|
||||||
// MinerU-specific defaults
|
// MinerU-specific defaults
|
||||||
mineru_parse_method: 'auto',
|
mineru_parse_method: 'auto',
|
||||||
|
|||||||
Reference in New Issue
Block a user