Fix bug of image parser and prompt of parser supports customization (#10319)

### What problem does this PR solve?
BugFix: ERROR: KeyError: 'llm_id'
Feat: The prompt of the describe picture in cv_model supports
customization #10320


### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
TeslaZY
2025-09-28 12:47:36 +08:00
committed by GitHub
parent 3521eb61fe
commit 4996dcb0eb
7 changed files with 30 additions and 3 deletions

View File

@ -108,8 +108,9 @@ class ParserParam(ProcessParamBase):
"parse_method": "ocr",
"llm_id": "",
"lang": "Chinese",
"system_prompt": "",
"suffix": ["jpg", "jpeg", "png", "gif"],
"output_format": "json",
"output_format": "text",
},
"email": {
"suffix": [
@ -329,11 +330,16 @@ class Parser(ProcessBase):
else:
lang = conf["lang"]
# use VLM to describe the picture
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang)
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["parse_method"], lang=lang)
img_binary = io.BytesIO()
img.save(img_binary, format="JPEG")
img_binary.seek(0)
txt = cv_model.describe(img_binary.read())
system_prompt = conf.get("system_prompt")
if system_prompt:
txt = cv_model.describe_with_prompt(img_binary.read(), system_prompt)
else:
txt = cv_model.describe(img_binary.read())
self.set_output("text", txt)

View File

@ -1708,6 +1708,9 @@ This delimiter is used to split the input text into several text pieces echo of
filenameEmbdWeight: 'Filename embd weight',
begin: 'File',
parserMethod: 'Parser method',
systemPrompt: 'System Prompt',
systemPromptPlaceholder:
'Enter system prompt for image analysis, if empty the system default value will be used',
exportJson: 'Export JSON',
viewResult: 'View Result',
running: 'Running',

View File

@ -1626,6 +1626,9 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
filenameEmbdWeight: '文件名嵌入权重',
begin: '文件',
parserMethod: '解析方法',
systemPrompt: '系统提示词',
systemPromptPlaceholder:
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
exportJson: '导出 JSON',
viewResult: '查看结果',
running: '运行中',

View File

@ -250,6 +250,7 @@ export const initialParserValues = {
fileFormat: FileType.Image,
output_format: ImageOutputFormat.Text,
parse_method: ImageParseMethod.OCR,
system_prompt: '',
},
{
fileFormat: FileType.Email,

View File

@ -1,7 +1,10 @@
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Textarea } from '@/components/ui/textarea';
import { buildOptions } from '@/utils/form';
import { isEmpty } from 'lodash';
import { useEffect, useMemo } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { ImageParseMethod } from '../../constant';
import { LanguageFormField, ParserMethodFormField } from './common-form-fields';
import { CommonProps } from './interface';
@ -11,6 +14,7 @@ import { buildFieldNameWithPrefix } from './utils';
const options = buildOptions(ImageParseMethod);
export function ImageFormFields({ prefix }: CommonProps) {
const { t } = useTranslation();
const form = useFormContext();
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
@ -40,6 +44,14 @@ export function ImageFormFields({ prefix }: CommonProps) {
optionsWithoutLLM={options}
></ParserMethodFormField>
{languageShown && <LanguageFormField prefix={prefix}></LanguageFormField>}
{languageShown && (
<RAGFlowFormItem
name={buildFieldNameWithPrefix('system_prompt', prefix)}
label={t('dataflow.systemPrompt')}
>
<Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
</RAGFlowFormItem>
)}
</>
);
}

View File

@ -64,6 +64,7 @@ export const FormSchema = z.object({
lang: z.string().optional(),
fields: z.array(z.string()).optional(),
llm_id: z.string().optional(),
system_prompt: z.string().optional(),
}),
),
});

View File

@ -100,6 +100,7 @@ function transformParserParams(params: ParserFormSchemaType) {
...filteredSetup,
parse_method: cur.parse_method,
lang: cur.lang,
system_prompt: cur.system_prompt,
};
break;
case FileType.Email: