From 2844700dc454980ae78b54013a1d2cc13c12f722 Mon Sep 17 00:00:00 2001 From: Yongteng Lei Date: Fri, 19 Dec 2025 11:34:21 +0800 Subject: [PATCH] Refa: better UX for adding OCR model (#12034) ### What problem does this PR solve? Better UX for adding OCR model. ### Type of change - [x] Refactoring --- web/src/locales/en.ts | 8 ++++ web/src/locales/zh.ts | 5 +++ .../user-setting/setting-model/hooks.tsx | 5 ++- .../modal/mineru-modal/index.tsx | 39 ++++++++++++++----- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index e3d5affc9..8bee75987 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -1110,6 +1110,9 @@ Example: Virtual Hosted Style`, mcp: 'MCP', mineru: { modelNameRequired: 'Model name is required', + apiServerRequired: 'MinerU API Server Configuration is required', + serverUrlBackendLimit: + 'MinerU Server URL Address is only available for the HTTP client backend', apiserver: 'MinerU API Server Configuration', outputDir: 'MinerU Output Directory Path', backend: 'MinerU Processing Backend Type', @@ -1121,6 +1124,11 @@ Example: Virtual Hosted Style`, vlmTransformers: 'Vision Language Model with Transformers', vlmVllmEngine: 'Vision Language Model with vLLM Engine', vlmHttpClient: 'Vision Language Model via HTTP Client', + vlmMlxEngine: 'Vision Language Model with MLX Engine', + vlmVllmAsyncEngine: + 'Vision Language Model with vLLM Async Engine (Experimental)', + vlmLmdeployEngine: + 'Vision Language Model with LMDeploy Engine (Experimental)', }, }, }, diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 828dc5882..cdce6076a 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -959,6 +959,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 mcp: 'MCP', mineru: { modelNameRequired: '模型名称为必填项', + apiServerRequired: 'MinerU API服务器配置为必填项', + serverUrlBackendLimit: '仅在backend 为vlm-http-client 时可填写', apiserver: 'MinerU API服务器配置', outputDir: 'MinerU输出目录路径', backend: 'MinerU处理后端类型', @@ -970,6 +972,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于 vlmTransformers: '基于Transformers的视觉语言模型', vlmVllmEngine: '基于vLLM引擎的视觉语言模型', vlmHttpClient: '通过HTTP客户端连接的视觉语言模型', + vlmMlxEngine: '基于MLX引擎的视觉语言模型', + vlmVllmAsyncEngine: '基于vLLM异步引擎的视觉语言模型(实验性)', + vlmLmdeployEngine: '基于LMDeploy引擎的视觉语言模型(实验性)', }, }, }, diff --git a/web/src/pages/user-setting/setting-model/hooks.tsx b/web/src/pages/user-setting/setting-model/hooks.tsx index 9fc620d3d..43d675783 100644 --- a/web/src/pages/user-setting/setting-model/hooks.tsx +++ b/web/src/pages/user-setting/setting-model/hooks.tsx @@ -472,10 +472,13 @@ export const useSubmitMinerU = () => { const onMineruOk = useCallback( async (payload: MinerUFormValues) => { - const cfg = { + const cfg: any = { ...payload, mineru_delete_output: payload.mineru_delete_output ?? true ? '1' : '0', }; + if (payload.mineru_backend !== 'vlm-http-client') { + delete cfg.mineru_server_url; + } const req: IAddLlmRequestBody = { llm_factory: LLMFactory.MinerU, llm_name: payload.llm_name, diff --git a/web/src/pages/user-setting/setting-model/modal/mineru-modal/index.tsx b/web/src/pages/user-setting/setting-model/modal/mineru-modal/index.tsx index 7833467db..0a9c1924b 100644 --- a/web/src/pages/user-setting/setting-model/modal/mineru-modal/index.tsx +++ b/web/src/pages/user-setting/setting-model/modal/mineru-modal/index.tsx @@ -16,7 +16,7 @@ import { IModalProps } from '@/interfaces/common'; import { buildOptions } from '@/utils/form'; import { zodResolver } from '@hookform/resolvers/zod'; import { t } from 'i18next'; -import { useForm } from 'react-hook-form'; +import { useForm, useWatch } from 'react-hook-form'; import { useTranslation } from 'react-i18next'; import { z } from 'zod'; import { LLMHeader } from '../../components/llm-header'; @@ -25,15 +25,18 @@ const FormSchema = z.object({ llm_name: z.string().min(1, { message: t('setting.mineru.modelNameRequired'), }), - mineru_apiserver: z.string().optional(), + mineru_apiserver: z.string().url(), mineru_output_dir: z.string().optional(), mineru_backend: z.enum([ 'pipeline', 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client', + 'vlm-mlx-engine', + 'vlm-vllm-async-engine', + 'vlm-lmdeploy-engine', ]), - mineru_server_url: z.string().optional(), + mineru_server_url: z.string().url().optional(), mineru_delete_output: z.boolean(), }); @@ -52,6 +55,9 @@ const MinerUModal = ({ 'vlm-transformers', 'vlm-vllm-engine', 'vlm-http-client', + 'vlm-mlx-engine', + 'vlm-vllm-async-engine', + 'vlm-lmdeploy-engine', ]); const form = useForm({ @@ -62,6 +68,11 @@ const MinerUModal = ({ }, }); + const backend = useWatch({ + control: form.control, + name: 'mineru_backend', + }); + const handleOk = async (values: MinerUFormValues) => { const ret = await onOk?.(values as any); if (ret) { @@ -93,6 +104,7 @@ const MinerUModal = ({ @@ -109,18 +121,25 @@ const MinerUModal = ({ {(field) => ( { + field.onChange(value); + if (value !== 'vlm-http-client') { + form.setValue('mineru_server_url', undefined); + } + }} options={backendOptions} placeholder={t('setting.mineru.selectBackend')} /> )} - - - + {backend === 'vlm-http-client' && ( + + + + )}