From e9053b6ed461307fd4b4f37cf1c71ee84ad304cf Mon Sep 17 00:00:00 2001 From: QuintinTao <72123724+QuintinTao@users.noreply.github.com> Date: Thu, 8 May 2025 11:24:39 +0800 Subject: [PATCH] fix bug #7309 deepseek-ai/deepseek-vl2 model can not be select as a VL model to parse pdf image (#7312) ### What problem does this PR solve? fix deepseek-ai/deepseek-vl2 model can not be select as a VL model to parse pdf image . And add other vl models config from siliconflow _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): --------- Co-authored-by: unknown --- conf/llm_factories.json | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/conf/llm_factories.json b/conf/llm_factories.json index c7f5907e4..76e99d6f9 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -2555,9 +2555,9 @@ }, { "llm_name": "deepseek-ai/deepseek-vl2", - "tags": "LLM,CHAT,4k", + "tags": "LLM,IMAGE2TEXT,4k", "max_tokens": 4096, - "model_type": "chat" + "model_type": "image2text" }, { "llm_name": "deepseek-ai/Janus-Pro-7B", @@ -2571,6 +2571,36 @@ "max_tokens": 32768, "model_type": "image2text" }, + { + "llm_name": "Qwen/Qwen2.5-VL-72B-Instruct", + "tags": "LLM,IMAGE2TEXT,128k", + "max_tokens": 131072, + "model_type": "image2text" + }, + { + "llm_name": "Qwen/Qwen2.5-VL-32B-Instruct", + "tags": "LLM,IMAGE2TEXT,128k", + "max_tokens": 131072, + "model_type": "image2text" + }, + { + "llm_name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct", + "tags": "LLM,IMAGE2TEXT,32k", + "max_tokens": 32768, + "model_type": "image2text" + }, + { + "llm_name": "Qwen/Qwen2-VL-72B-Instruct", + "tags": "LLM,IMAGE2TEXT,32k", + "max_tokens": 32768, + "model_type": "image2text" + }, + { + "llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct", + "tags": "LLM,IMAGE2TEXT,32k", + "max_tokens": 32768, + "model_type": "image2text" + }, { "llm_name": "FunAudioLLM/CosyVoice2-0.5B", "tags": "LLM,TTS,32k",