From e9053b6ed461307fd4b4f37cf1c71ee84ad304cf Mon Sep 17 00:00:00 2001
From: QuintinTao <72123724+QuintinTao@users.noreply.github.com>
Date: Thu, 8 May 2025 11:24:39 +0800
Subject: [PATCH] fix bug #7309 deepseek-ai/deepseek-vl2 model can not be
 select as a VL model to parse pdf image (#7312)

### What problem does this PR solve?
fix deepseek-ai/deepseek-vl2 model can not be select as a VL model to
parse pdf image . And add other vl models config from siliconflow
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

---------

Co-authored-by: unknown <taoshi.ln@chinatelecom.cn>
---
 conf/llm_factories.json | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
index c7f5907e4..76e99d6f9 100644
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@@ -2555,9 +2555,9 @@
                 },
                 {
                     "llm_name": "deepseek-ai/deepseek-vl2",
-                    "tags": "LLM,CHAT,4k",
+                    "tags": "LLM,IMAGE2TEXT,4k",
                     "max_tokens": 4096,
-                    "model_type": "chat"
+                    "model_type": "image2text"
                 },
                 {
                     "llm_name": "deepseek-ai/Janus-Pro-7B",
@@ -2571,6 +2571,36 @@
                     "max_tokens": 32768,
                     "model_type": "image2text"
                 },
+                {
+                    "llm_name": "Qwen/Qwen2.5-VL-72B-Instruct",
+                    "tags": "LLM,IMAGE2TEXT,128k",
+                    "max_tokens": 131072,
+                    "model_type": "image2text"
+                },
+                {
+                    "llm_name": "Qwen/Qwen2.5-VL-32B-Instruct",
+                    "tags": "LLM,IMAGE2TEXT,128k",
+                    "max_tokens": 131072,
+                    "model_type": "image2text"
+                },
+                {
+                    "llm_name": "Pro/Qwen/Qwen2.5-VL-7B-Instruct",
+                    "tags": "LLM,IMAGE2TEXT,32k",
+                    "max_tokens": 32768,
+                    "model_type": "image2text"
+                },
+                {
+                    "llm_name": "Qwen/Qwen2-VL-72B-Instruct",
+                    "tags": "LLM,IMAGE2TEXT,32k",
+                    "max_tokens": 32768,
+                    "model_type": "image2text"
+                },
+                {
+                    "llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct",
+                    "tags": "LLM,IMAGE2TEXT,32k",
+                    "max_tokens": 32768,
+                    "model_type": "image2text"
+                },
                 {
                     "llm_name": "FunAudioLLM/CosyVoice2-0.5B",
                     "tags": "LLM,TTS,32k",