Feat: Support vLLM #4316 (#5395)

### What problem does this PR solve? Feat: Support vLLM #4316 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-04 17:45:07 +08:00 · 2025-02-26 18:33:43 +08:00
parent fefea3a2a5
commit d9dd1171a3
9 changed files with 235 additions and 115 deletions
--- a/web/src/pages/user-setting/setting-model/ollama-modal/index.tsx
+++ b/web/src/pages/user-setting/setting-model/ollama-modal/index.tsx
@ -1,3 +1,4 @@
+import { LLMFactory } from '@/constants/llm';
 import { useTranslate } from '@/hooks/common-hooks';
 import { IModalProps } from '@/interfaces/common';
 import { IAddLlmRequestBody } from '@/interfaces/request/llm';
@ -18,19 +19,23 @@ type FieldType = IAddLlmRequestBody & { vision: boolean };
 const { Option } = Select;

 const llmFactoryToUrlMap = {
-  Ollama:
+  [LLMFactory.Ollama]:
    'https://github.com/infiniflow/ragflow/blob/main/docs/guides/deploy_local_llm.mdx',
-  Xinference: 'https://inference.readthedocs.io/en/latest/user_guide',
-  ModelScope: 'https://www.modelscope.cn/docs/model-service/API-Inference/intro',
-  LocalAI: 'https://localai.io/docs/getting-started/models/',
-  'LM-Studio': 'https://lmstudio.ai/docs/basics',
-  'OpenAI-API-Compatible': 'https://platform.openai.com/docs/models/gpt-4',
-  TogetherAI: 'https://docs.together.ai/docs/deployment-options',
-  Replicate: 'https://replicate.com/docs/topics/deployments',
-  OpenRouter: 'https://openrouter.ai/docs',
-  HuggingFace:
+  [LLMFactory.Xinference]:
+    'https://inference.readthedocs.io/en/latest/user_guide',
+  [LLMFactory.ModelScope]:
+    'https://www.modelscope.cn/docs/model-service/API-Inference/intro',
+  [LLMFactory.LocalAI]: 'https://localai.io/docs/getting-started/models/',
+  [LLMFactory.LMStudio]: 'https://lmstudio.ai/docs/basics',
+  [LLMFactory.OpenAiAPICompatible]:
+    'https://platform.openai.com/docs/models/gpt-4',
+  [LLMFactory.TogetherAI]: 'https://docs.together.ai/docs/deployment-options',
+  [LLMFactory.Replicate]: 'https://replicate.com/docs/topics/deployments',
+  [LLMFactory.OpenRouter]: 'https://openrouter.ai/docs',
+  [LLMFactory.HuggingFace]:
    'https://huggingface.co/docs/text-embeddings-inference/quick_tour',
-  GPUStack: 'https://docs.gpustack.ai/latest/quickstart',
+  [LLMFactory.GPUStack]: 'https://docs.gpustack.ai/latest/quickstart',
+  [LLMFactory.VLLM]: 'https://docs.vllm.ai/en/latest/',
 };
 type LlmFactory = keyof typeof llmFactoryToUrlMap;

@ -66,11 +71,11 @@ const OllamaModal = ({
    llmFactoryToUrlMap[llmFactory as LlmFactory] ||
    'https://github.com/infiniflow/ragflow/blob/main/docs/guides/deploy_local_llm.mdx';
  const optionsMap = {
-    HuggingFace: [
+    [LLMFactory.HuggingFace]: [
      { value: 'embedding', label: 'embedding' },
      { value: 'chat', label: 'chat' },
    ],
-    Xinference: [
+    [LLMFactory.Xinference]: [
      { value: 'chat', label: 'chat' },
      { value: 'embedding', label: 'embedding' },
      { value: 'rerank', label: 'rerank' },
@ -78,10 +83,8 @@ const OllamaModal = ({
      { value: 'speech2text', label: 'sequence2text' },
      { value: 'tts', label: 'tts' },
    ],
-    ModelScope: [
-      { value: 'chat', label: 'chat' },
-    ],
-    GPUStack: [
+    [LLMFactory.ModelScope]: [{ value: 'chat', label: 'chat' }],
+    [LLMFactory.GPUStack]: [
      { value: 'chat', label: 'chat' },
      { value: 'embedding', label: 'embedding' },
      { value: 'rerank', label: 'rerank' },