From 3ad147d34977e269fc8c774fbde2d9b172c083a1 Mon Sep 17 00:00:00 2001 From: yiminghub2024 <482890@qq.com> Date: Fri, 26 Dec 2025 15:14:25 +0800 Subject: [PATCH] Update deploy_local_llm.mdx with vllm guide support (#12222) ### What problem does this PR solve? vllm guide support ### Type of change - [x] Documentation Update --- docs/guides/models/deploy_local_llm.mdx | 40 ++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/docs/guides/models/deploy_local_llm.mdx b/docs/guides/models/deploy_local_llm.mdx index 997e526f3..077cd10be 100644 --- a/docs/guides/models/deploy_local_llm.mdx +++ b/docs/guides/models/deploy_local_llm.mdx @@ -7,7 +7,7 @@ slug: /deploy_local_llm import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -Deploy and run local models using Ollama, Xinference, or other frameworks. +Deploy and run local models using Ollama, Xinference, VLLM ,SGLANG or other frameworks. --- @@ -314,3 +314,41 @@ To enable IPEX-LLM accelerated Ollama in RAGFlow, you must also complete the con 3. [Update System Model Settings](#6-update-system-model-settings) 4. [Update Chat Configuration](#7-update-chat-configuration) +### 5. Deploy VLLM + +ubuntu 22.04/24.04 + +```bash + pip install vllm + ``` +### 5.1 RUN VLLM WITH BEST PRACTISE + +```bash +nohup vllm serve /data/Qwen3-8B --served-model-name Qwen3-8B-FP8 --dtype auto --port 1025 --gpu-memory-utilization 0.90 --tool-call-parser hermes --enable-auto-tool-choice > /var/log/vllm_startup1.log 2>&1 & + ``` +you can get log info +```bash + tail -f -n 100 /var/log/vllm_startup1.log + ``` +when see the follow ,it means vllm engine is ready for access +```bash +Starting vLLM API server 0 on http://0.0.0.0:1025 +Started server process [19177] +Application startup complete. + ``` +### 5.2 INTERGRATEING RAGFLOW WITH VLLM CHAT/EM/RERANK LLM WITH WEBUI + +setting->model providers->search->vllm->add ,configure as follow:
+ +![add vllm](https://github.com/user-attachments/assets/6f1d9f1a-3507-465b-87a3-4427254fff86) + +select vllm chat model as default llm model as follow:
+![chat](https://github.com/user-attachments/assets/05efbd4b-2c18-4c6b-8d1c-52bae712372d) +### 5.3 chat with vllm chat model +create chat->create conversations-chat as follow:
+![chat](https://github.com/user-attachments/assets/dc1885f6-23a9-48f1-8850-d5f59b5e8f67) + + + + +