diff --git a/docs/guides/models/deploy_local_llm.mdx b/docs/guides/models/deploy_local_llm.mdx
index 997e526f3..077cd10be 100644
--- a/docs/guides/models/deploy_local_llm.mdx
+++ b/docs/guides/models/deploy_local_llm.mdx
@@ -7,7 +7,7 @@ slug: /deploy_local_llm
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-Deploy and run local models using Ollama, Xinference, or other frameworks.
+Deploy and run local models using Ollama, Xinference, VLLM ,SGLANG or other frameworks.
---
@@ -314,3 +314,41 @@ To enable IPEX-LLM accelerated Ollama in RAGFlow, you must also complete the con
3. [Update System Model Settings](#6-update-system-model-settings)
4. [Update Chat Configuration](#7-update-chat-configuration)
+### 5. Deploy VLLM
+
+ubuntu 22.04/24.04
+
+```bash
+ pip install vllm
+ ```
+### 5.1 RUN VLLM WITH BEST PRACTISE
+
+```bash
+nohup vllm serve /data/Qwen3-8B --served-model-name Qwen3-8B-FP8 --dtype auto --port 1025 --gpu-memory-utilization 0.90 --tool-call-parser hermes --enable-auto-tool-choice > /var/log/vllm_startup1.log 2>&1 &
+ ```
+you can get log info
+```bash
+ tail -f -n 100 /var/log/vllm_startup1.log
+ ```
+when see the follow ,it means vllm engine is ready for access
+```bash
+Starting vLLM API server 0 on http://0.0.0.0:1025
+Started server process [19177]
+Application startup complete.
+ ```
+### 5.2 INTERGRATEING RAGFLOW WITH VLLM CHAT/EM/RERANK LLM WITH WEBUI
+
+setting->model providers->search->vllm->add ,configure as follow:
+
+
+
+select vllm chat model as default llm model as follow:
+
+### 5.3 chat with vllm chat model
+create chat->create conversations-chat as follow:
+
+
+
+
+
+