From b47dcc9108db4beb3f704281e7ab4d1c55fc60f3 Mon Sep 17 00:00:00 2001
From: Viktor Dmitriyev <2291074+vdmitriyev@users.noreply.github.com>
Date: Thu, 24 Jul 2025 05:20:14 +0200
Subject: [PATCH] Fix issue with `keep_alive=-1` for ollama chat model by
 allowing a user to set an additional configuration option (#9017)

### What problem does this PR solve?

fix issue with `keep_alive=-1` for ollama chat model by allowing a user
to set an additional configuration option. It is no-breaking change
because it still uses a previous default value such as: `keep_alive=-1`

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [X] Performance Improvement
- [X] Other (please describe):
- Additional configuration option has been added to control behavior of
RAGFlow while working with ollama LLM
---
 rag/llm/chat_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index ad8af6f3f..55c7aac63 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -663,6 +663,7 @@ class OllamaChat(Base):
 
         self.client = Client(host=base_url) if not key or key == "x" else Client(host=base_url, headers={"Authorization": f"Bearer {key}"})
         self.model_name = model_name
+        self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1)))
 
     def _clean_conf(self, gen_conf):
         options = {}
@@ -679,7 +680,7 @@ class OllamaChat(Base):
         ctx_size = self._calculate_dynamic_ctx(history)
 
         gen_conf["num_ctx"] = ctx_size
-        response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1)
+        response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=self.keep_alive)
         ans = response["message"]["content"].strip()
         token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0)
         return ans, token_count
@@ -706,7 +707,7 @@ class OllamaChat(Base):
 
             ans = ""
             try:
-                response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1)
+                response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=self.keep_alive)
                 for resp in response:
                     if resp["done"]:
                         token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)