From b47dcc9108db4beb3f704281e7ab4d1c55fc60f3 Mon Sep 17 00:00:00 2001 From: Viktor Dmitriyev <2291074+vdmitriyev@users.noreply.github.com> Date: Thu, 24 Jul 2025 05:20:14 +0200 Subject: [PATCH] Fix issue with `keep_alive=-1` for ollama chat model by allowing a user to set an additional configuration option (#9017) ### What problem does this PR solve? fix issue with `keep_alive=-1` for ollama chat model by allowing a user to set an additional configuration option. It is no-breaking change because it still uses a previous default value such as: `keep_alive=-1` ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [X] Performance Improvement - [X] Other (please describe): - Additional configuration option has been added to control behavior of RAGFlow while working with ollama LLM --- rag/llm/chat_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index ad8af6f3f..55c7aac63 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -663,6 +663,7 @@ class OllamaChat(Base): self.client = Client(host=base_url) if not key or key == "x" else Client(host=base_url, headers={"Authorization": f"Bearer {key}"}) self.model_name = model_name + self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1))) def _clean_conf(self, gen_conf): options = {} @@ -679,7 +680,7 @@ class OllamaChat(Base): ctx_size = self._calculate_dynamic_ctx(history) gen_conf["num_ctx"] = ctx_size - response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=-1) + response = self.client.chat(model=self.model_name, messages=history, options=gen_conf, keep_alive=self.keep_alive) ans = response["message"]["content"].strip() token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) return ans, token_count @@ -706,7 +707,7 @@ class OllamaChat(Base): ans = "" try: - response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=-1) + response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=self.keep_alive) for resp in response: if resp["done"]: token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0)