From 53b0b0e5838373a103154638d1ffca336a3a46d4 Mon Sep 17 00:00:00 2001 From: Stephen Hu Date: Fri, 25 Jul 2025 12:16:33 +0800 Subject: [PATCH] get keep alive from env (#9039) ### What problem does this PR solve? get keepalive from env ### Type of change - [x] Refactoring --- rag/llm/cv_model.py | 5 +++-- rag/llm/embedding_model.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 5303dfca0..b465661de 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -469,6 +469,7 @@ class OllamaCV(Base): self.client = Client(host=kwargs["base_url"]) self.model_name = model_name self.lang = lang + self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1))) def describe(self, image): prompt = self.prompt("") @@ -517,7 +518,7 @@ class OllamaCV(Base): model=self.model_name, messages=history, options=options, - keep_alive=-1, + keep_alive=self.keep_alive, ) ans = response["message"]["content"].strip() @@ -548,7 +549,7 @@ class OllamaCV(Base): messages=history, stream=True, options=options, - keep_alive=-1, + keep_alive=self.keep_alive, ) for resp in response: if resp["done"]: diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py index 415e9b712..e460de6e2 100644 --- a/rag/llm/embedding_model.py +++ b/rag/llm/embedding_model.py @@ -285,6 +285,7 @@ class OllamaEmbed(Base): def __init__(self, key, model_name, **kwargs): self.client = Client(host=kwargs["base_url"]) if not key or key == "x" else Client(host=kwargs["base_url"], headers={"Authorization": f"Bearer {key}"}) self.model_name = model_name + self.keep_alive = kwargs.get("ollama_keep_alive", int(os.environ.get("OLLAMA_KEEP_ALIVE", -1))) def encode(self, texts: list): arr = [] @@ -293,7 +294,7 @@ class OllamaEmbed(Base): # remove special tokens if they exist for token in OllamaEmbed._special_tokens: txt = txt.replace(token, "") - res = self.client.embeddings(prompt=txt, model=self.model_name, options={"use_mmap": True}, keep_alive=-1) + res = self.client.embeddings(prompt=txt, model=self.model_name, options={"use_mmap": True}, keep_alive=self.keep_alive) try: arr.append(res["embedding"]) except Exception as _e: @@ -305,7 +306,7 @@ class OllamaEmbed(Base): # remove special tokens if they exist for token in OllamaEmbed._special_tokens: text = text.replace(token, "") - res = self.client.embeddings(prompt=text, model=self.model_name, options={"use_mmap": True}, keep_alive=-1) + res = self.client.embeddings(prompt=text, model=self.model_name, options={"use_mmap": True}, keep_alive=self.keep_alive) try: return np.array(res["embedding"]), 128 except Exception as _e: