refine error response, add set api-key MD (#178)

2026-02-01 16:15:07 +08:00 · 2024-03-31 19:09:42 +08:00
parent 9ff38891ad
commit 121c7a5681
8 changed files with 194 additions and 4 deletions
--- a/rag/llm/rpc_server.py
+++ b/rag/llm/rpc_server.py
@ -7,6 +7,23 @@ from threading import Thread
 from transformers import AutoModelForCausalLM, AutoTokenizer


+def torch_gc():
+    try:
+        import torch
+        if torch.cuda.is_available():
+            # with torch.cuda.device(DEVICE):
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+        elif torch.backends.mps.is_available():
+            try:
+                from torch.mps import empty_cache
+                empty_cache()
+            except Exception as e:
+                pass
+    except Exception:
+        pass
+
+
 class RPCHandler:
    def __init__(self):
        self._functions = {}
@ -49,6 +66,7 @@ def chat(messages, gen_conf):
    global tokenizer
    model = Model()
    try:
+        torch_gc()
        conf = {
            "max_new_tokens": int(
                gen_conf.get(