add local llm implementation (#119)

2026-02-01 08:05:07 +08:00 · 2024-03-12 11:57:08 +08:00
parent 0452a6db73
commit f1f09df901
17 changed files with 196 additions and 25 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -20,6 +20,7 @@ from openai import OpenAI
 import openai

 from rag.nlp import is_english
+from rag.utils import num_tokens_from_string


 class Base(ABC):
@ -86,7 +87,6 @@ class ZhipuChat(Base):
        self.model_name = model_name

    def chat(self, system, history, gen_conf):
-        from http import HTTPStatus
        if system: history.insert(0, {"role": "system", "content": system})
        try:
            response = self.client.chat.completions.create(
@ -100,4 +100,42 @@ class ZhipuChat(Base):
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
            return ans, response.usage.completion_tokens
        except Exception as e:
-            return "**ERROR**: " + str(e), 0
+            return "**ERROR**: " + str(e), 0
+
+class LocalLLM(Base):
+    class RPCProxy:
+        def __init__(self, host, port):
+            self.host = host
+            self.port = int(port)
+            self.__conn()
+
+        def __conn(self):
+            from multiprocessing.connection import Client
+            self._connection = Client((self.host, self.port), authkey=b'infiniflow-token4kevinhu')
+
+        def __getattr__(self, name):
+            import pickle
+            def do_rpc(*args, **kwargs):
+                for _ in range(3):
+                    try:
+                        self._connection.send(pickle.dumps((name, args, kwargs)))
+                        return pickle.loads(self._connection.recv())
+                    except Exception as e:
+                        self.__conn()
+                raise Exception("RPC connection lost!")
+
+            return do_rpc
+
+    def __init__(self, key, model_name="glm-3-turbo"):
+        self.client = LocalLLM.RPCProxy("127.0.0.1", 7860)
+
+    def chat(self, system, history, gen_conf):
+        if system: history.insert(0, {"role": "system", "content": system})
+        try:
+            ans = self.client.chat(
+                history,
+                gen_conf
+            )
+            return ans, num_tokens_from_string(ans)
+        except Exception as e:
+            return "**ERROR**: " + str(e), 0