conversation API backend update (#360)

### What problem does this PR solve? Issue link:#345 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-31 15:45:08 +08:00 · 2024-04-15 14:43:44 +08:00
parent 5c62b993f2
commit c39b751600
11 changed files with 853 additions and 326 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -49,7 +49,7 @@ class GptTurbo(Base):
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-            return ans, response.usage.completion_tokens
+            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0

@ -73,7 +73,7 @@ class MoonshotChat(GptTurbo):
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-            return ans, response.usage.completion_tokens
+            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0

@ -127,7 +127,7 @@ class ZhipuChat(Base):
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-            return ans, response.usage.completion_tokens
+            return ans, response.usage.total_tokens
        except Exception as e:
            return "**ERROR**: " + str(e), 0

@ -153,7 +153,7 @@ class OllamaChat(Base):
                options=options
            )
            ans = response["message"]["content"].strip()
-            return ans, response["eval_count"]
+            return ans, response["eval_count"] + response["prompt_eval_count"]
        except Exception as e:
            return "**ERROR**: " + str(e), 0

@ -175,50 +175,7 @@ class XinferenceChat(Base):
            if response.choices[0].finish_reason == "length":
                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
-            return ans, response.usage.completion_tokens
+            return ans, response.usage.total_tokens
        except openai.APIError as e:
            return "**ERROR**: " + str(e), 0

-
-class LocalLLM(Base):
-    class RPCProxy:
-        def __init__(self, host, port):
-            self.host = host
-            self.port = int(port)
-            self.__conn()
-
-        def __conn(self):
-            from multiprocessing.connection import Client
-            self._connection = Client(
-                (self.host, self.port), authkey=b'infiniflow-token4kevinhu')
-
-        def __getattr__(self, name):
-            import pickle
-
-            def do_rpc(*args, **kwargs):
-                for _ in range(3):
-                    try:
-                        self._connection.send(
-                            pickle.dumps((name, args, kwargs)))
-                        return pickle.loads(self._connection.recv())
-                    except Exception as e:
-                        self.__conn()
-                raise Exception("RPC connection lost!")
-
-            return do_rpc
-
-    def __init__(self, *args, **kwargs):
-        self.client = LocalLLM.RPCProxy("127.0.0.1", 7860)
-
-    def chat(self, system, history, gen_conf):
-        if system:
-            history.insert(0, {"role": "system", "content": system})
-        try:
-            ans = self.client.chat(
-                history,
-                gen_conf
-            )
-            return ans, num_tokens_from_string(ans)
-        except Exception as e:
-            return "**ERROR**: " + str(e), 0
-