Add support for VolcEngine - the current version supports SDK2 (#885)

- The main idea is to assemble **ak**, **sk**, and **ep_id** into a dictionary and store it in the database **api_key** field - I don’t know much about the front-end, so I learned from Ollama, which may be redundant. ### Configuration method - model name - Format requirements: {"VolcEngine model name":"endpoint_id"} - For example: {"Skylark-pro-32K":"ep-xxxxxxxxx"} - Volcano ACCESS_KEY - Format requirements: VOLC_ACCESSKEY of the volcano engine corresponding to the model - Volcano SECRET_KEY - Format requirements: VOLC_SECRETKEY of the volcano engine corresponding to the model ### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-02-01 16:15:07 +08:00 · 2024-05-23 11:15:29 +08:00
parent fbd0d74053
commit eb51ad73d6
10 changed files with 315 additions and 8 deletions
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -19,6 +19,7 @@ from abc import ABC
 from openai import OpenAI
 import openai
 from ollama import Client
+from volcengine.maas.v2 import MaasService
 from rag.nlp import is_english
 from rag.utils import num_tokens_from_string

@ -315,3 +316,71 @@ class LocalLLM(Base):
            yield answer + "\n**ERROR**: " + str(e)

        yield token_count
+
+
+class VolcEngineChat(Base):
+    def __init__(self, key, model_name, base_url):
+        """
+        Since do not want to modify the original database fields, and the VolcEngine authentication method is quite special,
+        Assemble ak, sk, ep_id into api_key, store it as a dictionary type, and parse it for use
+        model_name is for display only
+        """
+        self.client = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
+        self.volc_ak = eval(key).get('volc_ak', '')
+        self.volc_sk = eval(key).get('volc_sk', '')
+        self.client.set_ak(self.volc_ak)
+        self.client.set_sk(self.volc_sk)
+        self.model_name = eval(key).get('ep_id', '')
+
+    def chat(self, system, history, gen_conf):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        try:
+            req = {
+                "parameters": {
+                    "min_new_tokens": gen_conf.get("min_new_tokens", 1),
+                    "top_k": gen_conf.get("top_k", 0),
+                    "max_prompt_tokens": gen_conf.get("max_prompt_tokens", 30000),
+                    "temperature": gen_conf.get("temperature", 0.1),
+                    "max_new_tokens": gen_conf.get("max_tokens", 1000),
+                    "top_p": gen_conf.get("top_p", 0.3),
+                },
+                "messages": history
+            }
+            response = self.client.chat(self.model_name, req)
+            ans = response.choices[0].message.content.strip()
+            if response.choices[0].finish_reason == "length":
+                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
+                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
+            return ans, response.usage.total_tokens
+        except Exception as e:
+            return "**ERROR**: " + str(e), 0
+
+    def chat_streamly(self, system, history, gen_conf):
+        if system:
+            history.insert(0, {"role": "system", "content": system})
+        ans = ""
+        try:
+            req = {
+                "parameters": {
+                    "min_new_tokens": gen_conf.get("min_new_tokens", 1),
+                    "top_k": gen_conf.get("top_k", 0),
+                    "max_prompt_tokens": gen_conf.get("max_prompt_tokens", 30000),
+                    "temperature": gen_conf.get("temperature", 0.1),
+                    "max_new_tokens": gen_conf.get("max_tokens", 1000),
+                    "top_p": gen_conf.get("top_p", 0.3),
+                },
+                "messages": history
+            }
+            stream = self.client.stream_chat(self.model_name, req)
+            for resp in stream:
+                if not resp.choices[0].message.content:
+                    continue
+                ans += resp.choices[0].message.content
+                yield ans
+                if resp.choices[0].finish_reason == "stop":
+                    return resp.usage.total_tokens
+
+        except Exception as e:
+            yield ans + "\n**ERROR**: " + str(e)
+        yield 0