mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Add support for VolcEngine - the current version supports SDK2 (#885)
- The main idea is to assemble **ak**, **sk**, and **ep_id** into a
dictionary and store it in the database **api_key** field
- I don’t know much about the front-end, so I learned from Ollama, which
may be redundant.
### Configuration method
- model name
- Format requirements: {"VolcEngine model name":"endpoint_id"}
- For example: {"Skylark-pro-32K":"ep-xxxxxxxxx"}
- Volcano ACCESS_KEY
- Format requirements: VOLC_ACCESSKEY of the volcano engine
corresponding to the model
- Volcano SECRET_KEY
- Format requirements: VOLC_SECRETKEY of the volcano engine
corresponding to the model
### What problem does this PR solve?
_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -19,6 +19,7 @@ from abc import ABC
|
||||
from openai import OpenAI
|
||||
import openai
|
||||
from ollama import Client
|
||||
from volcengine.maas.v2 import MaasService
|
||||
from rag.nlp import is_english
|
||||
from rag.utils import num_tokens_from_string
|
||||
|
||||
@ -315,3 +316,71 @@ class LocalLLM(Base):
|
||||
yield answer + "\n**ERROR**: " + str(e)
|
||||
|
||||
yield token_count
|
||||
|
||||
|
||||
class VolcEngineChat(Base):
|
||||
def __init__(self, key, model_name, base_url):
|
||||
"""
|
||||
Since do not want to modify the original database fields, and the VolcEngine authentication method is quite special,
|
||||
Assemble ak, sk, ep_id into api_key, store it as a dictionary type, and parse it for use
|
||||
model_name is for display only
|
||||
"""
|
||||
self.client = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
|
||||
self.volc_ak = eval(key).get('volc_ak', '')
|
||||
self.volc_sk = eval(key).get('volc_sk', '')
|
||||
self.client.set_ak(self.volc_ak)
|
||||
self.client.set_sk(self.volc_sk)
|
||||
self.model_name = eval(key).get('ep_id', '')
|
||||
|
||||
def chat(self, system, history, gen_conf):
|
||||
if system:
|
||||
history.insert(0, {"role": "system", "content": system})
|
||||
try:
|
||||
req = {
|
||||
"parameters": {
|
||||
"min_new_tokens": gen_conf.get("min_new_tokens", 1),
|
||||
"top_k": gen_conf.get("top_k", 0),
|
||||
"max_prompt_tokens": gen_conf.get("max_prompt_tokens", 30000),
|
||||
"temperature": gen_conf.get("temperature", 0.1),
|
||||
"max_new_tokens": gen_conf.get("max_tokens", 1000),
|
||||
"top_p": gen_conf.get("top_p", 0.3),
|
||||
},
|
||||
"messages": history
|
||||
}
|
||||
response = self.client.chat(self.model_name, req)
|
||||
ans = response.choices[0].message.content.strip()
|
||||
if response.choices[0].finish_reason == "length":
|
||||
ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
|
||||
[ans]) else "······\n由于长度的原因,回答被截断了,要继续吗?"
|
||||
return ans, response.usage.total_tokens
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
def chat_streamly(self, system, history, gen_conf):
|
||||
if system:
|
||||
history.insert(0, {"role": "system", "content": system})
|
||||
ans = ""
|
||||
try:
|
||||
req = {
|
||||
"parameters": {
|
||||
"min_new_tokens": gen_conf.get("min_new_tokens", 1),
|
||||
"top_k": gen_conf.get("top_k", 0),
|
||||
"max_prompt_tokens": gen_conf.get("max_prompt_tokens", 30000),
|
||||
"temperature": gen_conf.get("temperature", 0.1),
|
||||
"max_new_tokens": gen_conf.get("max_tokens", 1000),
|
||||
"top_p": gen_conf.get("top_p", 0.3),
|
||||
},
|
||||
"messages": history
|
||||
}
|
||||
stream = self.client.stream_chat(self.model_name, req)
|
||||
for resp in stream:
|
||||
if not resp.choices[0].message.content:
|
||||
continue
|
||||
ans += resp.choices[0].message.content
|
||||
yield ans
|
||||
if resp.choices[0].finish_reason == "stop":
|
||||
return resp.usage.total_tokens
|
||||
|
||||
except Exception as e:
|
||||
yield ans + "\n**ERROR**: " + str(e)
|
||||
yield 0
|
||||
|
||||
Reference in New Issue
Block a user