Feat: add primitive support for function calls (#6840)

### What problem does this PR solve?

This PR introduces ​**​primitive support for function calls​**​,
enabling the system to handle basic function call capabilities.
However, this feature is currently experimental and ​**​not yet enabled
for general use​**​, as it is only supported by a subset of models,
namely, Qwen and OpenAI models.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yongteng Lei
2025-04-08 16:09:03 +08:00
committed by GitHub
parent a20439bf81
commit dc2c74b249
5 changed files with 574 additions and 130 deletions

View File

@ -145,6 +145,9 @@ def chat(dialog, messages, stream=True, **kwargs):
chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
else:
chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)
toolcall_session, tools = kwargs.get("toolcall_session"), kwargs.get("tools")
if toolcall_session and tools:
chat_mdl.bind_tools(toolcall_session, tools)
bind_llm_ts = timer()
@ -338,7 +341,7 @@ def chat(dialog, messages, stream=True, **kwargs):
langfuse_output = {"time_elapsed:": re.sub(r"\n", " \n", langfuse_output), "created_at": time.time()}
# Add a condition check to call the end method only if langfuse_tracer exists
if langfuse_tracer and 'langfuse_generation' in locals():
if langfuse_tracer and "langfuse_generation" in locals():
langfuse_generation.end(output=langfuse_output)
return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}

View File

@ -102,6 +102,9 @@ class TenantLLMService(CommonService):
mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
if model_config:
model_config = model_config.to_dict()
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
if llm:
model_config["is_tools"] = llm[0].is_tools
if not model_config:
if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
@ -206,6 +209,8 @@ class LLMBundle:
model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
self.max_length = model_config.get("max_tokens", 8192)
self.is_tools = model_config.get("is_tools", False)
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
if langfuse_keys:
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
@ -215,6 +220,11 @@ class LLMBundle:
else:
self.langfuse = None
def bind_tools(self, toolcall_session, tools):
if not self.is_tools:
return
self.mdl.bind_tools(toolcall_session, tools)
def encode(self, texts: list):
if self.langfuse:
generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
@ -307,11 +317,31 @@ class LLMBundle:
if self.langfuse:
span.end()
def _remove_reasoning_content(self, txt: str) -> str:
first_think_start = txt.find("<think>")
if first_think_start == -1:
return txt
last_think_end = txt.rfind("</think>")
if last_think_end == -1:
return txt
if last_think_end < first_think_start:
return txt
return txt[last_think_end + len("</think>") :]
def chat(self, system, history, gen_conf):
if self.langfuse:
generation = self.trace.generation(name="chat", model=self.llm_name, input={"system": system, "history": history})
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
chat = self.mdl.chat
if self.is_tools and self.mdl.is_tools:
chat = self.mdl.chat_with_tools
txt, used_tokens = chat(system, history, gen_conf)
txt = self._remove_reasoning_content(txt)
if isinstance(txt, int) and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, self.llm_name):
logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
@ -325,7 +355,12 @@ class LLMBundle:
generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
ans = ""
for txt in self.mdl.chat_streamly(system, history, gen_conf):
chat_streamly = self.mdl.chat_streamly
if self.is_tools and self.mdl.is_tools:
chat_streamly = self.mdl.chat_streamly_with_tools
for txt in chat_streamly(system, history, gen_conf):
if isinstance(txt, int):
if self.langfuse:
generation.end(output={"output": ans})