mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: add primitive support for function calls (#6840)
### What problem does this PR solve? This PR introduces **primitive support for function calls**, enabling the system to handle basic function call capabilities. However, this feature is currently experimental and **not yet enabled for general use**, as it is only supported by a subset of models, namely, Qwen and OpenAI models. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -145,6 +145,9 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
chat_mdl = LLMBundle(dialog.tenant_id, LLMType.IMAGE2TEXT, dialog.llm_id)
|
||||
else:
|
||||
chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)
|
||||
toolcall_session, tools = kwargs.get("toolcall_session"), kwargs.get("tools")
|
||||
if toolcall_session and tools:
|
||||
chat_mdl.bind_tools(toolcall_session, tools)
|
||||
|
||||
bind_llm_ts = timer()
|
||||
|
||||
@ -338,7 +341,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
langfuse_output = {"time_elapsed:": re.sub(r"\n", " \n", langfuse_output), "created_at": time.time()}
|
||||
|
||||
# Add a condition check to call the end method only if langfuse_tracer exists
|
||||
if langfuse_tracer and 'langfuse_generation' in locals():
|
||||
if langfuse_tracer and "langfuse_generation" in locals():
|
||||
langfuse_generation.end(output=langfuse_output)
|
||||
|
||||
return {"answer": think + answer, "reference": refs, "prompt": re.sub(r"\n", " \n", prompt), "created_at": time.time()}
|
||||
|
||||
@ -102,6 +102,9 @@ class TenantLLMService(CommonService):
|
||||
mdlnm, fid = TenantLLMService.split_model_name_and_factory(mdlnm)
|
||||
if model_config:
|
||||
model_config = model_config.to_dict()
|
||||
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
|
||||
if llm:
|
||||
model_config["is_tools"] = llm[0].is_tools
|
||||
if not model_config:
|
||||
if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
|
||||
llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
|
||||
@ -206,6 +209,8 @@ class LLMBundle:
|
||||
model_config = TenantLLMService.get_model_config(tenant_id, llm_type, llm_name)
|
||||
self.max_length = model_config.get("max_tokens", 8192)
|
||||
|
||||
self.is_tools = model_config.get("is_tools", False)
|
||||
|
||||
langfuse_keys = TenantLangfuseService.filter_by_tenant(tenant_id=tenant_id)
|
||||
if langfuse_keys:
|
||||
langfuse = Langfuse(public_key=langfuse_keys.public_key, secret_key=langfuse_keys.secret_key, host=langfuse_keys.host)
|
||||
@ -215,6 +220,11 @@ class LLMBundle:
|
||||
else:
|
||||
self.langfuse = None
|
||||
|
||||
def bind_tools(self, toolcall_session, tools):
|
||||
if not self.is_tools:
|
||||
return
|
||||
self.mdl.bind_tools(toolcall_session, tools)
|
||||
|
||||
def encode(self, texts: list):
|
||||
if self.langfuse:
|
||||
generation = self.trace.generation(name="encode", model=self.llm_name, input={"texts": texts})
|
||||
@ -307,11 +317,31 @@ class LLMBundle:
|
||||
if self.langfuse:
|
||||
span.end()
|
||||
|
||||
def _remove_reasoning_content(self, txt: str) -> str:
|
||||
first_think_start = txt.find("<think>")
|
||||
if first_think_start == -1:
|
||||
return txt
|
||||
|
||||
last_think_end = txt.rfind("</think>")
|
||||
if last_think_end == -1:
|
||||
return txt
|
||||
|
||||
if last_think_end < first_think_start:
|
||||
return txt
|
||||
|
||||
return txt[last_think_end + len("</think>") :]
|
||||
|
||||
def chat(self, system, history, gen_conf):
|
||||
if self.langfuse:
|
||||
generation = self.trace.generation(name="chat", model=self.llm_name, input={"system": system, "history": history})
|
||||
|
||||
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
||||
chat = self.mdl.chat
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
chat = self.mdl.chat_with_tools
|
||||
|
||||
txt, used_tokens = chat(system, history, gen_conf)
|
||||
txt = self._remove_reasoning_content(txt)
|
||||
|
||||
if isinstance(txt, int) and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
||||
logging.error("LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
|
||||
|
||||
@ -325,7 +355,12 @@ class LLMBundle:
|
||||
generation = self.trace.generation(name="chat_streamly", model=self.llm_name, input={"system": system, "history": history})
|
||||
|
||||
ans = ""
|
||||
for txt in self.mdl.chat_streamly(system, history, gen_conf):
|
||||
chat_streamly = self.mdl.chat_streamly
|
||||
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
chat_streamly = self.mdl.chat_streamly_with_tools
|
||||
|
||||
for txt in chat_streamly(system, history, gen_conf):
|
||||
if isinstance(txt, int):
|
||||
if self.langfuse:
|
||||
generation.end(output={"output": ans})
|
||||
|
||||
Reference in New Issue
Block a user