mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Compare commits
21 Commits
v0.20.4
...
209b731541
| Author | SHA1 | Date | |
|---|---|---|---|
| 209b731541 | |||
| c47a38773c | |||
| fcd18d7d87 | |||
| fe9adbf0a5 | |||
| c7f7adf029 | |||
| c27172b3bc | |||
| a246949b77 | |||
| 0a954d720a | |||
| f89e55ec42 | |||
| 5fe8cf6018 | |||
| 4720849ac0 | |||
| d7721833e7 | |||
| 7332f1d0f3 | |||
| 2d101561f8 | |||
| 59590e9aae | |||
| bb9b9b8357 | |||
| a4b368e53f | |||
| c461261f0b | |||
| a1633e0a2f | |||
| 369add35b8 | |||
| 5abd0bbac1 |
@ -307,7 +307,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
||||
|
||||
## 🔨 Launch service from source for development
|
||||
|
||||
1. Install uv, or skip this step if it is already installed:
|
||||
1. Install `uv` and `pre-commit`, or skip this step if they are already installed:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -271,7 +271,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
||||
|
||||
## 🔨 Menjalankan Aplikasi dari untuk Pengembangan
|
||||
|
||||
1. Instal uv, atau lewati langkah ini jika sudah terinstal:
|
||||
1. Instal `uv` dan `pre-commit`, atau lewati langkah ini jika sudah terinstal:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -266,7 +266,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
||||
|
||||
## 🔨 ソースコードからサービスを起動する方法
|
||||
|
||||
1. uv をインストールする。すでにインストールされている場合は、このステップをスキップしてください:
|
||||
1. `uv` と `pre-commit` をインストールする。すでにインストールされている場合は、このステップをスキップしてください:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -265,7 +265,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
||||
|
||||
## 🔨 소스 코드로 서비스를 시작합니다.
|
||||
|
||||
1. uv를 설치하거나 이미 설치된 경우 이 단계를 건너뜁니다:
|
||||
1. `uv` 와 `pre-commit` 을 설치하거나, 이미 설치된 경우 이 단계를 건너뜁니다:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -289,7 +289,7 @@ docker build --platform linux/amd64 -f Dockerfile -t infiniflow/ragflow:nightly
|
||||
|
||||
## 🔨 Lançar o serviço a partir do código-fonte para desenvolvimento
|
||||
|
||||
1. Instale o `uv`, ou pule esta etapa se ele já estiver instalado:
|
||||
1. Instale o `uv` e o `pre-commit`, ou pule esta etapa se eles já estiverem instalados:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -301,7 +301,7 @@ docker build --platform linux/amd64 --build-arg NEED_MIRROR=1 -f Dockerfile -t i
|
||||
|
||||
## 🔨 以原始碼啟動服務
|
||||
|
||||
1. 安裝 uv。如已安裝,可跳過此步驟:
|
||||
1. 安裝 `uv` 和 `pre-commit`。如已安裝,可跳過此步驟:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
@ -301,7 +301,7 @@ docker build --platform linux/amd64 --build-arg NEED_MIRROR=1 -f Dockerfile -t i
|
||||
|
||||
## 🔨 以源代码启动服务
|
||||
|
||||
1. 安装 uv。如已经安装,可跳过本步骤:
|
||||
1. 安装 `uv` 和 `pre-commit`。如已经安装,可跳过本步骤:
|
||||
|
||||
```bash
|
||||
pipx install uv pre-commit
|
||||
|
||||
241
agent/canvas.py
241
agent/canvas.py
@ -29,83 +29,52 @@ from api.utils import get_uuid, hash_str2int
|
||||
from rag.prompts.prompts import chunks_format
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
|
||||
class Canvas:
|
||||
class Graph:
|
||||
"""
|
||||
dsl = {
|
||||
"components": {
|
||||
"begin": {
|
||||
"obj":{
|
||||
"component_name": "Begin",
|
||||
"params": {},
|
||||
},
|
||||
"downstream": ["answer_0"],
|
||||
"upstream": [],
|
||||
},
|
||||
"retrieval_0": {
|
||||
"obj": {
|
||||
"component_name": "Retrieval",
|
||||
"params": {}
|
||||
},
|
||||
"downstream": ["generate_0"],
|
||||
"upstream": ["answer_0"],
|
||||
},
|
||||
"generate_0": {
|
||||
"obj": {
|
||||
"component_name": "Generate",
|
||||
"params": {}
|
||||
},
|
||||
"downstream": ["answer_0"],
|
||||
"upstream": ["retrieval_0"],
|
||||
}
|
||||
},
|
||||
"history": [],
|
||||
"path": ["begin"],
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": tenant_id,
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, dsl: str, tenant_id=None, task_id=None):
|
||||
self.path = []
|
||||
self.history = []
|
||||
self.components = {}
|
||||
self.error = ""
|
||||
self.globals = {
|
||||
"sys.query": "",
|
||||
"sys.user_id": tenant_id,
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
self.dsl = json.loads(dsl) if dsl else {
|
||||
dsl = {
|
||||
"components": {
|
||||
"begin": {
|
||||
"obj": {
|
||||
"obj":{
|
||||
"component_name": "Begin",
|
||||
"params": {
|
||||
"prologue": "Hi there!"
|
||||
}
|
||||
"params": {},
|
||||
},
|
||||
"downstream": [],
|
||||
"downstream": ["answer_0"],
|
||||
"upstream": [],
|
||||
"parent_id": ""
|
||||
},
|
||||
"retrieval_0": {
|
||||
"obj": {
|
||||
"component_name": "Retrieval",
|
||||
"params": {}
|
||||
},
|
||||
"downstream": ["generate_0"],
|
||||
"upstream": ["answer_0"],
|
||||
},
|
||||
"generate_0": {
|
||||
"obj": {
|
||||
"component_name": "Generate",
|
||||
"params": {}
|
||||
},
|
||||
"downstream": ["answer_0"],
|
||||
"upstream": ["retrieval_0"],
|
||||
}
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrieval": [],
|
||||
"path": ["begin"],
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
"sys.user_id": tenant_id,
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def __init__(self, dsl: str, tenant_id=None, task_id=None):
|
||||
self.path = []
|
||||
self.components = {}
|
||||
self.error = ""
|
||||
self.dsl = json.loads(dsl)
|
||||
self._tenant_id = tenant_id
|
||||
self.task_id = task_id if task_id else get_uuid()
|
||||
self.load()
|
||||
@ -116,8 +85,6 @@ class Canvas:
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
|
||||
assert "Begin" in cpn_nms, "There have to be an 'Begin' component."
|
||||
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
param = component_class(cpn["obj"]["component_name"] + "Param")()
|
||||
@ -130,27 +97,10 @@ class Canvas:
|
||||
cpn["obj"] = component_class(cpn["obj"]["component_name"])(self, k, param)
|
||||
|
||||
self.path = self.dsl["path"]
|
||||
self.history = self.dsl["history"]
|
||||
if "globals" in self.dsl:
|
||||
self.globals = self.dsl["globals"]
|
||||
else:
|
||||
self.globals = {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
|
||||
self.retrieval = self.dsl["retrieval"]
|
||||
self.memory = self.dsl.get("memory", [])
|
||||
|
||||
def __str__(self):
|
||||
self.dsl["path"] = self.path
|
||||
self.dsl["history"] = self.history
|
||||
self.dsl["globals"] = self.globals
|
||||
self.dsl["task_id"] = self.task_id
|
||||
self.dsl["retrieval"] = self.retrieval
|
||||
self.dsl["memory"] = self.memory
|
||||
dsl = {
|
||||
"components": {}
|
||||
}
|
||||
@ -169,14 +119,79 @@ class Canvas:
|
||||
dsl["components"][k][c] = deepcopy(cpn[c])
|
||||
return json.dumps(dsl, ensure_ascii=False)
|
||||
|
||||
def reset(self, mem=False):
|
||||
def reset(self):
|
||||
self.path = []
|
||||
for k, cpn in self.components.items():
|
||||
self.components[k]["obj"].reset()
|
||||
try:
|
||||
REDIS_CONN.delete(f"{self.task_id}-logs")
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
def get_component_name(self, cid):
|
||||
for n in self.dsl.get("graph", {}).get("nodes", []):
|
||||
if cid == n["id"]:
|
||||
return n["data"]["name"]
|
||||
return ""
|
||||
|
||||
def run(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_component(self, cpn_id) -> Union[None, dict[str, Any]]:
|
||||
return self.components.get(cpn_id)
|
||||
|
||||
def get_component_obj(self, cpn_id) -> ComponentBase:
|
||||
return self.components.get(cpn_id)["obj"]
|
||||
|
||||
def get_component_type(self, cpn_id) -> str:
|
||||
return self.components.get(cpn_id)["obj"].component_name
|
||||
|
||||
def get_component_input_form(self, cpn_id) -> dict:
|
||||
return self.components.get(cpn_id)["obj"].get_input_form()
|
||||
|
||||
def get_tenant_id(self):
|
||||
return self._tenant_id
|
||||
|
||||
|
||||
class Canvas(Graph):
|
||||
|
||||
def __init__(self, dsl: str, tenant_id=None, task_id=None):
|
||||
self.globals = {
|
||||
"sys.query": "",
|
||||
"sys.user_id": tenant_id,
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
super().__init__(dsl, tenant_id, task_id)
|
||||
|
||||
def load(self):
|
||||
super().load()
|
||||
self.history = self.dsl["history"]
|
||||
if "globals" in self.dsl:
|
||||
self.globals = self.dsl["globals"]
|
||||
else:
|
||||
self.globals = {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
|
||||
self.retrieval = self.dsl["retrieval"]
|
||||
self.memory = self.dsl.get("memory", [])
|
||||
|
||||
def __str__(self):
|
||||
self.dsl["history"] = self.history
|
||||
self.dsl["retrieval"] = self.retrieval
|
||||
self.dsl["memory"] = self.memory
|
||||
return super().__str__()
|
||||
|
||||
def reset(self, mem=False):
|
||||
super().reset()
|
||||
if not mem:
|
||||
self.history = []
|
||||
self.retrieval = []
|
||||
self.memory = []
|
||||
for k, cpn in self.components.items():
|
||||
self.components[k]["obj"].reset()
|
||||
|
||||
for k in self.globals.keys():
|
||||
if isinstance(self.globals[k], str):
|
||||
@ -192,22 +207,13 @@ class Canvas:
|
||||
else:
|
||||
self.globals[k] = None
|
||||
|
||||
try:
|
||||
REDIS_CONN.delete(f"{self.task_id}-logs")
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
def get_component_name(self, cid):
|
||||
for n in self.dsl.get("graph", {}).get("nodes", []):
|
||||
if cid == n["id"]:
|
||||
return n["data"]["name"]
|
||||
return ""
|
||||
|
||||
def run(self, **kwargs):
|
||||
st = time.perf_counter()
|
||||
self.message_id = get_uuid()
|
||||
created_at = int(time.time())
|
||||
self.add_user_input(kwargs.get("query"))
|
||||
for k, cpn in self.components.items():
|
||||
self.components[k]["obj"].reset(True)
|
||||
|
||||
for k in kwargs.keys():
|
||||
if k in ["query", "user_id", "files"] and kwargs[k]:
|
||||
@ -386,18 +392,6 @@ class Canvas:
|
||||
})
|
||||
self.history.append(("assistant", self.get_component_obj(self.path[-1]).output()))
|
||||
|
||||
def get_component(self, cpn_id) -> Union[None, dict[str, Any]]:
|
||||
return self.components.get(cpn_id)
|
||||
|
||||
def get_component_obj(self, cpn_id) -> ComponentBase:
|
||||
return self.components.get(cpn_id)["obj"]
|
||||
|
||||
def get_component_type(self, cpn_id) -> str:
|
||||
return self.components.get(cpn_id)["obj"].component_name
|
||||
|
||||
def get_component_input_form(self, cpn_id) -> dict:
|
||||
return self.components.get(cpn_id)["obj"].get_input_form()
|
||||
|
||||
def is_reff(self, exp: str) -> bool:
|
||||
exp = exp.strip("{").strip("}")
|
||||
if exp.find("@") < 0:
|
||||
@ -419,9 +413,6 @@ class Canvas:
|
||||
raise Exception(f"Can't find variable: '{cpn_id}@{var_nm}'")
|
||||
return cpn["obj"].output(var_nm)
|
||||
|
||||
def get_tenant_id(self):
|
||||
return self._tenant_id
|
||||
|
||||
def get_history(self, window_size):
|
||||
convs = []
|
||||
if window_size <= 0:
|
||||
@ -436,36 +427,6 @@ class Canvas:
|
||||
def add_user_input(self, question):
|
||||
self.history.append(("user", question))
|
||||
|
||||
def _find_loop(self, max_loops=6):
|
||||
path = self.path[-1][::-1]
|
||||
if len(path) < 2:
|
||||
return False
|
||||
|
||||
for i in range(len(path)):
|
||||
if path[i].lower().find("answer") == 0 or path[i].lower().find("iterationitem") == 0:
|
||||
path = path[:i]
|
||||
break
|
||||
|
||||
if len(path) < 2:
|
||||
return False
|
||||
|
||||
for loc in range(2, len(path) // 2):
|
||||
pat = ",".join(path[0:loc])
|
||||
path_str = ",".join(path)
|
||||
if len(pat) >= len(path_str):
|
||||
return False
|
||||
loop = max_loops
|
||||
while path_str.find(pat) == 0 and loop >= 0:
|
||||
loop -= 1
|
||||
if len(pat)+1 >= len(path_str):
|
||||
return False
|
||||
path_str = path_str[len(pat)+1:]
|
||||
if loop < 0:
|
||||
pat = " => ".join([p.split(":")[0] for p in path[0:loc]])
|
||||
return pat + " => " + pat
|
||||
|
||||
return False
|
||||
|
||||
def get_prologue(self):
|
||||
return self.components["begin"]["obj"]._param.prologue
|
||||
|
||||
|
||||
@ -50,8 +50,9 @@ del _package_path, _import_submodules, _extract_classes_from_module
|
||||
|
||||
|
||||
def component_class(class_name):
|
||||
m = importlib.import_module("agent.component")
|
||||
try:
|
||||
return getattr(m, class_name)
|
||||
except Exception:
|
||||
return getattr(importlib.import_module("agent.tools"), class_name)
|
||||
for mdl in ["agent.component", "agent.tools", "rag.flow"]:
|
||||
try:
|
||||
return getattr(importlib.import_module(mdl), class_name)
|
||||
except Exception:
|
||||
pass
|
||||
assert False, f"Can't import {class_name}"
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
import re
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import ABC
|
||||
import builtins
|
||||
import json
|
||||
import os
|
||||
@ -410,8 +410,8 @@ class ComponentBase(ABC):
|
||||
)
|
||||
|
||||
def __init__(self, canvas, id, param: ComponentParamBase):
|
||||
from agent.canvas import Canvas # Local import to avoid cyclic dependency
|
||||
assert isinstance(canvas, Canvas), "canvas must be an instance of Canvas"
|
||||
from agent.canvas import Graph # Local import to avoid cyclic dependency
|
||||
assert isinstance(canvas, Graph), "canvas must be an instance of Canvas"
|
||||
self._canvas = canvas
|
||||
self._id = id
|
||||
self._param = param
|
||||
@ -448,9 +448,11 @@ class ComponentBase(ABC):
|
||||
def error(self):
|
||||
return self._param.outputs.get("_ERROR", {}).get("value")
|
||||
|
||||
def reset(self):
|
||||
def reset(self, only_output=False):
|
||||
for k in self._param.outputs.keys():
|
||||
self._param.outputs[k]["value"] = None
|
||||
if only_output:
|
||||
return
|
||||
for k in self._param.inputs.keys():
|
||||
self._param.inputs[k]["value"] = None
|
||||
self._param.debug_inputs = {}
|
||||
@ -526,6 +528,10 @@ class ComponentBase(ABC):
|
||||
cpn_nms = self._canvas.get_component(self._id)['upstream']
|
||||
return cpn_nms
|
||||
|
||||
def get_downstream(self) -> List[str]:
|
||||
cpn_nms = self._canvas.get_component(self._id)['downstream']
|
||||
return cpn_nms
|
||||
|
||||
@staticmethod
|
||||
def string_format(content: str, kv: dict[str, str]) -> str:
|
||||
for n, v in kv.items():
|
||||
@ -554,6 +560,5 @@ class ComponentBase(ABC):
|
||||
def set_exception_default_value(self):
|
||||
self.set_output("result", self.get_exception_default_value())
|
||||
|
||||
@abstractmethod
|
||||
def thoughts(self) -> str:
|
||||
...
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
{
|
||||
"id": 19,
|
||||
"title": "Choose Your Knowledge Base Agent",
|
||||
"description": "Select your desired knowledge base from the dropdown menu. The Agent will only retrieve from the selected knowledge base and use this content to generate responses.",
|
||||
"canvas_type": "Agent",
|
||||
"title": {
|
||||
"en": "Choose Your Knowledge Base Agent",
|
||||
"zh": "选择知识库智能体"},
|
||||
"description": {
|
||||
"en": "Select your desired knowledge base from the dropdown menu. The Agent will only retrieve from the selected knowledge base and use this content to generate responses.",
|
||||
"zh": "从下拉菜单中选择知识库,智能体将仅根据所选知识库内容生成回答。"},
|
||||
"canvas_type": "Agent",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Agent:BraveParksJoke": {
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
{
|
||||
"id": 18,
|
||||
"title": "Choose Your Knowledge Base Workflow",
|
||||
"description": "Select your desired knowledge base from the dropdown menu. The retrieval assistant will only use data from your selected knowledge base to generate responses.",
|
||||
"canvas_type": "Other",
|
||||
"title": {
|
||||
"en": "Choose Your Knowledge Base Workflow",
|
||||
"zh": "选择知识库工作流"},
|
||||
"description": {
|
||||
"en": "Select your desired knowledge base from the dropdown menu. The retrieval assistant will only use data from your selected knowledge base to generate responses.",
|
||||
"zh": "从下拉菜单中选择知识库,工作流将仅根据所选知识库内容生成回答。"},
|
||||
"canvas_type": "Other",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Agent:ProudDingosShout": {
|
||||
|
||||
@ -1,9 +1,13 @@
|
||||
|
||||
{
|
||||
"id": 11,
|
||||
"title": "Customer Review Analysis",
|
||||
"description": "Automatically classify customer reviews using LLM (Large Language Model) and route them via email to the relevant departments.",
|
||||
"canvas_type": "Customer Support",
|
||||
"title": {
|
||||
"en": "Customer Review Analysis",
|
||||
"zh": "客户评价分析"},
|
||||
"description": {
|
||||
"en": "Automatically classify customer reviews using LLM (Large Language Model) and route them via email to the relevant departments.",
|
||||
"zh": "大模型将自动分类客户评价,并通过电子邮件将结果发送到相关部门。"},
|
||||
"canvas_type": "Customer Support",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Categorize:FourTeamsFold": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -1,8 +1,12 @@
|
||||
|
||||
{
|
||||
"id": 10,
|
||||
"title": "Customer Support",
|
||||
"description": "This is an intelligent customer service processing system workflow based on user intent classification. It uses LLM to identify user demand types and transfers them to the corresponding professional agent for processing.",
|
||||
"title": {
|
||||
"en":"Customer Support",
|
||||
"zh": "客户支持"},
|
||||
"description": {
|
||||
"en": "This is an intelligent customer service processing system workflow based on user intent classification. It uses LLM to identify user demand types and transfers them to the corresponding professional agent for processing.",
|
||||
"zh": "工作流系统,用于智能客服场景。基于用户意图分类。使用大模型识别用户需求类型,并将需求转移给相应的智能体进行处理。"},
|
||||
"canvas_type": "Customer Support",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
|
||||
{
|
||||
"id": 15,
|
||||
"title": "CV Analysis and Candidate Evaluation",
|
||||
"description": "This is a workflow that helps companies evaluate resumes, HR uploads a job description first, then submits multiple resumes via the chat window for evaluation.",
|
||||
"title": {
|
||||
"en": "CV Analysis and Candidate Evaluation",
|
||||
"zh": "简历分析和候选人评估"},
|
||||
"description": {
|
||||
"en": "This is a workflow that helps companies evaluate resumes, HR uploads a job description first, then submits multiple resumes via the chat window for evaluation.",
|
||||
"zh": "帮助公司评估简历的工作流。HR首先上传职位描述,通过聊天窗口提交多份简历进行评估。"},
|
||||
"canvas_type": "Other",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -1,8 +1,12 @@
|
||||
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Deep Research",
|
||||
"description": "For professionals in sales, marketing, policy, or consulting, the Multi-Agent Deep Research Agent conducts structured, multi-step investigations across diverse sources and delivers consulting-style reports with clear citations.",
|
||||
"title": {
|
||||
"en": "Deep Research",
|
||||
"zh": "深度研究"},
|
||||
"description": {
|
||||
"en": "For professionals in sales, marketing, policy, or consulting, the Multi-Agent Deep Research Agent conducts structured, multi-step investigations across diverse sources and delivers consulting-style reports with clear citations.",
|
||||
"zh": "专为销售、市场、政策或咨询领域的专业人士设计,多智能体的深度研究会结合多源信息进行结构化、多步骤地回答问题,并附带有清晰的引用。"},
|
||||
"canvas_type": "Recommended",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Deep Research",
|
||||
"description": "For professionals in sales, marketing, policy, or consulting, the Multi-Agent Deep Research Agent conducts structured, multi-step investigations across diverse sources and delivers consulting-style reports with clear citations.",
|
||||
"title": {
|
||||
"en": "Deep Research",
|
||||
"zh": "深度研究"},
|
||||
"description": {
|
||||
"en": "For professionals in sales, marketing, policy, or consulting, the Multi-Agent Deep Research Agent conducts structured, multi-step investigations across diverse sources and delivers consulting-style reports with clear citations.",
|
||||
"zh": "专为销售、市场、政策或咨询领域的专业人士设计,多智能体的深度研究会结合多源信息进行结构化、多步骤地回答问题,并附带有清晰的引用。"},
|
||||
"canvas_type": "Agent",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,13 @@
|
||||
{
|
||||
"id": 22,
|
||||
"title": "Ecommerce Customer Service Workflow",
|
||||
"description": "This template helps e-commerce platforms address complex customer needs, such as comparing product features, providing usage support, and coordinating home installation services.",
|
||||
"title": {
|
||||
"en": "Ecommerce Customer Service Workflow",
|
||||
"zh": "电子商务客户服务工作流程"
|
||||
},
|
||||
"description": {
|
||||
"en": "This template helps e-commerce platforms address complex customer needs, such as comparing product features, providing usage support, and coordinating home installation services.",
|
||||
"zh": "该模板可帮助电子商务平台解决复杂的客户需求,例如比较产品功能、提供使用支持和协调家庭安装服务。"
|
||||
},
|
||||
"canvas_type": "Customer Support",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 8,
|
||||
"title": "Generate SEO Blog",
|
||||
"description": "This is a multi-agent version of the SEO blog generation workflow. It simulates a small team of AI “writers”, where each agent plays a specialized role — just like a real editorial team.",
|
||||
"title": {
|
||||
"en": "Generate SEO Blog",
|
||||
"zh": "生成SEO博客"},
|
||||
"description": {
|
||||
"en": "This is a multi-agent version of the SEO blog generation workflow. It simulates a small team of AI “writers”, where each agent plays a specialized role — just like a real editorial team.",
|
||||
"zh": "多智能体架构可根据简单的用户输入自动生成完整的SEO博客文章。模拟小型“作家”团队,其中每个智能体扮演一个专业角色——就像真正的编辑团队。"},
|
||||
"canvas_type": "Agent",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 13,
|
||||
"title": "ImageLingo",
|
||||
"description": "ImageLingo lets you snap any photo containing text—menus, signs, or documents—and instantly recognize and translate it into your language of choice using advanced AI-powered translation technology.",
|
||||
"title": {
|
||||
"en": "ImageLingo",
|
||||
"zh": "图片解析"},
|
||||
"description": {
|
||||
"en": "ImageLingo lets you snap any photo containing text—menus, signs, or documents—and instantly recognize and translate it into your language of choice using advanced AI-powered translation technology.",
|
||||
"zh": "多模态大模型允许您拍摄任何包含文本的照片——菜单、标志或文档——立即识别并转换成您选择的语言。"},
|
||||
"canvas_type": "Consumer App",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 20,
|
||||
"title": "Report Agent Using Knowledge Base",
|
||||
"description": "A report generation assistant using local knowledge base, with advanced capabilities in task planning, reasoning, and reflective analysis. Recommended for academic research paper Q&A",
|
||||
"title": {
|
||||
"en": "Report Agent Using Knowledge Base",
|
||||
"zh": "知识库检索智能体"},
|
||||
"description": {
|
||||
"en": "A report generation assistant using local knowledge base, with advanced capabilities in task planning, reasoning, and reflective analysis. Recommended for academic research paper Q&A",
|
||||
"zh": "一个使用本地知识库的报告生成助手,具备高级能力,包括任务规划、推理和反思性分析。推荐用于学术研究论文问答。"},
|
||||
"canvas_type": "Agent",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
331
agent/templates/knowledge_base_report_r.json
Normal file
331
agent/templates/knowledge_base_report_r.json
Normal file
@ -0,0 +1,331 @@
|
||||
{
|
||||
"id": 21,
|
||||
"title": {
|
||||
"en": "Report Agent Using Knowledge Base",
|
||||
"zh": "知识库检索智能体"},
|
||||
"description": {
|
||||
"en": "A report generation assistant using local knowledge base, with advanced capabilities in task planning, reasoning, and reflective analysis. Recommended for academic research paper Q&A",
|
||||
"zh": "一个使用本地知识库的报告生成助手,具备高级能力,包括任务规划、推理和反思性分析。推荐用于学术研究论文问答。"},
|
||||
"canvas_type": "Recommended",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Agent:NewPumasLick": {
|
||||
"downstream": [
|
||||
"Message:OrangeYearsShine"
|
||||
],
|
||||
"obj": {
|
||||
"component_name": "Agent",
|
||||
"params": {
|
||||
"delay_after_error": 1,
|
||||
"description": "",
|
||||
"exception_comment": "",
|
||||
"exception_default_value": "",
|
||||
"exception_goto": [],
|
||||
"exception_method": null,
|
||||
"frequencyPenaltyEnabled": false,
|
||||
"frequency_penalty": 0.5,
|
||||
"llm_id": "qwen3-235b-a22b-instruct-2507@Tongyi-Qianwen",
|
||||
"maxTokensEnabled": true,
|
||||
"max_retries": 3,
|
||||
"max_rounds": 3,
|
||||
"max_tokens": 128000,
|
||||
"mcp": [],
|
||||
"message_history_window_size": 12,
|
||||
"outputs": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
"parameter": "Precise",
|
||||
"presencePenaltyEnabled": false,
|
||||
"presence_penalty": 0.5,
|
||||
"prompts": [
|
||||
{
|
||||
"content": "# User Query\n {sys.query}",
|
||||
"role": "user"
|
||||
}
|
||||
],
|
||||
"sys_prompt": "## Role & Task\nYou are a **\u201cKnowledge Base Retrieval Q\\&A Agent\u201d** whose goal is to break down the user\u2019s question into retrievable subtasks, and then produce a multi-source-verified, structured, and actionable research report using the internal knowledge base.\n## Execution Framework (Detailed Steps & Key Points)\n1. **Assessment & Decomposition**\n * Actions:\n * Automatically extract: main topic, subtopics, entities (people/organizations/products/technologies), time window, geographic/business scope.\n * Output as a list: N facts/data points that must be collected (*N* ranges from 5\u201320 depending on question complexity).\n2. **Query Type Determination (Rule-Based)**\n * Example rules:\n * If the question involves a single issue but requests \u201cmethod comparison/multiple explanations\u201d \u2192 use **depth-first**.\n * If the question can naturally be split into \u22653 independent sub-questions \u2192 use **breadth-first**.\n * If the question can be answered by a single fact/specification/definition \u2192 use **simple query**.\n3. **Research Plan Formulation**\n * Depth-first: define 3\u20135 perspectives (methodology/stakeholders/time dimension/technical route, etc.), assign search keywords, target document types, and output format for each perspective.\n * Breadth-first: list subtasks, prioritize them, and assign search terms.\n * Simple query: directly provide the search sentence and required fields.\n4. **Retrieval Execution**\n * After retrieval: perform coverage check (does it contain the key facts?) and quality check (source diversity, authority, latest update time).\n * If standards are not met, automatically loop: rewrite queries (synonyms/cross-domain terms) and retry \u22643 times, or flag as requiring external search.\n5. **Integration & Reasoning**\n * Build the answer using a **fact\u2013evidence\u2013reasoning** chain. For each conclusion, attach 1\u20132 strongest pieces of evidence.\n---\n## Quality Gate Checklist (Verify at Each Stage)\n* **Stage 1 (Decomposition)**:\n * [ ] Key concepts and expected outputs identified\n * [ ] Required facts/data points listed\n* **Stage 2 (Retrieval)**:\n * [ ] Meets quality standards (see above)\n * [ ] If not met: execute query iteration\n* **Stage 3 (Generation)**:\n * [ ] Each conclusion has at least one direct evidence source\n * [ ] State assumptions/uncertainties\n * [ ] Provide next-step suggestions or experiment/retrieval plans\n * [ ] Final length and depth match user expectations (comply with word count/format if specified)\n---\n## Core Principles\n1. **Strict reliance on the knowledge base**: answers must be **fully bounded** by the content retrieved from the knowledge base.\n2. **No fabrication**: do not generate, infer, or create information that is not explicitly present in the knowledge base.\n3. **Accuracy first**: prefer incompleteness over inaccurate content.\n4. **Output format**:\n * Hierarchically clear modular structure\n * Logical grouping according to the MECE principle\n * Professionally presented formatting\n * Step-by-step cognitive guidance\n * Reasonable use of headings and dividers for clarity\n * *Italicize* key parameters\n * **Bold** critical information\n5. **LaTeX formula requirements**:\n * Inline formulas: start and end with `$`\n * Block formulas: start and end with `$$`, each `$$` on its own line\n * Block formula content must comply with LaTeX math syntax\n * Verify formula correctness\n---\n## Additional Notes (Interaction & Failure Strategy)\n* If the knowledge base does not cover critical facts: explicitly inform the user (with sample wording)\n* For time-sensitive issues: enforce time filtering in the search request, and indicate the latest retrieval date in the answer.\n* Language requirement: answer in the user\u2019s preferred language\n",
|
||||
"temperature": "0.1",
|
||||
"temperatureEnabled": true,
|
||||
"tools": [
|
||||
{
|
||||
"component_name": "Retrieval",
|
||||
"name": "Retrieval",
|
||||
"params": {
|
||||
"cross_languages": [],
|
||||
"description": "",
|
||||
"empty_response": "",
|
||||
"kb_ids": [],
|
||||
"keywords_similarity_weight": 0.7,
|
||||
"outputs": {
|
||||
"formalized_content": {
|
||||
"type": "string",
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
"rerank_id": "",
|
||||
"similarity_threshold": 0.2,
|
||||
"top_k": 1024,
|
||||
"top_n": 8,
|
||||
"use_kg": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"topPEnabled": false,
|
||||
"top_p": 0.75,
|
||||
"user_prompt": "",
|
||||
"visual_files_var": ""
|
||||
}
|
||||
},
|
||||
"upstream": [
|
||||
"begin"
|
||||
]
|
||||
},
|
||||
"Message:OrangeYearsShine": {
|
||||
"downstream": [],
|
||||
"obj": {
|
||||
"component_name": "Message",
|
||||
"params": {
|
||||
"content": [
|
||||
"{Agent:NewPumasLick@content}"
|
||||
]
|
||||
}
|
||||
},
|
||||
"upstream": [
|
||||
"Agent:NewPumasLick"
|
||||
]
|
||||
},
|
||||
"begin": {
|
||||
"downstream": [
|
||||
"Agent:NewPumasLick"
|
||||
],
|
||||
"obj": {
|
||||
"component_name": "Begin",
|
||||
"params": {
|
||||
"enablePrologue": true,
|
||||
"inputs": {},
|
||||
"mode": "conversational",
|
||||
"prologue": "\u4f60\u597d\uff01 \u6211\u662f\u4f60\u7684\u52a9\u7406\uff0c\u6709\u4ec0\u4e48\u53ef\u4ee5\u5e2e\u5230\u4f60\u7684\u5417\uff1f"
|
||||
}
|
||||
},
|
||||
"upstream": []
|
||||
}
|
||||
},
|
||||
"globals": {
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": [],
|
||||
"sys.query": "",
|
||||
"sys.user_id": ""
|
||||
},
|
||||
"graph": {
|
||||
"edges": [
|
||||
{
|
||||
"data": {
|
||||
"isHovered": false
|
||||
},
|
||||
"id": "xy-edge__beginstart-Agent:NewPumasLickend",
|
||||
"source": "begin",
|
||||
"sourceHandle": "start",
|
||||
"target": "Agent:NewPumasLick",
|
||||
"targetHandle": "end"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"isHovered": false
|
||||
},
|
||||
"id": "xy-edge__Agent:NewPumasLickstart-Message:OrangeYearsShineend",
|
||||
"markerEnd": "logo",
|
||||
"source": "Agent:NewPumasLick",
|
||||
"sourceHandle": "start",
|
||||
"style": {
|
||||
"stroke": "rgba(91, 93, 106, 1)",
|
||||
"strokeWidth": 1
|
||||
},
|
||||
"target": "Message:OrangeYearsShine",
|
||||
"targetHandle": "end",
|
||||
"type": "buttonEdge",
|
||||
"zIndex": 1001
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"isHovered": false
|
||||
},
|
||||
"id": "xy-edge__Agent:NewPumasLicktool-Tool:AllBirdsNailend",
|
||||
"selected": false,
|
||||
"source": "Agent:NewPumasLick",
|
||||
"sourceHandle": "tool",
|
||||
"target": "Tool:AllBirdsNail",
|
||||
"targetHandle": "end"
|
||||
}
|
||||
],
|
||||
"nodes": [
|
||||
{
|
||||
"data": {
|
||||
"form": {
|
||||
"enablePrologue": true,
|
||||
"inputs": {},
|
||||
"mode": "conversational",
|
||||
"prologue": "\u4f60\u597d\uff01 \u6211\u662f\u4f60\u7684\u52a9\u7406\uff0c\u6709\u4ec0\u4e48\u53ef\u4ee5\u5e2e\u5230\u4f60\u7684\u5417\uff1f"
|
||||
},
|
||||
"label": "Begin",
|
||||
"name": "begin"
|
||||
},
|
||||
"dragging": false,
|
||||
"id": "begin",
|
||||
"measured": {
|
||||
"height": 48,
|
||||
"width": 200
|
||||
},
|
||||
"position": {
|
||||
"x": -9.569875358221438,
|
||||
"y": 205.84018385864917
|
||||
},
|
||||
"selected": false,
|
||||
"sourcePosition": "left",
|
||||
"targetPosition": "right",
|
||||
"type": "beginNode"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"form": {
|
||||
"content": [
|
||||
"{Agent:NewPumasLick@content}"
|
||||
]
|
||||
},
|
||||
"label": "Message",
|
||||
"name": "Response"
|
||||
},
|
||||
"dragging": false,
|
||||
"id": "Message:OrangeYearsShine",
|
||||
"measured": {
|
||||
"height": 56,
|
||||
"width": 200
|
||||
},
|
||||
"position": {
|
||||
"x": 734.4061285881053,
|
||||
"y": 199.9706031723009
|
||||
},
|
||||
"selected": false,
|
||||
"sourcePosition": "right",
|
||||
"targetPosition": "left",
|
||||
"type": "messageNode"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"form": {
|
||||
"delay_after_error": 1,
|
||||
"description": "",
|
||||
"exception_comment": "",
|
||||
"exception_default_value": "",
|
||||
"exception_goto": [],
|
||||
"exception_method": null,
|
||||
"frequencyPenaltyEnabled": false,
|
||||
"frequency_penalty": 0.5,
|
||||
"llm_id": "qwen3-235b-a22b-instruct-2507@Tongyi-Qianwen",
|
||||
"maxTokensEnabled": true,
|
||||
"max_retries": 3,
|
||||
"max_rounds": 3,
|
||||
"max_tokens": 128000,
|
||||
"mcp": [],
|
||||
"message_history_window_size": 12,
|
||||
"outputs": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
"parameter": "Precise",
|
||||
"presencePenaltyEnabled": false,
|
||||
"presence_penalty": 0.5,
|
||||
"prompts": [
|
||||
{
|
||||
"content": "# User Query\n {sys.query}",
|
||||
"role": "user"
|
||||
}
|
||||
],
|
||||
"sys_prompt": "## Role & Task\nYou are a **\u201cKnowledge Base Retrieval Q\\&A Agent\u201d** whose goal is to break down the user\u2019s question into retrievable subtasks, and then produce a multi-source-verified, structured, and actionable research report using the internal knowledge base.\n## Execution Framework (Detailed Steps & Key Points)\n1. **Assessment & Decomposition**\n * Actions:\n * Automatically extract: main topic, subtopics, entities (people/organizations/products/technologies), time window, geographic/business scope.\n * Output as a list: N facts/data points that must be collected (*N* ranges from 5\u201320 depending on question complexity).\n2. **Query Type Determination (Rule-Based)**\n * Example rules:\n * If the question involves a single issue but requests \u201cmethod comparison/multiple explanations\u201d \u2192 use **depth-first**.\n * If the question can naturally be split into \u22653 independent sub-questions \u2192 use **breadth-first**.\n * If the question can be answered by a single fact/specification/definition \u2192 use **simple query**.\n3. **Research Plan Formulation**\n * Depth-first: define 3\u20135 perspectives (methodology/stakeholders/time dimension/technical route, etc.), assign search keywords, target document types, and output format for each perspective.\n * Breadth-first: list subtasks, prioritize them, and assign search terms.\n * Simple query: directly provide the search sentence and required fields.\n4. **Retrieval Execution**\n * After retrieval: perform coverage check (does it contain the key facts?) and quality check (source diversity, authority, latest update time).\n * If standards are not met, automatically loop: rewrite queries (synonyms/cross-domain terms) and retry \u22643 times, or flag as requiring external search.\n5. **Integration & Reasoning**\n * Build the answer using a **fact\u2013evidence\u2013reasoning** chain. For each conclusion, attach 1\u20132 strongest pieces of evidence.\n---\n## Quality Gate Checklist (Verify at Each Stage)\n* **Stage 1 (Decomposition)**:\n * [ ] Key concepts and expected outputs identified\n * [ ] Required facts/data points listed\n* **Stage 2 (Retrieval)**:\n * [ ] Meets quality standards (see above)\n * [ ] If not met: execute query iteration\n* **Stage 3 (Generation)**:\n * [ ] Each conclusion has at least one direct evidence source\n * [ ] State assumptions/uncertainties\n * [ ] Provide next-step suggestions or experiment/retrieval plans\n * [ ] Final length and depth match user expectations (comply with word count/format if specified)\n---\n## Core Principles\n1. **Strict reliance on the knowledge base**: answers must be **fully bounded** by the content retrieved from the knowledge base.\n2. **No fabrication**: do not generate, infer, or create information that is not explicitly present in the knowledge base.\n3. **Accuracy first**: prefer incompleteness over inaccurate content.\n4. **Output format**:\n * Hierarchically clear modular structure\n * Logical grouping according to the MECE principle\n * Professionally presented formatting\n * Step-by-step cognitive guidance\n * Reasonable use of headings and dividers for clarity\n * *Italicize* key parameters\n * **Bold** critical information\n5. **LaTeX formula requirements**:\n * Inline formulas: start and end with `$`\n * Block formulas: start and end with `$$`, each `$$` on its own line\n * Block formula content must comply with LaTeX math syntax\n * Verify formula correctness\n---\n## Additional Notes (Interaction & Failure Strategy)\n* If the knowledge base does not cover critical facts: explicitly inform the user (with sample wording)\n* For time-sensitive issues: enforce time filtering in the search request, and indicate the latest retrieval date in the answer.\n* Language requirement: answer in the user\u2019s preferred language\n",
|
||||
"temperature": "0.1",
|
||||
"temperatureEnabled": true,
|
||||
"tools": [
|
||||
{
|
||||
"component_name": "Retrieval",
|
||||
"name": "Retrieval",
|
||||
"params": {
|
||||
"cross_languages": [],
|
||||
"description": "",
|
||||
"empty_response": "",
|
||||
"kb_ids": [],
|
||||
"keywords_similarity_weight": 0.7,
|
||||
"outputs": {
|
||||
"formalized_content": {
|
||||
"type": "string",
|
||||
"value": ""
|
||||
}
|
||||
},
|
||||
"rerank_id": "",
|
||||
"similarity_threshold": 0.2,
|
||||
"top_k": 1024,
|
||||
"top_n": 8,
|
||||
"use_kg": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"topPEnabled": false,
|
||||
"top_p": 0.75,
|
||||
"user_prompt": "",
|
||||
"visual_files_var": ""
|
||||
},
|
||||
"label": "Agent",
|
||||
"name": "Knowledge Base Agent"
|
||||
},
|
||||
"dragging": false,
|
||||
"id": "Agent:NewPumasLick",
|
||||
"measured": {
|
||||
"height": 84,
|
||||
"width": 200
|
||||
},
|
||||
"position": {
|
||||
"x": 347.00048227952215,
|
||||
"y": 186.49109364794631
|
||||
},
|
||||
"selected": false,
|
||||
"sourcePosition": "right",
|
||||
"targetPosition": "left",
|
||||
"type": "agentNode"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"form": {
|
||||
"description": "This is an agent for a specific task.",
|
||||
"user_prompt": "This is the order you need to send to the agent."
|
||||
},
|
||||
"label": "Tool",
|
||||
"name": "flow.tool_10"
|
||||
},
|
||||
"dragging": false,
|
||||
"id": "Tool:AllBirdsNail",
|
||||
"measured": {
|
||||
"height": 48,
|
||||
"width": 200
|
||||
},
|
||||
"position": {
|
||||
"x": 220.24819746977118,
|
||||
"y": 403.31576836482583
|
||||
},
|
||||
"selected": false,
|
||||
"sourcePosition": "right",
|
||||
"targetPosition": "left",
|
||||
"type": "toolNode"
|
||||
}
|
||||
]
|
||||
},
|
||||
"history": [],
|
||||
"memory": [],
|
||||
"messages": [],
|
||||
"path": [],
|
||||
"retrieval": []
|
||||
},
|
||||
"avatar": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAH0klEQVR4nO2ZC1BU1wGG/3uRp/IygG+DGK0GOjE1U6cxI4tT03Y0E+kENbaJbKpj60wzgNMwnTjuEtu0miGasY+0krI202kMVEnVxtoOLG00oVa0LajVBDcSEI0REFBgkZv/3GWXfdzdvctuHs7kmzmec9//d+45914XCXc4Xwjk1+59VJGGF7C5QAFSWBvgyWmWLl7IKiny6QNL173B5YjB84bOyrpKA4B1DLySdQpLKAiZGtZ7a/KMVoQJz6UfEZyhTWwaEBmssiLvCueu6BJg8EwFqGTTAC+uvNWC9w82sRWcux/JwaSHstjywcogRt4RG0KExwWG4QsVYCebKSwe3L5lR9OOWjyzfg2WL/0a1/jncO3b2FHxGnKeWYqo+Giu8UEMrWJKWBACPMY/DG+63txhvnKshUu+DF2/hayMDFRsL+VScDb++AVc6OjAuInxXPJl2tfnIikrzUyJMi7qQmLRhOEr2fOFbX/7P6STF7BqoWevfdij4NWGQfx+57OYO2sG1wSnsek8Nm15EU8sikF6ouelXz9ph7JwDqYt+5IIZaGEkauDIrH4wPBmhjexCSEws+VdVG1M4NIoj+2xYzBuJtavWcEl/VS8dggx/ZdQvcGzQwp+cxOXsu5RBQQMVkYJM4LA/Txh+ELFMWFVPARS5kFiabZdx8Olh7l17BzdvhzZmROhdJ3j6D/nIyBgOCMlLAgA9xmF4TMV4BSbrgnrLiBl5rOsRCRRbDUsBzQFiJjY91PCBj9w+yiP1lXWsTLAjc9YQGB9I8+Yx1oTiUWFvW9QgDo2PdASaDp/EQ8/sRnhcPTVcuTMncXwQQVESL9DidscaPW+QEtAICRu9PSxFTpJiePV8AI9AsTvXZBY/Pa+wJ9ApNApIILm8S5Y4QXXQwhYFH6csemDP4G3G5v579i5d04mknknQhDYS4HCrCVr/mC3D305KnbCEpvVIia5Onw6WaWw+KAl0Np+FUXbdiMcyoqfUoeRHoFrJ1uRtnBG1/9Mf/3LtElp+VwF2wcd7woJib1vUPwMH4GWQCQJJtBa/V9cPmFD8uQUpMdNGDhY8bNYrobh8acHu270/l0ImJWRt64Wn6WACN9z5gq2lXwPW8pfweT0icP/fH23vO9QLYq3/QKyLBmFQI3CUcT9NdESEEPItKsSN3r7MBaSJoxHWZERM6ZmMLy2gDP8/pd/og418dTL37hFSUpMUC5f+UiWZcnY9s5+ixCwUiCXx2iiJdDNx6f4pgkH8Q3lbxK7h8+enoHha1cRNdMp8axiHxo6+/5bVdk8DSROYIW1X7QEIom3wHD3gEf4vu1bVYEJZeWQ0zJQvmcfyiv2QZak6raG/QWfK4Ez9mTc5v8xPMJfuojoxXmIX/9DOMe+FCWbcHu4BJJ0YEwCx0824bFNW9HesB+CqYu+jepfPYcHF+aoPXS8sQl/+vU2bgmOU2C+qRc9/YrrPPbGBtzavd0nvCxLxui4pJrBm911PFwak4CYA80cj+JCAiGUzYkmxrSY4N2c3GLi6UEIFL/wRxxqkhmHnTEpDQcrfq6ea+hcE8bNy3GFzyq4H22HW1Kd4WMSkg1jmsSRpKj0Rzhy4gNUv/y8Gjrv8SJK3OWScA+fMn/ysVPPvTmeh6nh1TcxBUJ+jEaKYr7N36x7h+Edj0pB6+WrLokn87+BrTt/p4ZPzZ6MM7/8R2//h33vOcNzdwgBMwVMbGvySQmo4a0NqOZccU7YmGXLEfPQUlUid/XT6B8YdIU/99vjsPcOdEhDsfOd4QVCwKB8yp8SWuG1njbTl83DpMWz1PCKAswuWPDI0e8WebyAJBbxNdrF7cls+hBpAb3h3XtehL/3+4u7D35rQwpP4YFTwMJ91rHpQyQFQgmf9sAMNL9Ur4afv/FBjIuPVj+n4YVTwMD96tj0IVICoYYXv/q1VJ1Sl8UveQyaRwErvOB6B5SwKhqP00gI6A0vhsycJ7/KIzxhyHqGN0ADbnNAAYOicRfCFdAb/p50Gbfuc/wy5w1D5lOghk0fuG0USlgVr7sQjoDe8C8WxKGKPy2KjzlvAQb02/sCbh+FApngX1QUtyeSuwDi0hxFByV7L+LIf3r5kvpp4PBr07Hqvn71Y85bgOG6WS2ggA1+4D6eUKKQApVsqngI6KSkqh9HzsoM/3zg8Oz5VQ9E8wjf30YFDGdkeAsCwH18oYRZGXk7C4HuYxcwe6rjQsFovzaEvoFxqNkTOPzMjGikJso8wsF77XYkLx6dAwxWxvBmBIH7aUMJi8J3w0DnTVz7dyvX6KPzVBt+kL8cmzesRq9ps2Z48bRJmOIapS7E4zM2lXNt5CcU6ID7+ocSZkqY2NRN6ysnsHbJEpR8ZwV6t5Yg+iuLELf2KVd48VwXQf3BQGUMb4ZOuH9gKFEIYJfiNrEDcXZHHV4q3YRv5i7ikgM94RlETNgihrcgBHhccCiRCf7VhBK5rAPyr9I/Y/WKPEyfksH/9NjQ2dODhsYzwcLXsypkeBtCRGLRDUUMAMyKHxEx4dtrzyP97nQMygripiQiKi4aSbPvQmKW7+OXF69ntYvBa1iPCYklZEZECsGm4ja0Ops7EJsaj4SprlU+8IJiqIjAFga3Ikx4vvAYkTGALxyWFArlsnbBC9Sz6mI5zWKNRGh3JJY7mjte4GOz+r4tkRbxQQAAAABJRU5ErkJggg=="
|
||||
}
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 12,
|
||||
"title": "Generate SEO Blog",
|
||||
"description": "This workflow automatically generates a complete SEO-optimized blog article based on a simple user input. You don’t need any writing experience. Just provide a topic or short request — the system will handle the rest.",
|
||||
"title": {
|
||||
"en": "Generate SEO Blog",
|
||||
"zh": "生成SEO博客"},
|
||||
"description": {
|
||||
"en": "This workflow automatically generates a complete SEO-optimized blog article based on a simple user input. You don’t need any writing experience. Just provide a topic or short request — the system will handle the rest.",
|
||||
"zh": "此工作流根据简单的用户输入自动生成完整的SEO博客文章。你无需任何写作经验,只需提供一个主题或简短请求,系统将处理其余部分。"},
|
||||
"canvas_type": "Marketing",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Generate SEO Blog",
|
||||
"description": "This workflow automatically generates a complete SEO-optimized blog article based on a simple user input. You don’t need any writing experience. Just provide a topic or short request — the system will handle the rest.",
|
||||
"title": {
|
||||
"en": "Generate SEO Blog",
|
||||
"zh": "生成SEO博客"},
|
||||
"description": {
|
||||
"en": "This workflow automatically generates a complete SEO-optimized blog article based on a simple user input. You don’t need any writing experience. Just provide a topic or short request — the system will handle the rest.",
|
||||
"zh": "此工作流根据简单的用户输入自动生成完整的SEO博客文章。你无需任何写作经验,只需提供一个主题或简短请求,系统将处理其余部分。"},
|
||||
"canvas_type": "Recommended",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,7 +1,11 @@
|
||||
{
|
||||
"id": 17,
|
||||
"title": "SQL Assistant",
|
||||
"description": "SQL Assistant is an AI-powered tool that lets business users turn plain-English questions into fully formed SQL queries. Simply type your question (e.g., “Show me last quarter’s top 10 products by revenue”) and SQL Assistant generates the exact SQL, runs it against your database, and returns the results in seconds. ",
|
||||
"title": {
|
||||
"en": "SQL Assistant",
|
||||
"zh": "SQL助理"},
|
||||
"description": {
|
||||
"en": "SQL Assistant is an AI-powered tool that lets business users turn plain-English questions into fully formed SQL queries. Simply type your question (e.g., “Show me last quarter’s top 10 products by revenue”) and SQL Assistant generates the exact SQL, runs it against your database, and returns the results in seconds. ",
|
||||
"zh": "用户能够将简单文本问题转化为完整的SQL查询并输出结果。只需输入您的问题(例如,“展示上个季度前十名按收入排序的产品”),SQL助理就会生成精确的SQL语句,对其运行您的数据库,并几秒钟内返回结果。"},
|
||||
"canvas_type": "Marketing",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -1,8 +1,12 @@
|
||||
|
||||
{
|
||||
"id": 9,
|
||||
"title": "Technical Docs QA",
|
||||
"description": "This is a document question-and-answer system based on a knowledge base. When a user asks a question, it retrieves relevant document content to provide accurate answers.",
|
||||
"title": {
|
||||
"en": "Technical Docs QA",
|
||||
"zh": "技术文档问答"},
|
||||
"description": {
|
||||
"en": "This is a document question-and-answer system based on a knowledge base. When a user asks a question, it retrieves relevant document content to provide accurate answers.",
|
||||
"zh": "基于知识库的文档问答系统,当用户提出问题时,会检索相关本地文档并提供准确回答。"},
|
||||
"canvas_type": "Customer Support",
|
||||
"dsl": {
|
||||
"components": {
|
||||
|
||||
@ -1,9 +1,13 @@
|
||||
|
||||
{
|
||||
"id": 14,
|
||||
"title": "Trip Planner",
|
||||
"description": "This smart trip planner utilizes LLM technology to automatically generate customized travel itineraries, with optional tool integration for enhanced reliability.",
|
||||
"canvas_type": "Consumer App",
|
||||
"title": {
|
||||
"en": "Trip Planner",
|
||||
"zh": "旅行规划"},
|
||||
"description": {
|
||||
"en": "This smart trip planner utilizes LLM technology to automatically generate customized travel itineraries, with optional tool integration for enhanced reliability.",
|
||||
"zh": "智能旅行规划将利用大模型自动生成定制化的旅行行程,附带可选工具集成,以增强可靠性。"},
|
||||
"canvas_type": "Consumer App",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Agent:OddGuestsPump": {
|
||||
|
||||
@ -1,9 +1,13 @@
|
||||
|
||||
{
|
||||
"id": 16,
|
||||
"title": "WebSearch Assistant",
|
||||
"description": "A chat assistant template that integrates information extracted from a knowledge base and web searches to respond to queries. Let's start by setting up your knowledge base in 'Retrieval'!",
|
||||
"canvas_type": "Other",
|
||||
"title": {
|
||||
"en": "WebSearch Assistant",
|
||||
"zh": "网页搜索助手"},
|
||||
"description": {
|
||||
"en": "A chat assistant template that integrates information extracted from a knowledge base and web searches to respond to queries. Let's start by setting up your knowledge base in 'Retrieval'!",
|
||||
"zh": "集成了从知识库和网络搜索中提取的信息回答用户问题。让我们从设置您的知识库开始检索!"},
|
||||
"canvas_type": "Other",
|
||||
"dsl": {
|
||||
"components": {
|
||||
"Agent:SmartSchoolsCross": {
|
||||
|
||||
@ -16,9 +16,8 @@
|
||||
from abc import ABC
|
||||
import asyncio
|
||||
from crawl4ai import AsyncWebCrawler
|
||||
|
||||
from agent.tools.base import ToolParamBase, ToolBase
|
||||
from api.utils.web_utils import is_valid_url
|
||||
|
||||
|
||||
|
||||
class CrawlerParam(ToolParamBase):
|
||||
@ -39,6 +38,7 @@ class Crawler(ToolBase, ABC):
|
||||
component_name = "Crawler"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
from api.utils.web_utils import is_valid_url
|
||||
ans = self.get_input()
|
||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||
if not is_valid_url(ans):
|
||||
|
||||
156
agent/tools/searxng.py
Normal file
156
agent/tools/searxng.py
Normal file
@ -0,0 +1,156 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from abc import ABC
|
||||
import requests
|
||||
from agent.tools.base import ToolMeta, ToolParamBase, ToolBase
|
||||
from api.utils.api_utils import timeout
|
||||
|
||||
|
||||
class SearXNGParam(ToolParamBase):
|
||||
"""
|
||||
Define the SearXNG component parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.meta: ToolMeta = {
|
||||
"name": "searxng_search",
|
||||
"description": "SearXNG is a privacy-focused metasearch engine that aggregates results from multiple search engines without tracking users. It provides comprehensive web search capabilities.",
|
||||
"parameters": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search keywords to execute with SearXNG. The keywords should be the most important words/terms(includes synonyms) from the original request.",
|
||||
"default": "{sys.query}",
|
||||
"required": True
|
||||
},
|
||||
"searxng_url": {
|
||||
"type": "string",
|
||||
"description": "The base URL of your SearXNG instance (e.g., http://localhost:4000). This is required to connect to your SearXNG server.",
|
||||
"required": False,
|
||||
"default": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
super().__init__()
|
||||
self.top_n = 10
|
||||
self.searxng_url = ""
|
||||
|
||||
def check(self):
|
||||
# Keep validation lenient so opening try-run panel won't fail without URL.
|
||||
# Coerce top_n to int if it comes as string from UI.
|
||||
try:
|
||||
if isinstance(self.top_n, str):
|
||||
self.top_n = int(self.top_n.strip())
|
||||
except Exception:
|
||||
pass
|
||||
self.check_positive_integer(self.top_n, "Top N")
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
"query": {
|
||||
"name": "Query",
|
||||
"type": "line"
|
||||
},
|
||||
"searxng_url": {
|
||||
"name": "SearXNG URL",
|
||||
"type": "line",
|
||||
"placeholder": "http://localhost:4000"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class SearXNG(ToolBase, ABC):
|
||||
component_name = "SearXNG"
|
||||
|
||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12))
|
||||
def _invoke(self, **kwargs):
|
||||
# Gracefully handle try-run without inputs
|
||||
query = kwargs.get("query")
|
||||
if not query or not isinstance(query, str) or not query.strip():
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
searxng_url = (kwargs.get("searxng_url") or getattr(self._param, "searxng_url", "") or "").strip()
|
||||
# In try-run, if no URL configured, just return empty instead of raising
|
||||
if not searxng_url:
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
try:
|
||||
# 构建搜索参数
|
||||
search_params = {
|
||||
'q': query,
|
||||
'format': 'json',
|
||||
'categories': 'general',
|
||||
'language': 'auto',
|
||||
'safesearch': 1,
|
||||
'pageno': 1
|
||||
}
|
||||
|
||||
# 发送搜索请求
|
||||
response = requests.get(
|
||||
f"{searxng_url}/search",
|
||||
params=search_params,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
||||
# 验证响应数据
|
||||
if not data or not isinstance(data, dict):
|
||||
raise ValueError("Invalid response from SearXNG")
|
||||
|
||||
results = data.get("results", [])
|
||||
if not isinstance(results, list):
|
||||
raise ValueError("Invalid results format from SearXNG")
|
||||
|
||||
# 限制结果数量
|
||||
results = results[:self._param.top_n]
|
||||
|
||||
# 处理搜索结果
|
||||
self._retrieve_chunks(results,
|
||||
get_title=lambda r: r.get("title", ""),
|
||||
get_url=lambda r: r.get("url", ""),
|
||||
get_content=lambda r: r.get("content", ""))
|
||||
|
||||
self.set_output("json", results)
|
||||
return self.output("formalized_content")
|
||||
|
||||
except requests.RequestException as e:
|
||||
last_e = f"Network error: {e}"
|
||||
logging.exception(f"SearXNG network error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
except Exception as e:
|
||||
last_e = str(e)
|
||||
logging.exception(f"SearXNG error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
if last_e:
|
||||
self.set_output("_ERROR", last_e)
|
||||
return f"SearXNG error: {last_e}"
|
||||
|
||||
assert False, self.output()
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return """
|
||||
Keywords: {}
|
||||
Searching with SearXNG for relevant results...
|
||||
""".format(self.get_input().get("query", "-_-!"))
|
||||
@ -93,6 +93,7 @@ def list_chunk():
|
||||
def get():
|
||||
chunk_id = request.args["chunk_id"]
|
||||
try:
|
||||
chunk = None
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
if not tenants:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
|
||||
@ -66,7 +66,7 @@ def set_dialog():
|
||||
|
||||
if not is_create:
|
||||
if not req.get("kb_ids", []) and not prompt_config.get("tavily_api_key") and "{knowledge}" in prompt_config['system']:
|
||||
return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no knowledge base/Tavily used here.")
|
||||
return get_data_error_result(message="Please remove `{knowledge}` in system prompt since no knowledge base / Tavily used here.")
|
||||
|
||||
for p in prompt_config["parameters"]:
|
||||
if p["optional"]:
|
||||
|
||||
@ -243,7 +243,7 @@ def add_llm():
|
||||
model_name=mdl_nm,
|
||||
base_url=llm["api_base"]
|
||||
)
|
||||
arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!", "Ohh, my friend!"])
|
||||
arr, tc = mdl.similarity("Hello~ RAGFlower!", ["Hi, there!", "Ohh, my friend!"])
|
||||
if len(arr) == 0:
|
||||
raise Exception("Not known.")
|
||||
except KeyError:
|
||||
@ -271,7 +271,7 @@ def add_llm():
|
||||
key=llm["api_key"], model_name=mdl_nm, base_url=llm["api_base"]
|
||||
)
|
||||
try:
|
||||
for resp in mdl.tts("Hello~ Ragflower!"):
|
||||
for resp in mdl.tts("Hello~ RAGFlower!"):
|
||||
pass
|
||||
except RuntimeError as e:
|
||||
msg += f"\nFail to access model({factory}/{mdl_nm})." + str(e)
|
||||
|
||||
@ -82,7 +82,7 @@ def create() -> Response:
|
||||
|
||||
server_name = req.get("name", "")
|
||||
if not server_name or len(server_name.encode("utf-8")) > 255:
|
||||
return get_data_error_result(message=f"Invaild MCP name or length is {len(server_name)} which is large than 255.")
|
||||
return get_data_error_result(message=f"Invalid MCP name or length is {len(server_name)} which is large than 255.")
|
||||
|
||||
e, _ = MCPServerService.get_by_name_and_tenant(name=server_name, tenant_id=current_user.id)
|
||||
if e:
|
||||
@ -90,7 +90,7 @@ def create() -> Response:
|
||||
|
||||
url = req.get("url", "")
|
||||
if not url:
|
||||
return get_data_error_result(message="Invaild url.")
|
||||
return get_data_error_result(message="Invalid url.")
|
||||
|
||||
headers = safe_json_parse(req.get("headers", {}))
|
||||
req["headers"] = headers
|
||||
@ -141,10 +141,10 @@ def update() -> Response:
|
||||
return get_data_error_result(message="Unsupported MCP server type.")
|
||||
server_name = req.get("name", mcp_server.name)
|
||||
if server_name and len(server_name.encode("utf-8")) > 255:
|
||||
return get_data_error_result(message=f"Invaild MCP name or length is {len(server_name)} which is large than 255.")
|
||||
return get_data_error_result(message=f"Invalid MCP name or length is {len(server_name)} which is large than 255.")
|
||||
url = req.get("url", mcp_server.url)
|
||||
if not url:
|
||||
return get_data_error_result(message="Invaild url.")
|
||||
return get_data_error_result(message="Invalid url.")
|
||||
|
||||
headers = safe_json_parse(req.get("headers", mcp_server.headers))
|
||||
req["headers"] = headers
|
||||
@ -218,7 +218,7 @@ def import_multiple() -> Response:
|
||||
continue
|
||||
|
||||
if not server_name or len(server_name.encode("utf-8")) > 255:
|
||||
results.append({"server": server_name, "success": False, "message": f"Invaild MCP name or length is {len(server_name)} which is large than 255."})
|
||||
results.append({"server": server_name, "success": False, "message": f"Invalid MCP name or length is {len(server_name)} which is large than 255."})
|
||||
continue
|
||||
|
||||
base_name = server_name
|
||||
@ -409,7 +409,7 @@ def test_mcp() -> Response:
|
||||
|
||||
url = req.get("url", "")
|
||||
if not url:
|
||||
return get_data_error_result(message="Invaild MCP url.")
|
||||
return get_data_error_result(message="Invalid MCP url.")
|
||||
|
||||
server_type = req.get("server_type", "")
|
||||
if server_type not in VALID_MCP_SERVER_TYPES:
|
||||
|
||||
@ -74,7 +74,6 @@ def retrieval(tenant_id):
|
||||
[tenant_id],
|
||||
[kb_id],
|
||||
embd_mdl,
|
||||
doc_ids,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
|
||||
@ -414,7 +414,7 @@ def agents_completion_openai_compatibility(tenant_id, agent_id):
|
||||
tenant_id,
|
||||
agent_id,
|
||||
question,
|
||||
session_id=req.get("id", req.get("metadata", {}).get("id", "")),
|
||||
session_id=req.get("session_id", req.get("id", "") or req.get("metadata", {}).get("id", "")),
|
||||
stream=True,
|
||||
**req,
|
||||
),
|
||||
@ -432,7 +432,7 @@ def agents_completion_openai_compatibility(tenant_id, agent_id):
|
||||
tenant_id,
|
||||
agent_id,
|
||||
question,
|
||||
session_id=req.get("id", req.get("metadata", {}).get("id", "")),
|
||||
session_id=req.get("session_id", req.get("id", "") or req.get("metadata", {}).get("id", "")),
|
||||
stream=False,
|
||||
**req,
|
||||
)
|
||||
@ -445,7 +445,6 @@ def agents_completion_openai_compatibility(tenant_id, agent_id):
|
||||
def agent_completions(tenant_id, agent_id):
|
||||
req = request.json
|
||||
|
||||
ans = {}
|
||||
if req.get("stream", True):
|
||||
|
||||
def generate():
|
||||
@ -456,14 +455,13 @@ def agent_completions(tenant_id, agent_id):
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if ans.get("event") != "message" or not ans.get("data", {}).get("reference", None):
|
||||
if ans.get("event") not in ["message", "message_end"]:
|
||||
continue
|
||||
|
||||
yield answer
|
||||
|
||||
yield "data:[DONE]\n\n"
|
||||
|
||||
if req.get("stream", True):
|
||||
resp = Response(generate(), mimetype="text/event-stream")
|
||||
resp.headers.add_header("Cache-control", "no-cache")
|
||||
resp.headers.add_header("Connection", "keep-alive")
|
||||
@ -472,6 +470,8 @@ def agent_completions(tenant_id, agent_id):
|
||||
return resp
|
||||
|
||||
full_content = ""
|
||||
reference = {}
|
||||
final_ans = ""
|
||||
for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req):
|
||||
try:
|
||||
ans = json.loads(answer[5:])
|
||||
@ -480,11 +480,14 @@ def agent_completions(tenant_id, agent_id):
|
||||
full_content += ans["data"]["content"]
|
||||
|
||||
if ans.get("data", {}).get("reference", None):
|
||||
ans["data"]["content"] = full_content
|
||||
return get_result(data=ans)
|
||||
reference.update(ans["data"]["reference"])
|
||||
|
||||
final_ans = ans
|
||||
except Exception as e:
|
||||
return get_result(data=f"**ERROR**: {str(e)}")
|
||||
return get_result(data=ans)
|
||||
final_ans["data"]["content"] = full_content
|
||||
final_ans["data"]["reference"] = reference
|
||||
return get_result(data=final_ans)
|
||||
|
||||
|
||||
@manager.route("/chats/<chat_id>/sessions", methods=["GET"]) # noqa: F821
|
||||
|
||||
@ -43,7 +43,7 @@ def create():
|
||||
return get_data_error_result(message=f"Search name length is {len(search_name)} which is large than 255.")
|
||||
e, _ = TenantService.get_by_id(current_user.id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Authorizationd identity.")
|
||||
return get_data_error_result(message="Authorized identity.")
|
||||
|
||||
search_name = search_name.strip()
|
||||
search_name = duplicate_name(SearchService.query, name=search_name, tenant_id=current_user.id, status=StatusEnum.VALID.value)
|
||||
@ -78,7 +78,7 @@ def update():
|
||||
tenant_id = req["tenant_id"]
|
||||
e, _ = TenantService.get_by_id(tenant_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Authorizationd identity.")
|
||||
return get_data_error_result(message="Authorized identity.")
|
||||
|
||||
search_id = req["search_id"]
|
||||
if not SearchService.accessible4deletion(search_id, current_user.id):
|
||||
|
||||
@ -824,9 +824,8 @@ class UserCanvas(DataBaseModel):
|
||||
class CanvasTemplate(DataBaseModel):
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
avatar = TextField(null=True, help_text="avatar base64 string")
|
||||
title = CharField(max_length=255, null=True, help_text="Canvas title")
|
||||
|
||||
description = TextField(null=True, help_text="Canvas description")
|
||||
title = JSONField(null=True, default=dict, help_text="Canvas title")
|
||||
description = JSONField(null=True, default=dict, help_text="Canvas description")
|
||||
canvas_type = CharField(max_length=32, null=True, help_text="Canvas type", index=True)
|
||||
dsl = JSONField(null=True, default={})
|
||||
|
||||
@ -1021,4 +1020,13 @@ def migrate_db():
|
||||
migrate(migrator.add_column("dialog", "meta_data_filter", JSONField(null=True, default={})))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
migrate(migrator.alter_column_type("canvas_template", "title", JSONField(null=True, default=dict, help_text="Canvas title")))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.alter_column_type("canvas_template", "description", JSONField(null=True, default=dict, help_text="Canvas description")))
|
||||
except Exception:
|
||||
pass
|
||||
logging.disable(logging.NOTSET)
|
||||
|
||||
@ -213,26 +213,33 @@ def completionOpenAI(tenant_id, agent_id, question, session_id=None, stream=True
|
||||
except Exception as e:
|
||||
logging.exception(f"Agent OpenAI-Compatible completionOpenAI parse answer failed: {e}")
|
||||
continue
|
||||
if ans.get("event") != "message" or not ans.get("data", {}).get("reference", None):
|
||||
if ans.get("event") not in ["message", "message_end"]:
|
||||
continue
|
||||
content_piece = ans["data"]["content"]
|
||||
|
||||
content_piece = ""
|
||||
if ans["event"] == "message":
|
||||
content_piece = ans["data"]["content"]
|
||||
|
||||
completion_tokens += len(tiktokenenc.encode(content_piece))
|
||||
|
||||
yield "data: " + json.dumps(
|
||||
get_data_openai(
|
||||
openai_data = get_data_openai(
|
||||
id=session_id or str(uuid4()),
|
||||
model=agent_id,
|
||||
content=content_piece,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
stream=True
|
||||
),
|
||||
ensure_ascii=False
|
||||
) + "\n\n"
|
||||
)
|
||||
|
||||
if ans.get("data", {}).get("reference", None):
|
||||
openai_data["choices"][0]["delta"]["reference"] = ans["data"]["reference"]
|
||||
|
||||
yield "data: " + json.dumps(openai_data, ensure_ascii=False) + "\n\n"
|
||||
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
yield "data: " + json.dumps(
|
||||
get_data_openai(
|
||||
id=session_id or str(uuid4()),
|
||||
@ -250,6 +257,7 @@ def completionOpenAI(tenant_id, agent_id, question, session_id=None, stream=True
|
||||
else:
|
||||
try:
|
||||
all_content = ""
|
||||
reference = {}
|
||||
for ans in completion(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
@ -260,13 +268,18 @@ def completionOpenAI(tenant_id, agent_id, question, session_id=None, stream=True
|
||||
):
|
||||
if isinstance(ans, str):
|
||||
ans = json.loads(ans[5:])
|
||||
if ans.get("event") != "message" or not ans.get("data", {}).get("reference", None):
|
||||
if ans.get("event") not in ["message", "message_end"]:
|
||||
continue
|
||||
all_content += ans["data"]["content"]
|
||||
|
||||
if ans["event"] == "message":
|
||||
all_content += ans["data"]["content"]
|
||||
|
||||
if ans.get("data", {}).get("reference", None):
|
||||
reference.update(ans["data"]["reference"])
|
||||
|
||||
completion_tokens = len(tiktokenenc.encode(all_content))
|
||||
|
||||
yield get_data_openai(
|
||||
openai_data = get_data_openai(
|
||||
id=session_id or str(uuid4()),
|
||||
model=agent_id,
|
||||
prompt_tokens=prompt_tokens,
|
||||
@ -276,7 +289,12 @@ def completionOpenAI(tenant_id, agent_id, question, session_id=None, stream=True
|
||||
param=None
|
||||
)
|
||||
|
||||
if reference:
|
||||
openai_data["choices"][0]["message"]["reference"] = reference
|
||||
|
||||
yield openai_data
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
yield get_data_openai(
|
||||
id=session_id or str(uuid4()),
|
||||
model=agent_id,
|
||||
|
||||
@ -133,6 +133,13 @@ class UserService(CommonService):
|
||||
cls.model.update(user_dict).where(
|
||||
cls.model.id == user_id).execute()
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def is_admin(cls, user_id):
|
||||
return cls.model.select().where(
|
||||
cls.model.id == user_id,
|
||||
cls.model.is_superuser == 1).count() > 0
|
||||
|
||||
|
||||
class TenantService(CommonService):
|
||||
"""Service class for managing tenant-related database operations.
|
||||
|
||||
@ -131,6 +131,12 @@ class RAGFlowExcelParser:
|
||||
|
||||
return tb_chunks
|
||||
|
||||
def markdown(self, fnm):
|
||||
import pandas as pd
|
||||
file_like_object = BytesIO(fnm) if not isinstance(fnm, str) else fnm
|
||||
df = pd.read_excel(file_like_object)
|
||||
return df.to_markdown(index=False)
|
||||
|
||||
def __call__(self, fnm):
|
||||
file_like_object = BytesIO(fnm) if not isinstance(fnm, str) else fnm
|
||||
wb = RAGFlowExcelParser._load_excel_to_workbook(file_like_object)
|
||||
|
||||
@ -93,6 +93,7 @@ class RAGFlowPdfParser:
|
||||
model_dir, "updown_concat_xgb.model"))
|
||||
|
||||
self.page_from = 0
|
||||
self.column_num = 1
|
||||
|
||||
def __char_width(self, c):
|
||||
return (c["x1"] - c["x0"]) // max(len(c["text"]), 1)
|
||||
@ -427,10 +428,18 @@ class RAGFlowPdfParser:
|
||||
i += 1
|
||||
self.boxes = bxs
|
||||
|
||||
def _naive_vertical_merge(self):
|
||||
def _naive_vertical_merge(self, zoomin=3):
|
||||
bxs = Recognizer.sort_Y_firstly(
|
||||
self.boxes, np.median(
|
||||
self.mean_height) / 3)
|
||||
|
||||
column_width = np.median([b["x1"] - b["x0"] for b in self.boxes])
|
||||
self.column_num = int(self.page_images[0].size[0] / zoomin / column_width)
|
||||
if column_width < self.page_images[0].size[0] / zoomin / self.column_num:
|
||||
logging.info("Multi-column................... {} {}".format(column_width,
|
||||
self.page_images[0].size[0] / zoomin / self.column_num))
|
||||
self.boxes = self.sort_X_by_page(self.boxes, column_width / self.column_num)
|
||||
|
||||
i = 0
|
||||
while i + 1 < len(bxs):
|
||||
b = bxs[i]
|
||||
@ -1139,20 +1148,94 @@ class RAGFlowPdfParser:
|
||||
need_image, zoomin, return_html, False)
|
||||
return self.__filterout_scraps(deepcopy(self.boxes), zoomin), tbls
|
||||
|
||||
def parse_into_bboxes(self, fnm, callback=None, zoomin=3):
|
||||
start = timer()
|
||||
self.__images__(fnm, zoomin)
|
||||
if callback:
|
||||
callback(0.40, "OCR finished ({:.2f}s)".format(timer() - start))
|
||||
|
||||
start = timer()
|
||||
self._layouts_rec(zoomin)
|
||||
if callback:
|
||||
callback(0.63, "Layout analysis ({:.2f}s)".format(timer() - start))
|
||||
|
||||
start = timer()
|
||||
self._table_transformer_job(zoomin)
|
||||
if callback:
|
||||
callback(0.83, "Table analysis ({:.2f}s)".format(timer() - start))
|
||||
|
||||
start = timer()
|
||||
self._text_merge()
|
||||
self._concat_downward()
|
||||
self._naive_vertical_merge(zoomin)
|
||||
if callback:
|
||||
callback(0.92, "Text merged ({:.2f}s)".format(timer() - start))
|
||||
|
||||
start = timer()
|
||||
tbls, figs = self._extract_table_figure(True, zoomin, True, True, True)
|
||||
|
||||
def insert_table_figures(tbls_or_figs, layout_type):
|
||||
def min_rectangle_distance(rect1, rect2):
|
||||
import math
|
||||
pn1, left1, right1, top1, bottom1 = rect1
|
||||
pn2, left2, right2, top2, bottom2 = rect2
|
||||
if (right1 >= left2 and right2 >= left1 and
|
||||
bottom1 >= top2 and bottom2 >= top1):
|
||||
return 0 + (pn1-pn2)*10000
|
||||
if right1 < left2:
|
||||
dx = left2 - right1
|
||||
elif right2 < left1:
|
||||
dx = left1 - right2
|
||||
else:
|
||||
dx = 0
|
||||
if bottom1 < top2:
|
||||
dy = top2 - bottom1
|
||||
elif bottom2 < top1:
|
||||
dy = top1 - bottom2
|
||||
else:
|
||||
dy = 0
|
||||
return math.sqrt(dx*dx + dy*dy) + (pn1-pn2)*10000
|
||||
|
||||
for (img, txt), poss in tbls_or_figs:
|
||||
bboxes = [(i, (b["page_number"], b["x0"], b["x1"], b["top"], b["bottom"])) for i, b in enumerate(self.boxes)]
|
||||
dists = [(min_rectangle_distance((pn, left, right, top, bott), rect),i) for i, rect in bboxes for pn, left, right, top, bott in poss]
|
||||
min_i = np.argmin(dists, axis=0)[0]
|
||||
min_i, rect = bboxes[dists[min_i][-1]]
|
||||
if isinstance(txt, list):
|
||||
txt = "\n".join(txt)
|
||||
self.boxes.insert(min_i, {
|
||||
"page_number": rect[0], "x0": rect[1], "x1": rect[2], "top": rect[3], "bottom": rect[4], "layout_type": layout_type, "text": txt, "image": img
|
||||
})
|
||||
|
||||
for b in self.boxes:
|
||||
b["position_tag"] = self._line_tag(b, zoomin)
|
||||
b["image"] = self.crop(b["position_tag"], zoomin)
|
||||
|
||||
insert_table_figures(tbls, "table")
|
||||
insert_table_figures(figs, "figure")
|
||||
if callback:
|
||||
callback(1, "Structured ({:.2f}s)".format(timer() - start))
|
||||
return deepcopy(self.boxes)
|
||||
|
||||
@staticmethod
|
||||
def remove_tag(txt):
|
||||
return re.sub(r"@@[\t0-9.-]+?##", "", txt)
|
||||
|
||||
def crop(self, text, ZM=3, need_position=False):
|
||||
imgs = []
|
||||
@staticmethod
|
||||
def extract_positions(txt):
|
||||
poss = []
|
||||
for tag in re.findall(r"@@[0-9-]+\t[0-9.\t]+##", text):
|
||||
for tag in re.findall(r"@@[0-9-]+\t[0-9.\t]+##", txt):
|
||||
pn, left, right, top, bottom = tag.strip(
|
||||
"#").strip("@").split("\t")
|
||||
left, right, top, bottom = float(left), float(
|
||||
right), float(top), float(bottom)
|
||||
poss.append(([int(p) - 1 for p in pn.split("-")],
|
||||
left, right, top, bottom))
|
||||
return poss
|
||||
|
||||
def crop(self, text, ZM=3, need_position=False):
|
||||
imgs = []
|
||||
poss = self.extract_positions(text)
|
||||
if not poss:
|
||||
if need_position:
|
||||
return None, None
|
||||
@ -1296,8 +1379,8 @@ class VisionParser(RAGFlowPdfParser):
|
||||
|
||||
def __call__(self, filename, from_page=0, to_page=100000, **kwargs):
|
||||
callback = kwargs.get("callback", lambda prog, msg: None)
|
||||
|
||||
self.__images__(fnm=filename, zoomin=3, page_from=from_page, page_to=to_page, **kwargs)
|
||||
zoomin = kwargs.get("zoomin", 3)
|
||||
self.__images__(fnm=filename, zoomin=zoomin, page_from=from_page, page_to=to_page, callback=callback)
|
||||
|
||||
total_pdf_pages = self.total_page
|
||||
|
||||
@ -1311,16 +1394,19 @@ class VisionParser(RAGFlowPdfParser):
|
||||
if pdf_page_num < start_page or pdf_page_num >= end_page:
|
||||
continue
|
||||
|
||||
docs = picture_vision_llm_chunk(
|
||||
text = picture_vision_llm_chunk(
|
||||
binary=img_binary,
|
||||
vision_model=self.vision_model,
|
||||
prompt=vision_llm_describe_prompt(page=pdf_page_num+1),
|
||||
callback=callback,
|
||||
)
|
||||
if kwargs.get("callback"):
|
||||
kwargs["callback"](idx*1./len(self.page_images), f"Processed: {idx+1}/{len(self.page_images)}")
|
||||
|
||||
if docs:
|
||||
all_docs.append(docs)
|
||||
return [(doc, "") for doc in all_docs], []
|
||||
if text:
|
||||
width, height = self.page_images[idx].size
|
||||
all_docs.append((text, f"{pdf_page_num+1} 0 {width/zoomin} 0 {height/zoomin}"))
|
||||
return all_docs, []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -31,11 +31,11 @@ def save_results(image_list, results, labels, output_dir='output/', threshold=0.
|
||||
logging.debug("save result to: " + out_path)
|
||||
|
||||
|
||||
def draw_box(im, result, lables, threshold=0.5):
|
||||
def draw_box(im, result, labels, threshold=0.5):
|
||||
draw_thickness = min(im.size) // 320
|
||||
draw = ImageDraw.Draw(im)
|
||||
color_list = get_color_map_list(len(lables))
|
||||
clsid2color = {n.lower():color_list[i] for i,n in enumerate(lables)}
|
||||
color_list = get_color_map_list(len(labels))
|
||||
clsid2color = {n.lower():color_list[i] for i,n in enumerate(labels)}
|
||||
result = [r for r in result if r["score"] >= threshold]
|
||||
|
||||
for dt in result:
|
||||
|
||||
@ -143,7 +143,6 @@ Non-stream:
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
@ -200,19 +199,24 @@ curl --request POST \
|
||||
- `stream` (*Body parameter*) `boolean`
|
||||
Whether to receive the response as a stream. Set this to `false` explicitly if you prefer to receive the entire response in one go instead of as a stream.
|
||||
|
||||
- `session_id` (*Body parameter*) `string`
|
||||
Agent session id.
|
||||
|
||||
#### Response
|
||||
|
||||
Stream:
|
||||
|
||||
```json
|
||||
...
|
||||
|
||||
data: {
|
||||
"id": "5fa65c94-e316-4954-800a-06dfd5827052",
|
||||
"id": "c39f6f9c83d911f0858253708ecb6573",
|
||||
"object": "chat.completion.chunk",
|
||||
"model": "99ee29d6783511f09c921a6272e682d8",
|
||||
"model": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Hello"
|
||||
"content": " terminal"
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0
|
||||
@ -220,21 +224,83 @@ data: {
|
||||
]
|
||||
}
|
||||
|
||||
data: {"id": "518022d9-545b-4100-89ed-ecd9e46fa753", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": "!"}, "finish_reason": null, "index": 0}]}
|
||||
data: {
|
||||
"id": "c39f6f9c83d911f0858253708ecb6573",
|
||||
"object": "chat.completion.chunk",
|
||||
"model": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "."
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
data: {"id": "f37c4af0-8187-4c86-8186-048c3c6ffe4e", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " How"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "3ebc0fcb-0f85-4024-b4a5-3b03234a16df", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " can"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "efa1f3cf-7bc4-47a4-8e53-cd696f290587", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " I"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "2eb6f741-50a3-4d3d-8418-88be27895611", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " assist"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "f1227e4f-bf8b-462c-8632-8f5269492ce9", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " you"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "35b669d0-b2be-4c0c-88d8-17ff98592b21", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": " today"}, "finish_reason": null, "index": 0}]}
|
||||
|
||||
data: {"id": "f00d8a39-af60-4f32-924f-d64106a7fdf1", "object": "chat.completion.chunk", "model": "99ee29d6783511f09c921a6272e682d8", "choices": [{"delta": {"content": "?"}, "finish_reason": null, "index": 0}]}
|
||||
data: {
|
||||
"id": "c39f6f9c83d911f0858253708ecb6573",
|
||||
"object": "chat.completion.chunk",
|
||||
"model": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"reference": {
|
||||
"chunks": {
|
||||
"20": {
|
||||
"id": "4b8935ac0a22deb1",
|
||||
"content": "```cd /usr/ports/editors/neovim/ && make install```## Android[Termux](https://github.com/termux/termux-app) offers a Neovim package.",
|
||||
"document_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"document_name": "INSTALL22.md",
|
||||
"dataset_id": "456ce60c5e1511f0907f09f583941b45",
|
||||
"image_id": "",
|
||||
"positions": [
|
||||
[
|
||||
12,
|
||||
11,
|
||||
11,
|
||||
11,
|
||||
11
|
||||
]
|
||||
],
|
||||
"url": null,
|
||||
"similarity": 0.5697155305154673,
|
||||
"vector_similarity": 0.7323851005515574,
|
||||
"term_similarity": 0.5000000005,
|
||||
"doc_type": ""
|
||||
}
|
||||
},
|
||||
"doc_aggs": {
|
||||
"INSTALL22.md": {
|
||||
"doc_name": "INSTALL22.md",
|
||||
"doc_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"count": 3
|
||||
},
|
||||
"INSTALL.md": {
|
||||
"doc_name": "INSTALL.md",
|
||||
"doc_id": "4bd7fdd85e1511f0907f09f583941b45",
|
||||
"count": 2
|
||||
},
|
||||
"INSTALL(1).md": {
|
||||
"doc_name": "INSTALL(1).md",
|
||||
"doc_id": "4bdfb42e5e1511f0907f09f583941b45",
|
||||
"count": 2
|
||||
},
|
||||
"INSTALL3.md": {
|
||||
"doc_name": "INSTALL3.md",
|
||||
"doc_id": "4bdab5825e1511f0907f09f583941b45",
|
||||
"count": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
data: [DONE]
|
||||
```
|
||||
@ -249,30 +315,77 @@ Non-stream:
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Hello! How can I assist you today?",
|
||||
"content": "\nTo install Neovim, the process varies depending on your operating system:\n\n### For Windows:\n1. **Download from GitHub**: \n - Visit the [Neovim releases page](https://github.com/neovim/neovim/releases)\n - Download the latest Windows installer (nvim-win64.msi)\n - Run the installer and follow the prompts\n\n2. **Using winget** (Windows Package Manager):\n...",
|
||||
"reference": {
|
||||
"chunks": {
|
||||
"20": {
|
||||
"content": "```cd /usr/ports/editors/neovim/ && make install```## Android[Termux](https://github.com/termux/termux-app) offers a Neovim package.",
|
||||
"dataset_id": "456ce60c5e1511f0907f09f583941b45",
|
||||
"doc_type": "",
|
||||
"document_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"document_name": "INSTALL22.md",
|
||||
"id": "4b8935ac0a22deb1",
|
||||
"image_id": "",
|
||||
"positions": [
|
||||
[
|
||||
12,
|
||||
11,
|
||||
11,
|
||||
11,
|
||||
11
|
||||
]
|
||||
],
|
||||
"similarity": 0.5697155305154673,
|
||||
"term_similarity": 0.5000000005,
|
||||
"url": null,
|
||||
"vector_similarity": 0.7323851005515574
|
||||
}
|
||||
},
|
||||
"doc_aggs": {
|
||||
"INSTALL(1).md": {
|
||||
"count": 2,
|
||||
"doc_id": "4bdfb42e5e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL(1).md"
|
||||
},
|
||||
"INSTALL.md": {
|
||||
"count": 2,
|
||||
"doc_id": "4bd7fdd85e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL.md"
|
||||
},
|
||||
"INSTALL22.md": {
|
||||
"count": 3,
|
||||
"doc_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL22.md"
|
||||
},
|
||||
"INSTALL3.md": {
|
||||
"count": 1,
|
||||
"doc_id": "4bdab5825e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL3.md"
|
||||
}
|
||||
}
|
||||
},
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": null,
|
||||
"id": "17aa4ec5-6d36-40c6-9a96-1b069c216d59",
|
||||
"model": "99ee29d6783511f09c921a6272e682d8",
|
||||
"id": "c39f6f9c83d911f0858253708ecb6573",
|
||||
"model": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"object": "chat.completion",
|
||||
"param": null,
|
||||
"usage": {
|
||||
"completion_tokens": 9,
|
||||
"completion_tokens": 415,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens": 1,
|
||||
"total_tokens": 10
|
||||
"prompt_tokens": 6,
|
||||
"total_tokens": 421
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
Failure:
|
||||
|
||||
```json
|
||||
@ -729,6 +842,7 @@ Failure:
|
||||
"message": "The dataset doesn't exist"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Get knowledge graph
|
||||
@ -808,6 +922,7 @@ Failure:
|
||||
"message": "The dataset doesn't exist"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Delete knowledge graph
|
||||
@ -855,6 +970,7 @@ Failure:
|
||||
"message": "The dataset doesn't exist"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FILE MANAGEMENT WITHIN DATASET
|
||||
@ -3017,41 +3133,88 @@ success without `session_id` provided and with no variables specified in the **B
|
||||
Stream:
|
||||
|
||||
```json
|
||||
data:{
|
||||
"event": "message",
|
||||
"message_id": "eb0c0a5e783511f0b9b61a6272e682d8",
|
||||
"created_at": 1755083342,
|
||||
"task_id": "99ee29d6783511f09c921a6272e682d8",
|
||||
"data": {
|
||||
"content": "Hello"
|
||||
},
|
||||
"session_id": "eaf19a8e783511f0b9b61a6272e682d8"
|
||||
}
|
||||
|
||||
data:{
|
||||
"event": "message",
|
||||
"message_id": "eb0c0a5e783511f0b9b61a6272e682d8",
|
||||
"created_at": 1755083342,
|
||||
"task_id": "99ee29d6783511f09c921a6272e682d8",
|
||||
"data": {
|
||||
"content": "!"
|
||||
},
|
||||
"session_id": "eaf19a8e783511f0b9b61a6272e682d8"
|
||||
}
|
||||
|
||||
data:{
|
||||
"event": "message",
|
||||
"message_id": "eb0c0a5e783511f0b9b61a6272e682d8",
|
||||
"created_at": 1755083342,
|
||||
"task_id": "99ee29d6783511f09c921a6272e682d8",
|
||||
"data": {
|
||||
"content": " How"
|
||||
},
|
||||
"session_id": "eaf19a8e783511f0b9b61a6272e682d8"
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
data: {
|
||||
"event": "message",
|
||||
"message_id": "cecdcb0e83dc11f0858253708ecb6573",
|
||||
"created_at": 1756364483,
|
||||
"task_id": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"data": {
|
||||
"content": " themes"
|
||||
},
|
||||
"session_id": "cd097ca083dc11f0858253708ecb6573"
|
||||
}
|
||||
|
||||
data: {
|
||||
"event": "message",
|
||||
"message_id": "cecdcb0e83dc11f0858253708ecb6573",
|
||||
"created_at": 1756364483,
|
||||
"task_id": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"data": {
|
||||
"content": "."
|
||||
},
|
||||
"session_id": "cd097ca083dc11f0858253708ecb6573"
|
||||
}
|
||||
|
||||
data: {
|
||||
"event": "message_end",
|
||||
"message_id": "cecdcb0e83dc11f0858253708ecb6573",
|
||||
"created_at": 1756364483,
|
||||
"task_id": "d1f79142831f11f09cc51795b9eb07c0",
|
||||
"data": {
|
||||
"reference": {
|
||||
"chunks": {
|
||||
"20": {
|
||||
"id": "4b8935ac0a22deb1",
|
||||
"content": "```cd /usr/ports/editors/neovim/ && make install```## Android[Termux](https://github.com/termux/termux-app) offers a Neovim package.",
|
||||
"document_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"document_name": "INSTALL22.md",
|
||||
"dataset_id": "456ce60c5e1511f0907f09f583941b45",
|
||||
"image_id": "",
|
||||
"positions": [
|
||||
[
|
||||
12,
|
||||
11,
|
||||
11,
|
||||
11,
|
||||
11
|
||||
]
|
||||
],
|
||||
"url": null,
|
||||
"similarity": 0.5705525104787287,
|
||||
"vector_similarity": 0.7351750337624289,
|
||||
"term_similarity": 0.5000000005,
|
||||
"doc_type": ""
|
||||
}
|
||||
},
|
||||
"doc_aggs": {
|
||||
"INSTALL22.md": {
|
||||
"doc_name": "INSTALL22.md",
|
||||
"doc_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"count": 3
|
||||
},
|
||||
"INSTALL.md": {
|
||||
"doc_name": "INSTALL.md",
|
||||
"doc_id": "4bd7fdd85e1511f0907f09f583941b45",
|
||||
"count": 2
|
||||
},
|
||||
"INSTALL(1).md": {
|
||||
"doc_name": "INSTALL(1).md",
|
||||
"doc_id": "4bdfb42e5e1511f0907f09f583941b45",
|
||||
"count": 2
|
||||
},
|
||||
"INSTALL3.md": {
|
||||
"doc_name": "INSTALL3.md",
|
||||
"doc_id": "4bdab5825e1511f0907f09f583941b45",
|
||||
"count": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"session_id": "cd097ca083dc11f0858253708ecb6573"
|
||||
}
|
||||
|
||||
data:[DONE]
|
||||
```
|
||||
|
||||
@ -3061,21 +3224,77 @@ Non-stream:
|
||||
{
|
||||
"code": 0,
|
||||
"data": {
|
||||
"created_at": 1755083440,
|
||||
"created_at": 1756363177,
|
||||
"data": {
|
||||
"created_at": 547061.147866385,
|
||||
"elapsed_time": 2.595433341921307,
|
||||
"inputs": {},
|
||||
"content": "\nTo install Neovim, the process varies depending on your operating system:\n\n### For macOS:\nUsing Homebrew:\n```bash\nbrew install neovim\n```\n\n### For Linux (Debian/Ubuntu):\n```bash\nsudo apt update\nsudo apt install neovim\n```\n\nFor other Linux distributions, you can use their respective package managers or build from source.\n\n### For Windows:\n1. Download the latest Windows installer from the official Neovim GitHub releases page\n2. Run the installer and follow the prompts\n3. Add Neovim to your PATH if not done automatically\n\n### From source (Unix-like systems):\n```bash\ngit clone https://github.com/neovim/neovim.git\ncd neovim\nmake CMAKE_BUILD_TYPE=Release\nsudo make install\n```\n\nAfter installation, you can verify it by running `nvim --version` in your terminal.",
|
||||
"created_at": 18129.044975627,
|
||||
"elapsed_time": 10.0157331670016,
|
||||
"inputs": {
|
||||
"var1": {
|
||||
"value": "I am var1"
|
||||
},
|
||||
"var2": {
|
||||
"value": "I am var2"
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"_created_time": 547061.149137775,
|
||||
"_elapsed_time": 8.720310870558023e-05,
|
||||
"content": "Hello! How can I assist you today?"
|
||||
"_created_time": 18129.502422278,
|
||||
"_elapsed_time": 0.00013378599760471843,
|
||||
"content": "\nTo install Neovim, the process varies depending on your operating system:\n\n### For macOS:\nUsing Homebrew:\n```bash\nbrew install neovim\n```\n\n### For Linux (Debian/Ubuntu):\n```bash\nsudo apt update\nsudo apt install neovim\n```\n\nFor other Linux distributions, you can use their respective package managers or build from source.\n\n### For Windows:\n1. Download the latest Windows installer from the official Neovim GitHub releases page\n2. Run the installer and follow the prompts\n3. Add Neovim to your PATH if not done automatically\n\n### From source (Unix-like systems):\n```bash\ngit clone https://github.com/neovim/neovim.git\ncd neovim\nmake CMAKE_BUILD_TYPE=Release\nsudo make install\n```\n\nAfter installation, you can verify it by running `nvim --version` in your terminal."
|
||||
},
|
||||
"reference": {
|
||||
"chunks": {
|
||||
"20": {
|
||||
"content": "```cd /usr/ports/editors/neovim/ && make install```## Android[Termux](https://github.com/termux/termux-app) offers a Neovim package.",
|
||||
"dataset_id": "456ce60c5e1511f0907f09f583941b45",
|
||||
"doc_type": "",
|
||||
"document_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"document_name": "INSTALL22.md",
|
||||
"id": "4b8935ac0a22deb1",
|
||||
"image_id": "",
|
||||
"positions": [
|
||||
[
|
||||
12,
|
||||
11,
|
||||
11,
|
||||
11,
|
||||
11
|
||||
]
|
||||
],
|
||||
"similarity": 0.5705525104787287,
|
||||
"term_similarity": 0.5000000005,
|
||||
"url": null,
|
||||
"vector_similarity": 0.7351750337624289
|
||||
}
|
||||
},
|
||||
"doc_aggs": {
|
||||
"INSTALL(1).md": {
|
||||
"count": 2,
|
||||
"doc_id": "4bdfb42e5e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL(1).md"
|
||||
},
|
||||
"INSTALL.md": {
|
||||
"count": 2,
|
||||
"doc_id": "4bd7fdd85e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL.md"
|
||||
},
|
||||
"INSTALL22.md": {
|
||||
"count": 3,
|
||||
"doc_id": "4bdd2ff65e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL22.md"
|
||||
},
|
||||
"INSTALL3.md": {
|
||||
"count": 1,
|
||||
"doc_id": "4bdab5825e1511f0907f09f583941b45",
|
||||
"doc_name": "INSTALL3.md"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"event": "workflow_finished",
|
||||
"message_id": "25807f94783611f095171a6272e682d8",
|
||||
"session_id": "25663198783611f095171a6272e682d8",
|
||||
"task_id": "99ee29d6783511f09c921a6272e682d8"
|
||||
"message_id": "c4692a2683d911f0858253708ecb6573",
|
||||
"session_id": "c39f6f9c83d911f0858253708ecb6573",
|
||||
"task_id": "d1f79142831f11f09cc51795b9eb07c0"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
49
rag/flow/__init__.py
Normal file
49
rag/flow/__init__.py
Normal file
@ -0,0 +1,49 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
import importlib
|
||||
import inspect
|
||||
from types import ModuleType
|
||||
from typing import Dict, Type
|
||||
|
||||
_package_path = os.path.dirname(__file__)
|
||||
__all_classes: Dict[str, Type] = {}
|
||||
|
||||
def _import_submodules() -> None:
|
||||
for filename in os.listdir(_package_path): # noqa: F821
|
||||
if filename.startswith("__") or not filename.endswith(".py") or filename.startswith("base"):
|
||||
continue
|
||||
module_name = filename[:-3]
|
||||
|
||||
try:
|
||||
module = importlib.import_module(f".{module_name}", package=__name__)
|
||||
_extract_classes_from_module(module) # noqa: F821
|
||||
except ImportError as e:
|
||||
print(f"Warning: Failed to import module {module_name}: {str(e)}")
|
||||
|
||||
def _extract_classes_from_module(module: ModuleType) -> None:
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if (inspect.isclass(obj) and
|
||||
obj.__module__ == module.__name__ and not name.startswith("_")):
|
||||
__all_classes[name] = obj
|
||||
globals()[name] = obj
|
||||
|
||||
_import_submodules()
|
||||
|
||||
__all__ = list(__all_classes.keys()) + ["__all_classes"]
|
||||
|
||||
del _package_path, _import_submodules, _extract_classes_from_module
|
||||
59
rag/flow/base.py
Normal file
59
rag/flow/base.py
Normal file
@ -0,0 +1,59 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import time
|
||||
import os
|
||||
import logging
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
import trio
|
||||
from agent.component.base import ComponentParamBase, ComponentBase
|
||||
from api.utils.api_utils import timeout
|
||||
|
||||
|
||||
class ProcessParamBase(ComponentParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.timeout = 100000000
|
||||
self.persist_logs = True
|
||||
|
||||
|
||||
class ProcessBase(ComponentBase):
|
||||
|
||||
def __init__(self, pipeline, id, param: ProcessParamBase):
|
||||
super().__init__(pipeline, id, param)
|
||||
self.callback = partial(self._canvas.callback, self.component_name)
|
||||
|
||||
async def invoke(self, **kwargs) -> dict[str, Any]:
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
for k,v in kwargs.items():
|
||||
self.set_output(k, v)
|
||||
try:
|
||||
with trio.fail_after(self._param.timeout):
|
||||
await self._invoke(**kwargs)
|
||||
self.callback(1, "Done")
|
||||
except Exception as e:
|
||||
if self.get_exception_default_value():
|
||||
self.set_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", str(e))
|
||||
logging.exception(e)
|
||||
self.callback(-1, str(e))
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
@timeout(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60))
|
||||
async def _invoke(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
47
rag/flow/begin.py
Normal file
47
rag/flow/begin.py
Normal file
@ -0,0 +1,47 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.file2document_service import File2DocumentService
|
||||
from api.db.services.file_service import FileService
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
|
||||
class FileParam(ProcessParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def check(self):
|
||||
pass
|
||||
|
||||
|
||||
class File(ProcessBase):
|
||||
component_name = "File"
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
if self._canvas._doc_id:
|
||||
e, doc = DocumentService.get_by_id(self._canvas._doc_id)
|
||||
if not e:
|
||||
self.set_output("_ERROR", f"Document({self._canvas._doc_id}) not found!")
|
||||
return
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=self._canvas._doc_id)
|
||||
self.set_output("blob", STORAGE_IMPL.get(b, n))
|
||||
self.set_output("name", doc.name)
|
||||
else:
|
||||
file = kwargs.get("file")
|
||||
self.set_output("name", file["name"])
|
||||
self.set_output("blob", FileService.get_blob(file["created_by"], file["id"]))
|
||||
160
rag/flow/chunker.py
Normal file
160
rag/flow/chunker.py
Normal file
@ -0,0 +1,160 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import random
|
||||
import trio
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
||||
from graphrag.utils import get_llm_cache, chat_limiter, set_llm_cache
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.nlp import naive_merge, naive_merge_with_images
|
||||
from rag.prompts.prompts import keyword_extraction, question_proposal
|
||||
|
||||
|
||||
class ChunkerParam(ProcessParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.method_options = ["general", "q&a", "resume", "manual", "table", "paper", "book", "laws", "presentation", "one"]
|
||||
self.method = "general"
|
||||
self.chunk_token_size = 512
|
||||
self.delimiter = "\n"
|
||||
self.overlapped_percent = 0
|
||||
self.page_rank = 0
|
||||
self.auto_keywords = 0
|
||||
self.auto_questions = 0
|
||||
self.tag_sets = []
|
||||
self.llm_setting = {
|
||||
"llm_name": "",
|
||||
"lang": "Chinese"
|
||||
}
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.method.lower(), "Chunk method abnormal.", self.method_options)
|
||||
self.check_positive_integer(self.chunk_token_size, "Chunk token size.")
|
||||
self.check_nonnegative_number(self.page_rank, "Page rank value: (0, 10]")
|
||||
self.check_nonnegative_number(self.auto_keywords, "Auto-keyword value: (0, 10]")
|
||||
self.check_nonnegative_number(self.auto_questions, "Auto-question value: (0, 10]")
|
||||
self.check_decimal_float(self.overlapped_percent, "Overlapped percentage: [0, 1)")
|
||||
|
||||
|
||||
class Chunker(ProcessBase):
|
||||
component_name = "Chunker"
|
||||
|
||||
def _general(self, **kwargs):
|
||||
self.callback(random.randint(1,5)/100., "Start to chunk via `General`.")
|
||||
if kwargs.get("output_format") in ["markdown", "text"]:
|
||||
cks = naive_merge(kwargs.get(kwargs["output_format"]), self._param.chunk_token_size, self._param.delimiter, self._param.overlapped_percent)
|
||||
return [{"text": c} for c in cks]
|
||||
|
||||
sections, section_images = [], []
|
||||
for o in kwargs["json"]:
|
||||
sections.append((o["text"], o.get("position_tag","")))
|
||||
section_images.append(o.get("image"))
|
||||
|
||||
chunks, images = naive_merge_with_images(sections, section_images,self._param.chunk_token_size, self._param.delimiter, self._param.overlapped_percent)
|
||||
return [{
|
||||
"text": RAGFlowPdfParser.remove_tag(c),
|
||||
"image": img,
|
||||
"positions": RAGFlowPdfParser.extract_positions(c)
|
||||
} for c,img in zip(chunks,images)]
|
||||
|
||||
def _q_and_a(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _resume(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _manual(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _table(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _paper(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _book(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _laws(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _presentation(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _one(self, **kwargs):
|
||||
pass
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
function_map = {
|
||||
"general": self._general,
|
||||
"q&a": self._q_and_a,
|
||||
"resume": self._resume,
|
||||
"manual": self._manual,
|
||||
"table": self._table,
|
||||
"paper": self._paper,
|
||||
"book": self._book,
|
||||
"laws": self._laws,
|
||||
"presentation": self._presentation,
|
||||
"one": self._one,
|
||||
}
|
||||
chunks = function_map[self._param.method](**kwargs)
|
||||
llm_setting = self._param.llm_setting
|
||||
|
||||
async def auto_keywords():
|
||||
nonlocal chunks, llm_setting
|
||||
chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT, llm_name=llm_setting["llm_name"], lang=llm_setting["lang"])
|
||||
|
||||
async def doc_keyword_extraction(chat_mdl, ck, topn):
|
||||
cached = get_llm_cache(chat_mdl.llm_name, ck["text"], "keywords", {"topn": topn})
|
||||
if not cached:
|
||||
async with chat_limiter:
|
||||
cached = await trio.to_thread.run_sync(lambda: keyword_extraction(chat_mdl, ck["text"], topn))
|
||||
set_llm_cache(chat_mdl.llm_name, ck["text"], cached, "keywords", {"topn": topn})
|
||||
if cached:
|
||||
ck["keywords"] = cached.split(",")
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
for ck in chunks:
|
||||
nursery.start_soon(doc_keyword_extraction, chat_mdl, ck, self._param.auto_keywords)
|
||||
|
||||
async def auto_questions():
|
||||
nonlocal chunks, llm_setting
|
||||
chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT, llm_name=llm_setting["llm_name"], lang=llm_setting["lang"])
|
||||
|
||||
async def doc_question_proposal(chat_mdl, d, topn):
|
||||
cached = get_llm_cache(chat_mdl.llm_name, ck["text"], "question", {"topn": topn})
|
||||
if not cached:
|
||||
async with chat_limiter:
|
||||
cached = await trio.to_thread.run_sync(lambda: question_proposal(chat_mdl, ck["text"], topn))
|
||||
set_llm_cache(chat_mdl.llm_name, ck["text"], cached, "question", {"topn": topn})
|
||||
if cached:
|
||||
d["questions"] = cached.split("\n")
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
for ck in chunks:
|
||||
nursery.start_soon(doc_question_proposal, chat_mdl, ck, self._param.auto_questions)
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
if self._param.auto_questions:
|
||||
nursery.start_soon(auto_questions)
|
||||
if self._param.auto_keywords:
|
||||
nursery.start_soon(auto_keywords)
|
||||
|
||||
if self._param.page_rank:
|
||||
for ck in chunks:
|
||||
ck["page_rank"] = self._param.page_rank
|
||||
|
||||
self.set_output("chunks", chunks)
|
||||
107
rag/flow/parser.py
Normal file
107
rag/flow/parser.py
Normal file
@ -0,0 +1,107 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import random
|
||||
import trio
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser, PlainParser, VisionParser
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.llm.cv_model import Base as VLM
|
||||
from deepdoc.parser import ExcelParser
|
||||
|
||||
|
||||
class ParserParam(ProcessParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.setups = {
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc", # deepdoc/plain_text/vlm
|
||||
"vlm_name": "",
|
||||
"lang": "Chinese",
|
||||
"suffix": ["pdf"],
|
||||
"output_format": "json"
|
||||
},
|
||||
"excel": {
|
||||
"output_format": "html"
|
||||
},
|
||||
"ppt": {},
|
||||
"image": {
|
||||
"parse_method": "ocr"
|
||||
},
|
||||
"email": {},
|
||||
"text": {},
|
||||
"audio": {},
|
||||
"video": {},
|
||||
}
|
||||
|
||||
def check(self):
|
||||
if self.setups["pdf"].get("parse_method") not in ["deepdoc", "plain_text"]:
|
||||
assert self.setups["pdf"].get("vlm_name"), "No VLM specified."
|
||||
assert self.setups["pdf"].get("lang"), "No language specified."
|
||||
|
||||
|
||||
class Parser(ProcessBase):
|
||||
component_name = "Parser"
|
||||
|
||||
def _pdf(self, blob):
|
||||
self.callback(random.randint(1,5)/100., "Start to work on a PDF.")
|
||||
conf = self._param.setups["pdf"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
if conf.get("parse_method") == "deepdoc":
|
||||
bboxes = RAGFlowPdfParser().parse_into_bboxes(blob, callback=self.callback)
|
||||
elif conf.get("parse_method") == "plain_text":
|
||||
lines,_ = PlainParser()(blob)
|
||||
bboxes = [{"text": t} for t,_ in lines]
|
||||
else:
|
||||
assert conf.get("vlm_name")
|
||||
vision_model = LLMBundle(self._canvas.tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("vlm_name"), lang=self.setups["pdf"].get("lang"))
|
||||
lines, _ = VisionParser(vision_model=vision_model)(bin, callback=self.callback)
|
||||
bboxes = []
|
||||
for t, poss in lines:
|
||||
pn, x0, x1, top, bott = poss.split(" ")
|
||||
bboxes.append({"page_number": int(pn), "x0": int(x0), "x1": int(x1), "top": int(top), "bottom": int(bott), "text": t})
|
||||
|
||||
self.set_output("json", bboxes)
|
||||
mkdn = ""
|
||||
for b in bboxes:
|
||||
if b.get("layout_type", "") == "title":
|
||||
mkdn += "\n## "
|
||||
if b.get("layout_type", "") == "figure":
|
||||
mkdn += "\n".format(VLM.image2base64(b["image"]))
|
||||
continue
|
||||
mkdn += b.get("text", "") + "\n"
|
||||
self.set_output("markdown", mkdn)
|
||||
|
||||
def _excel(self, blob):
|
||||
self.callback(random.randint(1,5)/100., "Start to work on a Excel.")
|
||||
conf = self._param.setups["excel"]
|
||||
excel_parser = ExcelParser()
|
||||
if conf.get("output_format") == "html":
|
||||
html = excel_parser.html(blob,1000000000)
|
||||
self.set_output("html", html)
|
||||
elif conf.get("output_format") == "json":
|
||||
self.set_output("json", [{"text": txt} for txt in excel_parser(blob) if txt])
|
||||
elif conf.get("output_format") == "markdown":
|
||||
self.set_output("markdown", excel_parser.markdown(blob))
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
function_map = {
|
||||
"pdf": self._pdf,
|
||||
}
|
||||
for p_type, conf in self._param.setups.items():
|
||||
if kwargs.get("name", "").split(".")[-1].lower() not in conf.get("suffix", []):
|
||||
continue
|
||||
await trio.to_thread.run_sync(function_map[p_type], kwargs["blob"])
|
||||
break
|
||||
121
rag/flow/pipeline.py
Normal file
121
rag/flow/pipeline.py
Normal file
@ -0,0 +1,121 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
import trio
|
||||
from agent.canvas import Graph
|
||||
from api.db.services.document_service import DocumentService
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
|
||||
class Pipeline(Graph):
|
||||
|
||||
def __init__(self, dsl: str, tenant_id=None, doc_id=None, task_id=None, flow_id=None):
|
||||
super().__init__(dsl, tenant_id, task_id)
|
||||
self._doc_id = doc_id
|
||||
self._flow_id = flow_id
|
||||
self._kb_id = None
|
||||
if doc_id:
|
||||
self._kb_id = DocumentService.get_knowledgebase_id(doc_id)
|
||||
assert self._kb_id, f"Can't find KB of this document: {doc_id}"
|
||||
|
||||
def callback(self, component_name: str, progress: float|int|None=None, message: str = "") -> None:
|
||||
log_key = f"{self._flow_id}-{self.task_id}-logs"
|
||||
try:
|
||||
bin = REDIS_CONN.get(log_key)
|
||||
obj = json.loads(bin.encode("utf-8"))
|
||||
if obj:
|
||||
if obj[-1]["component_name"] == component_name:
|
||||
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")})
|
||||
else:
|
||||
obj.append({
|
||||
"component_name": component_name,
|
||||
"trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]
|
||||
})
|
||||
else:
|
||||
obj = [{
|
||||
"component_name": component_name,
|
||||
"trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]
|
||||
}]
|
||||
REDIS_CONN.set_obj(log_key, obj, 60*10)
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
def fetch_logs(self):
|
||||
log_key = f"{self._flow_id}-{self.task_id}-logs"
|
||||
try:
|
||||
bin = REDIS_CONN.get(log_key)
|
||||
if bin:
|
||||
return json.loads(bin.encode("utf-8"))
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return []
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
log_key = f"{self._flow_id}-{self.task_id}-logs"
|
||||
try:
|
||||
REDIS_CONN.set_obj(log_key, [], 60*10)
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
async def run(self, **kwargs):
|
||||
st = time.perf_counter()
|
||||
if not self.path:
|
||||
self.path.append("begin")
|
||||
|
||||
if self._doc_id:
|
||||
DocumentService.update_by_id(self._doc_id, {
|
||||
"progress": random.randint(0,5)/100.,
|
||||
"progress_msg": "Start the pipeline...",
|
||||
"process_begin_at": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
})
|
||||
|
||||
self.error = ""
|
||||
idx = len(self.path) - 1
|
||||
if idx == 0:
|
||||
cpn_obj = self.get_component_obj(self.path[0])
|
||||
await cpn_obj.invoke(**kwargs)
|
||||
if cpn_obj.error():
|
||||
self.error = "[ERROR]" + cpn_obj.error()
|
||||
else:
|
||||
idx += 1
|
||||
self.path.extend(cpn_obj.get_downstream())
|
||||
|
||||
while idx < len(self.path) and not self.error:
|
||||
last_cpn = self.get_component_obj(self.path[idx-1])
|
||||
cpn_obj = self.get_component_obj(self.path[idx])
|
||||
async def invoke():
|
||||
nonlocal last_cpn, cpn_obj
|
||||
await cpn_obj.invoke(**last_cpn.output())
|
||||
async with trio.open_nursery() as nursery:
|
||||
nursery.start_soon(invoke)
|
||||
if cpn_obj.error():
|
||||
self.error = "[ERROR]" + cpn_obj.error()
|
||||
break
|
||||
idx += 1
|
||||
self.path.extend(cpn_obj.get_downstream())
|
||||
|
||||
if self._doc_id:
|
||||
DocumentService.update_by_id(self._doc_id, {
|
||||
"progress": 1 if not self.error else -1,
|
||||
"progress_msg": "Pipeline finished...\n" + self.error,
|
||||
"process_duration": time.perf_counter() - st
|
||||
})
|
||||
|
||||
57
rag/flow/tests/client.py
Normal file
57
rag/flow/tests/client.py
Normal file
@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import trio
|
||||
from api import settings
|
||||
from rag.flow.pipeline import Pipeline
|
||||
|
||||
|
||||
def print_logs(pipeline):
|
||||
last_logs = "[]"
|
||||
while True:
|
||||
time.sleep(5)
|
||||
logs = pipeline.fetch_logs()
|
||||
logs_str = json.dumps(logs)
|
||||
if logs_str != last_logs:
|
||||
print(logs_str)
|
||||
last_logs = logs_str
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
dsl_default_path = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
"dsl_examples",
|
||||
"general_pdf_all.json",
|
||||
)
|
||||
parser.add_argument('-s', '--dsl', default=dsl_default_path, help="input dsl", action='store', required=True)
|
||||
parser.add_argument('-d', '--doc_id', default=False, help="Document ID", action='store', required=True)
|
||||
parser.add_argument('-t', '--tenant_id', default=False, help="Tenant ID", action='store', required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
settings.init_settings()
|
||||
pipeline = Pipeline(open(args.dsl, "r").read(), tenant_id=args.tenant_id, doc_id=args.doc_id, task_id="xxxx", flow_id="xxx")
|
||||
pipeline.reset()
|
||||
|
||||
exe = ThreadPoolExecutor(max_workers=5)
|
||||
thr = exe.submit(print_logs, pipeline)
|
||||
|
||||
trio.run(pipeline.run)
|
||||
thr.result()
|
||||
54
rag/flow/tests/dsl_examples/general_pdf_all.json
Normal file
54
rag/flow/tests/dsl_examples/general_pdf_all.json
Normal file
@ -0,0 +1,54 @@
|
||||
{
|
||||
"components": {
|
||||
"begin": {
|
||||
"obj":{
|
||||
"component_name": "File",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": ["parser:0"],
|
||||
"upstream": []
|
||||
},
|
||||
"parser:0": {
|
||||
"obj": {
|
||||
"component_name": "Parser",
|
||||
"params": {
|
||||
"setups": {
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc",
|
||||
"vlm_name": "",
|
||||
"lang": "Chinese",
|
||||
"suffix": [
|
||||
"pdf"
|
||||
],
|
||||
"output_format": "json"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"downstream": ["chunker:0"],
|
||||
"upstream": ["begin"]
|
||||
},
|
||||
"chunker:0": {
|
||||
"obj": {
|
||||
"component_name": "Chunker",
|
||||
"params": {
|
||||
"method": "general",
|
||||
"auto_keywords": 5
|
||||
}
|
||||
},
|
||||
"downstream": ["tokenizer:0"],
|
||||
"upstream": ["chunker:0"]
|
||||
},
|
||||
"tokenizer:0": {
|
||||
"obj": {
|
||||
"component_name": "Tokenizer",
|
||||
"params": {
|
||||
}
|
||||
},
|
||||
"downstream": [],
|
||||
"upstream": ["chunker:0"]
|
||||
}
|
||||
},
|
||||
"path": []
|
||||
}
|
||||
134
rag/flow/tokenizer.py
Normal file
134
rag/flow/tokenizer.py
Normal file
@ -0,0 +1,134 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import random
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import trio
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.user_service import TenantService
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.nlp import rag_tokenizer
|
||||
from rag.settings import EMBEDDING_BATCH_SIZE
|
||||
from rag.svr.task_executor import embed_limiter
|
||||
from rag.utils import truncate
|
||||
|
||||
|
||||
class TokenizerParam(ProcessParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.search_method = ["full_text", "embedding"]
|
||||
self.filename_embd_weight = 0.1
|
||||
|
||||
def check(self):
|
||||
for v in self.search_method:
|
||||
self.check_valid_value(v.lower(), "Chunk method abnormal.", ["full_text", "embedding"])
|
||||
|
||||
|
||||
class Tokenizer(ProcessBase):
|
||||
component_name = "Tokenizer"
|
||||
|
||||
async def _embedding(self, name, chunks):
|
||||
parts = sum(["full_text" in self._param.search_method, "embedding" in self._param.search_method])
|
||||
token_count = 0
|
||||
if self._canvas._kb_id:
|
||||
e, kb = KnowledgebaseService.get_by_id(self._canvas._kb_id)
|
||||
embedding_id = kb.embd_id
|
||||
else:
|
||||
e, ten = TenantService.get_by_id(self._canvas._tenant_id)
|
||||
embedding_id = ten.embd_id
|
||||
embedding_model = LLMBundle(self._canvas._tenant_id, LLMType.EMBEDDING, llm_name=embedding_id)
|
||||
texts = []
|
||||
for c in chunks:
|
||||
if c.get("questions"):
|
||||
texts.append("\n".join(c["questions"]))
|
||||
else:
|
||||
texts.append(re.sub(r"</?(table|td|caption|tr|th)( [^<>]{0,12})?>", " ", c["text"]))
|
||||
vts, c = embedding_model.encode([name])
|
||||
token_count += c
|
||||
tts = np.concatenate([vts[0] for _ in range(len(texts))], axis=0)
|
||||
|
||||
@timeout(60)
|
||||
def batch_encode(txts):
|
||||
nonlocal embedding_model
|
||||
return embedding_model.encode([truncate(c, embedding_model.max_length-10) for c in txts])
|
||||
|
||||
cnts_ = np.array([])
|
||||
for i in range(0, len(texts), EMBEDDING_BATCH_SIZE):
|
||||
async with embed_limiter:
|
||||
vts, c = await trio.to_thread.run_sync(lambda: batch_encode(texts[i: i + EMBEDDING_BATCH_SIZE]))
|
||||
if len(cnts_) == 0:
|
||||
cnts_ = vts
|
||||
else:
|
||||
cnts_ = np.concatenate((cnts_, vts), axis=0)
|
||||
token_count += c
|
||||
if i % 33 == 32:
|
||||
self.callback(i*1./len(texts)/parts/EMBEDDING_BATCH_SIZE + 0.5*(parts-1))
|
||||
|
||||
cnts = cnts_
|
||||
title_w = float(self._param.filename_embd_weight)
|
||||
vects = (title_w * tts + (1 - title_w) * cnts) if len(tts) == len(cnts) else cnts
|
||||
|
||||
assert len(vects) == len(chunks)
|
||||
for i, ck in enumerate(chunks):
|
||||
v = vects[i].tolist()
|
||||
ck["q_%d_vec" % len(v)] = v
|
||||
return chunks, token_count
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
parts = sum(["full_text" in self._param.search_method, "embedding" in self._param.search_method])
|
||||
if "full_text" in self._param.search_method:
|
||||
self.callback(random.randint(1,5)/100., "Start to tokenize.")
|
||||
if kwargs.get("chunks"):
|
||||
chunks = kwargs["chunks"]
|
||||
for i, ck in enumerate(chunks):
|
||||
if ck.get("questions"):
|
||||
ck["question_tks"] = rag_tokenizer.tokenize("\n".join(ck["questions"]))
|
||||
if ck.get("keywords"):
|
||||
ck["important_tks"] = rag_tokenizer.tokenize("\n".join(ck["keywords"]))
|
||||
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
|
||||
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
|
||||
if i % 100 == 99:
|
||||
self.callback(i*1./len(chunks)/parts)
|
||||
elif kwargs.get("output_format") in ["markdown", "text"]:
|
||||
ck = {
|
||||
"text": kwargs.get(kwargs["output_format"], "")
|
||||
}
|
||||
if "full_text" in self._param.search_method:
|
||||
ck["content_ltks"] = rag_tokenizer.tokenize(kwargs.get(kwargs["output_format"], ""))
|
||||
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
|
||||
chunks = [ck]
|
||||
else:
|
||||
chunks = kwargs["json"]
|
||||
for i, ck in enumerate(chunks):
|
||||
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
|
||||
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
|
||||
if i % 100 == 99:
|
||||
self.callback(i*1./len(chunks)/parts)
|
||||
|
||||
self.callback(1./parts, "Finish tokenizing.")
|
||||
|
||||
if "embedding" in self._param.search_method:
|
||||
self.callback(random.randint(1,5)/100. + 0.5*(parts-1), "Start embedding inference.")
|
||||
chunks, token_count = await self._embedding(kwargs.get("name", ""), chunks)
|
||||
self.set_output("embedding_token_consumption", token_count)
|
||||
|
||||
self.callback(1., "Finish embedding.")
|
||||
|
||||
self.set_output("chunks", chunks)
|
||||
@ -43,6 +43,7 @@ FACTORY_DEFAULT_BASE_URL = {
|
||||
SupportedLiteLLMProvider.Tongyi_Qianwen: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1",
|
||||
SupportedLiteLLMProvider.Ollama: "",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1362,7 +1362,7 @@ class LiteLLMBase(ABC):
|
||||
self.prefix = LITELLM_PROVIDER_PREFIX.get(self.provider, "")
|
||||
self.model_name = f"{self.prefix}{model_name}"
|
||||
self.api_key = key
|
||||
self.base_url = base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")
|
||||
self.base_url = (base_url or FACTORY_DEFAULT_BASE_URL.get(self.provider, "")).rstrip('/')
|
||||
# Configure retry parameters
|
||||
self.max_retries = kwargs.get("max_retries", int(os.environ.get("LLM_MAX_RETRIES", 5)))
|
||||
self.base_delay = kwargs.get("retry_interval", float(os.environ.get("LLM_BASE_DELAY", 2.0)))
|
||||
|
||||
@ -554,8 +554,8 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?", overl
|
||||
if num_tokens_from_string(sec) < chunk_token_num:
|
||||
add_chunk(sec, pos)
|
||||
continue
|
||||
splited_sec = re.split(r"(%s)" % dels, sec, flags=re.DOTALL)
|
||||
for sub_sec in splited_sec:
|
||||
split_sec = re.split(r"(%s)" % dels, sec, flags=re.DOTALL)
|
||||
for sub_sec in split_sec:
|
||||
if re.match(f"^{dels}$", sub_sec):
|
||||
continue
|
||||
add_chunk(sub_sec, pos)
|
||||
@ -563,7 +563,8 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?", overl
|
||||
return cks
|
||||
|
||||
|
||||
def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。;!?", overlapped_percent=0):
|
||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
||||
if not texts or len(texts) != len(images):
|
||||
return [], []
|
||||
cks = [""]
|
||||
@ -578,7 +579,10 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
|
||||
if tnum < 8:
|
||||
pos = ""
|
||||
# Ensure that the length of the merged chunk does not exceed chunk_token_num
|
||||
if cks[-1] == "" or tk_nums[-1] > chunk_token_num:
|
||||
if cks[-1] == "" or tk_nums[-1] > chunk_token_num * (100 - overlapped_percent)/100.:
|
||||
if cks:
|
||||
overlapped = RAGFlowPdfParser.remove_tag(cks[-1])
|
||||
t = overlapped[int(len(overlapped)*(100-overlapped_percent)/100.):] + t
|
||||
if t.find(pos) < 0:
|
||||
t += pos
|
||||
cks.append(t)
|
||||
@ -600,14 +604,14 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
|
||||
if isinstance(text, tuple):
|
||||
text_str = text[0]
|
||||
text_pos = text[1] if len(text) > 1 else ""
|
||||
splited_sec = re.split(r"(%s)" % dels, text_str)
|
||||
for sub_sec in splited_sec:
|
||||
split_sec = re.split(r"(%s)" % dels, text_str)
|
||||
for sub_sec in split_sec:
|
||||
if re.match(f"^{dels}$", sub_sec):
|
||||
continue
|
||||
add_chunk(sub_sec, image, text_pos)
|
||||
else:
|
||||
splited_sec = re.split(r"(%s)" % dels, text)
|
||||
for sub_sec in splited_sec:
|
||||
split_sec = re.split(r"(%s)" % dels, text)
|
||||
for sub_sec in split_sec:
|
||||
if re.match(f"^{dels}$", sub_sec):
|
||||
continue
|
||||
add_chunk(sub_sec, image)
|
||||
@ -684,8 +688,8 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
|
||||
dels = get_delimiters(delimiter)
|
||||
for sec, image in sections:
|
||||
splited_sec = re.split(r"(%s)" % dels, sec)
|
||||
for sub_sec in splited_sec:
|
||||
split_sec = re.split(r"(%s)" % dels, sec)
|
||||
for sub_sec in split_sec:
|
||||
if re.match(f"^{dels}$", sub_sec):
|
||||
continue
|
||||
add_chunk(sub_sec, image,"")
|
||||
|
||||
@ -293,8 +293,7 @@ async def build_chunks(task, progress_callback):
|
||||
docs.append(d)
|
||||
return
|
||||
|
||||
output_buffer = BytesIO()
|
||||
try:
|
||||
with BytesIO() as output_buffer:
|
||||
if isinstance(d["image"], bytes):
|
||||
output_buffer.write(d["image"])
|
||||
output_buffer.seek(0)
|
||||
@ -317,8 +316,6 @@ async def build_chunks(task, progress_callback):
|
||||
d["image"].close()
|
||||
del d["image"] # Remove image reference
|
||||
docs.append(d)
|
||||
finally:
|
||||
output_buffer.close() # Ensure BytesIO is always closed
|
||||
except Exception:
|
||||
logging.exception(
|
||||
"Saving image of chunk {}/{}/{} got exception".format(task["location"], task["name"], d["id"]))
|
||||
|
||||
@ -93,7 +93,8 @@ class MCPToolCallSession(ToolCallSession):
|
||||
msg = f"Timeout initializing client_session for server {self._mcp_server.id}"
|
||||
logging.error(msg)
|
||||
await self._process_mcp_tasks(None, msg)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
msg = "Connection failed (possibly due to auth error). Please check authentication settings first"
|
||||
await self._process_mcp_tasks(None, msg)
|
||||
|
||||
@ -148,7 +149,7 @@ class MCPToolCallSession(ToolCallSession):
|
||||
if result.isError:
|
||||
return f"MCP server error: {result.content}"
|
||||
|
||||
# For now we only support text content
|
||||
# For now, we only support text content
|
||||
if isinstance(result.content[0], TextContent):
|
||||
return result.content[0].text
|
||||
else:
|
||||
|
||||
@ -336,7 +336,7 @@ class RedisDB:
|
||||
|
||||
def delete_if_equal(self, key: str, expected_value: str) -> bool:
|
||||
"""
|
||||
Do follwing atomically:
|
||||
Do following atomically:
|
||||
Delete a key if its value is equals to the given one, do nothing otherwise.
|
||||
"""
|
||||
return bool(self.lua_delete_if_equal(keys=[key], args=[expected_value], client=self.REDIS))
|
||||
|
||||
9
web/package-lock.json
generated
9
web/package-lock.json
generated
@ -12,7 +12,7 @@
|
||||
"@antv/g2": "^5.2.10",
|
||||
"@antv/g6": "^5.0.10",
|
||||
"@hookform/resolvers": "^3.9.1",
|
||||
"@js-preview/excel": "^1.7.8",
|
||||
"@js-preview/excel": "^1.7.14",
|
||||
"@lexical/react": "^0.23.1",
|
||||
"@monaco-editor/react": "^4.6.0",
|
||||
"@radix-ui/react-accordion": "^1.2.3",
|
||||
@ -4114,9 +4114,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@js-preview/excel": {
|
||||
"version": "1.7.8",
|
||||
"resolved": "https://registry.npmmirror.com/@js-preview/excel/-/excel-1.7.8.tgz",
|
||||
"integrity": "sha512-pLJTDIhbzqaiH3kUPnbeWLsBFeCAHjnBwloMvoREdW4YUYTcsHDQ5h41QTyRJWSYRJBCcsy6Kt7KeDHOHDbVEw=="
|
||||
"version": "1.7.14",
|
||||
"resolved": "https://registry.npmmirror.com/@js-preview/excel/-/excel-1.7.14.tgz",
|
||||
"integrity": "sha512-7QHtuRalWQzWIKARc/IRN8+kj1S5eWV4+cAQipzZngE3mVxMPL1RHXKJt/ONmpcKZ410egYkaBuOOs9+LctBkA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@lexical/clipboard": {
|
||||
"version": "0.23.1",
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
"@antv/g2": "^5.2.10",
|
||||
"@antv/g6": "^5.0.10",
|
||||
"@hookform/resolvers": "^3.9.1",
|
||||
"@js-preview/excel": "^1.7.8",
|
||||
"@js-preview/excel": "^1.7.14",
|
||||
"@lexical/react": "^0.23.1",
|
||||
"@monaco-editor/react": "^4.6.0",
|
||||
"@radix-ui/react-accordion": "^1.2.3",
|
||||
|
||||
@ -6,6 +6,7 @@ import { App, ConfigProvider, ConfigProviderProps, theme } from 'antd';
|
||||
import pt_BR from 'antd/lib/locale/pt_BR';
|
||||
import deDE from 'antd/locale/de_DE';
|
||||
import enUS from 'antd/locale/en_US';
|
||||
import ru_RU from 'antd/locale/ru_RU';
|
||||
import vi_VN from 'antd/locale/vi_VN';
|
||||
import zhCN from 'antd/locale/zh_CN';
|
||||
import zh_HK from 'antd/locale/zh_HK';
|
||||
@ -34,6 +35,7 @@ const AntLanguageMap = {
|
||||
en: enUS,
|
||||
zh: zhCN,
|
||||
'zh-TRADITIONAL': zh_HK,
|
||||
ru: ru_RU,
|
||||
vi: vi_VN,
|
||||
'pt-BR': pt_BR,
|
||||
de: deDE,
|
||||
|
||||
5
web/src/assets/svg/searxng.svg
Normal file
5
web/src/assets/svg/searxng.svg
Normal file
@ -0,0 +1,5 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M15.5 14h-.79l-.28-.27C15.41 12.59 16 11.11 16 9.5 16 5.91 13.09 3 9.5 3S3 5.91 3 9.5 5.91 16 9.5 16c1.61 0 3.09-.59 4.23-1.57l.27.28v.79l5 4.99L20.49 19l-4.99-5zm-6 0C7.01 14 5 11.99 5 9.5S7.01 5 9.5 5 14 7.01 14 9.5 11.99 14 9.5 14z"/>
|
||||
<circle cx="9.5" cy="9.5" r="2.5" fill="currentColor" opacity="0.6"/>
|
||||
<path d="M12 2l1.5 3h3L15 7l1.5 3L15 8.5 12 10 9 8.5 7.5 10 9 7 7.5 5h3L12 2z" opacity="0.4"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 506 B |
80
web/src/components/avatar-upload.tsx
Normal file
80
web/src/components/avatar-upload.tsx
Normal file
@ -0,0 +1,80 @@
|
||||
import { transformFile2Base64 } from '@/utils/file-util';
|
||||
import { Pencil, Upload } from 'lucide-react';
|
||||
import {
|
||||
ChangeEventHandler,
|
||||
forwardRef,
|
||||
useCallback,
|
||||
useEffect,
|
||||
useState,
|
||||
} from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Avatar, AvatarFallback, AvatarImage } from './ui/avatar';
|
||||
import { Input } from './ui/input';
|
||||
|
||||
type AvatarUploadProps = { value?: string; onChange?: (value: string) => void };
|
||||
|
||||
export const AvatarUpload = forwardRef<HTMLInputElement, AvatarUploadProps>(
|
||||
function AvatarUpload({ value, onChange }, ref) {
|
||||
const { t } = useTranslation();
|
||||
const [avatarBase64Str, setAvatarBase64Str] = useState(''); // Avatar Image base64
|
||||
|
||||
const handleChange: ChangeEventHandler<HTMLInputElement> = useCallback(
|
||||
async (ev) => {
|
||||
const file = ev.target?.files?.[0];
|
||||
if (/\.(jpg|jpeg|png|webp|bmp)$/i.test(file?.name ?? '')) {
|
||||
const str = await transformFile2Base64(file!);
|
||||
setAvatarBase64Str(str);
|
||||
onChange?.(str);
|
||||
}
|
||||
ev.target.value = '';
|
||||
},
|
||||
[onChange],
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (value) {
|
||||
setAvatarBase64Str(value);
|
||||
}
|
||||
}, [value]);
|
||||
|
||||
return (
|
||||
<div className="flex justify-start items-end space-x-2">
|
||||
<div className="relative group">
|
||||
{!avatarBase64Str ? (
|
||||
<div className="w-[64px] h-[64px] grid place-content-center border border-dashed rounded-md">
|
||||
<div className="flex flex-col items-center">
|
||||
<Upload />
|
||||
<p>{t('common.upload')}</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-[64px] h-[64px] relative grid place-content-center">
|
||||
<Avatar className="w-[64px] h-[64px] rounded-md">
|
||||
<AvatarImage className=" block" src={avatarBase64Str} alt="" />
|
||||
<AvatarFallback></AvatarFallback>
|
||||
</Avatar>
|
||||
<div className="absolute inset-0 bg-[#000]/20 group-hover:bg-[#000]/60">
|
||||
<Pencil
|
||||
size={20}
|
||||
className="absolute right-2 bottom-0 opacity-50 hidden group-hover:block"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<Input
|
||||
placeholder=""
|
||||
type="file"
|
||||
title=""
|
||||
accept="image/*"
|
||||
className="absolute top-0 left-0 w-full h-full opacity-0 cursor-pointer"
|
||||
onChange={handleChange}
|
||||
ref={ref}
|
||||
/>
|
||||
</div>
|
||||
<div className="margin-1 text-muted-foreground">
|
||||
{t('knowledgeConfiguration.photoTip')}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
);
|
||||
@ -15,123 +15,122 @@ interface EditTagsProps {
|
||||
onChange?: (tags: string[]) => void;
|
||||
}
|
||||
|
||||
const EditTag = ({ value = [], onChange }: EditTagsProps) => {
|
||||
const [inputVisible, setInputVisible] = useState(false);
|
||||
const [inputValue, setInputValue] = useState('');
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
({ value = [], onChange }: EditTagsProps, ref) => {
|
||||
const [inputVisible, setInputVisible] = useState(false);
|
||||
const [inputValue, setInputValue] = useState('');
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (inputVisible) {
|
||||
inputRef.current?.focus();
|
||||
}
|
||||
}, [inputVisible]);
|
||||
useEffect(() => {
|
||||
if (inputVisible) {
|
||||
inputRef.current?.focus();
|
||||
}
|
||||
}, [inputVisible]);
|
||||
|
||||
const handleClose = (removedTag: string) => {
|
||||
const newTags = value?.filter((tag) => tag !== removedTag);
|
||||
onChange?.(newTags ?? []);
|
||||
};
|
||||
const handleClose = (removedTag: string) => {
|
||||
const newTags = value?.filter((tag) => tag !== removedTag);
|
||||
onChange?.(newTags ?? []);
|
||||
};
|
||||
|
||||
const showInput = () => {
|
||||
setInputVisible(true);
|
||||
};
|
||||
const showInput = () => {
|
||||
setInputVisible(true);
|
||||
};
|
||||
|
||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setInputValue(e.target.value);
|
||||
};
|
||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setInputValue(e.target.value);
|
||||
};
|
||||
|
||||
const handleInputConfirm = () => {
|
||||
if (inputValue && value) {
|
||||
const newTags = inputValue
|
||||
.split(';')
|
||||
.map((tag) => tag.trim())
|
||||
.filter((tag) => tag && !value.includes(tag));
|
||||
onChange?.([...value, ...newTags]);
|
||||
}
|
||||
setInputVisible(false);
|
||||
setInputValue('');
|
||||
};
|
||||
const handleInputConfirm = () => {
|
||||
if (inputValue && value) {
|
||||
const newTags = inputValue
|
||||
.split(';')
|
||||
.map((tag) => tag.trim())
|
||||
.filter((tag) => tag && !value.includes(tag));
|
||||
onChange?.([...value, ...newTags]);
|
||||
}
|
||||
setInputVisible(false);
|
||||
setInputValue('');
|
||||
};
|
||||
|
||||
const forMap = (tag: string) => {
|
||||
return (
|
||||
<HoverCard>
|
||||
<HoverCardContent side="top">{tag}</HoverCardContent>
|
||||
<HoverCardTrigger>
|
||||
<div
|
||||
key={tag}
|
||||
className="w-fit flex items-center justify-center gap-2 border-dashed border px-1 rounded-sm bg-bg-card"
|
||||
>
|
||||
<div className="flex gap-2 items-center">
|
||||
<div className="max-w-80 overflow-hidden text-ellipsis">
|
||||
{tag}
|
||||
const forMap = (tag: string) => {
|
||||
return (
|
||||
<HoverCard key={tag}>
|
||||
<HoverCardContent side="top">{tag}</HoverCardContent>
|
||||
<HoverCardTrigger asChild>
|
||||
<div className="w-fit flex items-center justify-center gap-2 border-dashed border px-1 rounded-sm bg-bg-card">
|
||||
<div className="flex gap-2 items-center">
|
||||
<div className="max-w-80 overflow-hidden text-ellipsis">
|
||||
{tag}
|
||||
</div>
|
||||
<X
|
||||
className="w-4 h-4 text-muted-foreground hover:text-primary"
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
handleClose(tag);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<X
|
||||
className="w-4 h-4 text-muted-foreground hover:text-primary"
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
handleClose(tag);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</HoverCardTrigger>
|
||||
</HoverCard>
|
||||
</HoverCardTrigger>
|
||||
</HoverCard>
|
||||
);
|
||||
};
|
||||
|
||||
const tagChild = value?.map(forMap);
|
||||
|
||||
const tagPlusStyle: React.CSSProperties = {
|
||||
borderStyle: 'dashed',
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{inputVisible ? (
|
||||
<Input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
className="h-8 bg-bg-card"
|
||||
value={inputValue}
|
||||
onChange={handleInputChange}
|
||||
onBlur={handleInputConfirm}
|
||||
onKeyDown={(e) => {
|
||||
if (e?.key === 'Enter') {
|
||||
handleInputConfirm();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && (
|
||||
<TweenOneGroup
|
||||
className="flex gap-2 flex-wrap mt-2"
|
||||
enter={{
|
||||
scale: 0.8,
|
||||
opacity: 0,
|
||||
type: 'from',
|
||||
duration: 100,
|
||||
}}
|
||||
onEnd={(e) => {
|
||||
if (e.type === 'appear' || e.type === 'enter') {
|
||||
(e.target as any).style = 'display: inline-block';
|
||||
}
|
||||
}}
|
||||
leave={{ opacity: 0, width: 0, scale: 0, duration: 200 }}
|
||||
appear={false}
|
||||
>
|
||||
{tagChild}
|
||||
</TweenOneGroup>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const tagChild = value?.map(forMap);
|
||||
|
||||
const tagPlusStyle: React.CSSProperties = {
|
||||
borderStyle: 'dashed',
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{inputVisible ? (
|
||||
<Input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
className="h-8 bg-bg-card"
|
||||
value={inputValue}
|
||||
onChange={handleInputChange}
|
||||
onBlur={handleInputConfirm}
|
||||
onKeyDown={(e) => {
|
||||
if (e?.key === 'Enter') {
|
||||
handleInputConfirm();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && (
|
||||
<TweenOneGroup
|
||||
className="flex gap-2 flex-wrap mt-2"
|
||||
enter={{
|
||||
scale: 0.8,
|
||||
opacity: 0,
|
||||
type: 'from',
|
||||
duration: 100,
|
||||
}}
|
||||
onEnd={(e) => {
|
||||
if (e.type === 'appear' || e.type === 'enter') {
|
||||
(e.target as any).style = 'display: inline-block';
|
||||
}
|
||||
}}
|
||||
leave={{ opacity: 0, width: 0, scale: 0, duration: 200 }}
|
||||
appear={false}
|
||||
>
|
||||
{tagChild}
|
||||
</TweenOneGroup>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
export default EditTag;
|
||||
|
||||
@ -102,8 +102,8 @@ export function LlmSettingFieldItems({
|
||||
control={form.control}
|
||||
name={'parameter'}
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>{t('freedom')}</FormLabel>
|
||||
<FormItem className="flex justify-between items-center">
|
||||
<FormLabel className="flex-1">{t('freedom')}</FormLabel>
|
||||
<FormControl>
|
||||
<Select
|
||||
{...field}
|
||||
@ -112,7 +112,7 @@ export function LlmSettingFieldItems({
|
||||
field.onChange(val);
|
||||
}}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectTrigger className="flex-1 !m-0">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
|
||||
@ -30,7 +30,6 @@
|
||||
.messageTextDark {
|
||||
.chunkText();
|
||||
.messageTextBase();
|
||||
background-color: #1668dc;
|
||||
word-break: break-word;
|
||||
:global(section.think) {
|
||||
color: rgb(166, 166, 166);
|
||||
|
||||
@ -235,7 +235,7 @@ function MarkdownContent({
|
||||
<HoverCardTrigger>
|
||||
<CircleAlert className="size-4 inline-block" />
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent>
|
||||
<HoverCardContent className="max-w-3xl">
|
||||
{renderPopoverContent(chunkIndex)}
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
|
||||
@ -183,13 +183,13 @@ const RaptorFormFields = () => {
|
||||
render={({ field }) => (
|
||||
<FormItem className=" items-center space-y-0 ">
|
||||
<div className="flex items-center">
|
||||
<FormLabel className="text-sm text-muted-foreground whitespace-nowrap w-1/4">
|
||||
<FormLabel className="text-sm text-muted-foreground whitespace-wrap w-1/4">
|
||||
{t('randomSeed')}
|
||||
</FormLabel>
|
||||
<div className="w-3/4">
|
||||
<FormControl defaultValue={0}>
|
||||
<div className="flex gap-4">
|
||||
<Input {...field} defaultValue={0} />
|
||||
<div className="flex gap-4 items-center">
|
||||
<Input {...field} defaultValue={0} type="number" />
|
||||
<Button
|
||||
size={'sm'}
|
||||
onClick={handleGenerate}
|
||||
|
||||
@ -9,7 +9,24 @@ export interface InputProps
|
||||
}
|
||||
|
||||
const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
({ className, type, value, ...props }, ref) => {
|
||||
({ className, type, value, onChange, ...props }, ref) => {
|
||||
const isControlled = value !== undefined;
|
||||
const { defaultValue, ...restProps } = props;
|
||||
const inputValue = isControlled ? value : defaultValue;
|
||||
const handleChange: React.ChangeEventHandler<HTMLInputElement> = (e) => {
|
||||
if (type === 'number') {
|
||||
const numValue = e.target.value === '' ? '' : Number(e.target.value);
|
||||
onChange?.({
|
||||
...e,
|
||||
target: {
|
||||
...e.target,
|
||||
value: numValue,
|
||||
},
|
||||
} as React.ChangeEvent<HTMLInputElement>);
|
||||
} else {
|
||||
onChange?.(e);
|
||||
}
|
||||
};
|
||||
return (
|
||||
<input
|
||||
type={type}
|
||||
@ -18,8 +35,9 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
className,
|
||||
)}
|
||||
ref={ref}
|
||||
value={value ?? ''}
|
||||
{...props}
|
||||
value={inputValue ?? ''}
|
||||
onChange={handleChange}
|
||||
{...restProps}
|
||||
/>
|
||||
);
|
||||
},
|
||||
|
||||
@ -29,6 +29,7 @@ import {
|
||||
} from '@/components/ui/popover';
|
||||
import { Separator } from '@/components/ui/separator';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { isEmpty } from 'lodash';
|
||||
|
||||
export type MultiSelectOptionType = {
|
||||
label: React.ReactNode;
|
||||
@ -209,13 +210,17 @@ export const MultiSelect = React.forwardRef<
|
||||
const [isAnimating, setIsAnimating] = React.useState(false);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!selectedValues?.length && props.value) {
|
||||
if (isEmpty(selectedValues) && !isEmpty(props.value)) {
|
||||
setSelectedValues(props.value as string[]);
|
||||
}
|
||||
}, [props.value, selectedValues]);
|
||||
|
||||
React.useEffect(() => {
|
||||
if (!selectedValues?.length && !props.value && defaultValue) {
|
||||
if (
|
||||
isEmpty(selectedValues) &&
|
||||
isEmpty(props.value) &&
|
||||
!isEmpty(defaultValue)
|
||||
) {
|
||||
setSelectedValues(defaultValue);
|
||||
}
|
||||
}, [defaultValue, props.value, selectedValues]);
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import { useHandleFilterSubmit } from '@/components/list-filter-bar/use-handle-filter-submit';
|
||||
import message from '@/components/ui/message';
|
||||
import {
|
||||
IKnowledge,
|
||||
IKnowledgeGraph,
|
||||
@ -13,7 +14,6 @@ import kbService, {
|
||||
} from '@/services/knowledge-service';
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
|
||||
import { useDebounce } from 'ahooks';
|
||||
import { message } from 'antd';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { useParams, useSearchParams } from 'umi';
|
||||
import {
|
||||
|
||||
@ -10,6 +10,8 @@ export interface PromptConfig {
|
||||
keyword: boolean;
|
||||
refine_multiturn: boolean;
|
||||
use_kg: boolean;
|
||||
reasoning?: boolean;
|
||||
cross_languages?: Array<string>;
|
||||
}
|
||||
|
||||
export interface Parameter {
|
||||
|
||||
@ -48,10 +48,16 @@ export interface IFlowTemplate {
|
||||
canvas_type: string;
|
||||
create_date: string;
|
||||
create_time: number;
|
||||
description: string;
|
||||
description: {
|
||||
en: string;
|
||||
zh: string;
|
||||
};
|
||||
dsl: DSL;
|
||||
id: string;
|
||||
title: string;
|
||||
title: {
|
||||
en: string;
|
||||
zh: string;
|
||||
};
|
||||
update_date: string;
|
||||
update_time: number;
|
||||
}
|
||||
|
||||
@ -868,6 +868,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Eine Komponente, die auf duckduckgo.com sucht und Ihnen ermöglicht, die Anzahl der Suchergebnisse mit TopN anzugeben. Sie ergänzt die vorhandenen Wissensdatenbanken.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Eine Komponente, die auf https://searxng.org/ sucht und Ihnen ermöglicht, die Anzahl der Suchergebnisse mit TopN anzugeben. Sie ergänzt die vorhandenen Wissensdatenbanken.',
|
||||
channel: 'Kanal',
|
||||
channelTip:
|
||||
'Führt eine Textsuche oder Nachrichtensuche für die Eingabe der Komponente durch',
|
||||
|
||||
@ -1005,6 +1005,9 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'A component that searches from duckduckgo.com, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'A component that searches via your provided SearXNG instance URL. Specify TopN and the instance URL.',
|
||||
channel: 'Channel',
|
||||
channelTip: `Perform text search or news search on the component's input`,
|
||||
text: 'Text',
|
||||
|
||||
@ -571,6 +571,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Un componente que recupera resultados de búsqueda de duckduckgo.com, con TopN especificando el número de resultados de búsqueda. Complementa las bases de conocimiento existentes.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Un componente que realiza búsquedas mediante la URL de la instancia de SearXNG que usted proporcione. Especifique TopN y la URL de la instancia.',
|
||||
channel: 'Canal',
|
||||
channelTip:
|
||||
'Realizar búsqueda de texto o búsqueda de noticias en la entrada del componente.',
|
||||
|
||||
@ -781,6 +781,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Un composant qui recherche sur duckduckgo.com, vous permettant de spécifier le nombre de résultats avec TopN. Il complète les bases de connaissances existantes.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Un composant qui effectue des recherches via la URL de l\'instance de SearXNG que vous fournissez. Spécifiez TopN et l\'URL de l\'instance.',
|
||||
channel: 'Canal',
|
||||
channelTip:
|
||||
"Effectuer une recherche de texte ou d'actualités sur l'entrée du composant",
|
||||
|
||||
@ -759,6 +759,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Komponen yang mengambil hasil pencarian dari duckduckgo.com, dengan TopN menentukan jumlah hasil pencarian. Ini melengkapi basis pengetahuan yang ada.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Komponen yang melakukan pencarian menggunakan URL instance SearXNG yang Anda berikan. Spesifikasikan TopN dan URL instance.',
|
||||
channel: 'Saluran',
|
||||
channelTip: `Lakukan pencarian teks atau pencarian berita pada input komponen`,
|
||||
text: 'Teks',
|
||||
|
||||
@ -739,6 +739,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'duckduckgo.comから検索を行うコンポーネントで、TopNを使用して検索結果の数を指定します。既存のナレッジベースを補完します。',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'SearXNGのインスタンスURLを提供して検索を行うコンポーネント。TopNとインスタンスURLを指定してください。',
|
||||
channel: 'チャンネル',
|
||||
channelTip: `コンポーネントの入力に対してテキスト検索またはニュース検索を実行します`,
|
||||
text: 'テキスト',
|
||||
|
||||
@ -726,6 +726,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Um componente que realiza buscas no duckduckgo.com, permitindo especificar o número de resultados de pesquisa usando TopN. Ele complementa as bases de conhecimento existentes.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Um componente que realiza buscas via URL da instância SearXNG que você fornece. Especifique TopN e URL da instância.',
|
||||
channel: 'Canal',
|
||||
channelTip: `Realize uma busca por texto ou por notícias na entrada do componente`,
|
||||
text: 'Texto',
|
||||
|
||||
@ -859,6 +859,9 @@ export default {
|
||||
baiduDescription: `Ищет на baidu.com.`,
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription: 'Ищет на duckduckgo.com.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Компонент, который выполняет поиск по указанному вами URL-адресу экземпляра SearXNG. Укажите TopN и URL-адрес экземпляра.',
|
||||
channel: 'Канал',
|
||||
channelTip: `Текстовый или новостной поиск`,
|
||||
text: 'Текст',
|
||||
|
||||
@ -818,6 +818,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'Một thành phần truy xuất kết quả tìm kiếm từ duckduckgo.com, với TopN xác định số lượng kết quả tìm kiếm. Nó bổ sung cho các cơ sở kiến thức hiện có.',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'Một thành phần truy xuất kết quả tìm kiếm từ searxng.com, với TopN xác định số lượng kết quả tìm kiếm. Nó bổ sung cho các cơ sở kiến thức hiện có.',
|
||||
channel: 'Kênh',
|
||||
channelTip: `Thực hiện tìm kiếm văn bản hoặc tìm kiếm tin tức trên đầu vào của thành phần`,
|
||||
text: 'Văn bản',
|
||||
|
||||
@ -845,6 +845,9 @@ export default {
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'此元件用於從 www.duckduckgo.com 取得搜尋結果。通常,它作為知識庫的補充。 Top N 指定您需要採用的搜尋結果數。',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'該組件通過您提供的 SearXNG 實例地址進行搜索。請設置 Top N 和實例 URL。',
|
||||
channel: '頻道',
|
||||
channelTip: '針對該組件的輸入進行文字搜尋或新聞搜索',
|
||||
text: '文字',
|
||||
|
||||
@ -971,6 +971,9 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
duckDuckGo: 'DuckDuckGo',
|
||||
duckDuckGoDescription:
|
||||
'此元件用於從 www.duckduckgo.com 取得搜尋結果。通常,它作為知識庫的補充。 Top N 指定您需要調整的搜尋結果數。',
|
||||
searXNG: 'SearXNG',
|
||||
searXNGDescription:
|
||||
'该组件通过您提供的 SearXNG 实例地址进行搜索。请设置 Top N 和实例 URL。',
|
||||
channel: '频道',
|
||||
channelTip: '针对该组件的输入进行文本搜索或新闻搜索',
|
||||
text: '文本',
|
||||
|
||||
@ -201,6 +201,7 @@ function AccordionOperators({
|
||||
Operator.GitHub,
|
||||
Operator.Invoke,
|
||||
Operator.WenCai,
|
||||
Operator.SearXNG,
|
||||
]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
|
||||
@ -18,6 +18,7 @@ import { memo, useCallback } from 'react';
|
||||
import { useParams } from 'umi';
|
||||
import DebugContent from '../debug-content';
|
||||
import { useAwaitCompentData } from '../hooks/use-chat-logic';
|
||||
import { useIsTaskMode } from '../hooks/use-get-begin-query';
|
||||
|
||||
function AgentChatBox() {
|
||||
const {
|
||||
@ -48,6 +49,8 @@ function AgentChatBox() {
|
||||
canvasId: canvasId as string,
|
||||
});
|
||||
|
||||
const isTaskMode = useIsTaskMode();
|
||||
|
||||
const handleUploadFile: NonNullable<FileUploadProps['onUpload']> =
|
||||
useCallback(
|
||||
async (files, options) => {
|
||||
@ -109,18 +112,20 @@ function AgentChatBox() {
|
||||
</div>
|
||||
<div ref={scrollRef} />
|
||||
</div>
|
||||
<NextMessageInput
|
||||
value={value}
|
||||
sendLoading={sendLoading}
|
||||
disabled={isWaitting}
|
||||
sendDisabled={sendLoading || isWaitting}
|
||||
isUploading={loading || isWaitting}
|
||||
onPressEnter={handlePressEnter}
|
||||
onInputChange={handleInputChange}
|
||||
stopOutputMessage={stopOutputMessage}
|
||||
onUpload={handleUploadFile}
|
||||
conversationId=""
|
||||
/>
|
||||
{isTaskMode || (
|
||||
<NextMessageInput
|
||||
value={value}
|
||||
sendLoading={sendLoading}
|
||||
disabled={isWaitting}
|
||||
sendDisabled={sendLoading || isWaitting}
|
||||
isUploading={loading || isWaitting}
|
||||
onPressEnter={handlePressEnter}
|
||||
onInputChange={handleInputChange}
|
||||
stopOutputMessage={stopOutputMessage}
|
||||
onUpload={handleUploadFile}
|
||||
conversationId=""
|
||||
/>
|
||||
)}
|
||||
</section>
|
||||
<PdfDrawer
|
||||
visible={visible}
|
||||
|
||||
@ -18,13 +18,23 @@ import i18n from '@/locales/config';
|
||||
import api from '@/utils/api';
|
||||
import { get } from 'lodash';
|
||||
import trim from 'lodash/trim';
|
||||
import { useCallback, useContext, useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
useCallback,
|
||||
useContext,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from 'react';
|
||||
import { useParams } from 'umi';
|
||||
import { v4 as uuid } from 'uuid';
|
||||
import { BeginId } from '../constant';
|
||||
import { AgentChatLogContext } from '../context';
|
||||
import { transferInputsArrayToObject } from '../form/begin-form/use-watch-change';
|
||||
import { useSelectBeginNodeDataInputs } from '../hooks/use-get-begin-query';
|
||||
import {
|
||||
useIsTaskMode,
|
||||
useSelectBeginNodeDataInputs,
|
||||
} from '../hooks/use-get-begin-query';
|
||||
import { BeginQuery } from '../interface';
|
||||
import useGraphStore from '../store';
|
||||
import { receiveMessageError } from '../utils';
|
||||
@ -173,10 +183,22 @@ export function useSetUploadResponseData() {
|
||||
};
|
||||
}
|
||||
|
||||
export const buildRequestBody = (value: string = '') => {
|
||||
const id = uuid();
|
||||
const msgBody = {
|
||||
id,
|
||||
content: value.trim(),
|
||||
role: MessageType.User,
|
||||
};
|
||||
|
||||
return msgBody;
|
||||
};
|
||||
|
||||
export const useSendAgentMessage = (
|
||||
url?: string,
|
||||
addEventList?: (data: IEventList, messageId: string) => void,
|
||||
beginParams?: any[],
|
||||
isShared?: boolean,
|
||||
) => {
|
||||
const { id: agentId } = useParams();
|
||||
const { handleInputChange, value, setValue } = useHandleMessageInputChange();
|
||||
@ -188,7 +210,9 @@ export const useSendAgentMessage = (
|
||||
return answerList[0]?.message_id;
|
||||
}, [answerList]);
|
||||
|
||||
// const { refetch } = useFetchAgent();
|
||||
const isTaskMode = useIsTaskMode();
|
||||
|
||||
// const { refetch } = useFetchAgent(); // This will cause the shared page to also send a request
|
||||
|
||||
const { findReferenceByMessageId } = useFindMessageReference(answerList);
|
||||
const prologue = useGetBeginNodePrologue();
|
||||
@ -212,7 +236,14 @@ export const useSendAgentMessage = (
|
||||
} = useSetUploadResponseData();
|
||||
|
||||
const sendMessage = useCallback(
|
||||
async ({ message }: { message: Message; messages?: Message[] }) => {
|
||||
async ({
|
||||
message,
|
||||
beginInputs,
|
||||
}: {
|
||||
message: Message;
|
||||
messages?: Message[];
|
||||
beginInputs?: BeginQuery[];
|
||||
}) => {
|
||||
const params: Record<string, unknown> = {
|
||||
id: agentId,
|
||||
};
|
||||
@ -220,13 +251,13 @@ export const useSendAgentMessage = (
|
||||
params.running_hint_text = i18n.t('flow.runningHintText', {
|
||||
defaultValue: 'is running...🕞',
|
||||
});
|
||||
if (message.content) {
|
||||
if (typeof message.content === 'string') {
|
||||
const query = inputs;
|
||||
|
||||
params.query = message.content;
|
||||
// params.message_id = message.id;
|
||||
params.inputs = transferInputsArrayToObject(
|
||||
beginParams ? beginParams : query,
|
||||
beginInputs || beginParams || query,
|
||||
); // begin operator inputs
|
||||
|
||||
params.files = uploadResponseList;
|
||||
@ -289,12 +320,7 @@ export const useSendAgentMessage = (
|
||||
|
||||
const handlePressEnter = useCallback(() => {
|
||||
if (trim(value) === '') return;
|
||||
const id = uuid();
|
||||
const msgBody = {
|
||||
id,
|
||||
content: value.trim(),
|
||||
role: MessageType.User,
|
||||
};
|
||||
const msgBody = buildRequestBody(value);
|
||||
if (done) {
|
||||
setValue('');
|
||||
sendMessage({
|
||||
@ -315,6 +341,24 @@ export const useSendAgentMessage = (
|
||||
scrollToBottom,
|
||||
]);
|
||||
|
||||
const sendedTaskMessage = useRef<boolean>(false);
|
||||
|
||||
const sendMessageInTaskMode = useCallback(() => {
|
||||
if (isShared || !isTaskMode || sendedTaskMessage.current) {
|
||||
return;
|
||||
}
|
||||
const msgBody = buildRequestBody('');
|
||||
|
||||
sendMessage({
|
||||
message: msgBody,
|
||||
});
|
||||
sendedTaskMessage.current = true;
|
||||
}, [isShared, isTaskMode, sendMessage]);
|
||||
|
||||
useEffect(() => {
|
||||
sendMessageInTaskMode();
|
||||
}, [sendMessageInTaskMode]);
|
||||
|
||||
useEffect(() => {
|
||||
const { content, id } = findMessageFromList(answerList);
|
||||
const inputAnswer = findInputFromList(answerList);
|
||||
@ -328,12 +372,22 @@ export const useSendAgentMessage = (
|
||||
}, [answerList, addNewestOneAnswer]);
|
||||
|
||||
useEffect(() => {
|
||||
if (isTaskMode) {
|
||||
return;
|
||||
}
|
||||
if (prologue) {
|
||||
addNewestOneAnswer({
|
||||
answer: prologue,
|
||||
});
|
||||
}
|
||||
}, [addNewestOneAnswer, agentId, prologue, send, sendFormMessage]);
|
||||
}, [
|
||||
addNewestOneAnswer,
|
||||
agentId,
|
||||
isTaskMode,
|
||||
prologue,
|
||||
send,
|
||||
sendFormMessage,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof addEventList === 'function') {
|
||||
@ -365,5 +419,6 @@ export const useSendAgentMessage = (
|
||||
findReferenceByMessageId,
|
||||
appendUploadResponseList,
|
||||
addNewestOneAnswer,
|
||||
sendMessage,
|
||||
};
|
||||
};
|
||||
|
||||
@ -88,6 +88,7 @@ export enum Operator {
|
||||
TavilyExtract = 'TavilyExtract',
|
||||
UserFillUp = 'UserFillUp',
|
||||
StringTransform = 'StringTransform',
|
||||
SearXNG = 'SearXNG',
|
||||
}
|
||||
|
||||
export const SwitchLogicOperatorOptions = ['and', 'or'];
|
||||
@ -211,6 +212,9 @@ export const componentMenuList = [
|
||||
{
|
||||
name: Operator.Email,
|
||||
},
|
||||
{
|
||||
name: Operator.SearXNG,
|
||||
},
|
||||
];
|
||||
|
||||
export const SwitchOperatorOptions = [
|
||||
@ -340,6 +344,22 @@ export const initialDuckValues = {
|
||||
},
|
||||
};
|
||||
|
||||
export const initialSearXNGValues = {
|
||||
top_n: '10',
|
||||
searxng_url: '',
|
||||
query: AgentGlobals.SysQuery,
|
||||
outputs: {
|
||||
formalized_content: {
|
||||
value: '',
|
||||
type: 'string',
|
||||
},
|
||||
json: {
|
||||
value: [],
|
||||
type: 'Array<Object>',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export const initialBaiduValues = {
|
||||
top_n: 10,
|
||||
...initialQueryBaseValues,
|
||||
@ -807,6 +827,7 @@ export const RestrictedUpstreamMap = {
|
||||
[Operator.GitHub]: [Operator.Begin, Operator.Retrieval],
|
||||
[Operator.BaiduFanyi]: [Operator.Begin, Operator.Retrieval],
|
||||
[Operator.QWeather]: [Operator.Begin, Operator.Retrieval],
|
||||
[Operator.SearXNG]: [Operator.Begin, Operator.Retrieval],
|
||||
[Operator.ExeSQL]: [Operator.Begin],
|
||||
[Operator.Switch]: [Operator.Begin],
|
||||
[Operator.WenCai]: [Operator.Begin],
|
||||
@ -851,6 +872,7 @@ export const NodeMap = {
|
||||
[Operator.GitHub]: 'ragNode',
|
||||
[Operator.BaiduFanyi]: 'ragNode',
|
||||
[Operator.QWeather]: 'ragNode',
|
||||
[Operator.SearXNG]: 'ragNode',
|
||||
[Operator.ExeSQL]: 'ragNode',
|
||||
[Operator.Switch]: 'switchNode',
|
||||
[Operator.Concentrator]: 'logicNode',
|
||||
|
||||
@ -27,6 +27,7 @@ import QWeatherForm from '../form/qweather-form';
|
||||
import RelevantForm from '../form/relevant-form';
|
||||
import RetrievalForm from '../form/retrieval-form/next';
|
||||
import RewriteQuestionForm from '../form/rewrite-question-form';
|
||||
import SearXNGForm from '../form/searxng-form';
|
||||
import StringTransformForm from '../form/string-transform-form';
|
||||
import SwitchForm from '../form/switch-form';
|
||||
import TavilyExtractForm from '../form/tavily-extract-form';
|
||||
@ -132,6 +133,9 @@ export const FormConfigMap = {
|
||||
[Operator.Invoke]: {
|
||||
component: InvokeForm,
|
||||
},
|
||||
[Operator.SearXNG]: {
|
||||
component: SearXNGForm,
|
||||
},
|
||||
[Operator.Concentrator]: {
|
||||
component: () => <></>,
|
||||
},
|
||||
|
||||
@ -27,6 +27,7 @@ const Menus = [
|
||||
// Operator.Bing,
|
||||
Operator.DuckDuckGo,
|
||||
Operator.Wikipedia,
|
||||
Operator.SearXNG,
|
||||
Operator.YahooFinance,
|
||||
Operator.PubMed,
|
||||
Operator.GoogleScholar,
|
||||
|
||||
@ -10,7 +10,6 @@ import { HeadingNode, QuoteNode } from '@lexical/rich-text';
|
||||
import {
|
||||
$getRoot,
|
||||
$getSelection,
|
||||
$nodesOfType,
|
||||
EditorState,
|
||||
Klass,
|
||||
LexicalNode,
|
||||
@ -135,9 +134,8 @@ export function PromptEditor({
|
||||
const onValueChange = useCallback(
|
||||
(editorState: EditorState) => {
|
||||
editorState?.read(() => {
|
||||
const listNodes = $nodesOfType(VariableNode); // to be removed
|
||||
// const listNodes = $nodesOfType(VariableNode); // to be removed
|
||||
// const allNodes = $dfs();
|
||||
console.log('🚀 ~ onChange ~ allNodes:', listNodes);
|
||||
|
||||
const text = $getRoot().getTextContent();
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import i18n from '@/locales/config';
|
||||
import { BeginId } from '@/pages/flow/constant';
|
||||
import { DecoratorNode, LexicalNode, NodeKey } from 'lexical';
|
||||
import { ReactNode } from 'react';
|
||||
@ -7,19 +6,36 @@ const prefix = BeginId + '@';
|
||||
export class VariableNode extends DecoratorNode<ReactNode> {
|
||||
__value: string;
|
||||
__label: string;
|
||||
key?: NodeKey;
|
||||
__parentLabel?: string | ReactNode;
|
||||
__icon?: ReactNode;
|
||||
|
||||
static getType(): string {
|
||||
return 'variable';
|
||||
}
|
||||
|
||||
static clone(node: VariableNode): VariableNode {
|
||||
return new VariableNode(node.__value, node.__label, node.__key);
|
||||
return new VariableNode(
|
||||
node.__value,
|
||||
node.__label,
|
||||
node.__key,
|
||||
node.__parentLabel,
|
||||
node.__icon,
|
||||
);
|
||||
}
|
||||
|
||||
constructor(value: string, label: string, key?: NodeKey) {
|
||||
constructor(
|
||||
value: string,
|
||||
label: string,
|
||||
key?: NodeKey,
|
||||
parent?: string | ReactNode,
|
||||
icon?: ReactNode,
|
||||
) {
|
||||
super(key);
|
||||
this.__value = value;
|
||||
this.__label = label;
|
||||
this.__parentLabel = parent;
|
||||
this.__icon = icon;
|
||||
}
|
||||
|
||||
createDOM(): HTMLElement {
|
||||
@ -35,17 +51,20 @@ export class VariableNode extends DecoratorNode<ReactNode> {
|
||||
|
||||
decorate(): ReactNode {
|
||||
let content: ReactNode = (
|
||||
<span className="text-blue-600">{this.__label}</span>
|
||||
<div className="text-blue-600">{this.__label}</div>
|
||||
);
|
||||
if (this.__value?.startsWith(prefix)) {
|
||||
if (this.__parentLabel) {
|
||||
content = (
|
||||
<div>
|
||||
<span>{i18n.t(`flow.begin`)}</span> / {content}
|
||||
<div className="flex items-center gap-1 text-text-primary ">
|
||||
<div>{this.__icon}</div>
|
||||
<div>{this.__parentLabel}</div>
|
||||
<div className="text-text-disabled mr-1">/</div>
|
||||
{content}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="bg-gray-200 dark:bg-gray-400 text-primary inline-flex items-center rounded-md px-2 py-0">
|
||||
<div className="bg-gray-200 dark:bg-gray-400 text-sm inline-flex items-center rounded-md px-2 py-1">
|
||||
{content}
|
||||
</div>
|
||||
);
|
||||
@ -59,8 +78,10 @@ export class VariableNode extends DecoratorNode<ReactNode> {
|
||||
export function $createVariableNode(
|
||||
value: string,
|
||||
label: string,
|
||||
parentLabel: string | ReactNode,
|
||||
icon?: ReactNode,
|
||||
): VariableNode {
|
||||
return new VariableNode(value, label);
|
||||
return new VariableNode(value, label, undefined, parentLabel, icon);
|
||||
}
|
||||
|
||||
export function $isVariableNode(
|
||||
|
||||
@ -20,7 +20,13 @@ import {
|
||||
$isRangeSelection,
|
||||
TextNode,
|
||||
} from 'lexical';
|
||||
import React, { ReactElement, useCallback, useEffect, useRef } from 'react';
|
||||
import React, {
|
||||
ReactElement,
|
||||
ReactNode,
|
||||
useCallback,
|
||||
useEffect,
|
||||
useRef,
|
||||
} from 'react';
|
||||
import * as ReactDOM from 'react-dom';
|
||||
|
||||
import { $createVariableNode } from './variable-node';
|
||||
@ -31,11 +37,20 @@ import './index.css';
|
||||
class VariableInnerOption extends MenuOption {
|
||||
label: string;
|
||||
value: string;
|
||||
parentLabel: string | JSX.Element;
|
||||
icon?: ReactNode;
|
||||
|
||||
constructor(label: string, value: string) {
|
||||
constructor(
|
||||
label: string,
|
||||
value: string,
|
||||
parentLabel: string | JSX.Element,
|
||||
icon?: ReactNode,
|
||||
) {
|
||||
super(value);
|
||||
this.label = label;
|
||||
this.value = value;
|
||||
this.parentLabel = parentLabel;
|
||||
this.icon = icon;
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,7 +126,6 @@ export default function VariablePickerMenuPlugin({
|
||||
|
||||
const buildNextOptions = useCallback(() => {
|
||||
let filteredOptions = options;
|
||||
|
||||
if (queryString) {
|
||||
const lowerQuery = queryString.toLowerCase();
|
||||
filteredOptions = options
|
||||
@ -131,23 +145,28 @@ export default function VariablePickerMenuPlugin({
|
||||
new VariableOption(
|
||||
x.label,
|
||||
x.title,
|
||||
x.options.map((y) => new VariableInnerOption(y.label, y.value)),
|
||||
x.options.map((y) => {
|
||||
return new VariableInnerOption(y.label, y.value, x.label, y.icon);
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
return nextOptions;
|
||||
}, [options, queryString]);
|
||||
|
||||
const findLabelByValue = useCallback(
|
||||
const findItemByValue = useCallback(
|
||||
(value: string) => {
|
||||
const children = options.reduce<Array<{ label: string; value: string }>>(
|
||||
(pre, cur) => {
|
||||
return pre.concat(cur.options);
|
||||
},
|
||||
[],
|
||||
);
|
||||
const children = options.reduce<
|
||||
Array<{
|
||||
label: string;
|
||||
value: string;
|
||||
parentLabel?: string | ReactNode;
|
||||
icon?: ReactNode;
|
||||
}>
|
||||
>((pre, cur) => {
|
||||
return pre.concat(cur.options);
|
||||
}, []);
|
||||
|
||||
return children.find((x) => x.value === value)?.label;
|
||||
return children.find((x) => x.value === value);
|
||||
},
|
||||
[options],
|
||||
);
|
||||
@ -168,13 +187,13 @@ export default function VariablePickerMenuPlugin({
|
||||
if (nodeToRemove) {
|
||||
nodeToRemove.remove();
|
||||
}
|
||||
|
||||
selection.insertNodes([
|
||||
$createVariableNode(
|
||||
(selectedOption as VariableInnerOption).value,
|
||||
selectedOption.label as string,
|
||||
),
|
||||
]);
|
||||
const variableNode = $createVariableNode(
|
||||
(selectedOption as VariableInnerOption).value,
|
||||
selectedOption.label as string,
|
||||
selectedOption.parentLabel as string | ReactNode,
|
||||
selectedOption.icon as ReactNode,
|
||||
);
|
||||
selection.insertNodes([variableNode]);
|
||||
|
||||
closeMenu();
|
||||
});
|
||||
@ -190,7 +209,6 @@ export default function VariablePickerMenuPlugin({
|
||||
const regex = /{([^}]*)}/g;
|
||||
let match;
|
||||
let lastIndex = 0;
|
||||
|
||||
while ((match = regex.exec(text)) !== null) {
|
||||
const { 1: content, index, 0: template } = match;
|
||||
|
||||
@ -202,9 +220,17 @@ export default function VariablePickerMenuPlugin({
|
||||
}
|
||||
|
||||
// Add variable node or text node
|
||||
const label = findLabelByValue(content);
|
||||
if (label) {
|
||||
paragraph.append($createVariableNode(content, label));
|
||||
const nodeItem = findItemByValue(content);
|
||||
|
||||
if (nodeItem) {
|
||||
paragraph.append(
|
||||
$createVariableNode(
|
||||
content,
|
||||
nodeItem.label,
|
||||
nodeItem.parentLabel,
|
||||
nodeItem.icon,
|
||||
),
|
||||
);
|
||||
} else {
|
||||
paragraph.append($createTextNode(template));
|
||||
}
|
||||
@ -225,7 +251,7 @@ export default function VariablePickerMenuPlugin({
|
||||
$getRoot().selectEnd();
|
||||
}
|
||||
},
|
||||
[findLabelByValue],
|
||||
[findItemByValue],
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
|
||||
73
web/src/pages/agent/form/searxng-form/index.tsx
Normal file
73
web/src/pages/agent/form/searxng-form/index.tsx
Normal file
@ -0,0 +1,73 @@
|
||||
import { FormContainer } from '@/components/form-container';
|
||||
import { TopNFormField } from '@/components/top-n-item';
|
||||
import {
|
||||
Form,
|
||||
FormControl,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from '@/components/ui/form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { memo } from 'react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
import { initialSearXNGValues } from '../../constant';
|
||||
import { useFormValues } from '../../hooks/use-form-values';
|
||||
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
|
||||
import { INextOperatorForm } from '../../interface';
|
||||
import { buildOutputList } from '../../utils/build-output-list';
|
||||
import { FormWrapper } from '../components/form-wrapper';
|
||||
import { Output } from '../components/output';
|
||||
import { QueryVariable } from '../components/query-variable';
|
||||
|
||||
const FormSchema = z.object({
|
||||
query: z.string(),
|
||||
searxng_url: z.string().min(1),
|
||||
top_n: z.string(),
|
||||
});
|
||||
|
||||
const outputList = buildOutputList(initialSearXNGValues.outputs);
|
||||
|
||||
function SearXNGForm({ node }: INextOperatorForm) {
|
||||
const { t } = useTranslate('flow');
|
||||
const defaultValues = useFormValues(initialSearXNGValues, node);
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
defaultValues,
|
||||
resolver: zodResolver(FormSchema),
|
||||
});
|
||||
|
||||
useWatchFormChange(node?.id, form);
|
||||
|
||||
return (
|
||||
<Form {...form}>
|
||||
<FormWrapper>
|
||||
<FormContainer>
|
||||
<QueryVariable></QueryVariable>
|
||||
<TopNFormField></TopNFormField>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="searxng_url"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>SearXNG URL</FormLabel>
|
||||
<FormControl>
|
||||
<Input {...field} placeholder="http://localhost:4000" />
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</FormContainer>
|
||||
</FormWrapper>
|
||||
<div className="p-5">
|
||||
<Output list={outputList}></Output>
|
||||
</div>
|
||||
</Form>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(SearXNGForm);
|
||||
@ -12,6 +12,7 @@ import GoogleForm from './google-form';
|
||||
import GoogleScholarForm from './google-scholar-form';
|
||||
import PubMedForm from './pubmed-form';
|
||||
import RetrievalForm from './retrieval-form';
|
||||
import SearXNGForm from './searxng-form';
|
||||
import TavilyForm from './tavily-form';
|
||||
import WenCaiForm from './wencai-form';
|
||||
import WikipediaForm from './wikipedia-form';
|
||||
@ -37,4 +38,5 @@ export const ToolFormConfigMap = {
|
||||
[Operator.TavilySearch]: TavilyForm,
|
||||
[Operator.TavilyExtract]: TavilyForm,
|
||||
[Operator.WenCai]: WenCaiForm,
|
||||
[Operator.SearXNG]: SearXNGForm,
|
||||
};
|
||||
|
||||
58
web/src/pages/agent/form/tool-form/searxng-form/index.tsx
Normal file
58
web/src/pages/agent/form/tool-form/searxng-form/index.tsx
Normal file
@ -0,0 +1,58 @@
|
||||
import { FormContainer } from '@/components/form-container';
|
||||
import { TopNFormField } from '@/components/top-n-item';
|
||||
import {
|
||||
Form,
|
||||
FormControl,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from '@/components/ui/form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { memo } from 'react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
import { useValues } from '../use-values';
|
||||
import { useWatchFormChange } from '../use-watch-change';
|
||||
|
||||
const FormSchema = z.object({
|
||||
searxng_url: z.string().min(1),
|
||||
top_n: z.string(),
|
||||
});
|
||||
|
||||
function SearXNGForm() {
|
||||
const { t } = useTranslate('flow');
|
||||
const values = useValues();
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
defaultValues: values as any,
|
||||
resolver: zodResolver(FormSchema),
|
||||
});
|
||||
|
||||
useWatchFormChange(form);
|
||||
|
||||
return (
|
||||
<Form {...form}>
|
||||
<FormContainer>
|
||||
<TopNFormField></TopNFormField>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="searxng_url"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>SearXNG URL</FormLabel>
|
||||
<FormControl>
|
||||
<Input {...field} placeholder="http://localhost:4000" />
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</FormContainer>
|
||||
</Form>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(SearXNGForm);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user