mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-04 09:35:06 +08:00
Compare commits
10 Commits
522c7b7ac6
...
2c727a4a9c
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c727a4a9c | |||
| a15f522dc9 | |||
| de53498b39 | |||
| 72740eb5b9 | |||
| c30ffb5716 | |||
| 6dcff7db97 | |||
| 9213568692 | |||
| d81e4095de | |||
| 8ddeaca3d6 | |||
| f441f8ffc2 |
@ -193,6 +193,9 @@ releases! 🌟
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -191,6 +191,9 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# Opsional: gunakan tag stabil (lihat releases: https://github.com/infiniflow/ragflow/releases), contoh: git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -170,6 +170,9 @@
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# 任意: 安定版タグを利用 (一覧: https://github.com/infiniflow/ragflow/releases) 例: git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
@ -177,6 +180,7 @@
|
||||
# sed -i '1i DEVICE=gpu' .env
|
||||
# docker compose -f docker-compose.yml up -d
|
||||
```
|
||||
|
||||
|
||||
| RAGFlow image tag | Image size (GB) | Has embedding models? | Stable? |
|
||||
| ----------------- | --------------- | --------------------- | -------------------------- |
|
||||
|
||||
@ -172,6 +172,9 @@
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# Optional: use a stable tag (see releases: https://github.com/infiniflow/ragflow/releases), e.g.: git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -190,6 +190,9 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# Opcional: use uma tag estável (veja releases: https://github.com/infiniflow/ragflow/releases), ex.: git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -189,6 +189,9 @@
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# 可選:使用穩定版標籤(查看發佈:https://github.com/infiniflow/ragflow/releases),例:git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -190,6 +190,9 @@
|
||||
|
||||
```bash
|
||||
$ cd ragflow/docker
|
||||
|
||||
# 可选:使用稳定版本标签(查看发布:https://github.com/infiniflow/ragflow/releases),例如:git checkout v0.21.1
|
||||
|
||||
# Use CPU for embedding and DeepDoc tasks:
|
||||
$ docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
||||
@ -26,7 +26,9 @@ from typing import Any, Union, Tuple
|
||||
from agent.component import component_class
|
||||
from agent.component.base import ComponentBase
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.task_service import has_canceled
|
||||
from common.misc_utils import get_uuid, hash_str2int
|
||||
from common.exceptions import TaskCanceledException
|
||||
from rag.prompts.generator import chunks_format
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
@ -126,6 +128,7 @@ class Graph:
|
||||
self.components[k]["obj"].reset()
|
||||
try:
|
||||
REDIS_CONN.delete(f"{self.task_id}-logs")
|
||||
REDIS_CONN.delete(f"{self.task_id}-cancel")
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
@ -196,7 +199,7 @@ class Graph:
|
||||
if not rest:
|
||||
return root_val
|
||||
return self.get_variable_param_value(root_val,rest)
|
||||
|
||||
|
||||
def get_variable_param_value(self, obj: Any, path: str) -> Any:
|
||||
cur = obj
|
||||
if not path:
|
||||
@ -215,6 +218,17 @@ class Graph:
|
||||
cur = getattr(cur, key, None)
|
||||
return cur
|
||||
|
||||
def is_canceled(self) -> bool:
|
||||
return has_canceled(self.task_id)
|
||||
|
||||
def cancel_task(self) -> bool:
|
||||
try:
|
||||
REDIS_CONN.set(f"{self.task_id}-cancel", "x")
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Canvas(Graph):
|
||||
|
||||
@ -239,7 +253,7 @@ class Canvas(Graph):
|
||||
"sys.conversation_turns": 0,
|
||||
"sys.files": []
|
||||
}
|
||||
|
||||
|
||||
self.retrieval = self.dsl["retrieval"]
|
||||
self.memory = self.dsl.get("memory", [])
|
||||
|
||||
@ -311,10 +325,20 @@ class Canvas(Graph):
|
||||
self.path.append("begin")
|
||||
self.retrieval.append({"chunks": [], "doc_aggs": []})
|
||||
|
||||
if self.is_canceled():
|
||||
msg = f"Task {self.task_id} has been canceled before starting."
|
||||
logging.info(msg)
|
||||
raise TaskCanceledException(msg)
|
||||
|
||||
yield decorate("workflow_started", {"inputs": kwargs.get("inputs")})
|
||||
self.retrieval.append({"chunks": {}, "doc_aggs": {}})
|
||||
|
||||
def _run_batch(f, t):
|
||||
if self.is_canceled():
|
||||
msg = f"Task {self.task_id} has been canceled during batch execution."
|
||||
logging.info(msg)
|
||||
raise TaskCanceledException(msg)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
thr = []
|
||||
i = f
|
||||
@ -456,6 +480,7 @@ class Canvas(Graph):
|
||||
for c in path:
|
||||
o = self.get_component_obj(c)
|
||||
if o.component_name.lower() == "userfillup":
|
||||
o.invoke()
|
||||
another_inputs.update(o.get_input_elements())
|
||||
if o.get_param("enable_tips"):
|
||||
tips = o.output("tips")
|
||||
@ -472,6 +497,14 @@ class Canvas(Graph):
|
||||
"created_at": st,
|
||||
})
|
||||
self.history.append(("assistant", self.get_component_obj(self.path[-1]).output()))
|
||||
elif "Task has been canceled" in self.error:
|
||||
yield decorate("workflow_finished",
|
||||
{
|
||||
"inputs": kwargs.get("inputs"),
|
||||
"outputs": "Task has been canceled",
|
||||
"elapsed_time": time.perf_counter() - st,
|
||||
"created_at": st,
|
||||
})
|
||||
|
||||
def is_reff(self, exp: str) -> bool:
|
||||
exp = exp.strip("{").strip("}")
|
||||
|
||||
@ -139,6 +139,9 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
if kwargs.get("user_prompt"):
|
||||
usr_pmt = ""
|
||||
if kwargs.get("reasoning"):
|
||||
@ -152,6 +155,8 @@ class Agent(LLM, ToolBase):
|
||||
self._param.prompts = [{"role": "user", "content": usr_pmt}]
|
||||
|
||||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return LLM._invoke(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
@ -171,6 +176,8 @@ class Agent(LLM, ToolBase):
|
||||
use_tools = []
|
||||
ans = ""
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
@ -191,12 +198,16 @@ class Agent(LLM, ToolBase):
|
||||
answer_without_toolcall = ""
|
||||
use_tools = []
|
||||
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
|
||||
if delta_ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", delta_ans)
|
||||
return
|
||||
answer_without_toolcall += delta_ans
|
||||
yield delta_ans
|
||||
|
||||
@ -271,6 +282,8 @@ class Agent(LLM, ToolBase):
|
||||
st = timer()
|
||||
txt = ""
|
||||
for delta_ans in self._gen_citations(entire_txt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
yield delta_ans, 0
|
||||
txt += delta_ans
|
||||
|
||||
@ -286,6 +299,8 @@ class Agent(LLM, ToolBase):
|
||||
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
|
||||
for _ in range(self._param.max_rounds + 1):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
# self.callback("next_step", {}, str(response)[:256]+"...")
|
||||
token_count += tk
|
||||
@ -333,6 +348,8 @@ Instructions:
|
||||
6. Focus on delivering VALUE with the information already gathered
|
||||
Respond immediately with your final comprehensive answer.
|
||||
"""
|
||||
if self.check_if_canceled("Agent final instruction"):
|
||||
return
|
||||
append_user_content(hist, final_instruction)
|
||||
|
||||
for txt, tkcnt in complete():
|
||||
|
||||
@ -417,6 +417,20 @@ class ComponentBase(ABC):
|
||||
self._param = param
|
||||
self._param.check()
|
||||
|
||||
def is_canceled(self) -> bool:
|
||||
return self._canvas.is_canceled()
|
||||
|
||||
def check_if_canceled(self, message: str = "") -> bool:
|
||||
if self.is_canceled():
|
||||
task_id = getattr(self._canvas, 'task_id', 'unknown')
|
||||
log_message = f"Task {task_id} has been canceled"
|
||||
if message:
|
||||
log_message += f" during {message}"
|
||||
logging.info(log_message)
|
||||
self.set_output("_ERROR", "Task has been canceled")
|
||||
return True
|
||||
return False
|
||||
|
||||
def invoke(self, **kwargs) -> dict[str, Any]:
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
|
||||
@ -37,7 +37,13 @@ class Begin(UserFillUp):
|
||||
component_name = "Begin"
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Begin processing"):
|
||||
return
|
||||
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
if self.check_if_canceled("Begin processing"):
|
||||
return
|
||||
|
||||
if isinstance(v, dict) and v.get("type", "").lower().find("file") >=0:
|
||||
if v.get("optional") and v.get("value", None) is None:
|
||||
v = None
|
||||
|
||||
@ -98,6 +98,9 @@ class Categorize(LLM, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Categorize processing"):
|
||||
return
|
||||
|
||||
msg = self._canvas.get_history(self._param.message_history_window_size)
|
||||
if not msg:
|
||||
msg = [{"role": "user", "content": ""}]
|
||||
@ -114,10 +117,18 @@ class Categorize(LLM, ABC):
|
||||
---- Real Data ----
|
||||
{} →
|
||||
""".format(" | ".join(["{}: \"{}\"".format(c["role"].upper(), re.sub(r"\n", "", c["content"], flags=re.DOTALL)) for c in msg]))
|
||||
|
||||
if self.check_if_canceled("Categorize processing"):
|
||||
return
|
||||
|
||||
ans = chat_mdl.chat(self._param.sys_prompt, [{"role": "user", "content": user_prompt}], self._param.gen_conf())
|
||||
logging.info(f"input: {user_prompt}, answer: {str(ans)}")
|
||||
if ERROR_PREFIX in ans:
|
||||
raise Exception(ans)
|
||||
|
||||
if self.check_if_canceled("Categorize processing"):
|
||||
return
|
||||
|
||||
# Count the number of times each category appears in the answer.
|
||||
category_counts = {}
|
||||
for c in self._param.category_description.keys():
|
||||
|
||||
@ -47,6 +47,7 @@ class DataOperations(ComponentBase,ABC):
|
||||
inputs = [inputs]
|
||||
for input_ref in inputs:
|
||||
input_object=self._canvas.get_variable_value(input_ref)
|
||||
self.set_input_value(input_ref, input_object)
|
||||
if input_object is None:
|
||||
continue
|
||||
if isinstance(input_object,dict):
|
||||
|
||||
@ -13,10 +13,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from agent.component.message import MessageParam, Message
|
||||
import json
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from agent.component.base import ComponentParamBase, ComponentBase
|
||||
|
||||
|
||||
class UserFillUpParam(MessageParam):
|
||||
class UserFillUpParam(ComponentParamBase):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -27,17 +31,39 @@ class UserFillUpParam(MessageParam):
|
||||
return True
|
||||
|
||||
|
||||
class UserFillUp(Message):
|
||||
class UserFillUp(ComponentBase):
|
||||
component_name = "UserFillUp"
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("UserFillUp processing"):
|
||||
return
|
||||
|
||||
if self._param.enable_tips:
|
||||
tips, kwargs = self.get_kwargs(self._param.tips)
|
||||
self.set_output("tips", tips)
|
||||
content = self._param.tips
|
||||
for k, v in self.get_input_elements_from_text(self._param.tips).items():
|
||||
v = v["value"]
|
||||
ans = ""
|
||||
if isinstance(v, partial):
|
||||
for t in v():
|
||||
ans += t
|
||||
elif isinstance(v, list):
|
||||
ans = ",".join([str(vv) for vv in v])
|
||||
elif not isinstance(v, str):
|
||||
try:
|
||||
ans = json.dumps(v, ensure_ascii=False)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
ans = v
|
||||
if not ans:
|
||||
ans = ""
|
||||
content = re.sub(r"\{%s\}"%k, ans, content)
|
||||
|
||||
self.set_output("tips", content)
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
if self.check_if_canceled("UserFillUp processing"):
|
||||
return
|
||||
self.set_output(k, v)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Waiting for your input..."
|
||||
|
||||
|
||||
|
||||
@ -56,6 +56,9 @@ class Invoke(ComponentBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Invoke processing"):
|
||||
return
|
||||
|
||||
args = {}
|
||||
for para in self._param.variables:
|
||||
if para.get("value"):
|
||||
@ -89,6 +92,9 @@ class Invoke(ComponentBase, ABC):
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
if self.check_if_canceled("Invoke processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
if method == "get":
|
||||
response = requests.get(url=url, params=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
|
||||
@ -121,6 +127,9 @@ class Invoke(ComponentBase, ABC):
|
||||
|
||||
return self.output("result")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("Invoke processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"Http request error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -56,6 +56,9 @@ class Iteration(ComponentBase, ABC):
|
||||
return cid
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Iteration processing"):
|
||||
return
|
||||
|
||||
arr = self._canvas.get_variable_value(self._param.items_ref)
|
||||
if not isinstance(arr, list):
|
||||
self.set_output("_ERROR", self._param.items_ref + " must be an array, but its type is "+str(type(arr)))
|
||||
|
||||
@ -33,6 +33,9 @@ class IterationItem(ComponentBase, ABC):
|
||||
self._idx = 0
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("IterationItem processing"):
|
||||
return
|
||||
|
||||
parent = self.get_parent()
|
||||
arr = self._canvas.get_variable_value(parent._param.items_ref)
|
||||
if not isinstance(arr, list):
|
||||
@ -40,12 +43,17 @@ class IterationItem(ComponentBase, ABC):
|
||||
raise Exception(parent._param.items_ref + " must be an array, but its type is "+str(type(arr)))
|
||||
|
||||
if self._idx > 0:
|
||||
if self.check_if_canceled("IterationItem processing"):
|
||||
return
|
||||
self.output_collation()
|
||||
|
||||
if self._idx >= len(arr):
|
||||
self._idx = -1
|
||||
return
|
||||
|
||||
if self.check_if_canceled("IterationItem processing"):
|
||||
return
|
||||
|
||||
self.set_output("item", arr[self._idx])
|
||||
self.set_output("index", self._idx)
|
||||
|
||||
@ -80,4 +88,4 @@ class IterationItem(ComponentBase, ABC):
|
||||
return self._idx == -1
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "Next turn..."
|
||||
return "Next turn..."
|
||||
|
||||
@ -207,6 +207,9 @@ class LLM(ComponentBase):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
def clean_formated_answer(ans: str) -> str:
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
ans = re.sub(r"^.*```json", "", ans, flags=re.DOTALL)
|
||||
@ -223,6 +226,9 @@ class LLM(ComponentBase):
|
||||
schema=json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt += structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
@ -248,6 +254,9 @@ class LLM(ComponentBase):
|
||||
return
|
||||
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
@ -269,6 +278,9 @@ class LLM(ComponentBase):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
for ans in self._generate_streamly(msg):
|
||||
if self.check_if_canceled("LLM streaming"):
|
||||
return
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
@ -287,4 +299,4 @@ class LLM(ComponentBase):
|
||||
|
||||
def thoughts(self) -> str:
|
||||
_, msg,_ = self._prepare_prompt_variables()
|
||||
return "⌛Give me a moment—starting from: \n\n" + re.sub(r"(User's query:|[\\]+)", '', msg[-1]['content'], flags=re.DOTALL) + "\n\nI’ll figure out our best next move."
|
||||
return "⌛Give me a moment—starting from: \n\n" + re.sub(r"(User's query:|[\\]+)", '', msg[-1]['content'], flags=re.DOTALL) + "\n\nI’ll figure out our best next move."
|
||||
|
||||
@ -89,6 +89,9 @@ class Message(ComponentBase):
|
||||
all_content = ""
|
||||
cache = {}
|
||||
for r in re.finditer(self.variable_ref_patt, rand_cnt, flags=re.DOTALL):
|
||||
if self.check_if_canceled("Message streaming"):
|
||||
return
|
||||
|
||||
all_content += rand_cnt[s: r.start()]
|
||||
yield rand_cnt[s: r.start()]
|
||||
s = r.end()
|
||||
@ -104,6 +107,9 @@ class Message(ComponentBase):
|
||||
if isinstance(v, partial):
|
||||
cnt = ""
|
||||
for t in v():
|
||||
if self.check_if_canceled("Message streaming"):
|
||||
return
|
||||
|
||||
all_content += t
|
||||
cnt += t
|
||||
yield t
|
||||
@ -111,7 +117,7 @@ class Message(ComponentBase):
|
||||
continue
|
||||
elif not isinstance(v, str):
|
||||
try:
|
||||
v = json.dumps(v, ensure_ascii=False, indent=2)
|
||||
v = json.dumps(v, ensure_ascii=False)
|
||||
except Exception:
|
||||
v = str(v)
|
||||
yield v
|
||||
@ -120,6 +126,9 @@ class Message(ComponentBase):
|
||||
cache[exp] = v
|
||||
|
||||
if s < len(rand_cnt):
|
||||
if self.check_if_canceled("Message streaming"):
|
||||
return
|
||||
|
||||
all_content += rand_cnt[s: ]
|
||||
yield rand_cnt[s: ]
|
||||
|
||||
@ -133,6 +142,9 @@ class Message(ComponentBase):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Message processing"):
|
||||
return
|
||||
|
||||
rand_cnt = random.choice(self._param.content)
|
||||
if self._param.stream and not self._is_jinjia2(rand_cnt):
|
||||
self.set_output("content", partial(self._stream, rand_cnt))
|
||||
@ -145,6 +157,9 @@ class Message(ComponentBase):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if self.check_if_canceled("Message processing"):
|
||||
return
|
||||
|
||||
for n, v in kwargs.items():
|
||||
content = re.sub(n, v, content)
|
||||
|
||||
|
||||
@ -63,17 +63,24 @@ class StringTransform(Message, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("StringTransform processing"):
|
||||
return
|
||||
|
||||
if self._param.method == "split":
|
||||
self._split(kwargs.get("line"))
|
||||
else:
|
||||
self._merge(kwargs)
|
||||
|
||||
def _split(self, line:str|None = None):
|
||||
if self.check_if_canceled("StringTransform split processing"):
|
||||
return
|
||||
|
||||
var = self._canvas.get_variable_value(self._param.split_ref) if not line else line
|
||||
if not var:
|
||||
var = ""
|
||||
assert isinstance(var, str), "The input variable is not a string: {}".format(type(var))
|
||||
self.set_input_value(self._param.split_ref, var)
|
||||
|
||||
res = []
|
||||
for i,s in enumerate(re.split(r"(%s)"%("|".join([re.escape(d) for d in self._param.delimiters])), var, flags=re.DOTALL)):
|
||||
if i % 2 == 1:
|
||||
@ -82,6 +89,9 @@ class StringTransform(Message, ABC):
|
||||
self.set_output("result", res)
|
||||
|
||||
def _merge(self, kwargs:dict[str, str] = {}):
|
||||
if self.check_if_canceled("StringTransform merge processing"):
|
||||
return
|
||||
|
||||
script = self._param.script
|
||||
script, kwargs = self.get_kwargs(script, kwargs, self._param.delimiters[0])
|
||||
|
||||
|
||||
@ -63,9 +63,18 @@ class Switch(ComponentBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Switch processing"):
|
||||
return
|
||||
|
||||
for cond in self._param.conditions:
|
||||
if self.check_if_canceled("Switch processing"):
|
||||
return
|
||||
|
||||
res = []
|
||||
for item in cond["items"]:
|
||||
if self.check_if_canceled("Switch processing"):
|
||||
return
|
||||
|
||||
if not item["cpn_id"]:
|
||||
continue
|
||||
cpn_v = self._canvas.get_variable_value(item["cpn_id"])
|
||||
@ -128,4 +137,4 @@ class Switch(ComponentBase, ABC):
|
||||
raise ValueError('Not supported operator' + operator)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return "I’m weighing a few options and will pick the next step shortly."
|
||||
return "I’m weighing a few options and will pick the next step shortly."
|
||||
|
||||
@ -63,12 +63,18 @@ class ArXiv(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("ArXiv processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("ArXiv processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
sort_choices = {"relevance": arxiv.SortCriterion.Relevance,
|
||||
"lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
|
||||
@ -79,12 +85,20 @@ class ArXiv(ToolBase, ABC):
|
||||
max_results=self._param.top_n,
|
||||
sort_by=sort_choices[self._param.sort_by]
|
||||
)
|
||||
self._retrieve_chunks(list(arxiv_client.results(search)),
|
||||
results = list(arxiv_client.results(search))
|
||||
|
||||
if self.check_if_canceled("ArXiv processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(results,
|
||||
get_title=lambda r: r.title,
|
||||
get_url=lambda r: r.pdf_url,
|
||||
get_content=lambda r: r.summary)
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("ArXiv processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"ArXiv error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -125,6 +125,9 @@ class ToolBase(ComponentBase):
|
||||
return self._param.get_meta()
|
||||
|
||||
def invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Tool processing"):
|
||||
return
|
||||
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
res = self._invoke(**kwargs)
|
||||
@ -170,4 +173,4 @@ class ToolBase(ComponentBase):
|
||||
self.set_output("formalized_content", "\n".join(kb_prompt({"chunks": chunks, "doc_aggs": aggs}, 200000, True)))
|
||||
|
||||
def thoughts(self) -> str:
|
||||
return self._canvas.get_component_name(self._id) + " is running..."
|
||||
return self._canvas.get_component_name(self._id) + " is running..."
|
||||
|
||||
@ -131,10 +131,14 @@ class CodeExec(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("CodeExec processing"):
|
||||
return
|
||||
|
||||
lang = kwargs.get("lang", self._param.lang)
|
||||
script = kwargs.get("script", self._param.script)
|
||||
arguments = {}
|
||||
for k, v in self._param.arguments.items():
|
||||
|
||||
if kwargs.get(k):
|
||||
arguments[k] = kwargs[k]
|
||||
continue
|
||||
@ -149,15 +153,28 @@ class CodeExec(ToolBase, ABC):
|
||||
def _execute_code(self, language: str, code: str, arguments: dict):
|
||||
import requests
|
||||
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
try:
|
||||
code_b64 = self._encode_code(code)
|
||||
code_req = CodeExecutionRequest(code_b64=code_b64, language=language, arguments=arguments).model_dump()
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
self.set_output("_ERROR", "construct code request error: " + str(e))
|
||||
|
||||
try:
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return "Task has been canceled"
|
||||
|
||||
resp = requests.post(url=f"http://{settings.SANDBOX_HOST}:9385/run", json=code_req, timeout=int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run, code_req: {code_req}, resp.status_code {resp.status_code}:")
|
||||
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return "Task has been canceled"
|
||||
|
||||
if resp.status_code != 200:
|
||||
resp.raise_for_status()
|
||||
body = resp.json()
|
||||
@ -173,16 +190,25 @@ class CodeExec(ToolBase, ABC):
|
||||
logging.info(f"http://{settings.SANDBOX_HOST}:9385/run -> {rt}")
|
||||
if isinstance(rt, tuple):
|
||||
for i, (k, o) in enumerate(self._param.outputs.items()):
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
if k.find("_") == 0:
|
||||
continue
|
||||
o["value"] = rt[i]
|
||||
elif isinstance(rt, dict):
|
||||
for i, (k, o) in enumerate(self._param.outputs.items()):
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
if k not in rt or k.find("_") == 0:
|
||||
continue
|
||||
o["value"] = rt[k]
|
||||
else:
|
||||
for i, (k, o) in enumerate(self._param.outputs.items()):
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
if k.find("_") == 0:
|
||||
continue
|
||||
o["value"] = rt
|
||||
@ -190,6 +216,9 @@ class CodeExec(ToolBase, ABC):
|
||||
self.set_output("_ERROR", "There is no response from sandbox")
|
||||
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("CodeExec execution"):
|
||||
return
|
||||
|
||||
self.set_output("_ERROR", "Exception executing code: " + str(e))
|
||||
|
||||
return self.output()
|
||||
|
||||
@ -29,7 +29,7 @@ class CrawlerParam(ToolParamBase):
|
||||
super().__init__()
|
||||
self.proxy = None
|
||||
self.extract_type = "markdown"
|
||||
|
||||
|
||||
def check(self):
|
||||
self.check_valid_value(self.extract_type, "Type of content from the crawler", ['html', 'markdown', 'content'])
|
||||
|
||||
@ -47,18 +47,24 @@ class Crawler(ToolBase, ABC):
|
||||
result = asyncio.run(self.get_web(ans))
|
||||
|
||||
return Crawler.be_output(result)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
return Crawler.be_output(f"An unexpected error occurred: {str(e)}")
|
||||
|
||||
async def get_web(self, url):
|
||||
if self.check_if_canceled("Crawler async operation"):
|
||||
return
|
||||
|
||||
proxy = self._param.proxy if self._param.proxy else None
|
||||
async with AsyncWebCrawler(verbose=True, proxy=proxy) as crawler:
|
||||
result = await crawler.arun(
|
||||
url=url,
|
||||
bypass_cache=True
|
||||
)
|
||||
|
||||
|
||||
if self.check_if_canceled("Crawler async operation"):
|
||||
return
|
||||
|
||||
if self._param.extract_type == 'html':
|
||||
return result.cleaned_html
|
||||
elif self._param.extract_type == 'markdown':
|
||||
|
||||
@ -46,11 +46,16 @@ class DeepL(ComponentBase, ABC):
|
||||
component_name = "DeepL"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
if self.check_if_canceled("DeepL processing"):
|
||||
return
|
||||
ans = self.get_input()
|
||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||
if not ans:
|
||||
return DeepL.be_output("")
|
||||
|
||||
if self.check_if_canceled("DeepL processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
translator = deepl.Translator(self._param.auth_key)
|
||||
result = translator.translate_text(ans, source_lang=self._param.source_lang,
|
||||
@ -58,4 +63,6 @@ class DeepL(ComponentBase, ABC):
|
||||
|
||||
return DeepL.be_output(result.text)
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("DeepL processing"):
|
||||
return
|
||||
DeepL.be_output("**Error**:" + str(e))
|
||||
|
||||
@ -75,17 +75,30 @@ class DuckDuckGo(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
if kwargs.get("topic", "general") == "general":
|
||||
with DDGS() as ddgs:
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
# {'title': '', 'href': '', 'body': ''}
|
||||
duck_res = ddgs.text(kwargs["query"], max_results=self._param.top_n)
|
||||
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(duck_res,
|
||||
get_title=lambda r: r["title"],
|
||||
get_url=lambda r: r.get("href", r.get("url")),
|
||||
@ -94,8 +107,15 @@ class DuckDuckGo(ToolBase, ABC):
|
||||
return self.output("formalized_content")
|
||||
else:
|
||||
with DDGS() as ddgs:
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
# {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
|
||||
duck_res = ddgs.news(kwargs["query"], max_results=self._param.top_n)
|
||||
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(duck_res,
|
||||
get_title=lambda r: r["title"],
|
||||
get_url=lambda r: r.get("href", r.get("url")),
|
||||
@ -103,6 +123,9 @@ class DuckDuckGo(ToolBase, ABC):
|
||||
self.set_output("json", duck_res)
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("DuckDuckGo processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"DuckDuckGo error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -101,19 +101,27 @@ class Email(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Email processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("to_email"):
|
||||
self.set_output("success", False)
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("Email processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
# Parse JSON string passed from upstream
|
||||
email_data = kwargs
|
||||
|
||||
# Validate required fields
|
||||
if "to_email" not in email_data:
|
||||
return Email.be_output("Missing required field: to_email")
|
||||
self.set_output("_ERROR", "Missing required field: to_email")
|
||||
self.set_output("success", False)
|
||||
return False
|
||||
|
||||
# Create email object
|
||||
msg = MIMEMultipart('alternative')
|
||||
@ -133,6 +141,9 @@ class Email(ToolBase, ABC):
|
||||
# Connect to SMTP server and send
|
||||
logging.info(f"Connecting to SMTP server {self._param.smtp_server}:{self._param.smtp_port}")
|
||||
|
||||
if self.check_if_canceled("Email processing"):
|
||||
return
|
||||
|
||||
context = smtplib.ssl.create_default_context()
|
||||
with smtplib.SMTP(self._param.smtp_server, self._param.smtp_port) as server:
|
||||
server.ehlo()
|
||||
@ -149,6 +160,10 @@ class Email(ToolBase, ABC):
|
||||
|
||||
# Send email
|
||||
logging.info(f"Sending email to recipients: {recipients}")
|
||||
|
||||
if self.check_if_canceled("Email processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
server.send_message(msg, self._param.email, recipients)
|
||||
success = True
|
||||
|
||||
@ -81,6 +81,8 @@ class ExeSQL(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
return
|
||||
|
||||
def convert_decimals(obj):
|
||||
from decimal import Decimal
|
||||
@ -96,6 +98,9 @@ class ExeSQL(ToolBase, ABC):
|
||||
if not sql:
|
||||
raise Exception("SQL for `ExeSQL` MUST not be empty.")
|
||||
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
return
|
||||
|
||||
vars = self.get_input_elements_from_text(sql)
|
||||
args = {}
|
||||
for k, o in vars.items():
|
||||
@ -108,6 +113,9 @@ class ExeSQL(ToolBase, ABC):
|
||||
self.set_input_value(k, args[k])
|
||||
sql = self.string_format(sql, args)
|
||||
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
return
|
||||
|
||||
sqls = sql.split(";")
|
||||
if self._param.db_type in ["mysql", "mariadb"]:
|
||||
db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
|
||||
@ -181,6 +189,10 @@ class ExeSQL(ToolBase, ABC):
|
||||
sql_res = []
|
||||
formalized_content = []
|
||||
for single_sql in sqls:
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
ibm_db.close(conn)
|
||||
return
|
||||
|
||||
single_sql = single_sql.replace("```", "").strip()
|
||||
if not single_sql:
|
||||
continue
|
||||
@ -190,6 +202,9 @@ class ExeSQL(ToolBase, ABC):
|
||||
rows = []
|
||||
row = ibm_db.fetch_assoc(stmt)
|
||||
while row and len(rows) < self._param.max_records:
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
ibm_db.close(conn)
|
||||
return
|
||||
rows.append(row)
|
||||
row = ibm_db.fetch_assoc(stmt)
|
||||
|
||||
@ -220,6 +235,11 @@ class ExeSQL(ToolBase, ABC):
|
||||
sql_res = []
|
||||
formalized_content = []
|
||||
for single_sql in sqls:
|
||||
if self.check_if_canceled("ExeSQL processing"):
|
||||
cursor.close()
|
||||
db.close()
|
||||
return
|
||||
|
||||
single_sql = single_sql.replace('```','')
|
||||
if not single_sql:
|
||||
continue
|
||||
@ -244,6 +264,9 @@ class ExeSQL(ToolBase, ABC):
|
||||
sql_res.append(convert_decimals(single_res.to_dict(orient='records')))
|
||||
formalized_content.append(single_res.to_markdown(index=False, floatfmt=".6f"))
|
||||
|
||||
cursor.close()
|
||||
db.close()
|
||||
|
||||
self.set_output("json", sql_res)
|
||||
self.set_output("formalized_content", "\n\n".join(formalized_content))
|
||||
return self.output("formalized_content")
|
||||
|
||||
@ -59,17 +59,27 @@ class GitHub(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("GitHub processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("GitHub processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
url = 'https://api.github.com/search/repositories?q=' + kwargs["query"] + '&sort=stars&order=desc&per_page=' + str(
|
||||
self._param.top_n)
|
||||
headers = {"Content-Type": "application/vnd.github+json", "X-GitHub-Api-Version": '2022-11-28'}
|
||||
response = requests.get(url=url, headers=headers).json()
|
||||
|
||||
if self.check_if_canceled("GitHub processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(response['items'],
|
||||
get_title=lambda r: r["name"],
|
||||
get_url=lambda r: r["html_url"],
|
||||
@ -77,6 +87,9 @@ class GitHub(ToolBase, ABC):
|
||||
self.set_output("json", response['items'])
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("GitHub processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"GitHub error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -118,6 +118,9 @@ class Google(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Google processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("q"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
@ -132,8 +135,15 @@ class Google(ToolBase, ABC):
|
||||
}
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("Google processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
search = GoogleSearch(params).get_dict()
|
||||
|
||||
if self.check_if_canceled("Google processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(search["organic_results"],
|
||||
get_title=lambda r: r["title"],
|
||||
get_url=lambda r: r["link"],
|
||||
@ -142,6 +152,9 @@ class Google(ToolBase, ABC):
|
||||
self.set_output("json", search["organic_results"])
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("Google processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"Google error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -65,15 +65,25 @@ class GoogleScholar(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("GoogleScholar processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("GoogleScholar processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
scholar_client = scholarly.search_pubs(kwargs["query"], patents=self._param.patents, year_low=self._param.year_low,
|
||||
year_high=self._param.year_high, sort_by=self._param.sort_by)
|
||||
|
||||
if self.check_if_canceled("GoogleScholar processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(scholar_client,
|
||||
get_title=lambda r: r['bib']['title'],
|
||||
get_url=lambda r: r["pub_url"],
|
||||
@ -82,6 +92,9 @@ class GoogleScholar(ToolBase, ABC):
|
||||
self.set_output("json", list(scholar_client))
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("GoogleScholar processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"GoogleScholar error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -50,6 +50,9 @@ class Jin10(ComponentBase, ABC):
|
||||
component_name = "Jin10"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
|
||||
ans = self.get_input()
|
||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||
if not ans:
|
||||
@ -58,6 +61,9 @@ class Jin10(ComponentBase, ABC):
|
||||
jin10_res = []
|
||||
headers = {'secret-key': self._param.secret_key}
|
||||
try:
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
|
||||
if self._param.type == "flash":
|
||||
params = {
|
||||
'category': self._param.flash_type,
|
||||
@ -69,6 +75,8 @@ class Jin10(ComponentBase, ABC):
|
||||
headers=headers, data=json.dumps(params))
|
||||
response = response.json()
|
||||
for i in response['data']:
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
jin10_res.append({"content": i['data']['content']})
|
||||
if self._param.type == "calendar":
|
||||
params = {
|
||||
@ -79,6 +87,8 @@ class Jin10(ComponentBase, ABC):
|
||||
headers=headers, data=json.dumps(params))
|
||||
|
||||
response = response.json()
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
jin10_res.append({"content": pd.DataFrame(response['data']).to_markdown()})
|
||||
if self._param.type == "symbols":
|
||||
params = {
|
||||
@ -90,8 +100,12 @@ class Jin10(ComponentBase, ABC):
|
||||
url='https://open-data-api.jin10.com/data-api/' + self._param.symbols_datatype + '?type=' + self._param.symbols_type,
|
||||
headers=headers, data=json.dumps(params))
|
||||
response = response.json()
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
if self._param.symbols_datatype == "symbols":
|
||||
for i in response['data']:
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
i['Commodity Code'] = i['c']
|
||||
i['Stock Exchange'] = i['e']
|
||||
i['Commodity Name'] = i['n']
|
||||
@ -99,6 +113,8 @@ class Jin10(ComponentBase, ABC):
|
||||
del i['c'], i['e'], i['n'], i['t']
|
||||
if self._param.symbols_datatype == "quotes":
|
||||
for i in response['data']:
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
i['Selling Price'] = i['a']
|
||||
i['Buying Price'] = i['b']
|
||||
i['Commodity Code'] = i['c']
|
||||
@ -120,8 +136,12 @@ class Jin10(ComponentBase, ABC):
|
||||
url='https://open-data-api.jin10.com/data-api/news',
|
||||
headers=headers, data=json.dumps(params))
|
||||
response = response.json()
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
jin10_res.append({"content": pd.DataFrame(response['data']).to_markdown()})
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("Jin10 processing"):
|
||||
return
|
||||
return Jin10.be_output("**ERROR**: " + str(e))
|
||||
|
||||
if not jin10_res:
|
||||
|
||||
@ -71,23 +71,40 @@ class PubMed(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("PubMed processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("PubMed processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
Entrez.email = self._param.email
|
||||
pubmedids = Entrez.read(Entrez.esearch(db='pubmed', retmax=self._param.top_n, term=kwargs["query"]))['IdList']
|
||||
|
||||
if self.check_if_canceled("PubMed processing"):
|
||||
return
|
||||
|
||||
pubmedcnt = ET.fromstring(re.sub(r'<(/?)b>|<(/?)i>', '', Entrez.efetch(db='pubmed', id=",".join(pubmedids),
|
||||
retmode="xml").read().decode("utf-8")))
|
||||
|
||||
if self.check_if_canceled("PubMed processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(pubmedcnt.findall("PubmedArticle"),
|
||||
get_title=lambda child: child.find("MedlineCitation").find("Article").find("ArticleTitle").text,
|
||||
get_url=lambda child: "https://pubmed.ncbi.nlm.nih.gov/" + child.find("MedlineCitation").find("PMID").text,
|
||||
get_content=lambda child: self._format_pubmed_content(child),)
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("PubMed processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"PubMed error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -58,12 +58,18 @@ class QWeather(ComponentBase, ABC):
|
||||
component_name = "QWeather"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
|
||||
ans = self.get_input()
|
||||
ans = "".join(ans["content"]) if "content" in ans else ""
|
||||
if not ans:
|
||||
return QWeather.be_output("")
|
||||
|
||||
try:
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
|
||||
response = requests.get(
|
||||
url="https://geoapi.qweather.com/v2/city/lookup?location=" + ans + "&key=" + self._param.web_apikey).json()
|
||||
if response["code"] == "200":
|
||||
@ -71,16 +77,23 @@ class QWeather(ComponentBase, ABC):
|
||||
else:
|
||||
return QWeather.be_output("**Error**" + self._param.error_code[response["code"]])
|
||||
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
|
||||
base_url = "https://api.qweather.com/v7/" if self._param.user_type == 'paid' else "https://devapi.qweather.com/v7/"
|
||||
|
||||
if self._param.type == "weather":
|
||||
url = base_url + "weather/" + self._param.time_period + "?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang
|
||||
response = requests.get(url=url).json()
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
if response["code"] == "200":
|
||||
if self._param.time_period == "now":
|
||||
return QWeather.be_output(str(response["now"]))
|
||||
else:
|
||||
qweather_res = [{"content": str(i) + "\n"} for i in response["daily"]]
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
if not qweather_res:
|
||||
return QWeather.be_output("")
|
||||
|
||||
@ -92,6 +105,8 @@ class QWeather(ComponentBase, ABC):
|
||||
elif self._param.type == "indices":
|
||||
url = base_url + "indices/1d?type=0&location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang
|
||||
response = requests.get(url=url).json()
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
if response["code"] == "200":
|
||||
indices_res = response["daily"][0]["date"] + "\n" + "\n".join(
|
||||
[i["name"] + ": " + i["category"] + ", " + i["text"] for i in response["daily"]])
|
||||
@ -103,9 +118,13 @@ class QWeather(ComponentBase, ABC):
|
||||
elif self._param.type == "airquality":
|
||||
url = base_url + "air/now?location=" + location_id + "&key=" + self._param.web_apikey + "&lang=" + self._param.lang
|
||||
response = requests.get(url=url).json()
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
if response["code"] == "200":
|
||||
return QWeather.be_output(str(response["now"]))
|
||||
else:
|
||||
return QWeather.be_output("**Error**" + self._param.error_code[response["code"]])
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("Qweather processing"):
|
||||
return
|
||||
return QWeather.be_output("**Error**" + str(e))
|
||||
|
||||
@ -82,8 +82,12 @@ class Retrieval(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Retrieval processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", self._param.empty_response)
|
||||
return
|
||||
|
||||
kb_ids: list[str] = []
|
||||
for id in self._param.kb_ids:
|
||||
@ -122,7 +126,7 @@ class Retrieval(ToolBase, ABC):
|
||||
vars = self.get_input_elements_from_text(kwargs["query"])
|
||||
vars = {k:o["value"] for k,o in vars.items()}
|
||||
query = self.string_format(kwargs["query"], vars)
|
||||
|
||||
|
||||
doc_ids=[]
|
||||
if self._param.meta_data_filter!={}:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
@ -184,9 +188,14 @@ class Retrieval(ToolBase, ABC):
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=label_question(query, kbs),
|
||||
)
|
||||
if self.check_if_canceled("Retrieval processing"):
|
||||
return
|
||||
|
||||
if self._param.toc_enhance:
|
||||
chat_mdl = LLMBundle(self._canvas._tenant_id, LLMType.CHAT)
|
||||
cks = settings.retriever.retrieval_by_toc(query, kbinfos["chunks"], [kb.tenant_id for kb in kbs], chat_mdl, self._param.top_n)
|
||||
if self.check_if_canceled("Retrieval processing"):
|
||||
return
|
||||
if cks:
|
||||
kbinfos["chunks"] = cks
|
||||
if self._param.use_kg:
|
||||
@ -195,6 +204,8 @@ class Retrieval(ToolBase, ABC):
|
||||
kb_ids,
|
||||
embd_mdl,
|
||||
LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT))
|
||||
if self.check_if_canceled("Retrieval processing"):
|
||||
return
|
||||
if ck["content_with_weight"]:
|
||||
kbinfos["chunks"].insert(0, ck)
|
||||
else:
|
||||
@ -202,6 +213,8 @@ class Retrieval(ToolBase, ABC):
|
||||
|
||||
if self._param.use_kg and kbs:
|
||||
ck = settings.kg_retriever.retrieval(query, [kb.tenant_id for kb in kbs], filtered_kb_ids, embd_mdl, LLMBundle(kbs[0].tenant_id, LLMType.CHAT))
|
||||
if self.check_if_canceled("Retrieval processing"):
|
||||
return
|
||||
if ck["content_with_weight"]:
|
||||
ck["content"] = ck["content_with_weight"]
|
||||
del ck["content_with_weight"]
|
||||
|
||||
@ -79,6 +79,9 @@ class SearXNG(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
# Gracefully handle try-run without inputs
|
||||
query = kwargs.get("query")
|
||||
if not query or not isinstance(query, str) or not query.strip():
|
||||
@ -93,6 +96,9 @@ class SearXNG(ToolBase, ABC):
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
search_params = {
|
||||
'q': query,
|
||||
@ -110,6 +116,9 @@ class SearXNG(ToolBase, ABC):
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
data = response.json()
|
||||
|
||||
if not data or not isinstance(data, dict):
|
||||
@ -121,6 +130,9 @@ class SearXNG(ToolBase, ABC):
|
||||
|
||||
results = results[:self._param.top_n]
|
||||
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(results,
|
||||
get_title=lambda r: r.get("title", ""),
|
||||
get_url=lambda r: r.get("url", ""),
|
||||
@ -130,10 +142,16 @@ class SearXNG(ToolBase, ABC):
|
||||
return self.output("formalized_content")
|
||||
|
||||
except requests.RequestException as e:
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
last_e = f"Network error: {e}"
|
||||
logging.exception(f"SearXNG network error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("SearXNG processing"):
|
||||
return
|
||||
|
||||
last_e = str(e)
|
||||
logging.exception(f"SearXNG error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -103,6 +103,9 @@ class TavilySearch(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("TavilySearch processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
@ -113,10 +116,16 @@ class TavilySearch(ToolBase, ABC):
|
||||
if fld not in kwargs:
|
||||
kwargs[fld] = getattr(self._param, fld)
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("TavilySearch processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
kwargs["include_images"] = False
|
||||
kwargs["include_raw_content"] = False
|
||||
res = self.tavily_client.search(**kwargs)
|
||||
if self.check_if_canceled("TavilySearch processing"):
|
||||
return
|
||||
|
||||
self._retrieve_chunks(res["results"],
|
||||
get_title=lambda r: r["title"],
|
||||
get_url=lambda r: r["url"],
|
||||
@ -125,6 +134,9 @@ class TavilySearch(ToolBase, ABC):
|
||||
self.set_output("json", res["results"])
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("TavilySearch processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"Tavily error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
@ -201,6 +213,9 @@ class TavilyExtract(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("TavilyExtract processing"):
|
||||
return
|
||||
|
||||
self.tavily_client = TavilyClient(api_key=self._param.api_key)
|
||||
last_e = None
|
||||
for fld in ["urls", "extract_depth", "format"]:
|
||||
@ -209,12 +224,21 @@ class TavilyExtract(ToolBase, ABC):
|
||||
if kwargs.get("urls") and isinstance(kwargs["urls"], str):
|
||||
kwargs["urls"] = kwargs["urls"].split(",")
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("TavilyExtract processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
kwargs["include_images"] = False
|
||||
res = self.tavily_client.extract(**kwargs)
|
||||
if self.check_if_canceled("TavilyExtract processing"):
|
||||
return
|
||||
|
||||
self.set_output("json", res["results"])
|
||||
return self.output("json")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("TavilyExtract processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"Tavily error: {e}")
|
||||
if last_e:
|
||||
|
||||
@ -43,12 +43,18 @@ class TuShare(ComponentBase, ABC):
|
||||
component_name = "TuShare"
|
||||
|
||||
def _run(self, history, **kwargs):
|
||||
if self.check_if_canceled("TuShare processing"):
|
||||
return
|
||||
|
||||
ans = self.get_input()
|
||||
ans = ",".join(ans["content"]) if "content" in ans else ""
|
||||
if not ans:
|
||||
return TuShare.be_output("")
|
||||
|
||||
try:
|
||||
if self.check_if_canceled("TuShare processing"):
|
||||
return
|
||||
|
||||
tus_res = []
|
||||
params = {
|
||||
"api_name": "news",
|
||||
@ -58,12 +64,18 @@ class TuShare(ComponentBase, ABC):
|
||||
}
|
||||
response = requests.post(url="http://api.tushare.pro", data=json.dumps(params).encode('utf-8'))
|
||||
response = response.json()
|
||||
if self.check_if_canceled("TuShare processing"):
|
||||
return
|
||||
if response['code'] != 0:
|
||||
return TuShare.be_output(response['msg'])
|
||||
df = pd.DataFrame(response['data']['items'])
|
||||
df.columns = response['data']['fields']
|
||||
if self.check_if_canceled("TuShare processing"):
|
||||
return
|
||||
tus_res.append({"content": (df[df['content'].str.contains(self._param.keyword, case=False)]).to_markdown()})
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("TuShare processing"):
|
||||
return
|
||||
return TuShare.be_output("**ERROR**: " + str(e))
|
||||
|
||||
if not tus_res:
|
||||
|
||||
@ -70,19 +70,31 @@ class WenCai(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 12)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("report", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
wencai_res = []
|
||||
res = pywencai.get(query=kwargs["query"], query_type=self._param.query_type, perpage=self._param.top_n)
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
if isinstance(res, pd.DataFrame):
|
||||
wencai_res.append(res.to_markdown())
|
||||
elif isinstance(res, dict):
|
||||
for item in res.items():
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
if isinstance(item[1], list):
|
||||
wencai_res.append(item[0] + "\n" + pd.DataFrame(item[1]).to_markdown())
|
||||
elif isinstance(item[1], str):
|
||||
@ -100,6 +112,9 @@ class WenCai(ToolBase, ABC):
|
||||
self.set_output("report", "\n\n".join(wencai_res))
|
||||
return self.output("report")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("WenCai processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"WenCai error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -66,17 +66,26 @@ class Wikipedia(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Wikipedia processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("query"):
|
||||
self.set_output("formalized_content", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("Wikipedia processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
wikipedia.set_lang(self._param.language)
|
||||
wiki_engine = wikipedia
|
||||
pages = []
|
||||
for p in wiki_engine.search(kwargs["query"], results=self._param.top_n):
|
||||
if self.check_if_canceled("Wikipedia processing"):
|
||||
return
|
||||
|
||||
try:
|
||||
pages.append(wikipedia.page(p))
|
||||
except Exception:
|
||||
@ -87,6 +96,9 @@ class Wikipedia(ToolBase, ABC):
|
||||
get_content=lambda r: r.summary)
|
||||
return self.output("formalized_content")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("Wikipedia processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"Wikipedia error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -74,15 +74,24 @@ class YahooFinance(ToolBase, ABC):
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 60)))
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("YahooFinance processing"):
|
||||
return
|
||||
|
||||
if not kwargs.get("stock_code"):
|
||||
self.set_output("report", "")
|
||||
return ""
|
||||
|
||||
last_e = ""
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("YahooFinance processing"):
|
||||
return
|
||||
|
||||
yohoo_res = []
|
||||
try:
|
||||
msft = yf.Ticker(kwargs["stock_code"])
|
||||
if self.check_if_canceled("YahooFinance processing"):
|
||||
return
|
||||
|
||||
if self._param.info:
|
||||
yohoo_res.append("# Information:\n" + pd.Series(msft.info).to_markdown() + "\n")
|
||||
if self._param.history:
|
||||
@ -100,6 +109,9 @@ class YahooFinance(ToolBase, ABC):
|
||||
self.set_output("report", "\n\n".join(yohoo_res))
|
||||
return self.output("report")
|
||||
except Exception as e:
|
||||
if self.check_if_canceled("YahooFinance processing"):
|
||||
return
|
||||
|
||||
last_e = e
|
||||
logging.exception(f"YahooFinance error: {e}")
|
||||
time.sleep(self._param.delay_after_error)
|
||||
|
||||
@ -156,7 +156,7 @@ def run():
|
||||
return get_json_result(data={"message_id": task_id})
|
||||
|
||||
try:
|
||||
canvas = Canvas(cvs.dsl, current_user.id, req["id"])
|
||||
canvas = Canvas(cvs.dsl, current_user.id)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -168,8 +168,10 @@ def run():
|
||||
|
||||
cvs.dsl = json.loads(str(canvas))
|
||||
UserCanvasService.update_by_id(req["id"], cvs.to_dict())
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
canvas.cancel_task()
|
||||
yield "data:" + json.dumps({"code": 500, "message": str(e), "data": False}, ensure_ascii=False) + "\n\n"
|
||||
|
||||
resp = Response(sse(), mimetype="text/event-stream")
|
||||
@ -177,6 +179,7 @@ def run():
|
||||
resp.headers.add_header("Connection", "keep-alive")
|
||||
resp.headers.add_header("X-Accel-Buffering", "no")
|
||||
resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
|
||||
resp.call_on_close(lambda: canvas.cancel_task())
|
||||
return resp
|
||||
|
||||
|
||||
@ -430,7 +433,7 @@ def test_db_connect():
|
||||
catalog, schema = _parse_catalog_schema(req["database"])
|
||||
if not catalog:
|
||||
return server_error_response("For Trino, 'database' must be 'catalog.schema' or at least 'catalog'.")
|
||||
|
||||
|
||||
http_scheme = "https" if os.environ.get("TRINO_USE_TLS", "0") == "1" else "http"
|
||||
|
||||
auth = None
|
||||
@ -603,4 +606,4 @@ def download():
|
||||
id = request.args.get("id")
|
||||
created_by = request.args.get("created_by")
|
||||
blob = FileService.get_blob(created_by, id)
|
||||
return flask.make_response(blob)
|
||||
return flask.make_response(blob)
|
||||
|
||||
@ -13,16 +13,26 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from html import escape
|
||||
from typing import Any
|
||||
|
||||
from flask import request
|
||||
from flask_login import login_required, current_user
|
||||
from flask import make_response, request
|
||||
from flask_login import current_user, login_required
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
|
||||
from api.db import InputType
|
||||
from api.db.services.connector_service import ConnectorService, SyncLogsService
|
||||
from api.utils.api_utils import get_json_result, validate_request, get_data_error_result
|
||||
from common.misc_utils import get_uuid
|
||||
from api.utils.api_utils import get_data_error_result, get_json_result, validate_request
|
||||
from common.constants import RetCode, TaskStatus
|
||||
from common.data_source.config import GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI, DocumentSource
|
||||
from common.data_source.google_util.constant import GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE, GOOGLE_SCOPES
|
||||
from common.misc_utils import get_uuid
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
|
||||
|
||||
@manager.route("/set", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@ -42,8 +52,8 @@ def set_connector():
|
||||
"config": req["config"],
|
||||
"refresh_freq": int(req.get("refresh_freq", 30)),
|
||||
"prune_freq": int(req.get("prune_freq", 720)),
|
||||
"timeout_secs": int(req.get("timeout_secs", 60*29)),
|
||||
"status": TaskStatus.SCHEDULE
|
||||
"timeout_secs": int(req.get("timeout_secs", 60 * 29)),
|
||||
"status": TaskStatus.SCHEDULE,
|
||||
}
|
||||
conn["status"] = TaskStatus.SCHEDULE
|
||||
ConnectorService.save(**conn)
|
||||
@ -105,3 +115,181 @@ def rm_connector(connector_id):
|
||||
ConnectorService.resume(connector_id, TaskStatus.CANCEL)
|
||||
ConnectorService.delete_by_id(connector_id)
|
||||
return get_json_result(data=True)
|
||||
|
||||
|
||||
GOOGLE_WEB_FLOW_STATE_PREFIX = "google_drive_web_flow_state"
|
||||
GOOGLE_WEB_FLOW_RESULT_PREFIX = "google_drive_web_flow_result"
|
||||
WEB_FLOW_TTL_SECS = 15 * 60
|
||||
|
||||
|
||||
def _web_state_cache_key(flow_id: str) -> str:
|
||||
return f"{GOOGLE_WEB_FLOW_STATE_PREFIX}:{flow_id}"
|
||||
|
||||
|
||||
def _web_result_cache_key(flow_id: str) -> str:
|
||||
return f"{GOOGLE_WEB_FLOW_RESULT_PREFIX}:{flow_id}"
|
||||
|
||||
|
||||
def _load_credentials(payload: str | dict[str, Any]) -> dict[str, Any]:
|
||||
if isinstance(payload, dict):
|
||||
return payload
|
||||
try:
|
||||
return json.loads(payload)
|
||||
except json.JSONDecodeError as exc: # pragma: no cover - defensive
|
||||
raise ValueError("Invalid Google credentials JSON.") from exc
|
||||
|
||||
|
||||
def _get_web_client_config(credentials: dict[str, Any]) -> dict[str, Any]:
|
||||
web_section = credentials.get("web")
|
||||
if not isinstance(web_section, dict):
|
||||
raise ValueError("Google OAuth JSON must include a 'web' client configuration to use browser-based authorization.")
|
||||
return {"web": web_section}
|
||||
|
||||
|
||||
def _render_web_oauth_popup(flow_id: str, success: bool, message: str):
|
||||
status = "success" if success else "error"
|
||||
auto_close = "window.close();" if success else ""
|
||||
escaped_message = escape(message)
|
||||
payload_json = json.dumps(
|
||||
{
|
||||
"type": "ragflow-google-drive-oauth",
|
||||
"status": status,
|
||||
"flowId": flow_id or "",
|
||||
"message": message,
|
||||
}
|
||||
)
|
||||
html = GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE.format(
|
||||
heading="Authorization complete" if success else "Authorization failed",
|
||||
message=escaped_message,
|
||||
payload_json=payload_json,
|
||||
auto_close=auto_close,
|
||||
)
|
||||
response = make_response(html, 200)
|
||||
response.headers["Content-Type"] = "text/html; charset=utf-8"
|
||||
return response
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/start", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("credentials")
|
||||
def start_google_drive_web_oauth():
|
||||
if not GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI:
|
||||
return get_json_result(
|
||||
code=RetCode.SERVER_ERROR,
|
||||
message="Google Drive OAuth redirect URI is not configured on the server.",
|
||||
)
|
||||
|
||||
req = request.json or {}
|
||||
raw_credentials = req.get("credentials", "")
|
||||
try:
|
||||
credentials = _load_credentials(raw_credentials)
|
||||
except ValueError as exc:
|
||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message=str(exc))
|
||||
|
||||
if credentials.get("refresh_token"):
|
||||
return get_json_result(
|
||||
code=RetCode.ARGUMENT_ERROR,
|
||||
message="Uploaded credentials already include a refresh token.",
|
||||
)
|
||||
|
||||
try:
|
||||
client_config = _get_web_client_config(credentials)
|
||||
except ValueError as exc:
|
||||
return get_json_result(code=RetCode.ARGUMENT_ERROR, message=str(exc))
|
||||
|
||||
flow_id = str(uuid.uuid4())
|
||||
try:
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type="offline",
|
||||
include_granted_scopes="true",
|
||||
prompt="consent",
|
||||
state=flow_id,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.exception("Failed to create Google OAuth flow: %s", exc)
|
||||
return get_json_result(
|
||||
code=RetCode.SERVER_ERROR,
|
||||
message="Failed to initialize Google OAuth flow. Please verify the uploaded client configuration.",
|
||||
)
|
||||
|
||||
cache_payload = {
|
||||
"user_id": current_user.id,
|
||||
"client_config": client_config,
|
||||
"created_at": int(time.time()),
|
||||
}
|
||||
REDIS_CONN.set_obj(_web_state_cache_key(flow_id), cache_payload, WEB_FLOW_TTL_SECS)
|
||||
|
||||
return get_json_result(
|
||||
data={
|
||||
"flow_id": flow_id,
|
||||
"authorization_url": authorization_url,
|
||||
"expires_in": WEB_FLOW_TTL_SECS,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/callback", methods=["GET"]) # noqa: F821
|
||||
def google_drive_web_oauth_callback():
|
||||
state_id = request.args.get("state")
|
||||
error = request.args.get("error")
|
||||
error_description = request.args.get("error_description") or error
|
||||
|
||||
if not state_id:
|
||||
return _render_web_oauth_popup("", False, "Missing OAuth state parameter.")
|
||||
|
||||
state_cache = REDIS_CONN.get(_web_state_cache_key(state_id))
|
||||
if not state_cache:
|
||||
return _render_web_oauth_popup(state_id, False, "Authorization session expired. Please restart from the main window.")
|
||||
|
||||
state_obj = json.loads(state_cache)
|
||||
client_config = state_obj.get("client_config")
|
||||
if not client_config:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return _render_web_oauth_popup(state_id, False, "Authorization session was invalid. Please retry.")
|
||||
|
||||
if error:
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return _render_web_oauth_popup(state_id, False, error_description or "Authorization was cancelled.")
|
||||
|
||||
code = request.args.get("code")
|
||||
if not code:
|
||||
return _render_web_oauth_popup(state_id, False, "Missing authorization code from Google.")
|
||||
|
||||
try:
|
||||
flow = Flow.from_client_config(client_config, scopes=GOOGLE_SCOPES[DocumentSource.GOOGLE_DRIVE])
|
||||
flow.redirect_uri = GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI
|
||||
flow.fetch_token(code=code)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.exception("Failed to exchange Google OAuth code: %s", exc)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
return _render_web_oauth_popup(state_id, False, "Failed to exchange tokens with Google. Please retry.")
|
||||
|
||||
creds_json = flow.credentials.to_json()
|
||||
result_payload = {
|
||||
"user_id": state_obj.get("user_id"),
|
||||
"credentials": creds_json,
|
||||
}
|
||||
REDIS_CONN.set_obj(_web_result_cache_key(state_id), result_payload, WEB_FLOW_TTL_SECS)
|
||||
REDIS_CONN.delete(_web_state_cache_key(state_id))
|
||||
|
||||
return _render_web_oauth_popup(state_id, True, "Authorization completed successfully.")
|
||||
|
||||
|
||||
@manager.route("/google-drive/oauth/web/result", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("flow_id")
|
||||
def poll_google_drive_web_result():
|
||||
req = request.json or {}
|
||||
flow_id = req.get("flow_id")
|
||||
cache_raw = REDIS_CONN.get(_web_result_cache_key(flow_id))
|
||||
if not cache_raw:
|
||||
return get_json_result(code=RetCode.RUNNING, message="Authorization is still pending.")
|
||||
|
||||
result = json.loads(cache_raw)
|
||||
if result.get("user_id") != current_user.id:
|
||||
return get_json_result(code=RetCode.PERMISSION_ERROR, message="You are not allowed to access this authorization result.")
|
||||
|
||||
REDIS_CONN.delete(_web_result_cache_key(flow_id))
|
||||
return get_json_result(data={"credentials": result.get("credentials")})
|
||||
|
||||
@ -127,6 +127,7 @@ def update():
|
||||
logging.error("Link KB errors: ", errors)
|
||||
kb = kb.to_dict()
|
||||
kb.update(req)
|
||||
kb["connectors"] = connectors
|
||||
|
||||
return get_json_result(data=kb)
|
||||
except Exception as e:
|
||||
|
||||
@ -358,7 +358,7 @@ def list_app():
|
||||
for o in objs:
|
||||
if o.llm_name + "@" + o.llm_factory in llm_set:
|
||||
continue
|
||||
llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True})
|
||||
llms.append({"llm_name": o.llm_name, "model_type": o.model_type, "fid": o.llm_factory, "available": True, "status": StatusEnum.VALID.value})
|
||||
|
||||
res = {}
|
||||
for m in llms:
|
||||
|
||||
@ -101,6 +101,7 @@ def init_llm_factory():
|
||||
info = deepcopy(factory_llm_info)
|
||||
llm_infos = info.pop("llm")
|
||||
try:
|
||||
LLMFactoriesService.filter_delete([LLMFactories.name == factory_llm_info["name"]])
|
||||
LLMFactoriesService.save(**info)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@ -236,6 +236,7 @@ class Connector2KbService(CommonService):
|
||||
conn_id = conn["id"]
|
||||
connector_ids.append(conn_id)
|
||||
if conn_id in old_conn_ids:
|
||||
cls.update_by_id(conn_id, {"auto_parse": conn.get("auto_parse", "1")})
|
||||
continue
|
||||
cls.save(**{
|
||||
"id": get_uuid(),
|
||||
|
||||
@ -846,7 +846,7 @@ def queue_raptor_o_graphrag_tasks(sample_doc_id, ty, priority, fake_doc_id="", d
|
||||
"to_page": 100000000,
|
||||
"task_type": ty,
|
||||
"progress_msg": datetime.now().strftime("%H:%M:%S") + " created task " + ty,
|
||||
"begin_at": datetime.now(),
|
||||
"begin_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
|
||||
task = new_task()
|
||||
|
||||
@ -190,6 +190,7 @@ OAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get("OAUTH_GOOGLE_DRIVE_CLIENT_ID", ""
|
||||
OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
|
||||
"OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
|
||||
)
|
||||
GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI = os.environ.get("GOOGLE_DRIVE_WEB_OAUTH_REDIRECT_URI", "http://localhost:9380/v1/connector/google-drive/oauth/web/callback")
|
||||
|
||||
CONFLUENCE_OAUTH_TOKEN_URL = "https://auth.atlassian.com/oauth/token"
|
||||
RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower()
|
||||
|
||||
@ -47,3 +47,57 @@ USER_FIELDS = "nextPageToken, users(primaryEmail)"
|
||||
# Error message substrings
|
||||
MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requested"
|
||||
SCOPE_INSTRUCTIONS = ""
|
||||
|
||||
|
||||
GOOGLE_DRIVE_WEB_OAUTH_POPUP_TEMPLATE = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Google Drive Authorization</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: Arial, sans-serif;
|
||||
background: #f8fafc;
|
||||
color: #0f172a;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
min-height: 100vh;
|
||||
margin: 0;
|
||||
}}
|
||||
.card {{
|
||||
background: white;
|
||||
padding: 32px;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 8px 30px rgba(15, 23, 42, 0.1);
|
||||
max-width: 420px;
|
||||
text-align: center;
|
||||
}}
|
||||
h1 {{
|
||||
font-size: 1.5rem;
|
||||
margin-bottom: 12px;
|
||||
}}
|
||||
p {{
|
||||
font-size: 0.95rem;
|
||||
line-height: 1.5;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="card">
|
||||
<h1>{heading}</h1>
|
||||
<p>{message}</p>
|
||||
<p>You can close this window.</p>
|
||||
</div>
|
||||
<script>
|
||||
(function(){{
|
||||
if (window.opener) {{
|
||||
window.opener.postMessage({payload_json}, "*");
|
||||
}}
|
||||
{auto_close}
|
||||
}})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
@ -3,9 +3,15 @@ import os
|
||||
import threading
|
||||
from typing import Any, Callable
|
||||
|
||||
import requests
|
||||
|
||||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.google_util.constant import GOOGLE_SCOPES
|
||||
|
||||
GOOGLE_DEVICE_CODE_URL = "https://oauth2.googleapis.com/device/code"
|
||||
GOOGLE_DEVICE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
||||
DEFAULT_DEVICE_INTERVAL = 5
|
||||
|
||||
|
||||
def _get_requested_scopes(source: DocumentSource) -> list[str]:
|
||||
"""Return the scopes to request, honoring an optional override env var."""
|
||||
@ -49,6 +55,62 @@ def _run_with_timeout(func: Callable[[], Any], timeout_secs: int, timeout_messag
|
||||
return result.get("value")
|
||||
|
||||
|
||||
def _extract_client_info(credentials: dict[str, Any]) -> tuple[str, str | None]:
|
||||
if "client_id" in credentials:
|
||||
return credentials["client_id"], credentials.get("client_secret")
|
||||
for key in ("installed", "web"):
|
||||
if key in credentials and isinstance(credentials[key], dict):
|
||||
nested = credentials[key]
|
||||
if "client_id" not in nested:
|
||||
break
|
||||
return nested["client_id"], nested.get("client_secret")
|
||||
raise ValueError("Provided Google OAuth credentials are missing client_id.")
|
||||
|
||||
|
||||
def start_device_authorization_flow(
|
||||
credentials: dict[str, Any],
|
||||
source: DocumentSource,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
client_id, client_secret = _extract_client_info(credentials)
|
||||
data = {
|
||||
"client_id": client_id,
|
||||
"scope": " ".join(_get_requested_scopes(source)),
|
||||
}
|
||||
if client_secret:
|
||||
data["client_secret"] = client_secret
|
||||
resp = requests.post(GOOGLE_DEVICE_CODE_URL, data=data, timeout=15)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
state = {
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"device_code": payload.get("device_code"),
|
||||
"interval": payload.get("interval", DEFAULT_DEVICE_INTERVAL),
|
||||
}
|
||||
response_data = {
|
||||
"user_code": payload.get("user_code"),
|
||||
"verification_url": payload.get("verification_url") or payload.get("verification_uri"),
|
||||
"verification_url_complete": payload.get("verification_url_complete")
|
||||
or payload.get("verification_uri_complete"),
|
||||
"expires_in": payload.get("expires_in"),
|
||||
"interval": state["interval"],
|
||||
}
|
||||
return state, response_data
|
||||
|
||||
|
||||
def poll_device_authorization_flow(state: dict[str, Any]) -> dict[str, Any]:
|
||||
data = {
|
||||
"client_id": state["client_id"],
|
||||
"device_code": state["device_code"],
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:device_code",
|
||||
}
|
||||
if state.get("client_secret"):
|
||||
data["client_secret"] = state["client_secret"]
|
||||
resp = requests.post(GOOGLE_DEVICE_TOKEN_URL, data=data, timeout=20)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def _run_local_server_flow(client_config: dict[str, Any], source: DocumentSource) -> dict[str, Any]:
|
||||
"""Launch the standard Google OAuth local-server flow to mint user tokens."""
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
|
||||
|
||||
@ -4549,7 +4549,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Meituan",
|
||||
"name": "LongCat",
|
||||
"logo": "",
|
||||
"tags": "LLM",
|
||||
"status": "1",
|
||||
|
||||
@ -217,3 +217,6 @@ REGISTER_ENABLED=1
|
||||
# Enable DocLing and Mineru
|
||||
USE_DOCLING=false
|
||||
USE_MINERU=false
|
||||
|
||||
# pptx support
|
||||
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
@ -24,14 +24,6 @@ A guide explaining how to build a RAGFlow Docker image from its source code. By
|
||||
|
||||
## Build a Docker image
|
||||
|
||||
<Tabs
|
||||
defaultValue="without"
|
||||
values={[
|
||||
{label: 'Build a Docker image without embedding models', value: 'without'},
|
||||
{label: 'Build a Docker image including embedding models', value: 'including'}
|
||||
]}>
|
||||
<TabItem value="without">
|
||||
|
||||
This image is approximately 2 GB in size and relies on external LLM and embedding services.
|
||||
|
||||
:::danger IMPORTANT
|
||||
@ -47,10 +39,6 @@ docker build -f Dockerfile.deps -t infiniflow/ragflow_deps .
|
||||
docker build -f Dockerfile -t infiniflow/ragflow:nightly .
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Launch a RAGFlow Service from Docker for MacOS
|
||||
|
||||
After building the infiniflow/ragflow:nightly image, you are ready to launch a fully-functional RAGFlow service with all the required components, such as Elasticsearch, MySQL, MinIO, Redis, and more.
|
||||
|
||||
18
docs/faq.mdx
18
docs/faq.mdx
@ -514,11 +514,11 @@ See [here](./guides/agent/best_practices/accelerate_agent_question_answering.md)
|
||||
|
||||
### How to use MinerU to parse PDF documents?
|
||||
|
||||
MinerU PDF document parsing is available starting from v0.21.1. RAGFlow supports MinerU (>= 2.6.3) as an optional PDF parser with multiple backends. RAGFlow itself only acts as a client: it calls MinerU to parse documents, reads the output files, and ingests the parsed content into RAGFlow. To use this feature, follow these steps:
|
||||
MinerU PDF document parsing is available starting from v0.22.0. RAGFlow supports MinerU (>= 2.6.3) as an optional PDF parser with multiple backends. RAGFlow acts only as a client for MinerU, calling it to parse documents, reading the output files, and ingesting the parsed content. To use this feature, follow these steps:
|
||||
|
||||
1. **Prepare MinerU**
|
||||
1. Prepare MinerU
|
||||
|
||||
- **If you run RAGFlow from source**, install MinerU into an isolated virtual environment (recommended path: `$HOME/uv_tools`):
|
||||
- **If you deploy RAGFlow from source**, install MinerU into an isolated virtual environment (recommended path: `$HOME/uv_tools`):
|
||||
|
||||
```bash
|
||||
mkdir -p "$HOME/uv_tools"
|
||||
@ -530,7 +530,7 @@ MinerU PDF document parsing is available starting from v0.21.1. RAGFlow supports
|
||||
# uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
```
|
||||
|
||||
- **If you run RAGFlow with Docker**, you usually only need to turn on MinerU support in `docker/.env`:
|
||||
- **If you deploy RAGFlow with Docker**, you usually only need to turn on MinerU support in `docker/.env`:
|
||||
|
||||
```bash
|
||||
# docker/.env
|
||||
@ -541,7 +541,7 @@ MinerU PDF document parsing is available starting from v0.21.1. RAGFlow supports
|
||||
|
||||
Enabling `USE_MINERU=true` will internally perform the same setup as the manual configuration (including setting the MinerU executable path and related environment variables). You only need the manual installation above if you are running from source or want full control over the MinerU installation.
|
||||
|
||||
2. **Start RAGFlow with MinerU enabled**
|
||||
2. Start RAGFlow with MinerU enabled:
|
||||
|
||||
- **Source deployment** – in the RAGFlow repo, export the key MinerU-related variables and start the backend service:
|
||||
|
||||
@ -570,7 +570,7 @@ MinerU PDF document parsing is available starting from v0.21.1. RAGFlow supports
|
||||
|
||||
### How to configure MinerU-specific settings?
|
||||
|
||||
The table below summarizes the most commonly used MinerU-related environment variables:
|
||||
The table below summarizes the most frequently used MinerU environment variables:
|
||||
|
||||
| Environment variable | Description | Default | Example |
|
||||
| ---------------------- | ---------------------------------- | ----------------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
@ -583,14 +583,14 @@ The table below summarizes the most commonly used MinerU-related environment var
|
||||
|
||||
1. Set `MINERU_EXECUTABLE` to the path to the MinerU executable if the default `mineru` is not on `PATH`.
|
||||
2. Set `MINERU_DELETE_OUTPUT` to `0` to keep MinerU's output. (Default: `1`, which deletes temporary output.)
|
||||
3. Set `MINERU_OUTPUT_DIR` to specify the output directory for MinerU (otherwise a system temp directory is used).
|
||||
3. Set `MINERU_OUTPUT_DIR` to specify the output directory for MinerU; otherwise, a system temp directory is used.
|
||||
4. Set `MINERU_BACKEND` to specify a parsing backend:
|
||||
- `"pipeline"` (default): The traditional multimodel pipeline.
|
||||
- `"vlm-transformers"`: A vision-language model using HuggingFace Transformers.
|
||||
- `"vlm-vllm-engine"`: A vision-language model using a local vLLM engine (requires a local GPU).
|
||||
- `"vlm-http-client"`: A vision-language model via HTTP client to a remote vLLM server (RAGFlow only requires CPU).
|
||||
5. If using the `"vlm-http-client"` backend, you must also set `MINERU_SERVER_URL` to the URL of your vLLM server.
|
||||
6. If you want RAGFlow to call a **remote MinerU service** (instead of a MinerU process running locally with RAGFlow), set `MINERU_APISERVER` to the URL of the remote MinerU server.
|
||||
5. If using the `"vlm-http-client"` backend, you must also set `MINERU_SERVER_URL` to your vLLM server's URL.
|
||||
6. If configuring RAGFlow to call a *remote* MinerU service, set `MINERU_APISERVER` to the MinerU server's URL.
|
||||
|
||||
:::tip NOTE
|
||||
For information about other environment variables natively supported by MinerU, see [here](https://opendatalab.github.io/MinerU/usage/cli_tools/#environment-variables-description).
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
---
|
||||
sidebar_position: 6
|
||||
slug: /using_admin_ui
|
||||
sidebar_position: 7
|
||||
slug: /accessing_admin_ui
|
||||
---
|
||||
|
||||
# Admin UI
|
||||
@ -10,47 +10,7 @@ The RAGFlow Admin UI is a web-based interface that provides comprehensive system
|
||||
|
||||
## Accessing the Admin UI
|
||||
|
||||
### Launching from source code
|
||||
|
||||
1. Start the RAGFlow front-end (if not already running):
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Typically, the front-end server is running on port `9222`. The following output confirms a successful launch of the RAGFlow UI:
|
||||
|
||||
```bash
|
||||
╔════════════════════════════════════════════════════╗
|
||||
║ App listening at: ║
|
||||
║ > Local: http://localhost:9222 ║
|
||||
ready - ║ > Network: http://192.168.1.92:9222 ║
|
||||
║ ║
|
||||
║ Now you can open browser with the above addresses↑ ║
|
||||
╚════════════════════════════════════════════════════╝
|
||||
```
|
||||
|
||||
|
||||
2. Login to RAGFlow Admin UI
|
||||
|
||||
Open your browser and navigate to:
|
||||
|
||||
```
|
||||
http://localhost:9222/admin
|
||||
```
|
||||
|
||||
Or if accessing from a remote machine:
|
||||
|
||||
```
|
||||
http://[YOUR_MACHINE_IP]:9222/admin
|
||||
```
|
||||
|
||||
> Replace `[YOUR_MACHINE_IP]` with your actual machine IP address (e.g., `http://192.168.1.49:9222/admin`).
|
||||
|
||||
Then, you will be presented with a login page where you need to enter your admin user email address and password.
|
||||
|
||||
3. After a successful login, you will be redirected to the **Service Status** page, which is the default landing page for the Admin UI.
|
||||
To access the RAGFlow admin UI, append `/admin` to the web UI's address, e.g. `http://[RAGFLOW_WEB_UI_ADDR]/admin`, replace `[RAGFLOW_WEB_UI_ADDR]` with real RAGFlow web UI address.
|
||||
|
||||
|
||||
## Admin UI Overview
|
||||
@ -59,7 +19,7 @@ The RAGFlow Admin UI is a web-based interface that provides comprehensive system
|
||||
|
||||
The service status page displays of all services within the RAGFlow system.
|
||||
|
||||
- **Service List**: View all services in a table format.
|
||||
- **Service List**: View all services in a table.
|
||||
- **Filtering**: Use the filter button to filter services by **Service Type**.
|
||||
- **Search**: Use the search bar to quickly find services by **Name** or **Service Type**.
|
||||
- **Actions** (hover over a row to see action buttons):
|
||||
@ -9,9 +9,132 @@ A component that sets the parsing rules for your dataset.
|
||||
|
||||
---
|
||||
|
||||
A **Parser** component defines how various file types should be parsed, including parsing methods for PDFs , fields to parse for Emails, and OCR methods for images.
|
||||
A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows. Just like the **Extract** stage in the traditional ETL process, a **Parser** component in an ingestion pipeline defines how various file types are parsed into structured data. Click the component to display its configuration panel. In this configuration panel, you set the parsing rules for various file types.
|
||||
|
||||
## Configurations
|
||||
|
||||
## Scenario
|
||||
Within the configuration panel, you can add multiple parsers and set the corresponding parsing rules or remove unwanted parsers. Please ensure your set of parsers covers all required file types; otherwise, an error would occur when you select this ingestion pipeline on your dataset's **Files** page.
|
||||
|
||||
A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows.
|
||||
The **Parser** component supports parsing the following file types:
|
||||
|
||||
| File type | File format |
|
||||
| ------------- | ------------------------ |
|
||||
| PDF | PDF |
|
||||
| Spreadsheet | XLSX, XLS, CSV |
|
||||
| Image | PNG, JPG, JPEG, GIF, TIF |
|
||||
| Email | EML |
|
||||
| Text & Markup | TXT, MD, MDX, HTML, JSON |
|
||||
| Word | DOCX |
|
||||
| PowerPoint | PPTX, PPT |
|
||||
| Audio | MP3, WAV |
|
||||
| Video | MP4, AVI, MKV |
|
||||
|
||||
### PDF parser
|
||||
|
||||
The output of a PDF parser is `json`. In the PDF parser, you select the parsing method that works best with your PDFs.
|
||||
|
||||
- DeepDoc: (Default) The default visual model performing OCR, TSR, and DLR tasks on complex PDFs, but can be time-consuming.
|
||||
- Naive: Skip OCR, TSR, and DLR tasks if *all* your PDFs are plain text.
|
||||
- [MinerU](https://github.com/opendatalab/MinerU): (Experimental) An open-source tool that converts PDF into machine-readable formats.
|
||||
- [Docling](https://github.com/docling-project/docling): (Experimental) An open-source document processing tool for gen AI.
|
||||
- A third-party visual model from a specific model provider.
|
||||
|
||||
:::danger IMPORTANG
|
||||
MinerU PDF document parsing is available starting from v0.22.0. RAGFlow supports MinerU (>= 2.6.3) as an optional PDF parser with multiple backends. RAGFlow acts only as a client for MinerU, calling it to parse documents, reading the output files, and ingesting the parsed content. To use this feature, follow these steps:
|
||||
|
||||
1. Prepare MinerU:
|
||||
|
||||
- **If you deploy RAGFlow from source**, install MinerU into an isolated virtual environment (recommended path: `$HOME/uv_tools`):
|
||||
|
||||
```bash
|
||||
mkdir -p "$HOME/uv_tools"
|
||||
cd "$HOME/uv_tools"
|
||||
uv venv .venv
|
||||
source .venv/bin/activate
|
||||
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
# or
|
||||
# uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
```
|
||||
|
||||
- **If you deploy RAGFlow with Docker**, you usually only need to turn on MinerU support in `docker/.env`:
|
||||
|
||||
```bash
|
||||
# docker/.env
|
||||
...
|
||||
USE_MINERU=true
|
||||
...
|
||||
```
|
||||
|
||||
Enabling `USE_MINERU=true` will internally perform the same setup as the manual configuration (including setting the MinerU executable path and related environment variables). You only need the manual installation above if you are running from source or want full control over the MinerU installation.
|
||||
|
||||
2. Start RAGFlow with MinerU enabled:
|
||||
|
||||
- **Source deployment** – in the RAGFlow repo, export the key MinerU-related variables and start the backend service:
|
||||
|
||||
```bash
|
||||
# in RAGFlow repo
|
||||
export MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru"
|
||||
export MINERU_DELETE_OUTPUT=0 # keep output directory
|
||||
export MINERU_BACKEND=pipeline # or another backend you prefer
|
||||
|
||||
source .venv/bin/activate
|
||||
export PYTHONPATH=$(pwd)
|
||||
bash docker/launch_backend_service.sh
|
||||
```
|
||||
|
||||
- **Docker deployment** – after setting `USE_MINERU=true`, restart the containers so that the new settings take effect:
|
||||
|
||||
```bash
|
||||
# in RAGFlow repo
|
||||
docker compose -f docker/docker-compose.yml restart
|
||||
```
|
||||
|
||||
3. Restart the ragflow-server.
|
||||
:::
|
||||
|
||||
:::caution WARNING
|
||||
Third-party visual models are marked **Experimental**, because we have not fully tested these models for the aforementioned data extraction tasks.
|
||||
:::
|
||||
|
||||
### Spreadsheet parser
|
||||
|
||||
A spreadsheet parser outputs `html`, preserving the original layout and table structure. You may remove this parser if your dataset contains no spreadsheets.
|
||||
|
||||
### Image parser
|
||||
|
||||
An Image parser uses a native OCR model for text extraction by default. You may select an alternative VLM model, provided that you have properly configured it on the **Model provider** page.
|
||||
|
||||
### Email parser
|
||||
|
||||
With the Email parser, you select the fields to parse from Emails, such as **subject** and **body**. The parser will then extract text from these specified fields.
|
||||
|
||||
### Text&Markup parser
|
||||
|
||||
A Text&Markup parser automatically removes all formatting tags (e.g., those from HTML and Markdown files) to output clean, plain text only.
|
||||
|
||||
### Word parser
|
||||
|
||||
A Word parser outputs `json`, preserving the original document structure information, including titles, paragraphs, tables, headers, and footers.
|
||||
|
||||
### PowerPoint (PPT) parser
|
||||
|
||||
A PowerPoint parser extracts content from PowerPoint files into `json`, processing each slide individually and distinguishing between its title, body text, and notes.
|
||||
|
||||
### Audio parser
|
||||
|
||||
An Audio parser transcribes audio files to text. To use this parser, you must first configure an ASR model on the **Model provider** page.
|
||||
|
||||
### Video parser
|
||||
|
||||
A Video parser transcribes video files to text. To use this parser, you must first configure a VLM model on the **Model provider** page.
|
||||
|
||||
## Output
|
||||
|
||||
The global variable names for the output of the **Parser** component, which can be referenced by subsequent components in the ingestion pipeline.
|
||||
|
||||
| Variable name | Type |
|
||||
| ------------- | ------------------------ |
|
||||
| `markdown` | `string` |
|
||||
| `text` | `string` |
|
||||
| `html` | `string` |
|
||||
| `json` | `Array<Object>` |
|
||||
|
||||
@ -76,13 +76,8 @@ You can also change a file's chunking method on the **Files** page.
|
||||
|
||||
An embedding model converts chunks into embeddings. It cannot be changed once the dataset has chunks. To switch to a different embedding model, you must delete all existing chunks in the dataset. The obvious reason is that we *must* ensure that files in a specific dataset are converted to embeddings using the *same* embedding model (ensure that they are compared in the same embedding space).
|
||||
|
||||
The following embedding models can be deployed locally:
|
||||
|
||||
- BAAI/bge-large-zh-v1.5
|
||||
- maidalun1020/bce-embedding-base_v1
|
||||
|
||||
:::danger IMPORTANT
|
||||
These two embedding models are optimized specifically for English and Chinese, so performance may be compromised if you use them to embed documents in other languages.
|
||||
Some embedding models are optimized for specific languages, so performance may be compromised if you use them to embed documents in other languages.
|
||||
:::
|
||||
|
||||
### Upload file
|
||||
|
||||
@ -33,27 +33,61 @@ RAGFlow isn't one-size-fits-all. It is built for flexibility and supports deeper
|
||||
|
||||
2. Select the option that works best with your scenario:
|
||||
|
||||
- DeepDoc: (Default) The default visual model performing OCR, TSR, and DLR tasks on PDFs, which can be time-consuming.
|
||||
- DeepDoc: (Default) The default visual model performing OCR, TSR, and DLR tasks on PDFs, but can be time-consuming.
|
||||
- Naive: Skip OCR, TSR, and DLR tasks if *all* your PDFs are plain text.
|
||||
- MinerU: An experimental feature.
|
||||
- A third-party visual model provided by a specific model provider.
|
||||
- [MinerU](https://github.com/opendatalab/MinerU): (Experimental) An open-source tool that converts PDF into machine-readable formats.
|
||||
- [Docling](https://github.com/docling-project/docling): (Experimental) An open-source document processing tool for gen AI.
|
||||
- A third-party visual model from a specific model provider.
|
||||
|
||||
:::danger IMPORTANG
|
||||
MinerU PDF document parsing is available starting from v0.21.1. To use this feature, follow these steps:
|
||||
MinerU PDF document parsing is available starting from v0.22.0. RAGFlow supports MinerU (>= 2.6.3) as an optional PDF parser with multiple backends. RAGFlow acts only as a client for MinerU, calling it to parse documents, reading the output files, and ingesting the parsed content. To use this feature, follow these steps:
|
||||
|
||||
1. Before deploying ragflow-server, update your **docker/.env** file:
|
||||
- Enable `HF_ENDPOINT=https://hf-mirror.com`
|
||||
- Add a MinerU entry: `MINERU_EXECUTABLE=/ragflow/uv_tools/.venv/bin/mineru`
|
||||
1. Prepare MinerU:
|
||||
|
||||
2. Start the ragflow-server and run the following commands inside the container:
|
||||
- **If you deploy RAGFlow from source**, install MinerU into an isolated virtual environment (recommended path: `$HOME/uv_tools`):
|
||||
|
||||
```bash
|
||||
mkdir uv_tools
|
||||
cd uv_tools
|
||||
uv venv .venv
|
||||
source .venv/bin/activate
|
||||
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
```
|
||||
```bash
|
||||
mkdir -p "$HOME/uv_tools"
|
||||
cd "$HOME/uv_tools"
|
||||
uv venv .venv
|
||||
source .venv/bin/activate
|
||||
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
# or
|
||||
# uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple
|
||||
```
|
||||
|
||||
- **If you deploy RAGFlow with Docker**, you usually only need to turn on MinerU support in `docker/.env`:
|
||||
|
||||
```bash
|
||||
# docker/.env
|
||||
...
|
||||
USE_MINERU=true
|
||||
...
|
||||
```
|
||||
|
||||
Enabling `USE_MINERU=true` will internally perform the same setup as the manual configuration (including setting the MinerU executable path and related environment variables). You only need the manual installation above if you are running from source or want full control over the MinerU installation.
|
||||
|
||||
2. Start RAGFlow with MinerU enabled:
|
||||
|
||||
- **Source deployment** – in the RAGFlow repo, export the key MinerU-related variables and start the backend service:
|
||||
|
||||
```bash
|
||||
# in RAGFlow repo
|
||||
export MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru"
|
||||
export MINERU_DELETE_OUTPUT=0 # keep output directory
|
||||
export MINERU_BACKEND=pipeline # or another backend you prefer
|
||||
|
||||
source .venv/bin/activate
|
||||
export PYTHONPATH=$(pwd)
|
||||
bash docker/launch_backend_service.sh
|
||||
```
|
||||
|
||||
- **Docker deployment** – after setting `USE_MINERU=true`, restart the containers so that the new settings take effect:
|
||||
|
||||
```bash
|
||||
# in RAGFlow repo
|
||||
docker compose -f docker/docker-compose.yml restart
|
||||
```
|
||||
|
||||
3. Restart the ragflow-server.
|
||||
4. In the web UI, navigate to the **Configuration** page of your dataset. Click **Built-in** in the **Ingestion pipeline** section, select a chunking method from the **Built-in** dropdown, which supports PDF parsing, and slect **MinerU** in **PDF parser**.
|
||||
|
||||
@ -37,7 +37,7 @@ class SupportedLiteLLMProvider(StrEnum):
|
||||
TogetherAI = "TogetherAI"
|
||||
Anthropic = "Anthropic"
|
||||
Ollama = "Ollama"
|
||||
Meituan = "Meituan"
|
||||
LongCat = "LongCat"
|
||||
CometAPI = "CometAPI"
|
||||
SILICONFLOW = "SILICONFLOW"
|
||||
OpenRouter = "OpenRouter"
|
||||
@ -56,7 +56,7 @@ FACTORY_DEFAULT_BASE_URL = {
|
||||
SupportedLiteLLMProvider.Dashscope: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
SupportedLiteLLMProvider.Moonshot: "https://api.moonshot.cn/v1",
|
||||
SupportedLiteLLMProvider.Ollama: "",
|
||||
SupportedLiteLLMProvider.Meituan: "https://api.longcat.chat/openai",
|
||||
SupportedLiteLLMProvider.LongCat: "https://api.longcat.chat/openai",
|
||||
SupportedLiteLLMProvider.CometAPI: "https://api.cometapi.com/v1",
|
||||
SupportedLiteLLMProvider.SILICONFLOW: "https://api.siliconflow.cn/v1",
|
||||
SupportedLiteLLMProvider.OpenRouter: "https://openrouter.ai/api/v1",
|
||||
@ -87,7 +87,7 @@ LITELLM_PROVIDER_PREFIX = {
|
||||
SupportedLiteLLMProvider.TogetherAI: "together_ai/",
|
||||
SupportedLiteLLMProvider.Anthropic: "", # don't need a prefix
|
||||
SupportedLiteLLMProvider.Ollama: "ollama_chat/",
|
||||
SupportedLiteLLMProvider.Meituan: "openai/",
|
||||
SupportedLiteLLMProvider.LongCat: "openai/",
|
||||
SupportedLiteLLMProvider.CometAPI: "openai/",
|
||||
SupportedLiteLLMProvider.SILICONFLOW: "openai/",
|
||||
SupportedLiteLLMProvider.OpenRouter: "openai/",
|
||||
|
||||
@ -1390,7 +1390,7 @@ class LiteLLMBase(ABC):
|
||||
"TogetherAI",
|
||||
"Anthropic",
|
||||
"Ollama",
|
||||
"Meituan",
|
||||
"LongCat",
|
||||
"CometAPI",
|
||||
"SILICONFLOW",
|
||||
"OpenRouter",
|
||||
|
||||
@ -97,7 +97,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
||||
async def __call__(self, chunks, random_state, callback=None, task_id: str = ""):
|
||||
if len(chunks) <= 1:
|
||||
return []
|
||||
chunks = [(s, a) for s, a in chunks if s and len(a) > 0]
|
||||
chunks = [(s, a) for s, a in chunks if s and a is not None and len(a) > 0]
|
||||
layers = [(0, len(chunks))]
|
||||
start, end = 0, len(chunks)
|
||||
|
||||
|
||||
@ -24,7 +24,6 @@ import time
|
||||
|
||||
import json_repair
|
||||
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||
from common.connection_utils import timeout
|
||||
@ -33,7 +32,6 @@ from common.log_utils import init_root_logger
|
||||
from common.config_utils import show_configs
|
||||
from graphrag.general.index import run_graphrag_for_kb
|
||||
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
|
||||
from rag.flow.pipeline import Pipeline
|
||||
from rag.prompts.generator import keyword_extraction, question_proposal, content_tagging, run_toc_from_text
|
||||
import logging
|
||||
import os
|
||||
@ -478,6 +476,9 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
|
||||
|
||||
|
||||
async def run_dataflow(task: dict):
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from rag.flow.pipeline import Pipeline
|
||||
|
||||
task_start_ts = timer()
|
||||
dataflow_id = task["dataflow_id"]
|
||||
doc_id = task["doc_id"]
|
||||
@ -642,47 +643,63 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si
|
||||
fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID
|
||||
|
||||
raptor_config = kb_parser_config.get("raptor", {})
|
||||
|
||||
chunks = []
|
||||
vctr_nm = "q_%d_vec"%vector_size
|
||||
for doc_id in doc_ids:
|
||||
for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])],
|
||||
fields=["content_with_weight", vctr_nm],
|
||||
sort_by_position=True):
|
||||
chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
|
||||
|
||||
raptor = Raptor(
|
||||
raptor_config.get("max_cluster", 64),
|
||||
chat_mdl,
|
||||
embd_mdl,
|
||||
raptor_config["prompt"],
|
||||
raptor_config["max_token"],
|
||||
raptor_config["threshold"],
|
||||
)
|
||||
original_length = len(chunks)
|
||||
chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback, row["id"])
|
||||
doc = {
|
||||
"doc_id": fake_doc_id,
|
||||
"kb_id": [str(row["kb_id"])],
|
||||
"docnm_kwd": row["name"],
|
||||
"title_tks": rag_tokenizer.tokenize(row["name"]),
|
||||
"raptor_kwd": "raptor"
|
||||
}
|
||||
if row["pagerank"]:
|
||||
doc[PAGERANK_FLD] = int(row["pagerank"])
|
||||
res = []
|
||||
tk_count = 0
|
||||
for content, vctr in chunks[original_length:]:
|
||||
d = copy.deepcopy(doc)
|
||||
d["id"] = xxhash.xxh64((content + str(fake_doc_id)).encode("utf-8")).hexdigest()
|
||||
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
|
||||
d["create_timestamp_flt"] = datetime.now().timestamp()
|
||||
d[vctr_nm] = vctr.tolist()
|
||||
d["content_with_weight"] = content
|
||||
d["content_ltks"] = rag_tokenizer.tokenize(content)
|
||||
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
||||
res.append(d)
|
||||
tk_count += num_tokens_from_string(content)
|
||||
async def generate(chunks, did):
|
||||
nonlocal tk_count, res
|
||||
raptor = Raptor(
|
||||
raptor_config.get("max_cluster", 64),
|
||||
chat_mdl,
|
||||
embd_mdl,
|
||||
raptor_config["prompt"],
|
||||
raptor_config["max_token"],
|
||||
raptor_config["threshold"],
|
||||
)
|
||||
original_length = len(chunks)
|
||||
chunks = await raptor(chunks, kb_parser_config["raptor"]["random_seed"], callback, row["id"])
|
||||
doc = {
|
||||
"doc_id": did,
|
||||
"kb_id": [str(row["kb_id"])],
|
||||
"docnm_kwd": row["name"],
|
||||
"title_tks": rag_tokenizer.tokenize(row["name"]),
|
||||
"raptor_kwd": "raptor"
|
||||
}
|
||||
if row["pagerank"]:
|
||||
doc[PAGERANK_FLD] = int(row["pagerank"])
|
||||
|
||||
for content, vctr in chunks[original_length:]:
|
||||
d = copy.deepcopy(doc)
|
||||
d["id"] = xxhash.xxh64((content + str(fake_doc_id)).encode("utf-8")).hexdigest()
|
||||
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
|
||||
d["create_timestamp_flt"] = datetime.now().timestamp()
|
||||
d[vctr_nm] = vctr.tolist()
|
||||
d["content_with_weight"] = content
|
||||
d["content_ltks"] = rag_tokenizer.tokenize(content)
|
||||
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
||||
res.append(d)
|
||||
tk_count += num_tokens_from_string(content)
|
||||
|
||||
if raptor_config.get("scope", "file") == "file":
|
||||
for x, doc_id in enumerate(doc_ids):
|
||||
chunks = []
|
||||
for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])],
|
||||
fields=["content_with_weight", vctr_nm],
|
||||
sort_by_position=True):
|
||||
chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
|
||||
await generate(chunks, doc_id)
|
||||
callback(prog=(x+1.)/len(doc_ids))
|
||||
else:
|
||||
chunks = []
|
||||
for doc_id in doc_ids:
|
||||
for d in settings.retriever.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])],
|
||||
fields=["content_with_weight", vctr_nm],
|
||||
sort_by_position=True):
|
||||
chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
|
||||
|
||||
await generate(chunks, fake_doc_id)
|
||||
|
||||
return res, tk_count
|
||||
|
||||
|
||||
@ -795,6 +812,7 @@ async def do_handle_task(task):
|
||||
"threshold": 0.1,
|
||||
"max_cluster": 64,
|
||||
"random_seed": 0,
|
||||
"scope": "file"
|
||||
},
|
||||
}
|
||||
)
|
||||
@ -926,6 +944,7 @@ async def do_handle_task(task):
|
||||
|
||||
|
||||
async def handle_task():
|
||||
|
||||
global DONE_TASKS, FAILED_TASKS
|
||||
redis_msg, task = await collect()
|
||||
if not task:
|
||||
|
||||
@ -67,8 +67,10 @@ class Session(Base):
|
||||
or (self.__session_type == "chat" and json_data.get("data") is True)
|
||||
):
|
||||
return
|
||||
|
||||
yield self._structure_answer(json_data)
|
||||
if self.__session_type == "agent":
|
||||
yield self._structure_answer(json_data)
|
||||
else:
|
||||
yield self._structure_answer(json_data["data"])
|
||||
else:
|
||||
try:
|
||||
json_data = res.json()
|
||||
|
||||
@ -25,15 +25,7 @@
|
||||
_Replace `[YOUR_MACHINE_IP]` with your actual machine IP address (e.g., `http://192.168.1.49:9222`)._
|
||||
|
||||
|
||||
## Shutdown front-end
|
||||
|
||||
Ctrl + C or
|
||||
|
||||
```bash
|
||||
kill -f "umi dev"
|
||||
```
|
||||
|
||||
## Access admin UI
|
||||
## Login to RAGFlow web admin UI
|
||||
|
||||
Open your browser and navigate to:
|
||||
|
||||
@ -44,3 +36,10 @@
|
||||
_Replace `[YOUR_MACHINE_IP]` with your actual machine IP address (e.g., `http://192.168.1.49:9222/admin`)._
|
||||
|
||||
|
||||
## Shutdown front-end
|
||||
|
||||
Ctrl + C or
|
||||
|
||||
```bash
|
||||
kill -f "umi dev"
|
||||
```
|
||||
@ -1229,7 +1229,6 @@ export default {
|
||||
},
|
||||
addVariable: 'Variable hinzufügen',
|
||||
variableSettings: 'Variableneinstellungen',
|
||||
globalVariables: 'Globale Variablen',
|
||||
systemPrompt: 'System-Prompt',
|
||||
addCategory: 'Kategorie hinzufügen',
|
||||
categoryName: 'Kategoriename',
|
||||
|
||||
@ -1064,10 +1064,10 @@ Example: general/v2/`,
|
||||
exceptionMethod: 'Exception method',
|
||||
maxRounds: 'Max reflection rounds',
|
||||
delayEfterError: 'Delay after error',
|
||||
maxRetries: 'Max reflection rounds',
|
||||
maxRetries: 'Max retry rounds',
|
||||
advancedSettings: 'Advanced Settings',
|
||||
addTools: 'Add Tools',
|
||||
sysPromptDefultValue: `
|
||||
sysPromptDefaultValue: `
|
||||
<role>
|
||||
You are a helpful assistant, an AI assistant specialized in problem-solving for the user.
|
||||
If a specific domain is provided, adapt your expertise to that domain; otherwise, operate as a generalist.
|
||||
@ -1524,7 +1524,6 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
},
|
||||
addVariable: 'Add variable',
|
||||
variableSettings: 'Variable settings',
|
||||
globalVariables: 'Global variables',
|
||||
systemPrompt: 'System prompt',
|
||||
userPrompt: 'User prompt',
|
||||
addCategory: 'Add category',
|
||||
@ -1923,7 +1922,7 @@ Important structured information may include: names, dates, locations, events, k
|
||||
},
|
||||
admin: {
|
||||
loginTitle: 'Admin Console',
|
||||
title: 'RAGFlow admin',
|
||||
title: 'RAGFlow',
|
||||
confirm: 'Confirm',
|
||||
close: 'Close',
|
||||
yes: 'Yes',
|
||||
|
||||
@ -1154,7 +1154,6 @@ export default {
|
||||
},
|
||||
addVariable: 'Ajouter une variable',
|
||||
variableSettings: 'Paramètres des variables',
|
||||
globalVariables: 'Variables globales',
|
||||
systemPrompt: 'Invite système',
|
||||
addCategory: 'Ajouter une catégorie',
|
||||
categoryName: 'Nom de la catégorie',
|
||||
|
||||
@ -1131,7 +1131,6 @@ export default {
|
||||
},
|
||||
addVariable: 'Adicionar variável',
|
||||
variableSettings: 'Configurações da variável',
|
||||
globalVariables: 'Variáveis globais',
|
||||
systemPrompt: 'Prompt do sistema',
|
||||
addCategory: 'Adicionar categoria',
|
||||
categoryName: 'Nome da categoria',
|
||||
|
||||
@ -937,7 +937,7 @@ export default {
|
||||
maxRetries: 'Макс. попыток',
|
||||
advancedSettings: 'Расширенные настройки',
|
||||
addTools: 'Добавить инструменты',
|
||||
sysPromptDefultValue: `
|
||||
sysPromptDefaultValue: `
|
||||
<role>
|
||||
Вы полезный помощник, ИИ-ассистент, специализирующийся на решении проблем пользователя.
|
||||
Если указана конкретная область, адаптируйте вашу экспертизу к этой области; в противном случае действуйте как универсальный специалист.
|
||||
@ -1385,7 +1385,6 @@ export default {
|
||||
},
|
||||
addVariable: 'Добавить переменную',
|
||||
variableSettings: 'Настройки переменных',
|
||||
globalVariables: 'Глобальные переменные',
|
||||
systemPrompt: 'Системный промпт',
|
||||
userPrompt: 'Пользовательский промпт',
|
||||
addCategory: 'Добавить категорию',
|
||||
|
||||
@ -759,7 +759,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
confirmPasswordMessage: '请确认新密码',
|
||||
confirmPasswordNonMatchMessage: '您输入的新密码不匹配!',
|
||||
cancel: '取消',
|
||||
addedModels: '添加了的模型',
|
||||
addedModels: '已添加的模型',
|
||||
modelsToBeAdded: '待添加的模型',
|
||||
addTheModel: '添加',
|
||||
apiKey: 'API-Key',
|
||||
@ -1011,10 +1011,10 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
exceptionMethod: '异常处理方法',
|
||||
maxRounds: '最大反思轮数',
|
||||
delayEfterError: '错误后延迟',
|
||||
maxRetries: '最大反思轮数',
|
||||
maxRetries: '最大重试轮数',
|
||||
advancedSettings: '高级设置',
|
||||
addTools: '添加工具',
|
||||
sysPromptDefultValue: `
|
||||
sysPromptDefaultValue: `
|
||||
<role>
|
||||
你是一名乐于助人的助手,一名专注于为用户解决问题的 AI 助手。
|
||||
如果用户指定了特定领域,你需要在该领域展现专业性;如果没有,则以通用助手的方式工作。
|
||||
|
||||
@ -384,21 +384,21 @@ function AdminServiceStatus() {
|
||||
{/* Extra info modal*/}
|
||||
<Dialog open={extraInfoModalOpen} onOpenChange={setExtraInfoModalOpen}>
|
||||
<DialogContent
|
||||
className="flex flex-col max-h-[calc(100vh-4rem)] p-0 overflow-hidden"
|
||||
className="flex flex-col max-h-[calc(100vh-4rem)] overflow-hidden"
|
||||
onAnimationEnd={() => {
|
||||
if (!extraInfoModalOpen) {
|
||||
setItemToMakeAction(null);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<DialogHeader className="p-6 border-b-0.5 border-border-button">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{t('admin.extraInfo')}</DialogTitle>
|
||||
</DialogHeader>
|
||||
|
||||
<DialogDescription className="sr-only" />
|
||||
|
||||
<ScrollArea className="h-0 flex-1 grid">
|
||||
<div className="px-12">
|
||||
<div className="px-6">
|
||||
<JsonView
|
||||
src={itemToMakeAction?.extra ?? {}}
|
||||
className="rounded-lg p-4 bg-bg-card break-words text-text-secondary"
|
||||
@ -406,7 +406,7 @@ function AdminServiceStatus() {
|
||||
</div>
|
||||
</ScrollArea>
|
||||
|
||||
<DialogFooter className="flex justify-end gap-4 px-12 pt-4 pb-8">
|
||||
<DialogFooter className="flex justify-end gap-4 px-6 py-4">
|
||||
<Button
|
||||
className="px-4 h-10 dark:border-border-button"
|
||||
variant="outline"
|
||||
@ -421,7 +421,7 @@ function AdminServiceStatus() {
|
||||
{/* Service details modal */}
|
||||
<Dialog open={detailModalOpen} onOpenChange={setDetailModalOpen}>
|
||||
<DialogContent
|
||||
className="flex flex-col max-h-[calc(100vh-4rem)] max-w-6xl p-0 overflow-hidden"
|
||||
className="flex flex-col max-h-[calc(100vh-4rem)] max-w-6xl overflow-hidden"
|
||||
onAnimationEnd={() => {
|
||||
if (!detailModalOpen) {
|
||||
setItemToMakeAction(null);
|
||||
@ -443,7 +443,7 @@ function AdminServiceStatus() {
|
||||
<DialogDescription className="sr-only" />
|
||||
|
||||
<ScrollArea className="h-0 flex-1 text-text-secondary grid">
|
||||
<div className="px-12">
|
||||
<div className="px-6">
|
||||
{itemToMakeAction?.service_type === 'task_executor' ? (
|
||||
<TaskExecutorDetail
|
||||
content={
|
||||
@ -456,7 +456,7 @@ function AdminServiceStatus() {
|
||||
</div>
|
||||
</ScrollArea>
|
||||
|
||||
<DialogFooter className="flex justify-end gap-4 px-12 pt-4 pb-8">
|
||||
<DialogFooter className="flex justify-end gap-4 px-6 py-4">
|
||||
<Button
|
||||
className="px-4 h-10 dark:border-border-button"
|
||||
variant="outline"
|
||||
|
||||
@ -446,7 +446,7 @@ export const initialAgentValues = {
|
||||
...initialLlmBaseValues,
|
||||
description: '',
|
||||
user_prompt: '',
|
||||
sys_prompt: t('flow.sysPromptDefultValue'),
|
||||
sys_prompt: t('flow.sysPromptDefaultValue'),
|
||||
prompts: [{ role: PromptRole.User, content: `{${AgentGlobals.SysQuery}}` }],
|
||||
message_history_window_size: 12,
|
||||
max_retries: 3,
|
||||
|
||||
@ -212,7 +212,7 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||
<FormItem className="flex-1">
|
||||
<FormLabel>{t('flow.maxRetries')}</FormLabel>
|
||||
<FormControl>
|
||||
<NumberInput {...field} max={8}></NumberInput>
|
||||
<NumberInput {...field} max={8} min={0}></NumberInput>
|
||||
</FormControl>
|
||||
</FormItem>
|
||||
)}
|
||||
@ -237,7 +237,7 @@ function AgentForm({ node }: INextOperatorForm) {
|
||||
<FormItem className="flex-1">
|
||||
<FormLabel>{t('flow.maxRounds')}</FormLabel>
|
||||
<FormControl>
|
||||
<NumberInput {...field}></NumberInput>
|
||||
<NumberInput {...field} min={0}></NumberInput>
|
||||
</FormControl>
|
||||
</FormItem>
|
||||
)}
|
||||
|
||||
@ -89,7 +89,7 @@ function DataOperationsForm({ node }: INextOperatorForm) {
|
||||
<QueryVariableList
|
||||
tooltip={t('flow.queryTip')}
|
||||
label={t('flow.query')}
|
||||
types={[JsonSchemaDataType.Array, JsonSchemaDataType.Object]}
|
||||
types={[JsonSchemaDataType.Object]}
|
||||
></QueryVariableList>
|
||||
<Separator />
|
||||
<RAGFlowFormItem name="operations" label={t('flow.operations')}>
|
||||
|
||||
@ -5,7 +5,7 @@ import {
|
||||
FormFieldConfig,
|
||||
FormFieldType,
|
||||
} from '@/components/dynamic-form';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { BlockButton, Button } from '@/components/ui/button';
|
||||
import { Modal } from '@/components/ui/modal/modal';
|
||||
import {
|
||||
Sheet,
|
||||
@ -142,13 +142,12 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
|
||||
>
|
||||
<SheetHeader className="p-5">
|
||||
<SheetTitle className="flex items-center gap-2.5">
|
||||
{t('flow.gobalVariable')}
|
||||
{t('flow.conversationVariable')}
|
||||
</SheetTitle>
|
||||
</SheetHeader>
|
||||
|
||||
<div className="px-5 pb-5">
|
||||
<Button
|
||||
variant={'secondary'}
|
||||
<BlockButton
|
||||
onClick={() => {
|
||||
setFields(GobalFormFields);
|
||||
setDefaultValues(GobalVariableFormDefaultValues);
|
||||
@ -156,7 +155,7 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
|
||||
}}
|
||||
>
|
||||
{t('flow.add')}
|
||||
</Button>
|
||||
</BlockButton>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-2 px-5 ">
|
||||
@ -203,7 +202,7 @@ export const GobalParamSheet = (props: IGobalParamModalProps) => {
|
||||
</div>
|
||||
</SheetContent>
|
||||
<Modal
|
||||
title={t('flow.add') + t('flow.gobalVariable')}
|
||||
title={t('flow.add') + t('flow.conversationVariable')}
|
||||
open={visible}
|
||||
onCancel={hideAddModal}
|
||||
showfooter={false}
|
||||
|
||||
@ -6,7 +6,7 @@ import { DefaultOptionType } from 'antd/es/select';
|
||||
import { t } from 'i18next';
|
||||
import { isEmpty, toLower } from 'lodash';
|
||||
import get from 'lodash/get';
|
||||
import { MessageCircleCode } from 'lucide-react';
|
||||
import { MessageSquareCode } from 'lucide-react';
|
||||
import { useCallback, useContext, useEffect, useMemo, useState } from 'react';
|
||||
import {
|
||||
AgentDialogueMode,
|
||||
@ -162,7 +162,7 @@ export function useBuildConversationVariableOptions() {
|
||||
return {
|
||||
label: keyWithPrefix,
|
||||
parentLabel: <span>{t('flow.conversationVariable')}</span>,
|
||||
icon: <MessageCircleCode className="size-3" />,
|
||||
icon: <MessageSquareCode className="size-3" />,
|
||||
value: keyWithPrefix,
|
||||
type: value.type,
|
||||
};
|
||||
|
||||
@ -28,6 +28,7 @@ import {
|
||||
History,
|
||||
LaptopMinimalCheck,
|
||||
Logs,
|
||||
MessageSquareCode,
|
||||
ScreenShare,
|
||||
Settings,
|
||||
Upload,
|
||||
@ -218,7 +219,7 @@ export default function Agent() {
|
||||
onClick={() => showGobalParamSheet()}
|
||||
loading={loading}
|
||||
>
|
||||
{t('flow.conversationVariable')}
|
||||
<MessageSquareCode /> {t('flow.conversationVariable')}
|
||||
</ButtonLoading>
|
||||
<Button variant={'secondary'} onClick={handleButtonRunClick}>
|
||||
<CirclePlay />
|
||||
|
||||
@ -14,7 +14,7 @@ import { useForm } from 'react-hook-form';
|
||||
|
||||
const formSchema = z.object({
|
||||
title: z.string().min(1, {}),
|
||||
avatar: z.string().optional(),
|
||||
avatar: z.string().optional().nullable(),
|
||||
description: z.string().optional().nullable(),
|
||||
permission: z.string(),
|
||||
});
|
||||
|
||||
@ -1,64 +1,383 @@
|
||||
import { useMemo, useState } from 'react';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
import { FileUploader } from '@/components/file-uploader';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog';
|
||||
import message from '@/components/ui/message';
|
||||
import { Textarea } from '@/components/ui/textarea';
|
||||
import { FileMimeType } from '@/constants/common';
|
||||
import {
|
||||
pollGoogleDriveWebAuthResult,
|
||||
startGoogleDriveWebAuth,
|
||||
} from '@/services/data-source-service';
|
||||
import { Loader2 } from 'lucide-react';
|
||||
|
||||
type GoogleDriveTokenFieldProps = {
|
||||
value?: string;
|
||||
onChange: (value: any) => void;
|
||||
placeholder?: string;
|
||||
};
|
||||
|
||||
const credentialHasRefreshToken = (content: string) => {
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
return Boolean(parsed?.refresh_token);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const describeCredentials = (content?: string) => {
|
||||
if (!content) return '';
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
if (parsed?.refresh_token) {
|
||||
return 'Uploaded OAuth tokens with a refresh token.';
|
||||
}
|
||||
if (parsed?.installed || parsed?.web) {
|
||||
return 'Client credentials detected. Complete verification to mint long-lived tokens.';
|
||||
}
|
||||
return 'Stored Google credential JSON.';
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
};
|
||||
|
||||
const GoogleDriveTokenField = ({
|
||||
value,
|
||||
onChange,
|
||||
placeholder,
|
||||
}: GoogleDriveTokenFieldProps) => {
|
||||
const [files, setFiles] = useState<File[]>([]);
|
||||
const [pendingCredentials, setPendingCredentials] = useState<string>('');
|
||||
const [dialogOpen, setDialogOpen] = useState(false);
|
||||
const [webAuthLoading, setWebAuthLoading] = useState(false);
|
||||
const [webFlowId, setWebFlowId] = useState<string | null>(null);
|
||||
const [webStatus, setWebStatus] = useState<
|
||||
'idle' | 'waiting' | 'success' | 'error'
|
||||
>('idle');
|
||||
const [webStatusMessage, setWebStatusMessage] = useState('');
|
||||
const webFlowIdRef = useRef<string | null>(null);
|
||||
const webPollTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
const handleValueChange = useMemo(
|
||||
() => (nextFiles: File[]) => {
|
||||
const clearWebState = useCallback(() => {
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
webPollTimerRef.current = null;
|
||||
}
|
||||
webFlowIdRef.current = null;
|
||||
setWebFlowId(null);
|
||||
setWebStatus('idle');
|
||||
setWebStatusMessage('');
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
webFlowIdRef.current = webFlowId;
|
||||
}, [webFlowId]);
|
||||
|
||||
const credentialSummary = useMemo(() => describeCredentials(value), [value]);
|
||||
const hasVerifiedTokens = useMemo(
|
||||
() => Boolean(value && credentialHasRefreshToken(value)),
|
||||
[value],
|
||||
);
|
||||
const hasUploadedButUnverified = useMemo(
|
||||
() => Boolean(value && !hasVerifiedTokens),
|
||||
[hasVerifiedTokens, value],
|
||||
);
|
||||
|
||||
const resetDialog = useCallback(
|
||||
(shouldResetState: boolean) => {
|
||||
setDialogOpen(false);
|
||||
clearWebState();
|
||||
if (shouldResetState) {
|
||||
setPendingCredentials('');
|
||||
setFiles([]);
|
||||
}
|
||||
},
|
||||
[clearWebState],
|
||||
);
|
||||
|
||||
const fetchWebResult = useCallback(
|
||||
async (flowId: string) => {
|
||||
try {
|
||||
const { data } = await pollGoogleDriveWebAuthResult({
|
||||
flow_id: flowId,
|
||||
});
|
||||
if (data.code === 0 && data.data?.credentials) {
|
||||
onChange(data.data.credentials);
|
||||
setPendingCredentials('');
|
||||
message.success('Google Drive credentials verified.');
|
||||
resetDialog(false);
|
||||
return;
|
||||
}
|
||||
if (data.code === 106) {
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||
if (webPollTimerRef.current) {
|
||||
clearTimeout(webPollTimerRef.current);
|
||||
}
|
||||
webPollTimerRef.current = setTimeout(
|
||||
() => fetchWebResult(flowId),
|
||||
1500,
|
||||
);
|
||||
return;
|
||||
}
|
||||
message.error(data.message || 'Authorization failed.');
|
||||
clearWebState();
|
||||
} catch (err) {
|
||||
message.error('Unable to retrieve authorization result.');
|
||||
clearWebState();
|
||||
}
|
||||
},
|
||||
[clearWebState, onChange, resetDialog],
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
const handler = (event: MessageEvent) => {
|
||||
const payload = event.data;
|
||||
if (!payload || payload.type !== 'ragflow-google-drive-oauth') {
|
||||
return;
|
||||
}
|
||||
if (!payload.flowId) {
|
||||
return;
|
||||
}
|
||||
if (webFlowIdRef.current && webFlowIdRef.current !== payload.flowId) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (payload.status === 'success') {
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Authorization confirmed. Finalizing tokens...');
|
||||
fetchWebResult(payload.flowId);
|
||||
} else {
|
||||
message.error(
|
||||
payload.message || 'Authorization window reported an error.',
|
||||
);
|
||||
clearWebState();
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', handler);
|
||||
return () => window.removeEventListener('message', handler);
|
||||
}, [clearWebState, fetchWebResult]);
|
||||
|
||||
const handleValueChange = useCallback(
|
||||
(nextFiles: File[]) => {
|
||||
if (!nextFiles.length) {
|
||||
setFiles([]);
|
||||
onChange('');
|
||||
setPendingCredentials('');
|
||||
clearWebState();
|
||||
return;
|
||||
}
|
||||
const file = nextFiles[nextFiles.length - 1];
|
||||
file
|
||||
.text()
|
||||
.then((text) => {
|
||||
JSON.parse(text);
|
||||
onChange(text);
|
||||
try {
|
||||
JSON.parse(text);
|
||||
} catch {
|
||||
message.error('Invalid JSON file.');
|
||||
setFiles([]);
|
||||
clearWebState();
|
||||
return;
|
||||
}
|
||||
setFiles([file]);
|
||||
message.success('JSON uploaded');
|
||||
clearWebState();
|
||||
if (credentialHasRefreshToken(text)) {
|
||||
onChange(text);
|
||||
setPendingCredentials('');
|
||||
message.success('OAuth credentials uploaded.');
|
||||
return;
|
||||
}
|
||||
setPendingCredentials(text);
|
||||
setDialogOpen(true);
|
||||
message.info(
|
||||
'Client configuration uploaded. Verification is required to finish setup.',
|
||||
);
|
||||
})
|
||||
.catch(() => {
|
||||
message.error('Invalid JSON file.');
|
||||
message.error('Unable to read the uploaded file.');
|
||||
setFiles([]);
|
||||
});
|
||||
},
|
||||
[onChange],
|
||||
[clearWebState, onChange],
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-2">
|
||||
<Textarea
|
||||
value={value || ''}
|
||||
onChange={(event) => onChange(event.target.value)}
|
||||
placeholder={
|
||||
placeholder ||
|
||||
'{ "token": "...", "refresh_token": "...", "client_id": "...", ... }'
|
||||
const handleStartWebAuthorization = useCallback(async () => {
|
||||
if (!pendingCredentials) {
|
||||
message.error('No Google credential file detected.');
|
||||
return;
|
||||
}
|
||||
setWebAuthLoading(true);
|
||||
clearWebState();
|
||||
try {
|
||||
const { data } = await startGoogleDriveWebAuth({
|
||||
credentials: pendingCredentials,
|
||||
});
|
||||
if (data.code === 0 && data.data?.authorization_url) {
|
||||
const flowId = data.data.flow_id;
|
||||
const popup = window.open(
|
||||
data.data.authorization_url,
|
||||
'ragflow-google-drive-oauth',
|
||||
'width=600,height=720',
|
||||
);
|
||||
if (!popup) {
|
||||
message.error(
|
||||
'Popup was blocked. Please allow popups for this site.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
className="min-h-[120px] max-h-60 overflow-y-auto"
|
||||
/>
|
||||
popup.focus();
|
||||
webFlowIdRef.current = flowId;
|
||||
setWebFlowId(flowId);
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Complete the Google consent in the popup window.');
|
||||
} else {
|
||||
message.error(data.message || 'Failed to start browser authorization.');
|
||||
}
|
||||
} catch (err) {
|
||||
message.error('Failed to start browser authorization.');
|
||||
} finally {
|
||||
setWebAuthLoading(false);
|
||||
}
|
||||
}, [clearWebState, pendingCredentials]);
|
||||
|
||||
const handleManualWebCheck = useCallback(() => {
|
||||
if (!webFlowId) {
|
||||
message.info('Start browser authorization first.');
|
||||
return;
|
||||
}
|
||||
setWebStatus('waiting');
|
||||
setWebStatusMessage('Checking authorization status...');
|
||||
fetchWebResult(webFlowId);
|
||||
}, [fetchWebResult, webFlowId]);
|
||||
|
||||
const handleCancel = useCallback(() => {
|
||||
message.warning(
|
||||
'Verification canceled. Upload the credential again to restart.',
|
||||
);
|
||||
resetDialog(true);
|
||||
}, [resetDialog]);
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-3">
|
||||
{(credentialSummary ||
|
||||
hasVerifiedTokens ||
|
||||
hasUploadedButUnverified ||
|
||||
pendingCredentials) && (
|
||||
<div className="flex flex-wrap items-center gap-3 rounded-md border border-dashed border-muted-foreground/40 bg-muted/20 px-3 py-2 text-xs text-muted-foreground">
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
{hasVerifiedTokens ? (
|
||||
<span className="rounded-full bg-emerald-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-emerald-700">
|
||||
Verified
|
||||
</span>
|
||||
) : null}
|
||||
{hasUploadedButUnverified ? (
|
||||
<span className="rounded-full bg-amber-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-amber-700">
|
||||
Needs authorization
|
||||
</span>
|
||||
) : null}
|
||||
{pendingCredentials && !hasVerifiedTokens ? (
|
||||
<span className="rounded-full bg-blue-100 px-2 py-0.5 text-[11px] font-semibold uppercase tracking-wide text-blue-700">
|
||||
Uploaded (pending)
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
{credentialSummary ? (
|
||||
<p className="m-0">{credentialSummary}</p>
|
||||
) : null}
|
||||
</div>
|
||||
)}
|
||||
<FileUploader
|
||||
className="py-4"
|
||||
value={files}
|
||||
onValueChange={handleValueChange}
|
||||
accept={{ '*.json': [FileMimeType.Json] }}
|
||||
maxFileCount={1}
|
||||
description="Upload your Google OAuth JSON file."
|
||||
/>
|
||||
|
||||
<Dialog
|
||||
open={dialogOpen}
|
||||
onOpenChange={(open) => {
|
||||
if (!open) {
|
||||
handleCancel();
|
||||
}
|
||||
}}
|
||||
>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Complete Google verification</DialogTitle>
|
||||
<DialogDescription>
|
||||
The uploaded client credentials do not contain a refresh token.
|
||||
Run the verification flow once to mint reusable tokens.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div className="rounded-md border border-dashed border-muted-foreground/40 bg-muted/10 px-4 py-4 text-sm text-muted-foreground">
|
||||
<div className="text-sm font-semibold text-foreground">
|
||||
Authorize in browser
|
||||
</div>
|
||||
<p className="mt-2">
|
||||
We will open Google's consent page in a new window. Sign in
|
||||
with the admin account, grant access, and return here. Your
|
||||
credentials will update automatically.
|
||||
</p>
|
||||
{webStatus !== 'idle' && (
|
||||
<p
|
||||
className={`mt-2 text-xs ${
|
||||
webStatus === 'error'
|
||||
? 'text-destructive'
|
||||
: 'text-muted-foreground'
|
||||
}`}
|
||||
>
|
||||
{webStatusMessage}
|
||||
</p>
|
||||
)}
|
||||
<div className="mt-3 flex flex-wrap gap-2">
|
||||
<Button
|
||||
onClick={handleStartWebAuthorization}
|
||||
disabled={webAuthLoading}
|
||||
>
|
||||
{webAuthLoading && (
|
||||
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||
)}
|
||||
Authorize with Google
|
||||
</Button>
|
||||
{webFlowId ? (
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={handleManualWebCheck}
|
||||
disabled={webStatus === 'success'}
|
||||
>
|
||||
Refresh status
|
||||
</Button>
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DialogFooter className="pt-2">
|
||||
<Button variant="ghost" onClick={handleCancel}>
|
||||
Cancel
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@ -270,6 +270,101 @@ export const DataSourceFormFields = {
|
||||
defaultValue: 'uploaded',
|
||||
},
|
||||
],
|
||||
[DataSourceKey.GOOGLE_DRIVE]: [
|
||||
{
|
||||
label: 'Primary Admin Email',
|
||||
name: 'config.credentials.google_primary_admin',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
placeholder: 'admin@example.com',
|
||||
tooltip: t('setting.google_drivePrimaryAdminTip'),
|
||||
},
|
||||
{
|
||||
label: 'OAuth Token JSON',
|
||||
name: 'config.credentials.google_tokens',
|
||||
type: FormFieldType.Textarea,
|
||||
required: true,
|
||||
render: (fieldProps) => (
|
||||
<GoogleDriveTokenField
|
||||
value={fieldProps.value}
|
||||
onChange={fieldProps.onChange}
|
||||
placeholder='{ "token": "...", "refresh_token": "...", ... }'
|
||||
/>
|
||||
),
|
||||
tooltip: t('setting.google_driveTokenTip'),
|
||||
},
|
||||
{
|
||||
label: 'My Drive Emails',
|
||||
name: 'config.my_drive_emails',
|
||||
type: FormFieldType.Text,
|
||||
required: true,
|
||||
placeholder: 'user1@example.com,user2@example.com',
|
||||
tooltip: t('setting.google_driveMyDriveEmailsTip'),
|
||||
},
|
||||
{
|
||||
label: 'Shared Folder URLs',
|
||||
name: 'config.shared_folder_urls',
|
||||
type: FormFieldType.Textarea,
|
||||
required: true,
|
||||
placeholder:
|
||||
'https://drive.google.com/drive/folders/XXXXX,https://drive.google.com/drive/folders/YYYYY',
|
||||
tooltip: t('setting.google_driveSharedFoldersTip'),
|
||||
},
|
||||
// The fields below are intentionally disabled for now. Uncomment them when we
|
||||
// reintroduce shared drive controls or advanced impersonation options.
|
||||
// {
|
||||
// label: 'Shared Drive URLs',
|
||||
// name: 'config.shared_drive_urls',
|
||||
// type: FormFieldType.Text,
|
||||
// required: false,
|
||||
// placeholder:
|
||||
// 'Optional: comma-separated shared drive links if you want to include them.',
|
||||
// },
|
||||
// {
|
||||
// label: 'Specific User Emails',
|
||||
// name: 'config.specific_user_emails',
|
||||
// type: FormFieldType.Text,
|
||||
// required: false,
|
||||
// placeholder:
|
||||
// 'Optional: comma-separated list of users to impersonate (overrides defaults).',
|
||||
// },
|
||||
// {
|
||||
// label: 'Include My Drive',
|
||||
// name: 'config.include_my_drives',
|
||||
// type: FormFieldType.Checkbox,
|
||||
// required: false,
|
||||
// defaultValue: true,
|
||||
// },
|
||||
// {
|
||||
// label: 'Include Shared Drives',
|
||||
// name: 'config.include_shared_drives',
|
||||
// type: FormFieldType.Checkbox,
|
||||
// required: false,
|
||||
// defaultValue: false,
|
||||
// },
|
||||
// {
|
||||
// label: 'Include “Shared with me”',
|
||||
// name: 'config.include_files_shared_with_me',
|
||||
// type: FormFieldType.Checkbox,
|
||||
// required: false,
|
||||
// defaultValue: false,
|
||||
// },
|
||||
// {
|
||||
// label: 'Allow Images',
|
||||
// name: 'config.allow_images',
|
||||
// type: FormFieldType.Checkbox,
|
||||
// required: false,
|
||||
// defaultValue: false,
|
||||
// },
|
||||
{
|
||||
label: '',
|
||||
name: 'config.credentials.authentication_method',
|
||||
type: FormFieldType.Text,
|
||||
required: false,
|
||||
hidden: true,
|
||||
defaultValue: 'uploaded',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export const DataSourceFormDefaultValues = {
|
||||
|
||||
@ -124,7 +124,7 @@ export function EditMcpDialog({
|
||||
form={form}
|
||||
setFieldChanged={setFieldChanged}
|
||||
></EditMcpForm>
|
||||
<Card>
|
||||
<Card className="bg-transparent">
|
||||
<CardContent className="p-3">
|
||||
<Collapse
|
||||
title={
|
||||
|
||||
@ -59,7 +59,7 @@ export default function McpServer() {
|
||||
<ProfileSettingWrapperCard
|
||||
header={
|
||||
<>
|
||||
<div className="text-text-primary text-2xl font-medium">
|
||||
<div className="text-text-primary text-2xl font-semibold">
|
||||
{t('mcp.mcpServers')}
|
||||
</div>
|
||||
<section className="flex items-center justify-between">
|
||||
@ -80,13 +80,13 @@ export default function McpServer() {
|
||||
)}
|
||||
{t(`mcp.${isSelectionMode ? 'exitBulkManage' : 'bulkManage'}`)}
|
||||
</Button>
|
||||
<Button variant={'secondary'} onClick={showEditModal('')}>
|
||||
<Plus className="size-3.5" /> {t('mcp.addMCP')}
|
||||
</Button>
|
||||
<Button onClick={showImportModal}>
|
||||
<Button variant={'secondary'} onClick={showImportModal}>
|
||||
<Download className="size-3.5" />
|
||||
{t('mcp.import')}
|
||||
</Button>
|
||||
<Button onClick={showEditModal('')}>
|
||||
<Plus className="size-3.5" /> {t('mcp.addMCP')}
|
||||
</Button>
|
||||
</div>
|
||||
</section>
|
||||
</>
|
||||
|
||||
@ -33,4 +33,10 @@ export const getDataSourceLogs = (id: string, params?: any) =>
|
||||
export const featchDataSourceDetail = (id: string) =>
|
||||
request.get(api.dataSourceDetail(id));
|
||||
|
||||
export const startGoogleDriveWebAuth = (payload: { credentials: string }) =>
|
||||
request.post(api.googleDriveWebAuthStart, { data: payload });
|
||||
|
||||
export const pollGoogleDriveWebAuthResult = (payload: { flow_id: string }) =>
|
||||
request.post(api.googleDriveWebAuthResult, { data: payload });
|
||||
|
||||
export default dataSourceService;
|
||||
|
||||
@ -42,6 +42,8 @@ export default {
|
||||
dataSourceRebuild: (id: string) => `${api_host}/connector/${id}/rebuild`,
|
||||
dataSourceLogs: (id: string) => `${api_host}/connector/${id}/logs`,
|
||||
dataSourceDetail: (id: string) => `${api_host}/connector/${id}`,
|
||||
googleDriveWebAuthStart: `${api_host}/connector/google-drive/oauth/web/start`,
|
||||
googleDriveWebAuthResult: `${api_host}/connector/google-drive/oauth/web/result`,
|
||||
|
||||
// plugin
|
||||
llm_tools: `${api_host}/plugin/llm_tools`,
|
||||
|
||||
Reference in New Issue
Block a user