mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Compare commits
1 Commits
nightly
...
revert-116
| Author | SHA1 | Date | |
|---|---|---|---|
| cd0216cce3 |
8
.github/workflows/tests.yml
vendored
8
.github/workflows/tests.yml
vendored
@ -127,14 +127,6 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Run unit test
|
||||
run: |
|
||||
uv sync --python 3.10 --group test --frozen
|
||||
source .venv/bin/activate
|
||||
which pytest || echo "pytest not in PATH"
|
||||
echo "Start to run unit test"
|
||||
python3 run_tests.py
|
||||
|
||||
- name: Build ragflow:nightly
|
||||
run: |
|
||||
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}}
|
||||
|
||||
@ -10,10 +10,11 @@ WORKDIR /ragflow
|
||||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
|
||||
136
agent/canvas.py
136
agent/canvas.py
@ -16,7 +16,6 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import inspect
|
||||
import binascii
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
@ -29,9 +28,7 @@ from typing import Any, Union, Tuple
|
||||
from agent.component import component_class
|
||||
from agent.component.base import ComponentBase
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import has_canceled
|
||||
from common.constants import LLMType
|
||||
from common.misc_utils import get_uuid, hash_str2int
|
||||
from common.exceptions import TaskCanceledException
|
||||
from rag.prompts.generator import chunks_format
|
||||
@ -91,6 +88,9 @@ class Graph:
|
||||
def load(self):
|
||||
self.components = self.dsl["components"]
|
||||
cpn_nms = set([])
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
param = component_class(cpn["obj"]["component_name"] + "Param")()
|
||||
@ -356,6 +356,8 @@ class Canvas(Graph):
|
||||
self.globals[k] = ""
|
||||
else:
|
||||
self.globals[k] = ""
|
||||
print(self.globals)
|
||||
|
||||
|
||||
async def run(self, **kwargs):
|
||||
st = time.perf_counter()
|
||||
@ -413,19 +415,13 @@ class Canvas(Graph):
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
tasks = []
|
||||
|
||||
def _run_async_in_thread(coro_func, **call_kwargs):
|
||||
return asyncio.run(coro_func(**call_kwargs))
|
||||
|
||||
i = f
|
||||
while i < t:
|
||||
cpn = self.get_component_obj(self.path[i])
|
||||
task_fn = None
|
||||
call_kwargs = None
|
||||
|
||||
if cpn.component_name.lower() in ["begin", "userfillup"]:
|
||||
call_kwargs = {"inputs": kwargs.get("inputs", {})}
|
||||
task_fn = cpn.invoke
|
||||
task_fn = partial(cpn.invoke, inputs=kwargs.get("inputs", {}))
|
||||
i += 1
|
||||
else:
|
||||
for _, ele in cpn.get_input_elements().items():
|
||||
@ -434,18 +430,13 @@ class Canvas(Graph):
|
||||
t -= 1
|
||||
break
|
||||
else:
|
||||
call_kwargs = cpn.get_input()
|
||||
task_fn = cpn.invoke
|
||||
task_fn = partial(cpn.invoke, **cpn.get_input())
|
||||
i += 1
|
||||
|
||||
if task_fn is None:
|
||||
continue
|
||||
|
||||
invoke_async = getattr(cpn, "invoke_async", None)
|
||||
if invoke_async and asyncio.iscoroutinefunction(invoke_async):
|
||||
tasks.append(loop.run_in_executor(self._thread_pool, partial(_run_async_in_thread, invoke_async, **(call_kwargs or {}))))
|
||||
else:
|
||||
tasks.append(loop.run_in_executor(self._thread_pool, partial(task_fn, **(call_kwargs or {}))))
|
||||
tasks.append(loop.run_in_executor(self._thread_pool, task_fn))
|
||||
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks)
|
||||
@ -465,7 +456,6 @@ class Canvas(Graph):
|
||||
self.error = ""
|
||||
idx = len(self.path) - 1
|
||||
partials = []
|
||||
tts_mdl = None
|
||||
while idx < len(self.path):
|
||||
to = len(self.path)
|
||||
for i in range(idx, to):
|
||||
@ -483,63 +473,41 @@ class Canvas(Graph):
|
||||
cpn = self.get_component(self.path[i])
|
||||
cpn_obj = self.get_component_obj(self.path[i])
|
||||
if cpn_obj.component_name.lower() == "message":
|
||||
if cpn_obj.get_param("auto_play"):
|
||||
tts_mdl = LLMBundle(self._tenant_id, LLMType.TTS)
|
||||
if isinstance(cpn_obj.output("content"), partial):
|
||||
_m = ""
|
||||
buff_m = ""
|
||||
stream = cpn_obj.output("content")()
|
||||
async def _process_stream(m):
|
||||
nonlocal buff_m, _m, tts_mdl
|
||||
if not m:
|
||||
return
|
||||
if m == "<think>":
|
||||
return decorate("message", {"content": "", "start_to_think": True})
|
||||
|
||||
elif m == "</think>":
|
||||
return decorate("message", {"content": "", "end_to_think": True})
|
||||
|
||||
buff_m += m
|
||||
_m += m
|
||||
|
||||
if len(buff_m) > 16:
|
||||
ev = decorate(
|
||||
"message",
|
||||
{
|
||||
"content": m,
|
||||
"audio_binary": self.tts(tts_mdl, buff_m)
|
||||
}
|
||||
)
|
||||
buff_m = ""
|
||||
return ev
|
||||
|
||||
return decorate("message", {"content": m})
|
||||
|
||||
if inspect.isasyncgen(stream):
|
||||
async for m in stream:
|
||||
ev= await _process_stream(m)
|
||||
if ev:
|
||||
yield ev
|
||||
if not m:
|
||||
continue
|
||||
if m == "<think>":
|
||||
yield decorate("message", {"content": "", "start_to_think": True})
|
||||
elif m == "</think>":
|
||||
yield decorate("message", {"content": "", "end_to_think": True})
|
||||
else:
|
||||
yield decorate("message", {"content": m})
|
||||
_m += m
|
||||
else:
|
||||
for m in stream:
|
||||
ev= await _process_stream(m)
|
||||
if ev:
|
||||
yield ev
|
||||
if buff_m:
|
||||
yield decorate("message", {"content": "", "audio_binary": self.tts(tts_mdl, buff_m)})
|
||||
buff_m = ""
|
||||
if not m:
|
||||
continue
|
||||
if m == "<think>":
|
||||
yield decorate("message", {"content": "", "start_to_think": True})
|
||||
elif m == "</think>":
|
||||
yield decorate("message", {"content": "", "end_to_think": True})
|
||||
else:
|
||||
yield decorate("message", {"content": m})
|
||||
_m += m
|
||||
cpn_obj.set_output("content", _m)
|
||||
cite = re.search(r"\[ID:[ 0-9]+\]", _m)
|
||||
else:
|
||||
yield decorate("message", {"content": cpn_obj.output("content")})
|
||||
cite = re.search(r"\[ID:[ 0-9]+\]", cpn_obj.output("content"))
|
||||
|
||||
message_end = {}
|
||||
if isinstance(cpn_obj.output("attachment"), dict):
|
||||
message_end["attachment"] = cpn_obj.output("attachment")
|
||||
if cite:
|
||||
message_end["reference"] = self.get_reference()
|
||||
yield decorate("message_end", message_end)
|
||||
if isinstance(cpn_obj.output("attachment"), tuple):
|
||||
yield decorate("message", {"attachment": cpn_obj.output("attachment")})
|
||||
|
||||
yield decorate("message_end", {"reference": self.get_reference() if cite else None})
|
||||
|
||||
while partials:
|
||||
_cpn_obj = self.get_component_obj(partials[0])
|
||||
@ -650,50 +618,6 @@ class Canvas(Graph):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def tts(self,tts_mdl, text):
|
||||
def clean_tts_text(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
text = text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
||||
|
||||
text = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]", "", text)
|
||||
|
||||
emoji_pattern = re.compile(
|
||||
"[\U0001F600-\U0001F64F"
|
||||
"\U0001F300-\U0001F5FF"
|
||||
"\U0001F680-\U0001F6FF"
|
||||
"\U0001F1E0-\U0001F1FF"
|
||||
"\U00002700-\U000027BF"
|
||||
"\U0001F900-\U0001F9FF"
|
||||
"\U0001FA70-\U0001FAFF"
|
||||
"\U0001FAD0-\U0001FAFF]+",
|
||||
flags=re.UNICODE
|
||||
)
|
||||
text = emoji_pattern.sub("", text)
|
||||
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
MAX_LEN = 500
|
||||
if len(text) > MAX_LEN:
|
||||
text = text[:MAX_LEN]
|
||||
|
||||
return text
|
||||
if not tts_mdl or not text:
|
||||
return None
|
||||
text = clean_tts_text(text)
|
||||
if not text:
|
||||
return None
|
||||
bin = b""
|
||||
try:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
except Exception as e:
|
||||
logging.error(f"TTS failed: {e}, text={text!r}")
|
||||
return None
|
||||
return binascii.hexlify(bin).decode("utf-8")
|
||||
|
||||
def get_history(self, window_size):
|
||||
convs = []
|
||||
if window_size <= 0:
|
||||
|
||||
@ -13,11 +13,11 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
@ -29,8 +29,8 @@ from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.mcp_server_service import MCPServerService
|
||||
from common.connection_utils import timeout
|
||||
from rag.prompts.generator import next_step_async, COMPLETE_TASK, analyze_task_async, \
|
||||
citation_prompt, reflect_async, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \
|
||||
citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from common.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
|
||||
@ -153,19 +153,16 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
return None
|
||||
|
||||
async def _force_format_to_schema_async(self, text: str, schema_prompt: str) -> str:
|
||||
def _force_format_to_schema(self, text: str, schema_prompt: str) -> str:
|
||||
fmt_msgs = [
|
||||
{"role": "system", "content": schema_prompt + "\nIMPORTANT: Output ONLY valid JSON. No markdown, no extra text."},
|
||||
{"role": "user", "content": text},
|
||||
]
|
||||
_, fmt_msgs = message_fit_in(fmt_msgs, int(self.chat_mdl.max_length * 0.97))
|
||||
return await self._generate_async(fmt_msgs)
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
return self._generate(fmt_msgs)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||
async def _invoke_async(self, **kwargs):
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
@ -184,7 +181,7 @@ class Agent(LLM, ToolBase):
|
||||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return await LLM._invoke_async(self, **kwargs)
|
||||
return LLM._invoke(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
output_schema = self._get_output_schema()
|
||||
@ -196,13 +193,13 @@ class Agent(LLM, ToolBase):
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
|
||||
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, deepcopy(msg), user_defined_prompt))
|
||||
self.set_output("content", partial(self.stream_output_with_tools, prompt, msg, user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
async for delta_ans, _tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
@ -230,7 +227,7 @@ class Agent(LLM, ToolBase):
|
||||
return obj
|
||||
except Exception:
|
||||
error = "The answer cannot be parsed as JSON"
|
||||
ans = await self._force_format_to_schema_async(ans, schema_prompt)
|
||||
ans = self._force_format_to_schema(ans, schema_prompt)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
continue
|
||||
|
||||
@ -242,11 +239,11 @@ class Agent(LLM, ToolBase):
|
||||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
async def stream_output_with_tools_async(self, prompt, msg, user_defined_prompt={}):
|
||||
def stream_output_with_tools(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
use_tools = []
|
||||
async for delta_ans, _ in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt):
|
||||
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
|
||||
@ -264,23 +261,39 @@ class Agent(LLM, ToolBase):
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
|
||||
async def _react_with_tools_streamly_async(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
def _gen_citations(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
for delta_ans in self._generate_streamly([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def _react_with_tools_streamly(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
token_count = 0
|
||||
tool_metas = self.tool_meta
|
||||
hist = deepcopy(history)
|
||||
last_calling = ""
|
||||
if len(hist) > 3:
|
||||
st = timer()
|
||||
user_request = await asyncio.to_thread(full_question, messages=history, chat_mdl=self.chat_mdl)
|
||||
user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
|
||||
self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
|
||||
else:
|
||||
user_request = history[-1]["content"]
|
||||
|
||||
async def use_tool_async(name, args):
|
||||
nonlocal hist, use_tools, last_calling
|
||||
def use_tool(name, args):
|
||||
nonlocal hist, use_tools, token_count,last_calling,user_request
|
||||
logging.info(f"{last_calling=} == {name=}")
|
||||
# Summarize of function calling
|
||||
#if all([
|
||||
# isinstance(self.toolcall_session.get_tool_obj(name), Agent),
|
||||
# last_calling,
|
||||
# last_calling != name
|
||||
#]):
|
||||
# self.toolcall_session.get_tool_obj(name).add2system_prompt(f"The chat history with other agents are as following: \n" + self.get_useful_memory(user_request, str(args["user_prompt"]),user_defined_prompt))
|
||||
last_calling = name
|
||||
tool_response = await self.toolcall_session.tool_call_async(name, args)
|
||||
tool_response = self.toolcall_session.tool_call(name, args)
|
||||
use_tools.append({
|
||||
"name": name,
|
||||
"arguments": args,
|
||||
@ -291,7 +304,7 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
return name, tool_response
|
||||
|
||||
async def complete():
|
||||
def complete():
|
||||
nonlocal hist
|
||||
need2cite = self._param.cite and self._canvas.get_reference()["chunks"] and self._id.find("-->") < 0
|
||||
if schema_prompt:
|
||||
@ -309,7 +322,7 @@ class Agent(LLM, ToolBase):
|
||||
if len(hist) > 12:
|
||||
_hist = [hist[0], hist[1], *hist[-10:]]
|
||||
entire_txt = ""
|
||||
async for delta_ans in self._generate_streamly_async(_hist):
|
||||
for delta_ans in self._generate_streamly(_hist):
|
||||
if not need2cite or cited:
|
||||
yield delta_ans, 0
|
||||
entire_txt += delta_ans
|
||||
@ -318,7 +331,7 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
st = timer()
|
||||
txt = ""
|
||||
async for delta_ans in self._gen_citations_async(entire_txt):
|
||||
for delta_ans in self._gen_citations(entire_txt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
yield delta_ans, 0
|
||||
@ -333,14 +346,14 @@ class Agent(LLM, ToolBase):
|
||||
hist.append({"role": "user", "content": content})
|
||||
|
||||
st = timer()
|
||||
task_desc = await analyze_task_async(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
|
||||
for _ in range(self._param.max_rounds + 1):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
response, tk = await next_step_async(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
# self.callback("next_step", {}, str(response)[:256]+"...")
|
||||
token_count += tk or 0
|
||||
token_count += tk
|
||||
hist.append({"role": "assistant", "content": response})
|
||||
try:
|
||||
functions = json_repair.loads(re.sub(r"```.*", "", response))
|
||||
@ -349,24 +362,23 @@ class Agent(LLM, ToolBase):
|
||||
for f in functions:
|
||||
if not isinstance(f, dict):
|
||||
raise TypeError(f"An object type should be returned, but `{f}`")
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
thr = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
tool_tasks = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
async for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
thr.append(executor.submit(use_tool, name, args))
|
||||
|
||||
tool_tasks.append(asyncio.create_task(use_tool_async(name, args)))
|
||||
|
||||
results = await asyncio.gather(*tool_tasks) if tool_tasks else []
|
||||
st = timer()
|
||||
reflection = await reflect_async(self.chat_mdl, hist, results, user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
st = timer()
|
||||
reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr], user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
|
||||
@ -390,17 +402,21 @@ Respond immediately with your final comprehensive answer.
|
||||
return
|
||||
append_user_content(hist, final_instruction)
|
||||
|
||||
async for txt, tkcnt in complete():
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
|
||||
async def _gen_citations_async(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
async for delta_ans in self._generate_streamly_async([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
def get_useful_memory(self, goal: str, sub_goal:str, topn=3, user_defined_prompt:dict={}) -> str:
|
||||
# self.callback("get_useful_memory", {"topn": 3}, "...")
|
||||
mems = self._canvas.get_memory()
|
||||
rank = rank_memories(self.chat_mdl, goal, sub_goal, [summ for (user, assist, summ) in mems], user_defined_prompt)
|
||||
try:
|
||||
rank = json_repair.loads(re.sub(r"```.*", "", rank))[:topn]
|
||||
mems = [mems[r] for r in rank]
|
||||
return "\n\n".join([f"User: {u}\nAgent: {a}" for u, a,_ in mems])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
return "Error occurred."
|
||||
|
||||
def reset(self, only_output=False):
|
||||
"""
|
||||
@ -417,3 +433,4 @@ Respond immediately with your final comprehensive answer.
|
||||
for k in self._param.inputs.keys():
|
||||
self._param.inputs[k]["value"] = None
|
||||
self._param.debug_inputs = {}
|
||||
|
||||
|
||||
@ -14,7 +14,6 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from abc import ABC
|
||||
@ -446,34 +445,6 @@ class ComponentBase(ABC):
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
async def invoke_async(self, **kwargs) -> dict[str, Any]:
|
||||
"""
|
||||
Async wrapper for component invocation.
|
||||
Prefers coroutine `_invoke_async` if present; otherwise falls back to `_invoke`.
|
||||
Handles timing and error recording consistently with `invoke`.
|
||||
"""
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
if self.check_if_canceled("Component processing"):
|
||||
return
|
||||
|
||||
fn_async = getattr(self, "_invoke_async", None)
|
||||
if fn_async and asyncio.iscoroutinefunction(fn_async):
|
||||
await fn_async(**kwargs)
|
||||
elif asyncio.iscoroutinefunction(self._invoke):
|
||||
await self._invoke(**kwargs)
|
||||
else:
|
||||
await asyncio.to_thread(self._invoke, **kwargs)
|
||||
except Exception as e:
|
||||
if self.get_exception_default_value():
|
||||
self.set_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", str(e))
|
||||
logging.exception(e)
|
||||
self._param.debug_inputs = {}
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -18,7 +18,6 @@ import re
|
||||
from functools import partial
|
||||
|
||||
from agent.component.base import ComponentParamBase, ComponentBase
|
||||
from api.db.services.file_service import FileService
|
||||
|
||||
|
||||
class UserFillUpParam(ComponentParamBase):
|
||||
@ -64,13 +63,6 @@ class UserFillUp(ComponentBase):
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
if self.check_if_canceled("UserFillUp processing"):
|
||||
return
|
||||
if isinstance(v, dict) and v.get("type", "").lower().find("file") >=0:
|
||||
if v.get("optional") and v.get("value", None) is None:
|
||||
v = None
|
||||
else:
|
||||
v = FileService.get_files([v["value"]])
|
||||
else:
|
||||
v = v.get("value")
|
||||
self.set_output(k, v)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
|
||||
@ -13,14 +13,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
from copy import deepcopy
|
||||
from typing import Any, Generator, AsyncGenerator
|
||||
from typing import Any, Generator
|
||||
import json_repair
|
||||
from functools import partial
|
||||
from common.constants import LLMType
|
||||
@ -173,13 +171,6 @@ class LLM(ComponentBase):
|
||||
return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)
|
||||
return self.chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)
|
||||
|
||||
async def _generate_async(self, msg: list[dict], **kwargs) -> str:
|
||||
if not self.imgs and hasattr(self.chat_mdl, "async_chat"):
|
||||
return await self.chat_mdl.async_chat(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)
|
||||
if self.imgs and hasattr(self.chat_mdl, "async_chat"):
|
||||
return await self.chat_mdl.async_chat(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)
|
||||
return await asyncio.to_thread(self._generate, msg, **kwargs)
|
||||
|
||||
def _generate_streamly(self, msg:list[dict], **kwargs) -> Generator[str, None, None]:
|
||||
ans = ""
|
||||
last_idx = 0
|
||||
@ -214,69 +205,6 @@ class LLM(ComponentBase):
|
||||
for txt in self.chat_mdl.chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs):
|
||||
yield delta(txt)
|
||||
|
||||
async def _generate_streamly_async(self, msg: list[dict], **kwargs) -> AsyncGenerator[str, None]:
|
||||
async def delta_wrapper(txt_iter):
|
||||
ans = ""
|
||||
last_idx = 0
|
||||
endswith_think = False
|
||||
|
||||
def delta(txt):
|
||||
nonlocal ans, last_idx, endswith_think
|
||||
delta_ans = txt[last_idx:]
|
||||
ans = txt
|
||||
|
||||
if delta_ans.find("<think>") == 0:
|
||||
last_idx += len("<think>")
|
||||
return "<think>"
|
||||
elif delta_ans.find("<think>") > 0:
|
||||
delta_ans = txt[last_idx:last_idx + delta_ans.find("<think>")]
|
||||
last_idx += delta_ans.find("<think>")
|
||||
return delta_ans
|
||||
elif delta_ans.endswith("</think>"):
|
||||
endswith_think = True
|
||||
elif endswith_think:
|
||||
endswith_think = False
|
||||
return "</think>"
|
||||
|
||||
last_idx = len(ans)
|
||||
if ans.endswith("</think>"):
|
||||
last_idx -= len("</think>")
|
||||
return re.sub(r"(<think>|</think>)", "", delta_ans)
|
||||
|
||||
async for t in txt_iter:
|
||||
yield delta(t)
|
||||
|
||||
if not self.imgs and hasattr(self.chat_mdl, "async_chat_streamly"):
|
||||
async for t in delta_wrapper(self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), **kwargs)):
|
||||
yield t
|
||||
return
|
||||
if self.imgs and hasattr(self.chat_mdl, "async_chat_streamly"):
|
||||
async for t in delta_wrapper(self.chat_mdl.async_chat_streamly(msg[0]["content"], msg[1:], self._param.gen_conf(), images=self.imgs, **kwargs)):
|
||||
yield t
|
||||
return
|
||||
|
||||
# fallback
|
||||
loop = asyncio.get_running_loop()
|
||||
queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for item in self._generate_streamly(msg, **kwargs):
|
||||
loop.call_soon_threadsafe(queue.put_nowait, item)
|
||||
except Exception as e:
|
||||
loop.call_soon_threadsafe(queue.put_nowait, e)
|
||||
finally:
|
||||
loop.call_soon_threadsafe(queue.put_nowait, StopAsyncIteration)
|
||||
|
||||
threading.Thread(target=worker, daemon=True).start()
|
||||
while True:
|
||||
item = await queue.get()
|
||||
if item is StopAsyncIteration:
|
||||
break
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
yield item
|
||||
|
||||
async def _stream_output_async(self, prompt, msg):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
@ -327,7 +255,7 @@ class LLM(ComponentBase):
|
||||
self.set_output("content", answer)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
async def _invoke_async(self, **kwargs):
|
||||
def _invoke(self, **kwargs):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
@ -338,25 +266,22 @@ class LLM(ComponentBase):
|
||||
|
||||
prompt, msg, _ = self._prepare_prompt_variables()
|
||||
error: str = ""
|
||||
output_structure = None
|
||||
output_structure=None
|
||||
try:
|
||||
output_structure = self._param.outputs["structured"]
|
||||
output_structure = self._param.outputs['structured']
|
||||
except Exception:
|
||||
pass
|
||||
if output_structure and isinstance(output_structure, dict) and output_structure.get("properties") and len(output_structure["properties"]) > 0:
|
||||
schema = json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt_with_schema = prompt + structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
schema=json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt += structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt_with_schema}, *deepcopy(msg)],
|
||||
int(self.chat_mdl.max_length * 0.97),
|
||||
)
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
@ -365,7 +290,7 @@ class LLM(ComponentBase):
|
||||
self.set_output("structured", json_repair.loads(clean_formated_answer(ans)))
|
||||
return
|
||||
except Exception:
|
||||
msg_fit.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
msg.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
error = "The answer can't not be parsed as JSON"
|
||||
if error:
|
||||
self.set_output("_ERROR", error)
|
||||
@ -373,23 +298,18 @@ class LLM(ComponentBase):
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower() == "message" for cid in downstreams]) and not (
|
||||
ex and ex["goto"]
|
||||
):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, deepcopy(msg)))
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, msg))
|
||||
return
|
||||
|
||||
error = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
for _ in range(self._param.max_retries+1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
|
||||
)
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
error = ""
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
@ -403,9 +323,23 @@ class LLM(ComponentBase):
|
||||
else:
|
||||
self.set_output("_ERROR", error)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
def _stream_output(self, prompt, msg):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
for ans in self._generate_streamly(msg):
|
||||
if self.check_if_canceled("LLM streaming"):
|
||||
return
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
yield ans
|
||||
answer += ans
|
||||
self.set_output("content", answer)
|
||||
|
||||
def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
|
||||
summ = tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
|
||||
|
||||
@ -17,7 +17,6 @@ import logging
|
||||
import re
|
||||
import time
|
||||
from copy import deepcopy
|
||||
import asyncio
|
||||
from functools import partial
|
||||
from typing import TypedDict, List, Any
|
||||
from agent.component.base import ComponentParamBase, ComponentBase
|
||||
@ -49,19 +48,12 @@ class LLMToolPluginCallSession(ToolCallSession):
|
||||
self.callback = callback
|
||||
|
||||
def tool_call(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
return asyncio.run(self.tool_call_async(name, arguments))
|
||||
|
||||
async def tool_call_async(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
assert name in self.tools_map, f"LLM tool {name} does not exist"
|
||||
st = timer()
|
||||
tool_obj = self.tools_map[name]
|
||||
if isinstance(tool_obj, MCPToolCallSession):
|
||||
resp = await asyncio.to_thread(tool_obj.tool_call, name, arguments, 60)
|
||||
if isinstance(self.tools_map[name], MCPToolCallSession):
|
||||
resp = self.tools_map[name].tool_call(name, arguments, 60)
|
||||
else:
|
||||
if hasattr(tool_obj, "invoke_async") and asyncio.iscoroutinefunction(tool_obj.invoke_async):
|
||||
resp = await tool_obj.invoke_async(**arguments)
|
||||
else:
|
||||
resp = await asyncio.to_thread(tool_obj.invoke, **arguments)
|
||||
resp = self.tools_map[name].invoke(**arguments)
|
||||
|
||||
self.callback(name, arguments, resp, elapsed_time=timer()-st)
|
||||
return resp
|
||||
@ -147,33 +139,6 @@ class ToolBase(ComponentBase):
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return res
|
||||
|
||||
async def invoke_async(self, **kwargs):
|
||||
"""
|
||||
Async wrapper for tool invocation.
|
||||
If `_invoke` is a coroutine, await it directly; otherwise run in a thread to avoid blocking.
|
||||
Mirrors the exception handling of `invoke`.
|
||||
"""
|
||||
if self.check_if_canceled("Tool processing"):
|
||||
return
|
||||
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
fn_async = getattr(self, "_invoke_async", None)
|
||||
if fn_async and asyncio.iscoroutinefunction(fn_async):
|
||||
res = await fn_async(**kwargs)
|
||||
elif asyncio.iscoroutinefunction(self._invoke):
|
||||
res = await self._invoke(**kwargs)
|
||||
else:
|
||||
res = await asyncio.to_thread(self._invoke, **kwargs)
|
||||
except Exception as e:
|
||||
self._param.outputs["_ERROR"] = {"value": str(e)}
|
||||
logging.exception(e)
|
||||
res = str(e)
|
||||
self._param.debug_inputs = []
|
||||
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return res
|
||||
|
||||
def _retrieve_chunks(self, res_list: list, get_title, get_url, get_content, get_score=None):
|
||||
chunks = []
|
||||
aggs = []
|
||||
|
||||
@ -198,7 +198,6 @@ class Retrieval(ToolBase, ABC):
|
||||
return
|
||||
if cks:
|
||||
kbinfos["chunks"] = cks
|
||||
kbinfos["chunks"] = settings.retriever.retrieval_by_children(kbinfos["chunks"], [kb.tenant_id for kb in kbs])
|
||||
if self._param.use_kg:
|
||||
ck = settings.kg_retriever.retrieval(query,
|
||||
[kb.tenant_id for kb in kbs],
|
||||
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
@ -148,35 +147,31 @@ async def set():
|
||||
d["available_int"] = req["available_int"]
|
||||
|
||||
try:
|
||||
def _set_sync():
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
|
||||
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embd_id)
|
||||
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embd_id)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
_d = d
|
||||
if doc.parser_id == ParserType.QA:
|
||||
arr = [
|
||||
t for t in re.split(
|
||||
r"[\n\t]",
|
||||
req["content_with_weight"]) if len(t) > 1]
|
||||
q, a = rmPrefix(arr[0]), rmPrefix("\n".join(arr[1:]))
|
||||
_d = beAdoc(d, q, a, not any(
|
||||
[rag_tokenizer.is_chinese(t) for t in q + a]))
|
||||
if doc.parser_id == ParserType.QA:
|
||||
arr = [
|
||||
t for t in re.split(
|
||||
r"[\n\t]",
|
||||
req["content_with_weight"]) if len(t) > 1]
|
||||
q, a = rmPrefix(arr[0]), rmPrefix("\n".join(arr[1:]))
|
||||
d = beAdoc(d, q, a, not any(
|
||||
[rag_tokenizer.is_chinese(t) for t in q + a]))
|
||||
|
||||
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not _d.get("question_kwd") else "\n".join(_d["question_kwd"])])
|
||||
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
||||
_d["q_%d_vec" % len(v)] = v.tolist()
|
||||
settings.docStoreConn.update({"id": req["chunk_id"]}, _d, search.index_name(tenant_id), doc.kb_id)
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_set_sync)
|
||||
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
|
||||
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
||||
d["q_%d_vec" % len(v)] = v.tolist()
|
||||
settings.docStoreConn.update({"id": req["chunk_id"]}, d, search.index_name(tenant_id), doc.kb_id)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -187,19 +182,16 @@ async def set():
|
||||
async def switch():
|
||||
req = await get_request_json()
|
||||
try:
|
||||
def _switch_sync():
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
for cid in req["chunk_ids"]:
|
||||
if not settings.docStoreConn.update({"id": cid},
|
||||
{"available_int": int(req["available_int"])},
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
return get_data_error_result(message="Index updating failure")
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_switch_sync)
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
for cid in req["chunk_ids"]:
|
||||
if not settings.docStoreConn.update({"id": cid},
|
||||
{"available_int": int(req["available_int"])},
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
return get_data_error_result(message="Index updating failure")
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -210,23 +202,20 @@ async def switch():
|
||||
async def rm():
|
||||
req = await get_request_json()
|
||||
try:
|
||||
def _rm_sync():
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if not settings.docStoreConn.delete({"id": req["chunk_ids"]},
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
return get_data_error_result(message="Chunk deleting failure")
|
||||
deleted_chunk_ids = req["chunk_ids"]
|
||||
chunk_number = len(deleted_chunk_ids)
|
||||
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
|
||||
for cid in deleted_chunk_ids:
|
||||
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
|
||||
settings.STORAGE_IMPL.rm(doc.kb_id, cid)
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_rm_sync)
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if not settings.docStoreConn.delete({"id": req["chunk_ids"]},
|
||||
search.index_name(DocumentService.get_tenant_id(req["doc_id"])),
|
||||
doc.kb_id):
|
||||
return get_data_error_result(message="Chunk deleting failure")
|
||||
deleted_chunk_ids = req["chunk_ids"]
|
||||
chunk_number = len(deleted_chunk_ids)
|
||||
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
|
||||
for cid in deleted_chunk_ids:
|
||||
if settings.STORAGE_IMPL.obj_exist(doc.kb_id, cid):
|
||||
settings.STORAGE_IMPL.rm(doc.kb_id, cid)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -256,38 +245,35 @@ async def create():
|
||||
d["tag_feas"] = req["tag_feas"]
|
||||
|
||||
try:
|
||||
def _create_sync():
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
d["kb_id"] = [doc.kb_id]
|
||||
d["docnm_kwd"] = doc.name
|
||||
d["title_tks"] = rag_tokenizer.tokenize(doc.name)
|
||||
d["doc_id"] = doc.id
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
d["kb_id"] = [doc.kb_id]
|
||||
d["docnm_kwd"] = doc.name
|
||||
d["title_tks"] = rag_tokenizer.tokenize(doc.name)
|
||||
d["doc_id"] = doc.id
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Knowledgebase not found!")
|
||||
if kb.pagerank:
|
||||
d[PAGERANK_FLD] = kb.pagerank
|
||||
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Knowledgebase not found!")
|
||||
if kb.pagerank:
|
||||
d[PAGERANK_FLD] = kb.pagerank
|
||||
|
||||
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
|
||||
embd_id = DocumentService.get_embd_id(req["doc_id"])
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING.value, embd_id)
|
||||
|
||||
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
|
||||
v = 0.1 * v[0] + 0.9 * v[1]
|
||||
d["q_%d_vec" % len(v)] = v.tolist()
|
||||
settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
|
||||
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
|
||||
v = 0.1 * v[0] + 0.9 * v[1]
|
||||
d["q_%d_vec" % len(v)] = v.tolist()
|
||||
settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
|
||||
|
||||
DocumentService.increment_chunk_num(
|
||||
doc.id, doc.kb_id, c, 1, 0)
|
||||
return get_json_result(data={"chunk_id": chunck_id})
|
||||
|
||||
return await asyncio.to_thread(_create_sync)
|
||||
DocumentService.increment_chunk_num(
|
||||
doc.id, doc.kb_id, c, 1, 0)
|
||||
return get_json_result(data={"chunk_id": chunck_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -311,28 +297,25 @@ async def retrieval_test():
|
||||
use_kg = req.get("use_kg", False)
|
||||
top = int(req.get("top_k", 1024))
|
||||
langs = req.get("cross_languages", [])
|
||||
user_id = current_user.id
|
||||
tenant_ids = []
|
||||
|
||||
def _retrieval_sync():
|
||||
local_doc_ids = list(doc_ids) if doc_ids else []
|
||||
tenant_ids = []
|
||||
if req.get("search_id", ""):
|
||||
search_config = SearchService.get_detail(req.get("search_id", "")).get("search_config", {})
|
||||
meta_data_filter = search_config.get("meta_data_filter", {})
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(current_user.id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
if req.get("search_id", ""):
|
||||
search_config = SearchService.get_detail(req.get("search_id", "")).get("search_config", {})
|
||||
meta_data_filter = search_config.get("meta_data_filter", {})
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(user_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not local_doc_ids:
|
||||
local_doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not local_doc_ids:
|
||||
local_doc_ids = ["-999"]
|
||||
|
||||
tenants = UserTenantService.query(user_id=user_id)
|
||||
try:
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
for kb_id in kb_ids:
|
||||
for tenant in tenants:
|
||||
if KnowledgebaseService.query(
|
||||
@ -348,9 +331,8 @@ async def retrieval_test():
|
||||
if not e:
|
||||
return get_data_error_result(message="Knowledgebase not found!")
|
||||
|
||||
_question = question
|
||||
if langs:
|
||||
_question = cross_languages(kb.tenant_id, None, _question, langs)
|
||||
question = cross_languages(kb.tenant_id, None, question, langs)
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||
|
||||
@ -360,19 +342,19 @@ async def retrieval_test():
|
||||
|
||||
if req.get("keyword", False):
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
_question += keyword_extraction(chat_mdl, _question)
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
|
||||
labels = label_question(_question, [kb])
|
||||
ranks = settings.retriever.retrieval(_question, embd_mdl, tenant_ids, kb_ids, page, size,
|
||||
labels = label_question(question, [kb])
|
||||
ranks = settings.retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, page, size,
|
||||
float(req.get("similarity_threshold", 0.0)),
|
||||
float(req.get("vector_similarity_weight", 0.3)),
|
||||
top,
|
||||
local_doc_ids, rerank_mdl=rerank_mdl,
|
||||
doc_ids, rerank_mdl=rerank_mdl,
|
||||
highlight=req.get("highlight", False),
|
||||
rank_feature=labels
|
||||
)
|
||||
if use_kg:
|
||||
ck = settings.kg_retriever.retrieval(_question,
|
||||
ck = settings.kg_retriever.retrieval(question,
|
||||
tenant_ids,
|
||||
kb_ids,
|
||||
embd_mdl,
|
||||
@ -385,9 +367,6 @@ async def retrieval_test():
|
||||
ranks["labels"] = labels
|
||||
|
||||
return get_json_result(data=ranks)
|
||||
|
||||
try:
|
||||
return await asyncio.to_thread(_retrieval_sync)
|
||||
except Exception as e:
|
||||
if str(e).find("not_found") > 0:
|
||||
return get_json_result(data=False, message='No chunk found! Check the chunk status please!',
|
||||
|
||||
@ -168,12 +168,10 @@ async def _render_web_oauth_popup(flow_id: str, success: bool, message: str, sou
|
||||
status = "success" if success else "error"
|
||||
auto_close = "window.close();" if success else ""
|
||||
escaped_message = escape(message)
|
||||
# Drive: ragflow-google-drive-oauth
|
||||
# Gmail: ragflow-gmail-oauth
|
||||
payload_type = f"ragflow-{source}-oauth"
|
||||
payload_json = json.dumps(
|
||||
{
|
||||
"type": payload_type,
|
||||
# TODO(google-oauth): include connector type (drive/gmail) in payload type if needed
|
||||
"type": f"ragflow-google-{source}-oauth",
|
||||
"status": status,
|
||||
"flowId": flow_id or "",
|
||||
"message": message,
|
||||
|
||||
@ -462,7 +462,7 @@ async def related_questions():
|
||||
if "parameter" in gen_conf:
|
||||
del gen_conf["parameter"]
|
||||
prompt = load_prompt("related_question")
|
||||
ans = await chat_mdl.async_chat(
|
||||
ans = chat_mdl.chat(
|
||||
prompt,
|
||||
[
|
||||
{
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
#
|
||||
import asyncio
|
||||
import json
|
||||
import os.path
|
||||
import pathlib
|
||||
@ -73,7 +72,7 @@ async def upload():
|
||||
if not check_kb_team_permission(kb, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
err, files = await asyncio.to_thread(FileService.upload_document, kb, file_objs, current_user.id)
|
||||
err, files = FileService.upload_document(kb, file_objs, current_user.id)
|
||||
if err:
|
||||
return get_json_result(data=files, message="\n".join(err), code=RetCode.SERVER_ERROR)
|
||||
|
||||
@ -391,7 +390,7 @@ async def rm():
|
||||
if not DocumentService.accessible4deletion(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
errors = await asyncio.to_thread(FileService.delete_docs, doc_ids, current_user.id)
|
||||
errors = FileService.delete_docs(doc_ids, current_user.id)
|
||||
|
||||
if errors:
|
||||
return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
||||
@ -404,48 +403,44 @@ async def rm():
|
||||
@validate_request("doc_ids", "run")
|
||||
async def run():
|
||||
req = await get_request_json()
|
||||
for doc_id in req["doc_ids"]:
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
try:
|
||||
def _run_sync():
|
||||
for doc_id in req["doc_ids"]:
|
||||
if not DocumentService.accessible(doc_id, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
kb_table_num_map = {}
|
||||
for id in req["doc_ids"]:
|
||||
info = {"run": str(req["run"]), "progress": 0}
|
||||
if str(req["run"]) == TaskStatus.RUNNING.value and req.get("delete", False):
|
||||
info["progress_msg"] = ""
|
||||
info["chunk_num"] = 0
|
||||
info["token_num"] = 0
|
||||
|
||||
kb_table_num_map = {}
|
||||
for id in req["doc_ids"]:
|
||||
info = {"run": str(req["run"]), "progress": 0}
|
||||
if str(req["run"]) == TaskStatus.RUNNING.value and req.get("delete", False):
|
||||
info["progress_msg"] = ""
|
||||
info["chunk_num"] = 0
|
||||
info["token_num"] = 0
|
||||
tenant_id = DocumentService.get_tenant_id(id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
e, doc = DocumentService.get_by_id(id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
e, doc = DocumentService.get_by_id(id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if str(req["run"]) == TaskStatus.CANCEL.value:
|
||||
if str(doc.run) == TaskStatus.RUNNING.value:
|
||||
cancel_all_task_of(id)
|
||||
else:
|
||||
return get_data_error_result(message="Cannot cancel a task that is not in RUNNING status")
|
||||
if all([("delete" not in req or req["delete"]), str(req["run"]) == TaskStatus.RUNNING.value, str(doc.run) == TaskStatus.DONE.value]):
|
||||
DocumentService.clear_chunk_num_when_rerun(doc.id)
|
||||
|
||||
if str(req["run"]) == TaskStatus.CANCEL.value:
|
||||
if str(doc.run) == TaskStatus.RUNNING.value:
|
||||
cancel_all_task_of(id)
|
||||
else:
|
||||
return get_data_error_result(message="Cannot cancel a task that is not in RUNNING status")
|
||||
if all([("delete" not in req or req["delete"]), str(req["run"]) == TaskStatus.RUNNING.value, str(doc.run) == TaskStatus.DONE.value]):
|
||||
DocumentService.clear_chunk_num_when_rerun(doc.id)
|
||||
DocumentService.update_by_id(id, info)
|
||||
if req.get("delete", False):
|
||||
TaskService.filter_delete([Task.doc_id == id])
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
|
||||
|
||||
DocumentService.update_by_id(id, info)
|
||||
if req.get("delete", False):
|
||||
TaskService.filter_delete([Task.doc_id == id])
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
|
||||
if str(req["run"]) == TaskStatus.RUNNING.value:
|
||||
doc = doc.to_dict()
|
||||
DocumentService.run(tenant_id, doc, kb_table_num_map)
|
||||
|
||||
if str(req["run"]) == TaskStatus.RUNNING.value:
|
||||
doc_dict = doc.to_dict()
|
||||
DocumentService.run(tenant_id, doc_dict, kb_table_num_map)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_run_sync)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -455,49 +450,45 @@ async def run():
|
||||
@validate_request("doc_id", "name")
|
||||
async def rename():
|
||||
req = await get_request_json()
|
||||
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
try:
|
||||
def _rename_sync():
|
||||
if not DocumentService.accessible(req["doc_id"], current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=RetCode.ARGUMENT_ERROR)
|
||||
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=RetCode.ARGUMENT_ERROR)
|
||||
|
||||
e, doc = DocumentService.get_by_id(req["doc_id"])
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||
return get_json_result(data=False, message="The extension of file can't be changed", code=RetCode.ARGUMENT_ERROR)
|
||||
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=RetCode.ARGUMENT_ERROR)
|
||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||
if d.name == req["name"]:
|
||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
||||
|
||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||
if d.name == req["name"]:
|
||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
||||
if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}):
|
||||
return get_data_error_result(message="Database error (Document rename)!")
|
||||
|
||||
if not DocumentService.update_by_id(req["doc_id"], {"name": req["name"]}):
|
||||
return get_data_error_result(message="Database error (Document rename)!")
|
||||
informs = File2DocumentService.get_by_document_id(req["doc_id"])
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||
|
||||
informs = File2DocumentService.get_by_document_id(req["doc_id"])
|
||||
if informs:
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||
es_body = {
|
||||
"docnm_kwd": req["name"],
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": req["doc_id"]},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_rename_sync)
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||
es_body = {
|
||||
"docnm_kwd": req["name"],
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": req["doc_id"]},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -511,8 +502,7 @@ async def get(doc_id):
|
||||
return get_data_error_result(message="Document not found!")
|
||||
|
||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||
data = await asyncio.to_thread(settings.STORAGE_IMPL.get, b, n)
|
||||
response = await make_response(data)
|
||||
response = await make_response(settings.STORAGE_IMPL.get(b, n))
|
||||
|
||||
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||
ext = ext.group(1) if ext else None
|
||||
@ -533,7 +523,8 @@ async def get(doc_id):
|
||||
async def download_attachment(attachment_id):
|
||||
try:
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await asyncio.to_thread(settings.STORAGE_IMPL.get, current_user.id, attachment_id)
|
||||
data = settings.STORAGE_IMPL.get(current_user.id, attachment_id)
|
||||
# data = settings.STORAGE_IMPL.get("eb500d50bb0411f0907561d2782adda5", attachment_id)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
|
||||
|
||||
@ -605,8 +596,7 @@ async def get_image(image_id):
|
||||
if len(arr) != 2:
|
||||
return get_data_error_result(message="Image not found.")
|
||||
bkt, nm = image_id.split("-")
|
||||
data = await asyncio.to_thread(settings.STORAGE_IMPL.get, bkt, nm)
|
||||
response = await make_response(data)
|
||||
response = await make_response(settings.STORAGE_IMPL.get(bkt, nm))
|
||||
response.headers.set("Content-Type", "image/JPEG")
|
||||
return response
|
||||
except Exception as e:
|
||||
|
||||
@ -1,479 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
RAG Evaluation API Endpoints
|
||||
|
||||
Provides REST API for RAG evaluation functionality including:
|
||||
- Dataset management
|
||||
- Test case management
|
||||
- Evaluation execution
|
||||
- Results retrieval
|
||||
- Configuration recommendations
|
||||
"""
|
||||
|
||||
from quart import request
|
||||
from api.apps import login_required, current_user
|
||||
from api.db.services.evaluation_service import EvaluationService
|
||||
from api.utils.api_utils import (
|
||||
get_data_error_result,
|
||||
get_json_result,
|
||||
get_request_json,
|
||||
server_error_response,
|
||||
validate_request
|
||||
)
|
||||
from common.constants import RetCode
|
||||
|
||||
|
||||
# ==================== Dataset Management ====================
|
||||
|
||||
@manager.route('/dataset/create', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("name", "kb_ids")
|
||||
async def create_dataset():
|
||||
"""
|
||||
Create a new evaluation dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"name": "Dataset name",
|
||||
"description": "Optional description",
|
||||
"kb_ids": ["kb_id1", "kb_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
name = req.get("name", "").strip()
|
||||
description = req.get("description", "")
|
||||
kb_ids = req.get("kb_ids", [])
|
||||
|
||||
if not name:
|
||||
return get_data_error_result(message="Dataset name cannot be empty")
|
||||
|
||||
if not kb_ids or not isinstance(kb_ids, list):
|
||||
return get_data_error_result(message="kb_ids must be a non-empty list")
|
||||
|
||||
success, result = EvaluationService.create_dataset(
|
||||
name=name,
|
||||
description=description,
|
||||
kb_ids=kb_ids,
|
||||
tenant_id=current_user.id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"dataset_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def list_datasets():
|
||||
"""
|
||||
List evaluation datasets for current tenant.
|
||||
|
||||
Query params:
|
||||
- page: Page number (default: 1)
|
||||
- page_size: Items per page (default: 20)
|
||||
"""
|
||||
try:
|
||||
page = int(request.args.get("page", 1))
|
||||
page_size = int(request.args.get("page_size", 20))
|
||||
|
||||
result = EvaluationService.list_datasets(
|
||||
tenant_id=current_user.id,
|
||||
user_id=current_user.id,
|
||||
page=page,
|
||||
page_size=page_size
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_dataset(dataset_id):
|
||||
"""Get dataset details by ID"""
|
||||
try:
|
||||
dataset = EvaluationService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
return get_data_error_result(
|
||||
message="Dataset not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=dataset)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['PUT']) # noqa: F821
|
||||
@login_required
|
||||
async def update_dataset(dataset_id):
|
||||
"""
|
||||
Update dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"name": "New name",
|
||||
"description": "New description",
|
||||
"kb_ids": ["kb_id1", "kb_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
|
||||
# Remove fields that shouldn't be updated
|
||||
req.pop("id", None)
|
||||
req.pop("tenant_id", None)
|
||||
req.pop("created_by", None)
|
||||
req.pop("create_time", None)
|
||||
|
||||
success = EvaluationService.update_dataset(dataset_id, **req)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to update dataset")
|
||||
|
||||
return get_json_result(data={"dataset_id": dataset_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_dataset(dataset_id):
|
||||
"""Delete dataset (soft delete)"""
|
||||
try:
|
||||
success = EvaluationService.delete_dataset(dataset_id)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to delete dataset")
|
||||
|
||||
return get_json_result(data={"dataset_id": dataset_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Test Case Management ====================
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/case/add', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("question")
|
||||
async def add_test_case(dataset_id):
|
||||
"""
|
||||
Add a test case to a dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"question": "Test question",
|
||||
"reference_answer": "Optional ground truth answer",
|
||||
"relevant_doc_ids": ["doc_id1", "doc_id2"],
|
||||
"relevant_chunk_ids": ["chunk_id1", "chunk_id2"],
|
||||
"metadata": {"key": "value"}
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
question = req.get("question", "").strip()
|
||||
|
||||
if not question:
|
||||
return get_data_error_result(message="Question cannot be empty")
|
||||
|
||||
success, result = EvaluationService.add_test_case(
|
||||
dataset_id=dataset_id,
|
||||
question=question,
|
||||
reference_answer=req.get("reference_answer"),
|
||||
relevant_doc_ids=req.get("relevant_doc_ids"),
|
||||
relevant_chunk_ids=req.get("relevant_chunk_ids"),
|
||||
metadata=req.get("metadata")
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"case_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/case/import', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("cases")
|
||||
async def import_test_cases(dataset_id):
|
||||
"""
|
||||
Bulk import test cases.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"cases": [
|
||||
{
|
||||
"question": "Question 1",
|
||||
"reference_answer": "Answer 1",
|
||||
...
|
||||
},
|
||||
{
|
||||
"question": "Question 2",
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
cases = req.get("cases", [])
|
||||
|
||||
if not cases or not isinstance(cases, list):
|
||||
return get_data_error_result(message="cases must be a non-empty list")
|
||||
|
||||
success_count, failure_count = EvaluationService.import_test_cases(
|
||||
dataset_id=dataset_id,
|
||||
cases=cases
|
||||
)
|
||||
|
||||
return get_json_result(data={
|
||||
"success_count": success_count,
|
||||
"failure_count": failure_count,
|
||||
"total": len(cases)
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/cases', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_test_cases(dataset_id):
|
||||
"""Get all test cases for a dataset"""
|
||||
try:
|
||||
cases = EvaluationService.get_test_cases(dataset_id)
|
||||
return get_json_result(data={"cases": cases, "total": len(cases)})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/case/<case_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_test_case(case_id):
|
||||
"""Delete a test case"""
|
||||
try:
|
||||
success = EvaluationService.delete_test_case(case_id)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to delete test case")
|
||||
|
||||
return get_json_result(data={"case_id": case_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Evaluation Execution ====================
|
||||
|
||||
@manager.route('/run/start', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("dataset_id", "dialog_id")
|
||||
async def start_evaluation():
|
||||
"""
|
||||
Start an evaluation run.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"dataset_id": "dataset_id",
|
||||
"dialog_id": "dialog_id",
|
||||
"name": "Optional run name"
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
dataset_id = req.get("dataset_id")
|
||||
dialog_id = req.get("dialog_id")
|
||||
name = req.get("name")
|
||||
|
||||
success, result = EvaluationService.start_evaluation(
|
||||
dataset_id=dataset_id,
|
||||
dialog_id=dialog_id,
|
||||
user_id=current_user.id,
|
||||
name=name
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"run_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_evaluation_run(run_id):
|
||||
"""Get evaluation run details"""
|
||||
try:
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>/results', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_run_results(run_id):
|
||||
"""Get detailed results for an evaluation run"""
|
||||
try:
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def list_evaluation_runs():
|
||||
"""
|
||||
List evaluation runs.
|
||||
|
||||
Query params:
|
||||
- dataset_id: Filter by dataset (optional)
|
||||
- dialog_id: Filter by dialog (optional)
|
||||
- page: Page number (default: 1)
|
||||
- page_size: Items per page (default: 20)
|
||||
"""
|
||||
try:
|
||||
# TODO: Implement list_runs in EvaluationService
|
||||
return get_json_result(data={"runs": [], "total": 0})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_evaluation_run(run_id):
|
||||
"""Delete an evaluation run"""
|
||||
try:
|
||||
# TODO: Implement delete_run in EvaluationService
|
||||
return get_json_result(data={"run_id": run_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Analysis & Recommendations ====================
|
||||
|
||||
@manager.route('/run/<run_id>/recommendations', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_recommendations(run_id):
|
||||
"""Get configuration recommendations based on evaluation results"""
|
||||
try:
|
||||
recommendations = EvaluationService.get_recommendations(run_id)
|
||||
return get_json_result(data={"recommendations": recommendations})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/compare', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("run_ids")
|
||||
async def compare_runs():
|
||||
"""
|
||||
Compare multiple evaluation runs.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"run_ids": ["run_id1", "run_id2", "run_id3"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
run_ids = req.get("run_ids", [])
|
||||
|
||||
if not run_ids or not isinstance(run_ids, list) or len(run_ids) < 2:
|
||||
return get_data_error_result(
|
||||
message="run_ids must be a list with at least 2 run IDs"
|
||||
)
|
||||
|
||||
# TODO: Implement compare_runs in EvaluationService
|
||||
return get_json_result(data={"comparison": {}})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>/export', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def export_results(run_id):
|
||||
"""Export evaluation results as JSON/CSV"""
|
||||
try:
|
||||
# format_type = request.args.get("format", "json") # TODO: Use for CSV export
|
||||
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
# TODO: Implement CSV export
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Real-time Evaluation ====================
|
||||
|
||||
@manager.route('/evaluate_single', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("question", "dialog_id")
|
||||
async def evaluate_single():
|
||||
"""
|
||||
Evaluate a single question-answer pair in real-time.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"question": "Test question",
|
||||
"dialog_id": "dialog_id",
|
||||
"reference_answer": "Optional ground truth",
|
||||
"relevant_chunk_ids": ["chunk_id1", "chunk_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
# req = await get_request_json() # TODO: Use for single evaluation implementation
|
||||
|
||||
# TODO: Implement single evaluation
|
||||
# This would execute the RAG pipeline and return metrics immediately
|
||||
|
||||
return get_json_result(data={
|
||||
"answer": "",
|
||||
"metrics": {},
|
||||
"retrieved_chunks": []
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@ -14,7 +14,6 @@
|
||||
# limitations under the License
|
||||
#
|
||||
import logging
|
||||
import asyncio
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
@ -62,10 +61,9 @@ async def upload():
|
||||
e, pf_folder = FileService.get_by_id(pf_id)
|
||||
if not e:
|
||||
return get_data_error_result( message="Can't find this folder!")
|
||||
|
||||
async def _handle_single_file(file_obj):
|
||||
for file_obj in file_objs:
|
||||
MAX_FILE_NUM_PER_USER: int = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
|
||||
if 0 < MAX_FILE_NUM_PER_USER <= await asyncio.to_thread(DocumentService.get_doc_count, current_user.id):
|
||||
if 0 < MAX_FILE_NUM_PER_USER <= DocumentService.get_doc_count(current_user.id):
|
||||
return get_data_error_result( message="Exceed the maximum file number of a free user!")
|
||||
|
||||
# split file name path
|
||||
@ -77,36 +75,35 @@ async def upload():
|
||||
file_len = len(file_obj_names)
|
||||
|
||||
# get folder
|
||||
file_id_list = await asyncio.to_thread(FileService.get_id_list_by_id, pf_id, file_obj_names, 1, [pf_id])
|
||||
file_id_list = FileService.get_id_list_by_id(pf_id, file_obj_names, 1, [pf_id])
|
||||
len_id_list = len(file_id_list)
|
||||
|
||||
# create folder
|
||||
if file_len != len_id_list:
|
||||
e, file = await asyncio.to_thread(FileService.get_by_id, file_id_list[len_id_list - 1])
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 1])
|
||||
if not e:
|
||||
return get_data_error_result(message="Folder not found!")
|
||||
last_folder = await asyncio.to_thread(FileService.create_folder, file, file_id_list[len_id_list - 1], file_obj_names,
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 1], file_obj_names,
|
||||
len_id_list)
|
||||
else:
|
||||
e, file = await asyncio.to_thread(FileService.get_by_id, file_id_list[len_id_list - 2])
|
||||
e, file = FileService.get_by_id(file_id_list[len_id_list - 2])
|
||||
if not e:
|
||||
return get_data_error_result(message="Folder not found!")
|
||||
last_folder = await asyncio.to_thread(FileService.create_folder, file, file_id_list[len_id_list - 2], file_obj_names,
|
||||
last_folder = FileService.create_folder(file, file_id_list[len_id_list - 2], file_obj_names,
|
||||
len_id_list)
|
||||
|
||||
# file type
|
||||
filetype = filename_type(file_obj_names[file_len - 1])
|
||||
location = file_obj_names[file_len - 1]
|
||||
while await asyncio.to_thread(settings.STORAGE_IMPL.obj_exist, last_folder.id, location):
|
||||
while settings.STORAGE_IMPL.obj_exist(last_folder.id, location):
|
||||
location += "_"
|
||||
blob = await asyncio.to_thread(file_obj.read)
|
||||
filename = await asyncio.to_thread(
|
||||
duplicate_name,
|
||||
blob = file_obj.read()
|
||||
filename = duplicate_name(
|
||||
FileService.query,
|
||||
name=file_obj_names[file_len - 1],
|
||||
parent_id=last_folder.id)
|
||||
await asyncio.to_thread(settings.STORAGE_IMPL.put, last_folder.id, location, blob)
|
||||
file_data = {
|
||||
settings.STORAGE_IMPL.put(last_folder.id, location, blob)
|
||||
file = {
|
||||
"id": get_uuid(),
|
||||
"parent_id": last_folder.id,
|
||||
"tenant_id": current_user.id,
|
||||
@ -116,13 +113,8 @@ async def upload():
|
||||
"location": location,
|
||||
"size": len(blob),
|
||||
}
|
||||
inserted = await asyncio.to_thread(FileService.insert, file_data)
|
||||
return inserted.to_json()
|
||||
|
||||
for file_obj in file_objs:
|
||||
res = await _handle_single_file(file_obj)
|
||||
file_res.append(res)
|
||||
|
||||
file = FileService.insert(file)
|
||||
file_res.append(file.to_json())
|
||||
return get_json_result(data=file_res)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@ -250,58 +242,55 @@ async def rm():
|
||||
req = await get_request_json()
|
||||
file_ids = req["file_ids"]
|
||||
|
||||
def _delete_single_file(file):
|
||||
try:
|
||||
if file.location:
|
||||
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
except Exception as e:
|
||||
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(file.id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if e and doc:
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if tenant_id:
|
||||
DocumentService.remove_document(doc, tenant_id)
|
||||
File2DocumentService.delete_by_file_id(file.id)
|
||||
|
||||
FileService.delete(file)
|
||||
|
||||
def _delete_folder_recursive(folder, tenant_id):
|
||||
sub_files = FileService.list_all_files_by_parent_id(folder.id)
|
||||
for sub_file in sub_files:
|
||||
if sub_file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(sub_file, tenant_id)
|
||||
else:
|
||||
_delete_single_file(sub_file)
|
||||
|
||||
FileService.delete(folder)
|
||||
|
||||
try:
|
||||
def _delete_single_file(file):
|
||||
try:
|
||||
if file.location:
|
||||
settings.STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
except Exception as e:
|
||||
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}, error: {e}")
|
||||
for file_id in file_ids:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e or not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(file.id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if e and doc:
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if tenant_id:
|
||||
DocumentService.remove_document(doc, tenant_id)
|
||||
File2DocumentService.delete_by_file_id(file.id)
|
||||
if file.source_type == FileSource.KNOWLEDGEBASE:
|
||||
continue
|
||||
|
||||
FileService.delete(file)
|
||||
if file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(file, current_user.id)
|
||||
continue
|
||||
|
||||
def _delete_folder_recursive(folder, tenant_id):
|
||||
sub_files = FileService.list_all_files_by_parent_id(folder.id)
|
||||
for sub_file in sub_files:
|
||||
if sub_file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(sub_file, tenant_id)
|
||||
else:
|
||||
_delete_single_file(sub_file)
|
||||
_delete_single_file(file)
|
||||
|
||||
FileService.delete(folder)
|
||||
|
||||
def _rm_sync():
|
||||
for file_id in file_ids:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e or not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message="No authorization.", code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
if file.source_type == FileSource.KNOWLEDGEBASE:
|
||||
continue
|
||||
|
||||
if file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(file, current_user.id)
|
||||
continue
|
||||
|
||||
_delete_single_file(file)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_rm_sync)
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@ -357,10 +346,10 @@ async def get(file_id):
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
blob = await asyncio.to_thread(settings.STORAGE_IMPL.get, file.parent_id, file.location)
|
||||
blob = settings.STORAGE_IMPL.get(file.parent_id, file.location)
|
||||
if not blob:
|
||||
b, n = File2DocumentService.get_storage_address(file_id=file_id)
|
||||
blob = await asyncio.to_thread(settings.STORAGE_IMPL.get, b, n)
|
||||
blob = settings.STORAGE_IMPL.get(b, n)
|
||||
|
||||
response = await make_response(blob)
|
||||
ext = re.search(r"\.([^.]+)$", file.name.lower())
|
||||
@ -455,12 +444,10 @@ async def move():
|
||||
},
|
||||
)
|
||||
|
||||
def _move_sync():
|
||||
for file in files:
|
||||
_move_entry_recursive(file, dest_folder)
|
||||
return get_json_result(data=True)
|
||||
for file in files:
|
||||
_move_entry_recursive(file, dest_folder)
|
||||
|
||||
return await asyncio.to_thread(_move_sync)
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -17,7 +17,6 @@ import json
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from quart import request
|
||||
import numpy as np
|
||||
@ -117,22 +116,12 @@ async def update():
|
||||
|
||||
if kb.pagerank != req.get("pagerank", 0):
|
||||
if req.get("pagerank", 0) > 0:
|
||||
await asyncio.to_thread(
|
||||
settings.docStoreConn.update,
|
||||
{"kb_id": kb.id},
|
||||
{PAGERANK_FLD: req["pagerank"]},
|
||||
search.index_name(kb.tenant_id),
|
||||
kb.id,
|
||||
)
|
||||
settings.docStoreConn.update({"kb_id": kb.id}, {PAGERANK_FLD: req["pagerank"]},
|
||||
search.index_name(kb.tenant_id), kb.id)
|
||||
else:
|
||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||
await asyncio.to_thread(
|
||||
settings.docStoreConn.update,
|
||||
{"exists": PAGERANK_FLD},
|
||||
{"remove": PAGERANK_FLD},
|
||||
search.index_name(kb.tenant_id),
|
||||
kb.id,
|
||||
)
|
||||
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
|
||||
search.index_name(kb.tenant_id), kb.id)
|
||||
|
||||
e, kb = KnowledgebaseService.get_by_id(kb.id)
|
||||
if not e:
|
||||
@ -235,28 +224,25 @@ async def rm():
|
||||
data=False, message='Only owner of knowledgebase authorized for this operation.',
|
||||
code=RetCode.OPERATING_ERROR)
|
||||
|
||||
def _rm_sync():
|
||||
for doc in DocumentService.query(kb_id=req["kb_id"]):
|
||||
if not DocumentService.remove_document(doc, kbs[0].tenant_id):
|
||||
return get_data_error_result(
|
||||
message="Database error (Document removal)!")
|
||||
f2d = File2DocumentService.get_by_document_id(doc.id)
|
||||
if f2d:
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete(
|
||||
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])
|
||||
if not KnowledgebaseService.delete_by_id(req["kb_id"]):
|
||||
for doc in DocumentService.query(kb_id=req["kb_id"]):
|
||||
if not DocumentService.remove_document(doc, kbs[0].tenant_id):
|
||||
return get_data_error_result(
|
||||
message="Database error (Knowledgebase removal)!")
|
||||
for kb in kbs:
|
||||
settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
|
||||
settings.docStoreConn.deleteIdx(search.index_name(kb.tenant_id), kb.id)
|
||||
if hasattr(settings.STORAGE_IMPL, 'remove_bucket'):
|
||||
settings.STORAGE_IMPL.remove_bucket(kb.id)
|
||||
return get_json_result(data=True)
|
||||
|
||||
return await asyncio.to_thread(_rm_sync)
|
||||
message="Database error (Document removal)!")
|
||||
f2d = File2DocumentService.get_by_document_id(doc.id)
|
||||
if f2d:
|
||||
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
||||
File2DocumentService.delete_by_document_id(doc.id)
|
||||
FileService.filter_delete(
|
||||
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])
|
||||
if not KnowledgebaseService.delete_by_id(req["kb_id"]):
|
||||
return get_data_error_result(
|
||||
message="Database error (Knowledgebase removal)!")
|
||||
for kb in kbs:
|
||||
settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id)
|
||||
settings.docStoreConn.deleteIdx(search.index_name(kb.tenant_id), kb.id)
|
||||
if hasattr(settings.STORAGE_IMPL, 'remove_bucket'):
|
||||
settings.STORAGE_IMPL.remove_bucket(kb.id)
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -936,3 +922,5 @@ async def check_embedding():
|
||||
if summary["avg_cos_sim"] > 0.9:
|
||||
return get_json_result(data={"summary": summary, "results": results})
|
||||
return get_json_result(code=RetCode.NOT_EFFECTIVE, message="Embedding model switch failed: the average similarity between old and new vectors is below 0.9, indicating incompatible vector spaces.", data={"summary": summary, "results": results})
|
||||
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of
|
||||
from api.db.services.task_service import TaskService, queue_tasks
|
||||
from api.db.services.dialog_service import meta_filter, convert_conditions
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \
|
||||
get_request_json
|
||||
@ -321,7 +321,9 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
||||
try:
|
||||
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
|
||||
return get_error_data_result(message="Database error (Document update)!")
|
||||
|
||||
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
|
||||
return get_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -348,10 +350,12 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
||||
}
|
||||
renamed_doc = {}
|
||||
for key, value in doc.to_dict().items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
|
||||
return get_result(data=renamed_doc)
|
||||
|
||||
@ -835,8 +839,6 @@ async def stop_parsing(tenant_id, dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
if int(doc[0].progress) == 1 or doc[0].progress == 0:
|
||||
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
|
||||
# Send cancellation signal via Redis to stop background task
|
||||
cancel_all_task_of(id)
|
||||
info = {"run": "2", "progress": 0, "chunk_num": 0}
|
||||
DocumentService.update_by_id(id, info)
|
||||
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import asyncio
|
||||
|
||||
import pathlib
|
||||
import re
|
||||
from quart import request, make_response
|
||||
@ -29,7 +29,6 @@ from api.db import FileType
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils.file_utils import filename_type
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||
from common import settings
|
||||
from common.constants import RetCode
|
||||
|
||||
@ -40,7 +39,7 @@ async def upload(tenant_id):
|
||||
Upload a file to the system.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -156,7 +155,7 @@ async def create(tenant_id):
|
||||
Create a new file or folder.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -234,7 +233,7 @@ async def list_files(tenant_id):
|
||||
List files under a specific folder.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -326,7 +325,7 @@ async def get_root_folder(tenant_id):
|
||||
Get user's root folder.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
responses:
|
||||
@ -362,7 +361,7 @@ async def get_parent_folder():
|
||||
Get parent folder info of a file.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -407,7 +406,7 @@ async def get_all_parent_folders(tenant_id):
|
||||
Get all parent folders of a file.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -455,7 +454,7 @@ async def rm(tenant_id):
|
||||
Delete one or multiple files/folders.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -529,7 +528,7 @@ async def rename(tenant_id):
|
||||
Rename a file.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -590,7 +589,7 @@ async def get(tenant_id, file_id):
|
||||
Download a file.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
produces:
|
||||
@ -630,19 +629,6 @@ async def get(tenant_id, file_id):
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@manager.route("/file/download/<attachment_id>", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
async def download_attachment(tenant_id,attachment_id):
|
||||
try:
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await asyncio.to_thread(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
@ -651,7 +637,7 @@ async def move(tenant_id):
|
||||
Move one or multiple files to another folder.
|
||||
---
|
||||
tags:
|
||||
- File
|
||||
- File Management
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
@ -788,7 +787,7 @@ Reason:
|
||||
- At the same time, related terms can also help search engines better understand user needs and return more accurate search results.
|
||||
|
||||
"""
|
||||
ans = await chat_mdl.async_chat(
|
||||
ans = chat_mdl.chat(
|
||||
prompt,
|
||||
[
|
||||
{
|
||||
@ -964,30 +963,28 @@ async def retrieval_test_embedded():
|
||||
use_kg = req.get("use_kg", False)
|
||||
top = int(req.get("top_k", 1024))
|
||||
langs = req.get("cross_languages", [])
|
||||
tenant_ids = []
|
||||
|
||||
tenant_id = objs[0].tenant_id
|
||||
if not tenant_id:
|
||||
return get_error_data_result(message="permission denined.")
|
||||
|
||||
def _retrieval_sync():
|
||||
local_doc_ids = list(doc_ids) if doc_ids else []
|
||||
tenant_ids = []
|
||||
_question = question
|
||||
|
||||
if req.get("search_id", ""):
|
||||
search_config = SearchService.get_detail(req.get("search_id", "")).get("search_config", {})
|
||||
meta_data_filter = search_config.get("meta_data_filter", {})
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, _question)
|
||||
local_doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not local_doc_ids:
|
||||
local_doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
local_doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not local_doc_ids:
|
||||
local_doc_ids = ["-999"]
|
||||
if req.get("search_id", ""):
|
||||
search_config = SearchService.get_detail(req.get("search_id", "")).get("search_config", {})
|
||||
meta_data_filter = search_config.get("meta_data_filter", {})
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
try:
|
||||
tenants = UserTenantService.query(user_id=tenant_id)
|
||||
for kb_id in kb_ids:
|
||||
for tenant in tenants:
|
||||
@ -1003,7 +1000,7 @@ async def retrieval_test_embedded():
|
||||
return get_error_data_result(message="Knowledgebase not found!")
|
||||
|
||||
if langs:
|
||||
_question = cross_languages(kb.tenant_id, None, _question, langs)
|
||||
question = cross_languages(kb.tenant_id, None, question, langs)
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||
|
||||
@ -1013,15 +1010,15 @@ async def retrieval_test_embedded():
|
||||
|
||||
if req.get("keyword", False):
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
_question += keyword_extraction(chat_mdl, _question)
|
||||
question += keyword_extraction(chat_mdl, question)
|
||||
|
||||
labels = label_question(_question, [kb])
|
||||
labels = label_question(question, [kb])
|
||||
ranks = settings.retriever.retrieval(
|
||||
_question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
|
||||
local_doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||
question, embd_mdl, tenant_ids, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top,
|
||||
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), rank_feature=labels
|
||||
)
|
||||
if use_kg:
|
||||
ck = settings.kg_retriever.retrieval(_question, tenant_ids, kb_ids, embd_mdl,
|
||||
ck = settings.kg_retriever.retrieval(question, tenant_ids, kb_ids, embd_mdl,
|
||||
LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
ranks["chunks"].insert(0, ck)
|
||||
@ -1031,9 +1028,6 @@ async def retrieval_test_embedded():
|
||||
ranks["labels"] = labels
|
||||
|
||||
return get_json_result(data=ranks)
|
||||
|
||||
try:
|
||||
return await asyncio.to_thread(_retrieval_sync)
|
||||
except Exception as e:
|
||||
if str(e).find("not_found") > 0:
|
||||
return get_json_result(data=False, message="No chunk found! Check the chunk status please!",
|
||||
@ -1070,7 +1064,7 @@ async def related_questions_embedded():
|
||||
|
||||
gen_conf = search_config.get("llm_setting", {"temperature": 0.9})
|
||||
prompt = load_prompt("related_question")
|
||||
ans = await chat_mdl.async_chat(
|
||||
ans = chat_mdl.chat(
|
||||
prompt,
|
||||
[
|
||||
{
|
||||
|
||||
@ -1113,70 +1113,6 @@ class SyncLogs(DataBaseModel):
|
||||
db_table = "sync_logs"
|
||||
|
||||
|
||||
class EvaluationDataset(DataBaseModel):
|
||||
"""Ground truth dataset for RAG evaluation"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
tenant_id = CharField(max_length=32, null=False, index=True, help_text="tenant ID")
|
||||
name = CharField(max_length=255, null=False, index=True, help_text="dataset name")
|
||||
description = TextField(null=True, help_text="dataset description")
|
||||
kb_ids = JSONField(null=False, help_text="knowledge base IDs to evaluate against")
|
||||
created_by = CharField(max_length=32, null=False, index=True, help_text="creator user ID")
|
||||
create_time = BigIntegerField(null=False, index=True, help_text="creation timestamp")
|
||||
update_time = BigIntegerField(null=False, help_text="last update timestamp")
|
||||
status = IntegerField(null=False, default=1, help_text="1=valid, 0=invalid")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_datasets"
|
||||
|
||||
|
||||
class EvaluationCase(DataBaseModel):
|
||||
"""Individual test case in an evaluation dataset"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
|
||||
question = TextField(null=False, help_text="test question")
|
||||
reference_answer = TextField(null=True, help_text="optional ground truth answer")
|
||||
relevant_doc_ids = JSONField(null=True, help_text="expected relevant document IDs")
|
||||
relevant_chunk_ids = JSONField(null=True, help_text="expected relevant chunk IDs")
|
||||
metadata = JSONField(null=True, help_text="additional context/tags")
|
||||
create_time = BigIntegerField(null=False, help_text="creation timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_cases"
|
||||
|
||||
|
||||
class EvaluationRun(DataBaseModel):
|
||||
"""A single evaluation run"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
|
||||
dialog_id = CharField(max_length=32, null=False, index=True, help_text="dialog configuration being evaluated")
|
||||
name = CharField(max_length=255, null=False, help_text="run name")
|
||||
config_snapshot = JSONField(null=False, help_text="dialog config at time of evaluation")
|
||||
metrics_summary = JSONField(null=True, help_text="aggregated metrics")
|
||||
status = CharField(max_length=32, null=False, default="PENDING", help_text="PENDING/RUNNING/COMPLETED/FAILED")
|
||||
created_by = CharField(max_length=32, null=False, index=True, help_text="user who started the run")
|
||||
create_time = BigIntegerField(null=False, index=True, help_text="creation timestamp")
|
||||
complete_time = BigIntegerField(null=True, help_text="completion timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_runs"
|
||||
|
||||
|
||||
class EvaluationResult(DataBaseModel):
|
||||
"""Result for a single test case in an evaluation run"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
run_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_runs")
|
||||
case_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_cases")
|
||||
generated_answer = TextField(null=False, help_text="generated answer")
|
||||
retrieved_chunks = JSONField(null=False, help_text="chunks that were retrieved")
|
||||
metrics = JSONField(null=False, help_text="all computed metrics")
|
||||
execution_time = FloatField(null=False, help_text="response time in seconds")
|
||||
token_usage = JSONField(null=True, help_text="prompt/completion tokens")
|
||||
create_time = BigIntegerField(null=False, help_text="creation timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_results"
|
||||
|
||||
|
||||
def migrate_db():
|
||||
logging.disable(logging.ERROR)
|
||||
migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
|
||||
@ -1357,43 +1293,4 @@ def migrate_db():
|
||||
migrate(migrator.add_column("llm_factories", "rank", IntegerField(default=0, index=False)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# RAG Evaluation tables
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "id", CharField(max_length=32, primary_key=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "tenant_id", CharField(max_length=32, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "name", CharField(max_length=255, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "description", TextField(null=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "kb_ids", JSONField(null=False)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "created_by", CharField(max_length=32, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "create_time", BigIntegerField(null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "update_time", BigIntegerField(null=False)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "status", IntegerField(null=False, default=1)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logging.disable(logging.NOTSET)
|
||||
|
||||
@ -761,48 +761,13 @@ Please write the SQL, only SQL, without any other explanations or text.
|
||||
"prompt": sys_prompt,
|
||||
}
|
||||
|
||||
def clean_tts_text(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
text = text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
||||
|
||||
text = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]", "", text)
|
||||
|
||||
emoji_pattern = re.compile(
|
||||
"[\U0001F600-\U0001F64F"
|
||||
"\U0001F300-\U0001F5FF"
|
||||
"\U0001F680-\U0001F6FF"
|
||||
"\U0001F1E0-\U0001F1FF"
|
||||
"\U00002700-\U000027BF"
|
||||
"\U0001F900-\U0001F9FF"
|
||||
"\U0001FA70-\U0001FAFF"
|
||||
"\U0001FAD0-\U0001FAFF]+",
|
||||
flags=re.UNICODE
|
||||
)
|
||||
text = emoji_pattern.sub("", text)
|
||||
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
MAX_LEN = 500
|
||||
if len(text) > MAX_LEN:
|
||||
text = text[:MAX_LEN]
|
||||
|
||||
return text
|
||||
|
||||
def tts(tts_mdl, text):
|
||||
if not tts_mdl or not text:
|
||||
return None
|
||||
text = clean_tts_text(text)
|
||||
if not text:
|
||||
return None
|
||||
bin = b""
|
||||
try:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
except Exception as e:
|
||||
logging.error(f"TTS failed: {e}, text={text!r}")
|
||||
return None
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
return binascii.hexlify(bin).decode("utf-8")
|
||||
|
||||
|
||||
|
||||
@ -719,14 +719,10 @@ class DocumentService(CommonService):
|
||||
# only for special task and parsed docs and unfinished
|
||||
freeze_progress = special_task_running and doc_progress >= 1 and not finished
|
||||
msg = "\n".join(sorted(msg))
|
||||
begin_at = d.get("process_begin_at")
|
||||
if not begin_at:
|
||||
begin_at = datetime.now()
|
||||
# fallback
|
||||
cls.update_by_id(d["id"], {"process_begin_at": begin_at})
|
||||
|
||||
info = {
|
||||
"process_duration": max(datetime.timestamp(datetime.now()) - begin_at.timestamp(), 0),
|
||||
"process_duration": datetime.timestamp(
|
||||
datetime.now()) -
|
||||
d["process_begin_at"].timestamp(),
|
||||
"run": status}
|
||||
if prg != 0 and not freeze_progress:
|
||||
info["progress"] = prg
|
||||
|
||||
@ -1,598 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
RAG Evaluation Service
|
||||
|
||||
Provides functionality for evaluating RAG system performance including:
|
||||
- Dataset management
|
||||
- Test case management
|
||||
- Evaluation execution
|
||||
- Metrics computation
|
||||
- Configuration recommendations
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from timeit import default_timer as timer
|
||||
|
||||
from api.db.db_models import EvaluationDataset, EvaluationCase, EvaluationRun, EvaluationResult
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.dialog_service import DialogService, chat
|
||||
from common.misc_utils import get_uuid
|
||||
from common.time_utils import current_timestamp
|
||||
from common.constants import StatusEnum
|
||||
|
||||
|
||||
class EvaluationService(CommonService):
|
||||
"""Service for managing RAG evaluations"""
|
||||
|
||||
model = EvaluationDataset
|
||||
|
||||
# ==================== Dataset Management ====================
|
||||
|
||||
@classmethod
|
||||
def create_dataset(cls, name: str, description: str, kb_ids: List[str],
|
||||
tenant_id: str, user_id: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Create a new evaluation dataset.
|
||||
|
||||
Args:
|
||||
name: Dataset name
|
||||
description: Dataset description
|
||||
kb_ids: List of knowledge base IDs to evaluate against
|
||||
tenant_id: Tenant ID
|
||||
user_id: User ID who creates the dataset
|
||||
|
||||
Returns:
|
||||
(success, dataset_id or error_message)
|
||||
"""
|
||||
try:
|
||||
dataset_id = get_uuid()
|
||||
dataset = {
|
||||
"id": dataset_id,
|
||||
"tenant_id": tenant_id,
|
||||
"name": name,
|
||||
"description": description,
|
||||
"kb_ids": kb_ids,
|
||||
"created_by": user_id,
|
||||
"create_time": current_timestamp(),
|
||||
"update_time": current_timestamp(),
|
||||
"status": StatusEnum.VALID.value
|
||||
}
|
||||
|
||||
if not EvaluationDataset.create(**dataset):
|
||||
return False, "Failed to create dataset"
|
||||
|
||||
return True, dataset_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error creating evaluation dataset: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def get_dataset(cls, dataset_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get dataset by ID"""
|
||||
try:
|
||||
dataset = EvaluationDataset.get_by_id(dataset_id)
|
||||
if dataset:
|
||||
return dataset.to_dict()
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting dataset {dataset_id}: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def list_datasets(cls, tenant_id: str, user_id: str,
|
||||
page: int = 1, page_size: int = 20) -> Dict[str, Any]:
|
||||
"""List datasets for a tenant"""
|
||||
try:
|
||||
query = EvaluationDataset.select().where(
|
||||
(EvaluationDataset.tenant_id == tenant_id) &
|
||||
(EvaluationDataset.status == StatusEnum.VALID.value)
|
||||
).order_by(EvaluationDataset.create_time.desc())
|
||||
|
||||
total = query.count()
|
||||
datasets = query.paginate(page, page_size)
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"datasets": [d.to_dict() for d in datasets]
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error listing datasets: {e}")
|
||||
return {"total": 0, "datasets": []}
|
||||
|
||||
@classmethod
|
||||
def update_dataset(cls, dataset_id: str, **kwargs) -> bool:
|
||||
"""Update dataset"""
|
||||
try:
|
||||
kwargs["update_time"] = current_timestamp()
|
||||
return EvaluationDataset.update(**kwargs).where(
|
||||
EvaluationDataset.id == dataset_id
|
||||
).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error updating dataset {dataset_id}: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def delete_dataset(cls, dataset_id: str) -> bool:
|
||||
"""Soft delete dataset"""
|
||||
try:
|
||||
return EvaluationDataset.update(
|
||||
status=StatusEnum.INVALID.value,
|
||||
update_time=current_timestamp()
|
||||
).where(EvaluationDataset.id == dataset_id).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting dataset {dataset_id}: {e}")
|
||||
return False
|
||||
|
||||
# ==================== Test Case Management ====================
|
||||
|
||||
@classmethod
|
||||
def add_test_case(cls, dataset_id: str, question: str,
|
||||
reference_answer: Optional[str] = None,
|
||||
relevant_doc_ids: Optional[List[str]] = None,
|
||||
relevant_chunk_ids: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> Tuple[bool, str]:
|
||||
"""
|
||||
Add a test case to a dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
question: Test question
|
||||
reference_answer: Optional ground truth answer
|
||||
relevant_doc_ids: Optional list of relevant document IDs
|
||||
relevant_chunk_ids: Optional list of relevant chunk IDs
|
||||
metadata: Optional additional metadata
|
||||
|
||||
Returns:
|
||||
(success, case_id or error_message)
|
||||
"""
|
||||
try:
|
||||
case_id = get_uuid()
|
||||
case = {
|
||||
"id": case_id,
|
||||
"dataset_id": dataset_id,
|
||||
"question": question,
|
||||
"reference_answer": reference_answer,
|
||||
"relevant_doc_ids": relevant_doc_ids,
|
||||
"relevant_chunk_ids": relevant_chunk_ids,
|
||||
"metadata": metadata,
|
||||
"create_time": current_timestamp()
|
||||
}
|
||||
|
||||
if not EvaluationCase.create(**case):
|
||||
return False, "Failed to create test case"
|
||||
|
||||
return True, case_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error adding test case: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def get_test_cases(cls, dataset_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all test cases for a dataset"""
|
||||
try:
|
||||
cases = EvaluationCase.select().where(
|
||||
EvaluationCase.dataset_id == dataset_id
|
||||
).order_by(EvaluationCase.create_time)
|
||||
|
||||
return [c.to_dict() for c in cases]
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting test cases for dataset {dataset_id}: {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def delete_test_case(cls, case_id: str) -> bool:
|
||||
"""Delete a test case"""
|
||||
try:
|
||||
return EvaluationCase.delete().where(
|
||||
EvaluationCase.id == case_id
|
||||
).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting test case {case_id}: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def import_test_cases(cls, dataset_id: str, cases: List[Dict[str, Any]]) -> Tuple[int, int]:
|
||||
"""
|
||||
Bulk import test cases from a list.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
cases: List of test case dictionaries
|
||||
|
||||
Returns:
|
||||
(success_count, failure_count)
|
||||
"""
|
||||
success_count = 0
|
||||
failure_count = 0
|
||||
|
||||
for case_data in cases:
|
||||
success, _ = cls.add_test_case(
|
||||
dataset_id=dataset_id,
|
||||
question=case_data.get("question", ""),
|
||||
reference_answer=case_data.get("reference_answer"),
|
||||
relevant_doc_ids=case_data.get("relevant_doc_ids"),
|
||||
relevant_chunk_ids=case_data.get("relevant_chunk_ids"),
|
||||
metadata=case_data.get("metadata")
|
||||
)
|
||||
|
||||
if success:
|
||||
success_count += 1
|
||||
else:
|
||||
failure_count += 1
|
||||
|
||||
return success_count, failure_count
|
||||
|
||||
# ==================== Evaluation Execution ====================
|
||||
|
||||
@classmethod
|
||||
def start_evaluation(cls, dataset_id: str, dialog_id: str,
|
||||
user_id: str, name: Optional[str] = None) -> Tuple[bool, str]:
|
||||
"""
|
||||
Start an evaluation run.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
dialog_id: Dialog configuration to evaluate
|
||||
user_id: User ID who starts the run
|
||||
name: Optional run name
|
||||
|
||||
Returns:
|
||||
(success, run_id or error_message)
|
||||
"""
|
||||
try:
|
||||
# Get dialog configuration
|
||||
success, dialog = DialogService.get_by_id(dialog_id)
|
||||
if not success:
|
||||
return False, "Dialog not found"
|
||||
|
||||
# Create evaluation run
|
||||
run_id = get_uuid()
|
||||
if not name:
|
||||
name = f"Evaluation Run {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
|
||||
run = {
|
||||
"id": run_id,
|
||||
"dataset_id": dataset_id,
|
||||
"dialog_id": dialog_id,
|
||||
"name": name,
|
||||
"config_snapshot": dialog.to_dict(),
|
||||
"metrics_summary": None,
|
||||
"status": "RUNNING",
|
||||
"created_by": user_id,
|
||||
"create_time": current_timestamp(),
|
||||
"complete_time": None
|
||||
}
|
||||
|
||||
if not EvaluationRun.create(**run):
|
||||
return False, "Failed to create evaluation run"
|
||||
|
||||
# Execute evaluation asynchronously (in production, use task queue)
|
||||
# For now, we'll execute synchronously
|
||||
cls._execute_evaluation(run_id, dataset_id, dialog)
|
||||
|
||||
return True, run_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error starting evaluation: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def _execute_evaluation(cls, run_id: str, dataset_id: str, dialog: Any):
|
||||
"""
|
||||
Execute evaluation for all test cases.
|
||||
|
||||
This method runs the RAG pipeline for each test case and computes metrics.
|
||||
"""
|
||||
try:
|
||||
# Get all test cases
|
||||
test_cases = cls.get_test_cases(dataset_id)
|
||||
|
||||
if not test_cases:
|
||||
EvaluationRun.update(
|
||||
status="FAILED",
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
return
|
||||
|
||||
# Execute each test case
|
||||
results = []
|
||||
for case in test_cases:
|
||||
result = cls._evaluate_single_case(run_id, case, dialog)
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
# Compute summary metrics
|
||||
metrics_summary = cls._compute_summary_metrics(results)
|
||||
|
||||
# Update run status
|
||||
EvaluationRun.update(
|
||||
status="COMPLETED",
|
||||
metrics_summary=metrics_summary,
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error executing evaluation {run_id}: {e}")
|
||||
EvaluationRun.update(
|
||||
status="FAILED",
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
|
||||
@classmethod
|
||||
def _evaluate_single_case(cls, run_id: str, case: Dict[str, Any],
|
||||
dialog: Any) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Evaluate a single test case.
|
||||
|
||||
Args:
|
||||
run_id: Evaluation run ID
|
||||
case: Test case dictionary
|
||||
dialog: Dialog configuration
|
||||
|
||||
Returns:
|
||||
Result dictionary or None if failed
|
||||
"""
|
||||
try:
|
||||
# Prepare messages
|
||||
messages = [{"role": "user", "content": case["question"]}]
|
||||
|
||||
# Execute RAG pipeline
|
||||
start_time = timer()
|
||||
answer = ""
|
||||
retrieved_chunks = []
|
||||
|
||||
for ans in chat(dialog, messages, stream=False):
|
||||
if isinstance(ans, dict):
|
||||
answer = ans.get("answer", "")
|
||||
retrieved_chunks = ans.get("reference", {}).get("chunks", [])
|
||||
break
|
||||
|
||||
execution_time = timer() - start_time
|
||||
|
||||
# Compute metrics
|
||||
metrics = cls._compute_metrics(
|
||||
question=case["question"],
|
||||
generated_answer=answer,
|
||||
reference_answer=case.get("reference_answer"),
|
||||
retrieved_chunks=retrieved_chunks,
|
||||
relevant_chunk_ids=case.get("relevant_chunk_ids"),
|
||||
dialog=dialog
|
||||
)
|
||||
|
||||
# Save result
|
||||
result_id = get_uuid()
|
||||
result = {
|
||||
"id": result_id,
|
||||
"run_id": run_id,
|
||||
"case_id": case["id"],
|
||||
"generated_answer": answer,
|
||||
"retrieved_chunks": retrieved_chunks,
|
||||
"metrics": metrics,
|
||||
"execution_time": execution_time,
|
||||
"token_usage": None, # TODO: Track token usage
|
||||
"create_time": current_timestamp()
|
||||
}
|
||||
|
||||
EvaluationResult.create(**result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"Error evaluating case {case.get('id')}: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _compute_metrics(cls, question: str, generated_answer: str,
|
||||
reference_answer: Optional[str],
|
||||
retrieved_chunks: List[Dict[str, Any]],
|
||||
relevant_chunk_ids: Optional[List[str]],
|
||||
dialog: Any) -> Dict[str, float]:
|
||||
"""
|
||||
Compute evaluation metrics for a single test case.
|
||||
|
||||
Returns:
|
||||
Dictionary of metric names to values
|
||||
"""
|
||||
metrics = {}
|
||||
|
||||
# Retrieval metrics (if ground truth chunks provided)
|
||||
if relevant_chunk_ids:
|
||||
retrieved_ids = [c.get("chunk_id") for c in retrieved_chunks]
|
||||
metrics.update(cls._compute_retrieval_metrics(retrieved_ids, relevant_chunk_ids))
|
||||
|
||||
# Generation metrics
|
||||
if generated_answer:
|
||||
# Basic metrics
|
||||
metrics["answer_length"] = len(generated_answer)
|
||||
metrics["has_answer"] = 1.0 if generated_answer.strip() else 0.0
|
||||
|
||||
# TODO: Implement advanced metrics using LLM-as-judge
|
||||
# - Faithfulness (hallucination detection)
|
||||
# - Answer relevance
|
||||
# - Context relevance
|
||||
# - Semantic similarity (if reference answer provided)
|
||||
|
||||
return metrics
|
||||
|
||||
@classmethod
|
||||
def _compute_retrieval_metrics(cls, retrieved_ids: List[str],
|
||||
relevant_ids: List[str]) -> Dict[str, float]:
|
||||
"""
|
||||
Compute retrieval metrics.
|
||||
|
||||
Args:
|
||||
retrieved_ids: List of retrieved chunk IDs
|
||||
relevant_ids: List of relevant chunk IDs (ground truth)
|
||||
|
||||
Returns:
|
||||
Dictionary of retrieval metrics
|
||||
"""
|
||||
if not relevant_ids:
|
||||
return {}
|
||||
|
||||
retrieved_set = set(retrieved_ids)
|
||||
relevant_set = set(relevant_ids)
|
||||
|
||||
# Precision: proportion of retrieved that are relevant
|
||||
precision = len(retrieved_set & relevant_set) / len(retrieved_set) if retrieved_set else 0.0
|
||||
|
||||
# Recall: proportion of relevant that were retrieved
|
||||
recall = len(retrieved_set & relevant_set) / len(relevant_set) if relevant_set else 0.0
|
||||
|
||||
# F1 score
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
|
||||
|
||||
# Hit rate: whether any relevant chunk was retrieved
|
||||
hit_rate = 1.0 if (retrieved_set & relevant_set) else 0.0
|
||||
|
||||
# MRR (Mean Reciprocal Rank): position of first relevant chunk
|
||||
mrr = 0.0
|
||||
for i, chunk_id in enumerate(retrieved_ids, 1):
|
||||
if chunk_id in relevant_set:
|
||||
mrr = 1.0 / i
|
||||
break
|
||||
|
||||
return {
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1_score": f1,
|
||||
"hit_rate": hit_rate,
|
||||
"mrr": mrr
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _compute_summary_metrics(cls, results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Compute summary metrics across all test cases.
|
||||
|
||||
Args:
|
||||
results: List of result dictionaries
|
||||
|
||||
Returns:
|
||||
Summary metrics dictionary
|
||||
"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
# Aggregate metrics
|
||||
metric_sums = {}
|
||||
metric_counts = {}
|
||||
|
||||
for result in results:
|
||||
metrics = result.get("metrics", {})
|
||||
for key, value in metrics.items():
|
||||
if isinstance(value, (int, float)):
|
||||
metric_sums[key] = metric_sums.get(key, 0) + value
|
||||
metric_counts[key] = metric_counts.get(key, 0) + 1
|
||||
|
||||
# Compute averages
|
||||
summary = {
|
||||
"total_cases": len(results),
|
||||
"avg_execution_time": sum(r.get("execution_time", 0) for r in results) / len(results)
|
||||
}
|
||||
|
||||
for key in metric_sums:
|
||||
summary[f"avg_{key}"] = metric_sums[key] / metric_counts[key]
|
||||
|
||||
return summary
|
||||
|
||||
# ==================== Results & Analysis ====================
|
||||
|
||||
@classmethod
|
||||
def get_run_results(cls, run_id: str) -> Dict[str, Any]:
|
||||
"""Get results for an evaluation run"""
|
||||
try:
|
||||
run = EvaluationRun.get_by_id(run_id)
|
||||
if not run:
|
||||
return {}
|
||||
|
||||
results = EvaluationResult.select().where(
|
||||
EvaluationResult.run_id == run_id
|
||||
).order_by(EvaluationResult.create_time)
|
||||
|
||||
return {
|
||||
"run": run.to_dict(),
|
||||
"results": [r.to_dict() for r in results]
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting run results {run_id}: {e}")
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def get_recommendations(cls, run_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze evaluation results and provide configuration recommendations.
|
||||
|
||||
Args:
|
||||
run_id: Evaluation run ID
|
||||
|
||||
Returns:
|
||||
List of recommendation dictionaries
|
||||
"""
|
||||
try:
|
||||
run = EvaluationRun.get_by_id(run_id)
|
||||
if not run or not run.metrics_summary:
|
||||
return []
|
||||
|
||||
metrics = run.metrics_summary
|
||||
recommendations = []
|
||||
|
||||
# Low precision: retrieving irrelevant chunks
|
||||
if metrics.get("avg_precision", 1.0) < 0.7:
|
||||
recommendations.append({
|
||||
"issue": "Low Precision",
|
||||
"severity": "high",
|
||||
"description": "System is retrieving many irrelevant chunks",
|
||||
"suggestions": [
|
||||
"Increase similarity_threshold to filter out less relevant chunks",
|
||||
"Enable reranking to improve chunk ordering",
|
||||
"Reduce top_k to return fewer chunks"
|
||||
]
|
||||
})
|
||||
|
||||
# Low recall: missing relevant chunks
|
||||
if metrics.get("avg_recall", 1.0) < 0.7:
|
||||
recommendations.append({
|
||||
"issue": "Low Recall",
|
||||
"severity": "high",
|
||||
"description": "System is missing relevant chunks",
|
||||
"suggestions": [
|
||||
"Increase top_k to retrieve more chunks",
|
||||
"Lower similarity_threshold to be more inclusive",
|
||||
"Enable hybrid search (keyword + semantic)",
|
||||
"Check chunk size - may be too large or too small"
|
||||
]
|
||||
})
|
||||
|
||||
# Slow response time
|
||||
if metrics.get("avg_execution_time", 0) > 5.0:
|
||||
recommendations.append({
|
||||
"issue": "Slow Response Time",
|
||||
"severity": "medium",
|
||||
"description": f"Average response time is {metrics['avg_execution_time']:.2f}s",
|
||||
"suggestions": [
|
||||
"Reduce top_k to retrieve fewer chunks",
|
||||
"Optimize embedding model selection",
|
||||
"Consider caching frequently asked questions"
|
||||
]
|
||||
})
|
||||
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating recommendations for run {run_id}: {e}")
|
||||
return []
|
||||
@ -385,7 +385,6 @@ class LLMBundle(LLM4Tenant):
|
||||
|
||||
async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
|
||||
total_tokens = 0
|
||||
ans = ""
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
stream_fn = getattr(self.mdl, "async_chat_streamly_with_tools", None)
|
||||
else:
|
||||
@ -398,15 +397,7 @@ class LLMBundle(LLM4Tenant):
|
||||
if isinstance(txt, int):
|
||||
total_tokens = txt
|
||||
break
|
||||
|
||||
if txt.endswith("</think>"):
|
||||
ans = ans[: -len("</think>")]
|
||||
|
||||
if not self.verbose_tool_use:
|
||||
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
||||
|
||||
ans += txt
|
||||
yield ans
|
||||
yield txt
|
||||
if total_tokens and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, total_tokens, self.llm_name):
|
||||
logging.error("LLMBundle.async_chat_streamly can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, total_tokens))
|
||||
return
|
||||
|
||||
@ -331,7 +331,6 @@ class RaptorConfig(Base):
|
||||
threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)]
|
||||
max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)]
|
||||
random_seed: Annotated[int, Field(default=0, ge=0)]
|
||||
auto_disable_for_structured_data: Annotated[bool, Field(default=True)]
|
||||
|
||||
|
||||
class GraphragConfig(Base):
|
||||
|
||||
@ -148,7 +148,6 @@ class Storage(Enum):
|
||||
AWS_S3 = 4
|
||||
OSS = 5
|
||||
OPENDAL = 6
|
||||
GCS = 7
|
||||
|
||||
# environment
|
||||
# ENV_STRONG_TEST_COUNT = "STRONG_TEST_COUNT"
|
||||
|
||||
@ -190,11 +190,6 @@ class WebDAVConnector(LoadConnector, PollConnector):
|
||||
files = self._list_files_recursive(self.remote_path, start, end)
|
||||
logging.info(f"Found {len(files)} files matching time criteria")
|
||||
|
||||
filename_counts: dict[str, int] = {}
|
||||
for file_path, _ in files:
|
||||
file_name = os.path.basename(file_path)
|
||||
filename_counts[file_name] = filename_counts.get(file_name, 0) + 1
|
||||
|
||||
batch: list[Document] = []
|
||||
for file_path, file_info in files:
|
||||
file_name = os.path.basename(file_path)
|
||||
@ -242,22 +237,12 @@ class WebDAVConnector(LoadConnector, PollConnector):
|
||||
else:
|
||||
modified = datetime.now(timezone.utc)
|
||||
|
||||
if filename_counts.get(file_name, 0) > 1:
|
||||
relative_path = file_path
|
||||
if file_path.startswith(self.remote_path):
|
||||
relative_path = file_path[len(self.remote_path):]
|
||||
if relative_path.startswith('/'):
|
||||
relative_path = relative_path[1:]
|
||||
semantic_id = relative_path.replace('/', ' / ') if relative_path else file_name
|
||||
else:
|
||||
semantic_id = file_name
|
||||
|
||||
batch.append(
|
||||
Document(
|
||||
id=f"webdav:{self.base_url}:{file_path}",
|
||||
blob=blob,
|
||||
source=DocumentSource.WEBDAV,
|
||||
semantic_identifier=semantic_id,
|
||||
semantic_identifier=file_name,
|
||||
extension=get_file_ext(file_name),
|
||||
doc_updated_at=modified,
|
||||
size_bytes=size_bytes if size_bytes else 0
|
||||
|
||||
@ -153,7 +153,7 @@ def parse_mineru_paths() -> Dict[str, Path]:
|
||||
|
||||
|
||||
@once
|
||||
def check_and_install_mineru() -> None:
|
||||
def install_mineru() -> None:
|
||||
"""
|
||||
Ensure MinerU is installed.
|
||||
|
||||
@ -173,8 +173,8 @@ def check_and_install_mineru() -> None:
|
||||
Logging is used to indicate status.
|
||||
"""
|
||||
# Check if MinerU is enabled
|
||||
use_mineru = os.getenv("USE_MINERU", "false").strip().lower()
|
||||
if use_mineru != "true":
|
||||
use_mineru = os.getenv("USE_MINERU", "").strip().lower()
|
||||
if use_mineru == "false":
|
||||
logging.info("USE_MINERU=%r. Skipping MinerU installation.", use_mineru)
|
||||
return
|
||||
|
||||
|
||||
@ -31,7 +31,6 @@ import rag.utils.ob_conn
|
||||
import rag.utils.opensearch_conn
|
||||
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
||||
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
||||
from rag.utils.gcs_conn import RAGFlowGCS
|
||||
from rag.utils.minio_conn import RAGFlowMinio
|
||||
from rag.utils.opendal_conn import OpenDALStorage
|
||||
from rag.utils.s3_conn import RAGFlowS3
|
||||
@ -110,7 +109,6 @@ MINIO = {}
|
||||
OB = {}
|
||||
OSS = {}
|
||||
OS = {}
|
||||
GCS = {}
|
||||
|
||||
DOC_MAXIMUM_SIZE: int = 128 * 1024 * 1024
|
||||
DOC_BULK_SIZE: int = 4
|
||||
@ -153,8 +151,7 @@ class StorageFactory:
|
||||
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
|
||||
Storage.AWS_S3: RAGFlowS3,
|
||||
Storage.OSS: RAGFlowOSS,
|
||||
Storage.OPENDAL: OpenDALStorage,
|
||||
Storage.GCS: RAGFlowGCS,
|
||||
Storage.OPENDAL: OpenDALStorage
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -253,7 +250,7 @@ def init_settings():
|
||||
else:
|
||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||
|
||||
global AZURE, S3, MINIO, OSS, GCS
|
||||
global AZURE, S3, MINIO, OSS
|
||||
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
|
||||
AZURE = get_base_config("azure", {})
|
||||
elif STORAGE_IMPL_TYPE == 'AWS_S3':
|
||||
@ -262,8 +259,6 @@ def init_settings():
|
||||
MINIO = decrypt_database_config(name="minio")
|
||||
elif STORAGE_IMPL_TYPE == 'OSS':
|
||||
OSS = get_base_config("oss", {})
|
||||
elif STORAGE_IMPL_TYPE == 'GCS':
|
||||
GCS = get_base_config("gcs", {})
|
||||
|
||||
global STORAGE_IMPL
|
||||
STORAGE_IMPL = StorageFactory.create(Storage[STORAGE_IMPL_TYPE])
|
||||
|
||||
@ -60,8 +60,6 @@ user_default_llm:
|
||||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
# region: 'region'
|
||||
#gcs:
|
||||
# bucket: 'bridgtl-edm-d-bucket-ragflow'
|
||||
# oss:
|
||||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
|
||||
@ -25,8 +25,6 @@ from rag.prompts.generator import vision_llm_figure_describe_prompt
|
||||
|
||||
|
||||
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
if not figures_data_without_positions:
|
||||
return []
|
||||
return [
|
||||
(
|
||||
(figure_data[1], [figure_data[0]]),
|
||||
@ -37,9 +35,7 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
]
|
||||
|
||||
|
||||
def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
@ -57,8 +53,6 @@ def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
|
||||
|
||||
|
||||
def vision_figure_parser_pdf_wrapper(tbls, callback=None, **kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
|
||||
@ -63,7 +63,6 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def _extract_zip_no_root(self, zip_path, extract_to, root_dir):
|
||||
self.logger.info(f"[MinerU] Extract zip: zip_path={zip_path}, extract_to={extract_to}, root_hint={root_dir}")
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
if not root_dir:
|
||||
files = zip_ref.namelist()
|
||||
@ -73,7 +72,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
root_dir = None
|
||||
|
||||
if not root_dir or not root_dir.endswith("/"):
|
||||
self.logger.info(f"[MinerU] No root directory found, extracting all (root_hint={root_dir})")
|
||||
self.logger.info(f"[MinerU] No root directory found, extracting all...fff{root_dir}")
|
||||
zip_ref.extractall(extract_to)
|
||||
return
|
||||
|
||||
@ -109,7 +108,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine"]
|
||||
if backend not in valid_backends:
|
||||
reason = "[MinerU] Invalid backend '{backend}'. Valid backends are: {valid_backends}"
|
||||
self.logger.warning(reason)
|
||||
logging.warning(reason)
|
||||
return False, reason
|
||||
|
||||
subprocess_kwargs = {
|
||||
@ -129,40 +128,40 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
if backend == "vlm-http-client" and server_url:
|
||||
try:
|
||||
server_accessible = self._is_http_endpoint_valid(server_url + "/openapi.json")
|
||||
self.logger.info(f"[MinerU] vlm-http-client server check: {server_accessible}")
|
||||
logging.info(f"[MinerU] vlm-http-client server check: {server_accessible}")
|
||||
if server_accessible:
|
||||
self.using_api = False # We are using http client, not API
|
||||
return True, reason
|
||||
else:
|
||||
reason = f"[MinerU] vlm-http-client server not accessible: {server_url}"
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server not accessible: {server_url}")
|
||||
logging.warning(f"[MinerU] vlm-http-client server not accessible: {server_url}")
|
||||
return False, reason
|
||||
except Exception as e:
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server check failed: {e}")
|
||||
logging.warning(f"[MinerU] vlm-http-client server check failed: {e}")
|
||||
try:
|
||||
response = requests.get(server_url, timeout=5)
|
||||
self.logger.info(f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}")
|
||||
logging.info(f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}")
|
||||
self.using_api = False
|
||||
return True, reason
|
||||
except Exception as e:
|
||||
reason = f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}"
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}")
|
||||
logging.warning(f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}")
|
||||
return False, reason
|
||||
|
||||
try:
|
||||
result = subprocess.run([str(self.mineru_path), "--version"], **subprocess_kwargs)
|
||||
version_info = result.stdout.strip()
|
||||
if version_info:
|
||||
self.logger.info(f"[MinerU] Detected version: {version_info}")
|
||||
logging.info(f"[MinerU] Detected version: {version_info}")
|
||||
else:
|
||||
self.logger.info("[MinerU] Detected MinerU, but version info is empty.")
|
||||
logging.info("[MinerU] Detected MinerU, but version info is empty.")
|
||||
return True, reason
|
||||
except subprocess.CalledProcessError as e:
|
||||
self.logger.warning(f"[MinerU] Execution failed (exit code {e.returncode}).")
|
||||
logging.warning(f"[MinerU] Execution failed (exit code {e.returncode}).")
|
||||
except FileNotFoundError:
|
||||
self.logger.warning("[MinerU] MinerU not found. Please install it via: pip install -U 'mineru[core]'")
|
||||
logging.warning("[MinerU] MinerU not found. Please install it via: pip install -U 'mineru[core]'")
|
||||
except Exception as e:
|
||||
self.logger.error(f"[MinerU] Unexpected error during installation check: {e}")
|
||||
logging.error(f"[MinerU] Unexpected error during installation check: {e}")
|
||||
|
||||
# If executable check fails, try API check
|
||||
try:
|
||||
@ -172,14 +171,14 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
if not openapi_exists:
|
||||
reason = "[MinerU] Failed to detect vaild MinerU API server"
|
||||
return openapi_exists, reason
|
||||
self.logger.info(f"[MinerU] Detected {self.mineru_api}/openapi.json: {openapi_exists}")
|
||||
logging.info(f"[MinerU] Detected {self.mineru_api}/openapi.json: {openapi_exists}")
|
||||
self.using_api = openapi_exists
|
||||
return openapi_exists, reason
|
||||
else:
|
||||
self.logger.info("[MinerU] api not exists.")
|
||||
logging.info("[MinerU] api not exists.")
|
||||
except Exception as e:
|
||||
reason = f"[MinerU] Unexpected error during api check: {e}"
|
||||
self.logger.error(f"[MinerU] Unexpected error during api check: {e}")
|
||||
logging.error(f"[MinerU] Unexpected error during api check: {e}")
|
||||
return False, reason
|
||||
|
||||
def _run_mineru(
|
||||
@ -315,7 +314,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
except Exception as e:
|
||||
self.page_images = None
|
||||
self.total_page = 0
|
||||
self.logger.exception(e)
|
||||
logging.exception(e)
|
||||
|
||||
def _line_tag(self, bx):
|
||||
pn = [bx["page_idx"] + 1]
|
||||
@ -481,49 +480,15 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
|
||||
json_file = None
|
||||
subdir = None
|
||||
attempted = []
|
||||
|
||||
# mirror MinerU's sanitize_filename to align ZIP naming
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
sanitized = re.sub(r"[/\\\.]{2,}|[/\\]", "", name)
|
||||
sanitized = re.sub(r"[^\w.-]", "_", sanitized, flags=re.UNICODE)
|
||||
if sanitized.startswith("."):
|
||||
sanitized = "_" + sanitized[1:]
|
||||
return sanitized or "unnamed"
|
||||
|
||||
safe_stem = _sanitize_filename(file_stem)
|
||||
allowed_names = {f"{file_stem}_content_list.json", f"{safe_stem}_content_list.json"}
|
||||
self.logger.info(f"[MinerU] Expected output files: {', '.join(sorted(allowed_names))}")
|
||||
self.logger.info(f"[MinerU] Searching output candidates: {', '.join(str(c) for c in candidates)}")
|
||||
|
||||
for sub in candidates:
|
||||
jf = sub / f"{file_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying original path: {jf}")
|
||||
attempted.append(jf)
|
||||
if jf.exists():
|
||||
subdir = sub
|
||||
json_file = jf
|
||||
break
|
||||
|
||||
# MinerU API sanitizes non-ASCII filenames inside the ZIP root and file names.
|
||||
alt = sub / f"{safe_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying sanitized filename: {alt}")
|
||||
attempted.append(alt)
|
||||
if alt.exists():
|
||||
subdir = sub
|
||||
json_file = alt
|
||||
break
|
||||
|
||||
nested_alt = sub / safe_stem / f"{safe_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying sanitized nested path: {nested_alt}")
|
||||
attempted.append(nested_alt)
|
||||
if nested_alt.exists():
|
||||
subdir = nested_alt.parent
|
||||
json_file = nested_alt
|
||||
break
|
||||
|
||||
if not json_file:
|
||||
raise FileNotFoundError(f"[MinerU] Missing output file, tried: {', '.join(str(p) for p in attempted)}")
|
||||
raise FileNotFoundError(f"[MinerU] Missing output file, tried: {', '.join(str(c / (file_stem + '_content_list.json')) for c in candidates)}")
|
||||
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
@ -170,7 +170,7 @@ TZ=Asia/Shanghai
|
||||
# Uncomment the following line if your operating system is MacOS:
|
||||
# MACOS=1
|
||||
|
||||
# The maximum file size limit (in bytes) for each upload to your dataset or RAGFlow's File system.
|
||||
# The maximum file size limit (in bytes) for each upload to your knowledge base or File Management.
|
||||
# To change the 1GB file size limit, uncomment the line below and update as needed.
|
||||
# MAX_CONTENT_LENGTH=1073741824
|
||||
# After updating, ensure `client_max_body_size` in nginx/nginx.conf is updated accordingly.
|
||||
|
||||
@ -23,7 +23,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -48,7 +48,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
||||
@ -31,7 +31,7 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
opensearch01:
|
||||
profiles:
|
||||
@ -67,12 +67,12 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
infinity:
|
||||
profiles:
|
||||
- infinity
|
||||
image: infiniflow/infinity:v0.6.10
|
||||
image: infiniflow/infinity:v0.6.8
|
||||
volumes:
|
||||
- infinity_data:/var/infinity
|
||||
- ./infinity_conf.toml:/infinity_conf.toml
|
||||
@ -94,7 +94,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
oceanbase:
|
||||
profiles:
|
||||
@ -119,7 +119,7 @@ services:
|
||||
timeout: 10s
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
sandbox-executor-manager:
|
||||
profiles:
|
||||
@ -147,7 +147,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
mysql:
|
||||
# mysql:5.7 linux/arm64 image is unavailable.
|
||||
@ -175,7 +175,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
||||
@ -191,7 +191,7 @@ services:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||
interval: 10s
|
||||
@ -209,7 +209,7 @@ services:
|
||||
- redis_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 10s
|
||||
@ -228,7 +228,7 @@ services:
|
||||
networks:
|
||||
- ragflow
|
||||
command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"]
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
|
||||
tei-gpu:
|
||||
@ -249,7 +249,7 @@ services:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
|
||||
kibana:
|
||||
@ -271,7 +271,7 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
|
||||
|
||||
volumes:
|
||||
|
||||
@ -22,7 +22,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -39,7 +39,7 @@ services:
|
||||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: unless-stopped
|
||||
# restart: on-failure
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
||||
@ -45,7 +45,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -94,7 +94,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: unless-stopped
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -120,7 +120,7 @@ services:
|
||||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: unless-stopped
|
||||
# restart: on-failure
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[general]
|
||||
version = "0.6.10"
|
||||
version = "0.6.8"
|
||||
time_zone = "utc-8"
|
||||
|
||||
[network]
|
||||
|
||||
@ -76,5 +76,5 @@ No. Files uploaded to an agent as input are not stored in a dataset and hence wi
|
||||
There is no _specific_ file size limit for a file uploaded to an agent. However, note that model providers typically have a default or explicit maximum token setting, which can range from 8196 to 128k: The plain text part of the uploaded file will be passed in as the key value, but if the file's token count exceeds this limit, the string will be truncated and incomplete.
|
||||
|
||||
:::tip NOTE
|
||||
The variables `MAX_CONTENT_LENGTH` in `/docker/.env` and `client_max_body_size` in `/docker/nginx/nginx.conf` set the file size limit for each upload to a dataset or RAGFlow's File system. These settings DO NOT apply in this scenario.
|
||||
The variables `MAX_CONTENT_LENGTH` in `/docker/.env` and `client_max_body_size` in `/docker/nginx/nginx.conf` set the file size limit for each upload to a dataset or **File Management**. These settings DO NOT apply in this scenario.
|
||||
:::
|
||||
|
||||
@ -9,7 +9,7 @@ Initiate an AI-powered chat with a configured chat assistant.
|
||||
|
||||
---
|
||||
|
||||
Chats in RAGFlow are based on a particular dataset or multiple datasets. Once you have created your dataset, finished file parsing, and [run a retrieval test](../dataset/run_retrieval_test.md), you can go ahead and start an AI conversation.
|
||||
Knowledge base, hallucination-free chat, and file management are the three pillars of RAGFlow. Chats in RAGFlow are based on a particular dataset or multiple datasets. Once you have created your dataset, finished file parsing, and [run a retrieval test](../dataset/run_retrieval_test.md), you can go ahead and start an AI conversation.
|
||||
|
||||
## Start an AI chat
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ slug: /configure_knowledge_base
|
||||
|
||||
# Configure dataset
|
||||
|
||||
Most of RAGFlow's chat assistants and Agents are based on datasets. Each of RAGFlow's datasets serves as a knowledge source, *parsing* files uploaded from your local machine and file references generated in RAGFlow's File system into the real 'knowledge' for future AI chats. This guide demonstrates some basic usages of the dataset feature, covering the following topics:
|
||||
Most of RAGFlow's chat assistants and Agents are based on datasets. Each of RAGFlow's datasets serves as a knowledge source, *parsing* files uploaded from your local machine and file references generated in **File Management** into the real 'knowledge' for future AI chats. This guide demonstrates some basic usages of the dataset feature, covering the following topics:
|
||||
|
||||
- Create a dataset
|
||||
- Configure a dataset
|
||||
@ -82,10 +82,10 @@ Some embedding models are optimized for specific languages, so performance may b
|
||||
|
||||
### Upload file
|
||||
|
||||
- RAGFlow's File system allows you to link a file to multiple datasets, in which case each target dataset holds a reference to the file.
|
||||
- RAGFlow's **File Management** allows you to link a file to multiple datasets, in which case each target dataset holds a reference to the file.
|
||||
- In **Knowledge Base**, you are also given the option of uploading a single file or a folder of files (bulk upload) from your local machine to a dataset, in which case the dataset holds file copies.
|
||||
|
||||
While uploading files directly to a dataset seems more convenient, we *highly* recommend uploading files to RAGFlow's File system and then linking them to the target datasets. This way, you can avoid permanently deleting files uploaded to the dataset.
|
||||
While uploading files directly to a dataset seems more convenient, we *highly* recommend uploading files to **File Management** and then linking them to the target datasets. This way, you can avoid permanently deleting files uploaded to the dataset.
|
||||
|
||||
### Parse file
|
||||
|
||||
@ -142,6 +142,6 @@ As of RAGFlow v0.22.1, the search feature is still in a rudimentary form, suppor
|
||||
You are allowed to delete a dataset. Hover your mouse over the three dot of the intended dataset card and the **Delete** option appears. Once you delete a dataset, the associated folder under **root/.knowledge** directory is AUTOMATICALLY REMOVED. The consequence is:
|
||||
|
||||
- The files uploaded directly to the dataset are gone;
|
||||
- The file references, which you created from within RAGFlow's File system, are gone, but the associated files still exist.
|
||||
- The file references, which you created from within **File Management**, are gone, but the associated files still exist in **File Management**.
|
||||
|
||||

|
||||
|
||||
@ -419,11 +419,17 @@ Creates a dataset.
|
||||
- `"embedding_model"`: `string`
|
||||
- `"permission"`: `string`
|
||||
- `"chunk_method"`: `string`
|
||||
- `"parser_config"`: `object`
|
||||
- `"parse_type"`: `int`
|
||||
- `"pipeline_id"`: `string`
|
||||
- "parser_config": `object`
|
||||
- "parse_type": `int`
|
||||
- "pipeline_id": `string`
|
||||
|
||||
##### A basic request example
|
||||
Note: Choose exactly one ingestion mode when creating a dataset.
|
||||
- Chunking method: provide `"chunk_method"` (optionally with `"parser_config"`).
|
||||
- Ingestion pipeline: provide both `"parse_type"` and `"pipeline_id"` and do not provide `"chunk_method"`.
|
||||
|
||||
These options are mutually exclusive. If all three of `chunk_method`, `parse_type`, and `pipeline_id` are omitted, the system defaults to `chunk_method = "naive"`.
|
||||
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
@ -435,11 +441,9 @@ curl --request POST \
|
||||
}'
|
||||
```
|
||||
|
||||
##### A request example specifying ingestion pipeline
|
||||
##### Request example (ingestion pipeline)
|
||||
|
||||
:::caution WARNING
|
||||
You must *not* include `"chunk_method"` or `"parser_config"` when specifying an ingestion pipeline.
|
||||
:::
|
||||
Use this form when specifying an ingestion pipeline (do not include `chunk_method`).
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
@ -448,11 +452,15 @@ curl --request POST \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||
--data '{
|
||||
"name": "test-sdk",
|
||||
"parse_type": <NUMBER_OF_PARSERS_IN_YOUR_PARSER_COMPONENT>,
|
||||
"parse_type": <NUMBER_OF_FORMATS_IN_PARSE>,
|
||||
"pipeline_id": "<PIPELINE_ID_32_HEX>"
|
||||
}'
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `parse_type` is an integer. Replace `<NUMBER_OF_FORMATS_IN_PARSE>` with your pipeline's parse-type value.
|
||||
- `pipeline_id` must be a 32-character lowercase hexadecimal string.
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `"name"`: (*Body parameter*), `string`, *Required*
|
||||
@ -480,8 +488,7 @@ curl --request POST \
|
||||
- `"team"`: All team members can manage the dataset.
|
||||
|
||||
- `"chunk_method"`: (*Body parameter*), `enum<string>`
|
||||
The default chunk method of the dataset to create. Mutually exclusive with `"parse_type"` and `"pipeline_id"`. If you set `"chunk_method"`, do not include `"parse_type"` or `"pipeline_id"`.
|
||||
Available options:
|
||||
The chunking method of the dataset to create. Available options:
|
||||
- `"naive"`: General (default)
|
||||
- `"book"`: Book
|
||||
- `"email"`: Email
|
||||
@ -494,6 +501,7 @@ curl --request POST \
|
||||
- `"qa"`: Q&A
|
||||
- `"table"`: Table
|
||||
- `"tag"`: Tag
|
||||
- Mutually exclusive with `parse_type` and `pipeline_id`. If you set `chunk_method`, do not include `parse_type` or `pipeline_id`.
|
||||
|
||||
- `"parser_config"`: (*Body parameter*), `object`
|
||||
The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`:
|
||||
@ -512,16 +520,13 @@ curl --request POST \
|
||||
- Maximum: `2048`
|
||||
- `"delimiter"`: `string`
|
||||
- Defaults to `"\n"`.
|
||||
- `"html4excel"`: `bool`
|
||||
- Whether to convert Excel documents into HTML format.
|
||||
- `"html4excel"`: `bool` Indicates whether to convert Excel documents into HTML format.
|
||||
- Defaults to `false`
|
||||
- `"layout_recognize"`: `string`
|
||||
- Defaults to `DeepDOC`
|
||||
- `"tag_kb_ids"`: `array<string>`
|
||||
- IDs of datasets to be parsed using the Tag chunk method.
|
||||
- Before setting this, ensure a tag set is created and properly configured. For details, see [Use tag set](https://ragflow.io/docs/dev/use_tag_sets).
|
||||
- `"task_page_size"`: `int`
|
||||
- For PDFs only.
|
||||
- `"tag_kb_ids"`: `array<string>` refer to [Use tag set](https://ragflow.io/docs/dev/use_tag_sets)
|
||||
- Must include a list of dataset IDs, where each dataset is parsed using the Tag Chunking Method
|
||||
- `"task_page_size"`: `int` For PDF only.
|
||||
- Defaults to `12`
|
||||
- Minimum: `1`
|
||||
- `"raptor"`: `object` RAPTOR-specific settings.
|
||||
@ -533,25 +538,14 @@ curl --request POST \
|
||||
- Defaults to: `{"use_raptor": false}`.
|
||||
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
||||
|
||||
- `"parse_type"`: (*Body parameter*), `int`
|
||||
The ingestion pipeline parse type identifier, i.e., the number of parsers in your **Parser** component.
|
||||
- Required (along with `"pipeline_id"`) if specifying an ingestion pipeline.
|
||||
- Must not be included when `"chunk_method"` is specified.
|
||||
- "parse_type": (*Body parameter*), `int`
|
||||
The ingestion pipeline parse type identifier. Required if and only if you are using an ingestion pipeline (together with `"pipeline_id"`). Must not be provided when `"chunk_method"` is set.
|
||||
|
||||
- `"pipeline_id"`: (*Body parameter*), `string`
|
||||
The ingestion pipeline ID. Can be found in the corresponding URL in the RAGFlow UI.
|
||||
- Required (along with `"parse_type"`) if specifying an ingestion pipeline.
|
||||
- Must be a 32-character lowercase hexadecimal string, e.g., `"d0bebe30ae2211f0970942010a8e0005"`.
|
||||
- Must not be included when `"chunk_method"` is specified.
|
||||
- "pipeline_id": (*Body parameter*), `string`
|
||||
The ingestion pipeline ID. Required if and only if you are using an ingestion pipeline (together with `"parse_type"`).
|
||||
- Must not be provided when `"chunk_method"` is set.
|
||||
|
||||
:::caution WARNING
|
||||
You can choose either of the following ingestion options when creating a dataset, but *not* both:
|
||||
|
||||
- Use a built-in chunk method -- specify `"chunk_method"` (optionally with `"parser_config"`).
|
||||
- Use an ingestion pipeline -- specify both `"parse_type"` and `"pipeline_id"`.
|
||||
|
||||
If none of `"chunk_method"`, `"parse_type"`, or `"pipeline_id"` are provided, the system defaults to `chunk_method = "naive"`.
|
||||
:::
|
||||
Note: If none of `chunk_method`, `parse_type`, and `pipeline_id` are provided, the system will default to `chunk_method = "naive"`.
|
||||
|
||||
#### Response
|
||||
|
||||
|
||||
@ -43,6 +43,7 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
||||
repos = [
|
||||
"InfiniFlow/text_concat_xgb_v1.0",
|
||||
"InfiniFlow/deepdoc",
|
||||
"InfiniFlow/huqie",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ async def run_graphrag(
|
||||
start = trio.current_time()
|
||||
tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
|
||||
chunks = []
|
||||
for d in settings.retriever.chunk_list(doc_id, tenant_id, [kb_id], max_count=10000, fields=["content_with_weight", "doc_id"], sort_by_position=True):
|
||||
for d in settings.retriever.chunk_list(doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"], sort_by_position=True):
|
||||
chunks.append(d["content_with_weight"])
|
||||
|
||||
with trio.fail_after(max(120, len(chunks) * 60 * 10) if enable_timeout_assertion else 10000000000):
|
||||
@ -174,19 +174,13 @@ async def run_graphrag_for_kb(
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
# DEBUG: Obtener todos los chunks primero
|
||||
raw_chunks = list(settings.retriever.chunk_list(
|
||||
for d in settings.retriever.chunk_list(
|
||||
doc_id,
|
||||
tenant_id,
|
||||
[kb_id],
|
||||
max_count=10000, # FIX: Aumentar límite para procesar todos los chunks
|
||||
fields=fields_for_chunks,
|
||||
sort_by_position=True,
|
||||
))
|
||||
|
||||
callback(msg=f"[DEBUG] chunk_list() returned {len(raw_chunks)} raw chunks for doc {doc_id}")
|
||||
|
||||
for d in raw_chunks:
|
||||
):
|
||||
content = d["content_with_weight"]
|
||||
if num_tokens_from_string(current_chunk + content) < 1024:
|
||||
current_chunk += content
|
||||
|
||||
@ -96,7 +96,7 @@ ragflow:
|
||||
infinity:
|
||||
image:
|
||||
repository: infiniflow/infinity
|
||||
tag: v0.6.10
|
||||
tag: v0.6.8
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
storage:
|
||||
|
||||
@ -57,6 +57,7 @@ JSON_RESPONSE = True
|
||||
|
||||
class RAGFlowConnector:
|
||||
_MAX_DATASET_CACHE = 32
|
||||
_MAX_DOCUMENT_CACHE = 128
|
||||
_CACHE_TTL = 300
|
||||
|
||||
_dataset_metadata_cache: OrderedDict[str, tuple[dict, float | int]] = OrderedDict() # "dataset_id" -> (metadata, expiry_ts)
|
||||
@ -115,6 +116,8 @@ class RAGFlowConnector:
|
||||
def _set_cached_document_metadata_by_dataset(self, dataset_id, doc_id_meta_list):
|
||||
self._document_metadata_cache[dataset_id] = (doc_id_meta_list, self._get_expiry_timestamp())
|
||||
self._document_metadata_cache.move_to_end(dataset_id)
|
||||
if len(self._document_metadata_cache) > self._MAX_DOCUMENT_CACHE:
|
||||
self._document_metadata_cache.popitem(last=False)
|
||||
|
||||
def list_datasets(self, page: int = 1, page_size: int = 1000, orderby: str = "create_time", desc: bool = True, id: str | None = None, name: str | None = None):
|
||||
res = self._get("/datasets", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
||||
@ -237,46 +240,46 @@ class RAGFlowConnector:
|
||||
|
||||
docs = None if force_refresh else self._get_cached_document_metadata_by_dataset(dataset_id)
|
||||
if docs is None:
|
||||
page = 1
|
||||
page_size = 30
|
||||
doc_id_meta_list = []
|
||||
docs = {}
|
||||
while page:
|
||||
docs_res = self._get(f"/datasets/{dataset_id}/documents?page={page}")
|
||||
docs_data = docs_res.json()
|
||||
if docs_data.get("code") == 0 and docs_data.get("data", {}).get("docs"):
|
||||
for doc in docs_data["data"]["docs"]:
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
doc_meta = {
|
||||
"document_id": doc_id,
|
||||
"name": doc.get("name", ""),
|
||||
"location": doc.get("location", ""),
|
||||
"type": doc.get("type", ""),
|
||||
"size": doc.get("size"),
|
||||
"chunk_count": doc.get("chunk_count"),
|
||||
"create_date": doc.get("create_date", ""),
|
||||
"update_date": doc.get("update_date", ""),
|
||||
"token_count": doc.get("token_count"),
|
||||
"thumbnail": doc.get("thumbnail", ""),
|
||||
"dataset_id": doc.get("dataset_id", dataset_id),
|
||||
"meta_fields": doc.get("meta_fields", {}),
|
||||
}
|
||||
doc_id_meta_list.append((doc_id, doc_meta))
|
||||
docs[doc_id] = doc_meta
|
||||
|
||||
page += 1
|
||||
if docs_data.get("data", {}).get("total", 0) - page * page_size <= 0:
|
||||
page = None
|
||||
|
||||
docs_res = self._get(f"/datasets/{dataset_id}/documents")
|
||||
docs_data = docs_res.json()
|
||||
if docs_data.get("code") == 0 and docs_data.get("data", {}).get("docs"):
|
||||
doc_id_meta_list = []
|
||||
docs = {}
|
||||
for doc in docs_data["data"]["docs"]:
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
doc_meta = {
|
||||
"document_id": doc_id,
|
||||
"name": doc.get("name", ""),
|
||||
"location": doc.get("location", ""),
|
||||
"type": doc.get("type", ""),
|
||||
"size": doc.get("size"),
|
||||
"chunk_count": doc.get("chunk_count"),
|
||||
# "chunk_method": doc.get("chunk_method", ""),
|
||||
"create_date": doc.get("create_date", ""),
|
||||
"update_date": doc.get("update_date", ""),
|
||||
# "process_begin_at": doc.get("process_begin_at", ""),
|
||||
# "process_duration": doc.get("process_duration"),
|
||||
# "progress": doc.get("progress"),
|
||||
# "progress_msg": doc.get("progress_msg", ""),
|
||||
# "status": doc.get("status", ""),
|
||||
# "run": doc.get("run", ""),
|
||||
"token_count": doc.get("token_count"),
|
||||
# "source_type": doc.get("source_type", ""),
|
||||
"thumbnail": doc.get("thumbnail", ""),
|
||||
"dataset_id": doc.get("dataset_id", dataset_id),
|
||||
"meta_fields": doc.get("meta_fields", {}),
|
||||
# "parser_config": doc.get("parser_config", {})
|
||||
}
|
||||
doc_id_meta_list.append((doc_id, doc_meta))
|
||||
docs[doc_id] = doc_meta
|
||||
self._set_cached_document_metadata_by_dataset(dataset_id, doc_id_meta_list)
|
||||
if docs:
|
||||
document_cache.update(docs)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# Gracefully handle metadata cache failures
|
||||
logging.error(f"Problem building the document metadata cache: {str(e)}")
|
||||
pass
|
||||
|
||||
return document_cache, dataset_cache
|
||||
|
||||
@ -49,7 +49,7 @@ dependencies = [
|
||||
"html-text==0.6.2",
|
||||
"httpx[socks]>=0.28.1,<0.29.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.10",
|
||||
"infinity-sdk==0.6.8",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
"itsdangerous==2.1.2",
|
||||
"json-repair==0.35.0",
|
||||
@ -131,6 +131,7 @@ dependencies = [
|
||||
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
||||
"mini-racer>=0.12.4,<0.13.0",
|
||||
"pyodbc>=5.2.0,<6.0.0",
|
||||
"pyicu>=2.15.3,<3.0.0",
|
||||
"flasgger>=0.9.7.1,<0.10.0",
|
||||
"xxhash>=3.5.0,<4.0.0",
|
||||
"trio>=0.17.0,<0.29.0",
|
||||
@ -162,9 +163,6 @@ test = [
|
||||
"openpyxl>=3.1.5",
|
||||
"pillow>=10.4.0",
|
||||
"pytest>=8.3.5",
|
||||
"pytest-asyncio>=1.3.0",
|
||||
"pytest-xdist>=3.8.0",
|
||||
"pytest-cov>=7.0.0",
|
||||
"python-docx>=1.1.2",
|
||||
"python-pptx>=1.0.2",
|
||||
"reportlab>=4.4.1",
|
||||
@ -197,83 +195,8 @@ extend-select = ["ASYNC", "ASYNC1"]
|
||||
ignore = ["E402"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = [
|
||||
"."
|
||||
]
|
||||
|
||||
testpaths = ["test"]
|
||||
python_files = ["test_*.py"]
|
||||
python_classes = ["Test*"]
|
||||
python_functions = ["test_*"]
|
||||
|
||||
markers = [
|
||||
"p1: high priority test cases",
|
||||
"p2: medium priority test cases",
|
||||
"p3: low priority test cases",
|
||||
]
|
||||
|
||||
# Test collection and runtime configuration
|
||||
filterwarnings = [
|
||||
"error", # Treat warnings as errors
|
||||
"ignore::DeprecationWarning", # Ignore specific warnings
|
||||
]
|
||||
|
||||
# Command line options
|
||||
addopts = [
|
||||
"-v", # Verbose output
|
||||
"--strict-markers", # Enforce marker definitions
|
||||
"--tb=short", # Simplified traceback
|
||||
"--disable-warnings", # Disable warnings
|
||||
"--color=yes" # Colored output
|
||||
]
|
||||
|
||||
|
||||
# Coverage configuration
|
||||
[tool.coverage.run]
|
||||
# Source paths - adjust according to your project structure
|
||||
source = [
|
||||
# "../../api/db/services",
|
||||
# Add more directories if needed:
|
||||
"../../common",
|
||||
# "../../utils",
|
||||
]
|
||||
|
||||
# Files/directories to exclude
|
||||
omit = [
|
||||
"*/tests/*",
|
||||
"*/test_*",
|
||||
"*/__pycache__/*",
|
||||
"*/.pytest_cache/*",
|
||||
"*/venv/*",
|
||||
"*/.venv/*",
|
||||
"*/env/*",
|
||||
"*/site-packages/*",
|
||||
"*/dist/*",
|
||||
"*/build/*",
|
||||
"*/migrations/*",
|
||||
"setup.py"
|
||||
]
|
||||
|
||||
[tool.coverage.report]
|
||||
# Report configuration
|
||||
precision = 2
|
||||
show_missing = true
|
||||
skip_covered = false
|
||||
fail_under = 0 # Minimum coverage requirement (0-100)
|
||||
|
||||
# Lines to exclude (optional)
|
||||
exclude_lines = [
|
||||
# "pragma: no cover",
|
||||
# "def __repr__",
|
||||
# "raise AssertionError",
|
||||
# "raise NotImplementedError",
|
||||
# "if __name__ == .__main__.:",
|
||||
# "if TYPE_CHECKING:",
|
||||
"pass"
|
||||
]
|
||||
|
||||
[tool.coverage.html]
|
||||
# HTML report configuration
|
||||
directory = "htmlcov"
|
||||
title = "Test Coverage Report"
|
||||
# extra_css = "custom.css" # Optional custom CSS
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
|
||||
@ -219,27 +219,23 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
)
|
||||
|
||||
def _normalize_section(section):
|
||||
# Pad/normalize to (txt, layout, positions)
|
||||
if not isinstance(section, (list, tuple)):
|
||||
section = (section, "", [])
|
||||
elif len(section) == 1:
|
||||
# pad section to length 3: (txt, sec_id, poss)
|
||||
if len(section) == 1:
|
||||
section = (section[0], "", [])
|
||||
elif len(section) == 2:
|
||||
section = (section[0], "", section[1])
|
||||
else:
|
||||
section = (section[0], section[1], section[2])
|
||||
elif len(section) != 3:
|
||||
raise ValueError(f"Unexpected section length: {len(section)} (value={section!r})")
|
||||
|
||||
txt, layoutno, poss = section
|
||||
if isinstance(poss, str):
|
||||
poss = pdf_parser.extract_positions(poss)
|
||||
if poss:
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
if isinstance(pn, list) and pn:
|
||||
pn = pn[0] # [pn] -> pn
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
|
||||
if isinstance(pn, list):
|
||||
pn = pn[0] # [pn] -> pn
|
||||
poss[0] = (pn, *first[1:])
|
||||
if not poss:
|
||||
poss = []
|
||||
|
||||
return (txt, layoutno, poss)
|
||||
|
||||
|
||||
@ -86,11 +86,9 @@ class Pdf(PdfParser):
|
||||
|
||||
# (A) Add text
|
||||
for b in self.boxes:
|
||||
# b["page_number"] is relative page number,must + from_page
|
||||
global_page_num = b["page_number"] + from_page
|
||||
if not (from_page < global_page_num <= to_page + from_page):
|
||||
if not (from_page < b["page_number"] <= to_page + from_page):
|
||||
continue
|
||||
page_items[global_page_num].append({
|
||||
page_items[b["page_number"]].append({
|
||||
"top": b["top"],
|
||||
"x0": b["x0"],
|
||||
"text": b["text"],
|
||||
@ -102,6 +100,7 @@ class Pdf(PdfParser):
|
||||
if not positions:
|
||||
continue
|
||||
|
||||
# Handle content type (list vs str)
|
||||
if isinstance(content, list):
|
||||
final_text = "\n".join(content)
|
||||
elif isinstance(content, str):
|
||||
@ -110,11 +109,10 @@ class Pdf(PdfParser):
|
||||
final_text = str(content)
|
||||
|
||||
try:
|
||||
# Parse positions
|
||||
pn_index = positions[0][0]
|
||||
if isinstance(pn_index, list):
|
||||
pn_index = pn_index[0]
|
||||
|
||||
# pn_index in tbls is absolute page number
|
||||
current_page_num = int(pn_index) + 1
|
||||
except Exception as e:
|
||||
print(f"Error parsing position: {e}")
|
||||
|
||||
@ -12,17 +12,10 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
from copy import deepcopy, copy
|
||||
|
||||
import trio
|
||||
import xxhash
|
||||
|
||||
from copy import deepcopy
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.prompts.generator import run_toc_from_text
|
||||
|
||||
|
||||
class ExtractorParam(ProcessParamBase, LLMParam):
|
||||
@ -38,38 +31,6 @@ class ExtractorParam(ProcessParamBase, LLMParam):
|
||||
class Extractor(ProcessBase, LLM):
|
||||
component_name = "Extractor"
|
||||
|
||||
def _build_TOC(self, docs):
|
||||
self.callback(message="Start to generate table of content ...")
|
||||
docs = sorted(docs, key=lambda d:(
|
||||
d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
|
||||
d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
|
||||
))
|
||||
toc: list[dict] = trio.run(run_toc_from_text, [d["text"] for d in docs], self.chat_mdl)
|
||||
logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent=' '))
|
||||
ii = 0
|
||||
while ii < len(toc):
|
||||
try:
|
||||
idx = int(toc[ii]["chunk_id"])
|
||||
del toc[ii]["chunk_id"]
|
||||
toc[ii]["ids"] = [docs[idx]["id"]]
|
||||
if ii == len(toc) -1:
|
||||
break
|
||||
for jj in range(idx+1, int(toc[ii+1]["chunk_id"])+1):
|
||||
toc[ii]["ids"].append(docs[jj]["id"])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
ii += 1
|
||||
|
||||
if toc:
|
||||
d = copy.deepcopy(docs[-1])
|
||||
d["content_with_weight"] = json.dumps(toc, ensure_ascii=False)
|
||||
d["toc_kwd"] = "toc"
|
||||
d["available_int"] = 0
|
||||
d["page_num_int"] = [100000000]
|
||||
d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
|
||||
return d
|
||||
return None
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
self.set_output("output_format", "chunks")
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to generate.")
|
||||
@ -84,12 +45,6 @@ class Extractor(ProcessBase, LLM):
|
||||
chunks_key = k
|
||||
|
||||
if chunks:
|
||||
if self._param.field_name == "toc":
|
||||
toc = self._build_TOC(chunks)
|
||||
chunks.append(toc)
|
||||
self.set_output("chunks", chunks)
|
||||
return
|
||||
|
||||
prog = 0
|
||||
for i, ck in enumerate(chunks):
|
||||
args[chunks_key] = ck["text"]
|
||||
|
||||
@ -1685,17 +1685,12 @@ class LiteLLMBase(ABC):
|
||||
|
||||
yield ans, tol
|
||||
|
||||
async def async_chat(self, system, history, gen_conf, **kwargs):
|
||||
hist = list(history) if history else []
|
||||
if system:
|
||||
if not hist or hist[0].get("role") != "system":
|
||||
hist.insert(0, {"role": "system", "content": system})
|
||||
|
||||
logging.info("[HISTORY]" + json.dumps(hist, ensure_ascii=False, indent=2))
|
||||
async def async_chat(self, history, gen_conf, **kwargs):
|
||||
logging.info("[HISTORY]" + json.dumps(history, ensure_ascii=False, indent=2))
|
||||
if self.model_name.lower().find("qwen3") >= 0:
|
||||
kwargs["extra_body"] = {"enable_thinking": False}
|
||||
|
||||
completion_args = self._construct_completion_args(history=hist, stream=False, tools=False, **gen_conf)
|
||||
completion_args = self._construct_completion_args(history=history, stream=False, tools=False, **gen_conf)
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
|
||||
@ -537,8 +537,7 @@ class Dealer:
|
||||
doc["id"] = id
|
||||
if dict_chunks:
|
||||
res.extend(dict_chunks.values())
|
||||
# FIX: Solo terminar si no hay chunks, no si hay menos de bs
|
||||
if len(dict_chunks.values()) == 0:
|
||||
if len(dict_chunks.values()) < bs:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import asyncio
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
@ -343,8 +342,7 @@ def form_history(history, limit=-6):
|
||||
return context
|
||||
|
||||
|
||||
|
||||
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
tools_desc = tool_schema(tools_description)
|
||||
context = ""
|
||||
|
||||
@ -353,7 +351,7 @@ async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: lis
|
||||
else:
|
||||
template = PROMPT_JINJA_ENV.from_string(ANALYZE_TASK_SYSTEM + "\n\n" + ANALYZE_TASK_USER)
|
||||
context = template.render(task=task_name, context=context, agent_prompt=prompt, tools_desc=tools_desc)
|
||||
kwd = await _chat_async(chat_mdl, context, [{"role": "user", "content": "Please analyze it."}])
|
||||
kwd = chat_mdl.chat(context, [{"role": "user", "content": "Please analyze it."}])
|
||||
if isinstance(kwd, tuple):
|
||||
kwd = kwd[0]
|
||||
kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
|
||||
@ -362,17 +360,9 @@ async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: lis
|
||||
return kwd
|
||||
|
||||
|
||||
async def _chat_async(chat_mdl, system: str, history: list, **kwargs):
|
||||
chat_async = getattr(chat_mdl, "async_chat", None)
|
||||
if chat_async and asyncio.iscoroutinefunction(chat_async):
|
||||
return await chat_async(system, history, **kwargs)
|
||||
return await asyncio.to_thread(chat_mdl.chat, system, history, **kwargs)
|
||||
|
||||
|
||||
|
||||
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
if not tools_description:
|
||||
return "", 0
|
||||
return ""
|
||||
desc = tool_schema(tools_description)
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("plan_generation", NEXT_STEP))
|
||||
user_prompt = "\nWhat's the next tool to call? If ready OR IMPOSSIBLE TO BE READY, then call `complete_task`."
|
||||
@ -381,18 +371,14 @@ async def next_step_async(chat_mdl, history:list, tools_description: list[dict],
|
||||
hist[-1]["content"] += user_prompt
|
||||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
json_str = await _chat_async(
|
||||
chat_mdl,
|
||||
template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:],
|
||||
stop=["<|stop|>"],
|
||||
)
|
||||
json_str = chat_mdl.chat(template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:], stop=["<|stop|>"])
|
||||
tk_cnt = num_tokens_from_string(json_str)
|
||||
json_str = re.sub(r"^.*</think>", "", json_str, flags=re.DOTALL)
|
||||
return json_str, tk_cnt
|
||||
|
||||
|
||||
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
tool_calls = [{"name": p[0], "result": p[1]} for p in tool_call_res]
|
||||
goal = history[1]["content"]
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("reflection", REFLECT))
|
||||
@ -403,7 +389,7 @@ async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple
|
||||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
_, msg = message_fit_in(hist, chat_mdl.max_length)
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:])
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:])
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
return """
|
||||
**Observation**
|
||||
@ -434,12 +420,12 @@ def tool_call_summary(chat_mdl, name: str, params: dict, result: str, user_defin
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
def rank_memories(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
template = PROMPT_JINJA_ENV.from_string(RANK_MEMORY)
|
||||
system_prompt = template.render(goal=goal, sub_goal=sub_goal, results=[{"i": i, "content": s} for i,s in enumerate(tool_call_summaries)])
|
||||
user_prompt = " → rank: "
|
||||
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
@ -511,7 +497,7 @@ def toc_index_extractor(toc:list[dict], content:str, chat_mdl):
|
||||
|
||||
The structure variable is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
@ -638,8 +624,8 @@ def toc_transformer(toc_pages, chat_mdl):
|
||||
|
||||
The `structure` is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
The `title` is a short phrase or a several-words term.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
@ -664,7 +650,7 @@ def toc_transformer(toc_pages, chat_mdl):
|
||||
while not (if_complete == "yes"):
|
||||
prompt = f"""
|
||||
Your task is to continue the table of contents json structure, directly output the remaining part of the json structure.
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The raw table of contents json structure is:
|
||||
{toc_content}
|
||||
@ -753,7 +739,7 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
|
||||
|
||||
for chunk in chunks_res:
|
||||
titles.extend(chunk.get("toc", []))
|
||||
|
||||
|
||||
# Filter out entries with title == -1
|
||||
prune = len(titles) > 512
|
||||
max_len = 12 if prune else 22
|
||||
|
||||
555629
rag/res/huqie.txt
Normal file
555629
rag/res/huqie.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -157,30 +157,11 @@ class Confluence(SyncBase):
|
||||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.interfaces import StaticCredentialsProvider
|
||||
|
||||
index_mode = (self.conf.get("index_mode") or "everything").lower()
|
||||
if index_mode not in {"everything", "space", "page"}:
|
||||
index_mode = "everything"
|
||||
|
||||
space = ""
|
||||
page_id = ""
|
||||
|
||||
index_recursively = False
|
||||
if index_mode == "space":
|
||||
space = (self.conf.get("space") or "").strip()
|
||||
if not space:
|
||||
raise ValueError("Space Key is required when indexing a specific Confluence space.")
|
||||
elif index_mode == "page":
|
||||
page_id = (self.conf.get("page_id") or "").strip()
|
||||
if not page_id:
|
||||
raise ValueError("Page ID is required when indexing a specific Confluence page.")
|
||||
index_recursively = bool(self.conf.get("index_recursively", False))
|
||||
|
||||
self.connector = ConfluenceConnector(
|
||||
wiki_base=self.conf["wiki_base"],
|
||||
space=self.conf.get("space", ""),
|
||||
is_cloud=self.conf.get("is_cloud", True),
|
||||
space=space,
|
||||
page_id=page_id,
|
||||
index_recursively=index_recursively,
|
||||
# page_id=self.conf.get("page_id", ""),
|
||||
)
|
||||
|
||||
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])
|
||||
|
||||
@ -29,7 +29,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||
from common.connection_utils import timeout
|
||||
from rag.utils.base64_image import image2id
|
||||
from rag.utils.raptor_utils import should_skip_raptor, get_skip_reason
|
||||
from common.log_utils import init_root_logger
|
||||
from common.config_utils import show_configs
|
||||
from graphrag.general.index import run_graphrag_for_kb
|
||||
@ -69,7 +68,7 @@ from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc
|
||||
from common.exceptions import TaskCanceledException
|
||||
from common import settings
|
||||
from common.constants import PAGERANK_FLD, TAG_FLD, SVR_CONSUMER_GROUP_NAME
|
||||
from common.misc_utils import check_and_install_mineru
|
||||
from common.misc_utils import install_mineru
|
||||
|
||||
BATCH_SIZE = 64
|
||||
|
||||
@ -854,17 +853,6 @@ async def do_handle_task(task):
|
||||
progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration")
|
||||
return
|
||||
|
||||
# Check if Raptor should be skipped for structured data
|
||||
file_type = task.get("type", "")
|
||||
parser_id = task.get("parser_id", "")
|
||||
raptor_config = kb_parser_config.get("raptor", {})
|
||||
|
||||
if should_skip_raptor(file_type, parser_id, task_parser_config, raptor_config):
|
||||
skip_reason = get_skip_reason(file_type, parser_id, task_parser_config)
|
||||
logging.info(f"Skipping Raptor for document {task_document_name}: {skip_reason}")
|
||||
progress_callback(prog=1.0, msg=f"Raptor skipped: {skip_reason}")
|
||||
return
|
||||
|
||||
# bind LLM for raptor
|
||||
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
|
||||
# run RAPTOR
|
||||
@ -956,7 +944,7 @@ async def do_handle_task(task):
|
||||
logging.info(progress_message)
|
||||
progress_callback(msg=progress_message)
|
||||
if task["parser_id"].lower() == "naive" and task["parser_config"].get("toc_extraction", False):
|
||||
toc_thread = executor.submit(build_TOC, task, chunks, progress_callback)
|
||||
toc_thread = executor.submit(build_TOC,task, chunks, progress_callback)
|
||||
|
||||
chunk_count = len(set([chunk["id"] for chunk in chunks]))
|
||||
start_ts = timer()
|
||||
@ -1113,8 +1101,8 @@ async def main():
|
||||
show_configs()
|
||||
settings.init_settings()
|
||||
settings.check_and_install_torch()
|
||||
check_and_install_mineru()
|
||||
logging.info(f'default embedding config: {settings.EMBEDDING_CFG}')
|
||||
install_mineru()
|
||||
logging.info(f'settings.EMBEDDING_CFG: {settings.EMBEDDING_CFG}')
|
||||
settings.print_rag_settings()
|
||||
if sys.platform != "win32":
|
||||
signal.signal(signal.SIGUSR1, start_tracemalloc_and_snapshot)
|
||||
|
||||
@ -1,207 +0,0 @@
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
import time
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
from google.cloud import storage
|
||||
from google.api_core.exceptions import NotFound
|
||||
from common.decorator import singleton
|
||||
from common import settings
|
||||
|
||||
|
||||
@singleton
|
||||
class RAGFlowGCS:
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.bucket_name = None
|
||||
self.__open__()
|
||||
|
||||
def __open__(self):
|
||||
try:
|
||||
if self.client:
|
||||
self.client = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.client = storage.Client()
|
||||
self.bucket_name = settings.GCS["bucket"]
|
||||
except Exception:
|
||||
logging.exception("Fail to connect to GCS")
|
||||
|
||||
def _get_blob_path(self, folder, filename):
|
||||
"""Helper to construct the path: folder/filename"""
|
||||
if not folder:
|
||||
return filename
|
||||
return f"{folder}/{filename}"
|
||||
|
||||
def health(self):
|
||||
folder, fnm, binary = "ragflow-health", "health_check", b"_t@@@1"
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
if not bucket_obj.exists():
|
||||
logging.error(f"Health check failed: Main bucket '{self.bucket_name}' does not exist.")
|
||||
return False
|
||||
|
||||
blob_path = self._get_blob_path(folder, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.exception(f"Health check failed: {e}")
|
||||
return False
|
||||
|
||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket (to match interface)
|
||||
for _ in range(3):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except NotFound:
|
||||
logging.error(f"Fail to put: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to put {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
def rm(self, bucket, fnm, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.delete()
|
||||
except NotFound:
|
||||
pass
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove {bucket}/{fnm}:")
|
||||
|
||||
def get(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(1):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.download_as_bytes()
|
||||
except NotFound:
|
||||
logging.warning(f"File not found {bucket}/{filename} in {self.bucket_name}")
|
||||
return None
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get {bucket}/{filename}")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def obj_exist(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.exists()
|
||||
except Exception:
|
||||
logging.exception(f"obj_exist {bucket}/{filename} got exception")
|
||||
return False
|
||||
|
||||
def bucket_exists(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
return bucket_obj.exists()
|
||||
except Exception:
|
||||
logging.exception(f"bucket_exist check for {self.bucket_name} got exception")
|
||||
return False
|
||||
|
||||
def get_presigned_url(self, bucket, fnm, expires, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(10):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
expiration = expires
|
||||
if isinstance(expires, int):
|
||||
expiration = datetime.timedelta(seconds=expires)
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=expiration,
|
||||
method="GET"
|
||||
)
|
||||
return url
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def remove_bucket(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
prefix = f"{bucket}/"
|
||||
|
||||
blobs = list(self.client.list_blobs(self.bucket_name, prefix=prefix))
|
||||
|
||||
if blobs:
|
||||
bucket_obj.delete_blobs(blobs)
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove virtual bucket (folder) {bucket}")
|
||||
|
||||
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
# RENAMED PARAMETERS to match original interface
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
|
||||
src_blob_path = self._get_blob_path(src_bucket, src_path)
|
||||
dest_blob_path = self._get_blob_path(dest_bucket, dest_path)
|
||||
|
||||
src_blob = bucket_obj.blob(src_blob_path)
|
||||
|
||||
if not src_blob.exists():
|
||||
logging.error(f"Source object not found: {src_blob_path}")
|
||||
return False
|
||||
|
||||
bucket_obj.copy_blob(src_blob, bucket_obj, dest_blob_path)
|
||||
return True
|
||||
|
||||
except NotFound:
|
||||
logging.error(f"Copy failed: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
||||
def move(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
try:
|
||||
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
|
||||
self.rm(src_bucket, src_path)
|
||||
return True
|
||||
else:
|
||||
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
@ -1,145 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Utility functions for Raptor processing decisions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# File extensions for structured data types
|
||||
EXCEL_EXTENSIONS = {".xls", ".xlsx", ".xlsm", ".xlsb"}
|
||||
CSV_EXTENSIONS = {".csv", ".tsv"}
|
||||
STRUCTURED_EXTENSIONS = EXCEL_EXTENSIONS | CSV_EXTENSIONS
|
||||
|
||||
|
||||
def is_structured_file_type(file_type: Optional[str]) -> bool:
|
||||
"""
|
||||
Check if a file type is structured data (Excel, CSV, etc.)
|
||||
|
||||
Args:
|
||||
file_type: File extension (e.g., ".xlsx", ".csv")
|
||||
|
||||
Returns:
|
||||
True if file is structured data type
|
||||
"""
|
||||
if not file_type:
|
||||
return False
|
||||
|
||||
# Normalize to lowercase and ensure leading dot
|
||||
file_type = file_type.lower()
|
||||
if not file_type.startswith("."):
|
||||
file_type = f".{file_type}"
|
||||
|
||||
return file_type in STRUCTURED_EXTENSIONS
|
||||
|
||||
|
||||
def is_tabular_pdf(parser_id: str = "", parser_config: Optional[dict] = None) -> bool:
|
||||
"""
|
||||
Check if a PDF is being parsed as tabular data.
|
||||
|
||||
Args:
|
||||
parser_id: Parser ID (e.g., "table", "naive")
|
||||
parser_config: Parser configuration dict
|
||||
|
||||
Returns:
|
||||
True if PDF is being parsed as tabular data
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
|
||||
# If using table parser, it's tabular
|
||||
if parser_id and parser_id.lower() == "table":
|
||||
return True
|
||||
|
||||
# Check if html4excel is enabled (Excel-like table parsing)
|
||||
if parser_config.get("html4excel", False):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def should_skip_raptor(
|
||||
file_type: Optional[str] = None,
|
||||
parser_id: str = "",
|
||||
parser_config: Optional[dict] = None,
|
||||
raptor_config: Optional[dict] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if Raptor should be skipped for a given document.
|
||||
|
||||
This function implements the logic to automatically disable Raptor for:
|
||||
1. Excel files (.xls, .xlsx, .csv, etc.)
|
||||
2. PDFs with tabular data (using table parser or html4excel)
|
||||
|
||||
Args:
|
||||
file_type: File extension (e.g., ".xlsx", ".pdf")
|
||||
parser_id: Parser ID being used
|
||||
parser_config: Parser configuration dict
|
||||
raptor_config: Raptor configuration dict (can override with auto_disable_for_structured_data)
|
||||
|
||||
Returns:
|
||||
True if Raptor should be skipped, False otherwise
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
raptor_config = raptor_config or {}
|
||||
|
||||
# Check if auto-disable is explicitly disabled in config
|
||||
if raptor_config.get("auto_disable_for_structured_data", True) is False:
|
||||
logging.info("Raptor auto-disable is turned off via configuration")
|
||||
return False
|
||||
|
||||
# Check for Excel/CSV files
|
||||
if is_structured_file_type(file_type):
|
||||
logging.info(f"Skipping Raptor for structured file type: {file_type}")
|
||||
return True
|
||||
|
||||
# Check for tabular PDFs
|
||||
if file_type and file_type.lower() in [".pdf", "pdf"]:
|
||||
if is_tabular_pdf(parser_id, parser_config):
|
||||
logging.info(f"Skipping Raptor for tabular PDF (parser_id={parser_id})")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_skip_reason(
|
||||
file_type: Optional[str] = None,
|
||||
parser_id: str = "",
|
||||
parser_config: Optional[dict] = None
|
||||
) -> str:
|
||||
"""
|
||||
Get a human-readable reason why Raptor was skipped.
|
||||
|
||||
Args:
|
||||
file_type: File extension
|
||||
parser_id: Parser ID being used
|
||||
parser_config: Parser configuration dict
|
||||
|
||||
Returns:
|
||||
Reason string, or empty string if Raptor should not be skipped
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
|
||||
if is_structured_file_type(file_type):
|
||||
return f"Structured data file ({file_type}) - Raptor auto-disabled"
|
||||
|
||||
if file_type and file_type.lower() in [".pdf", "pdf"]:
|
||||
if is_tabular_pdf(parser_id, parser_config):
|
||||
return f"Tabular PDF (parser={parser_id}) - Raptor auto-disabled"
|
||||
|
||||
return ""
|
||||
275
run_tests.py
275
run_tests.py
@ -1,275 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
class Colors:
|
||||
"""ANSI color codes for terminal output"""
|
||||
RED = '\033[0;31m'
|
||||
GREEN = '\033[0;32m'
|
||||
YELLOW = '\033[1;33m'
|
||||
BLUE = '\033[0;34m'
|
||||
NC = '\033[0m' # No Color
|
||||
|
||||
|
||||
class TestRunner:
|
||||
"""RAGFlow Unit Test Runner"""
|
||||
|
||||
def __init__(self):
|
||||
self.project_root = Path(__file__).parent.resolve()
|
||||
self.ut_dir = Path(self.project_root / 'test' / 'unit_test')
|
||||
# Default options
|
||||
self.coverage = False
|
||||
self.parallel = False
|
||||
self.verbose = False
|
||||
self.markers = ""
|
||||
|
||||
# Python interpreter path
|
||||
self.python = sys.executable
|
||||
|
||||
@staticmethod
|
||||
def print_info(message: str) -> None:
|
||||
"""Print informational message"""
|
||||
print(f"{Colors.BLUE}[INFO]{Colors.NC} {message}")
|
||||
|
||||
@staticmethod
|
||||
def print_error(message: str) -> None:
|
||||
"""Print error message"""
|
||||
print(f"{Colors.RED}[ERROR]{Colors.NC} {message}")
|
||||
|
||||
@staticmethod
|
||||
def show_usage() -> None:
|
||||
"""Display usage information"""
|
||||
usage = """
|
||||
RAGFlow Unit Test Runner
|
||||
Usage: python run_tests.py [OPTIONS]
|
||||
|
||||
OPTIONS:
|
||||
-h, --help Show this help message
|
||||
-c, --coverage Run tests with coverage report
|
||||
-p, --parallel Run tests in parallel (requires pytest-xdist)
|
||||
-v, --verbose Verbose output
|
||||
-t, --test FILE Run specific test file or directory
|
||||
-m, --markers MARKERS Run tests with specific markers (e.g., "unit", "integration")
|
||||
|
||||
EXAMPLES:
|
||||
# Run all tests
|
||||
python run_tests.py
|
||||
|
||||
# Run with coverage
|
||||
python run_tests.py --coverage
|
||||
|
||||
# Run in parallel
|
||||
python run_tests.py --parallel
|
||||
|
||||
# Run specific test file
|
||||
python run_tests.py --test services/test_dialog_service.py
|
||||
|
||||
# Run only unit tests
|
||||
python run_tests.py --markers "unit"
|
||||
|
||||
# Run tests with coverage and parallel execution
|
||||
python run_tests.py --coverage --parallel
|
||||
|
||||
"""
|
||||
print(usage)
|
||||
|
||||
def build_pytest_command(self) -> List[str]:
|
||||
"""Build the pytest command arguments"""
|
||||
cmd = ["pytest", str(self.ut_dir)]
|
||||
|
||||
# Add test path
|
||||
|
||||
# Add markers
|
||||
if self.markers:
|
||||
cmd.extend(["-m", self.markers])
|
||||
|
||||
# Add verbose flag
|
||||
if self.verbose:
|
||||
cmd.extend(["-vv"])
|
||||
else:
|
||||
cmd.append("-v")
|
||||
|
||||
# Add coverage
|
||||
if self.coverage:
|
||||
# Relative path from test directory to source code
|
||||
source_path = str(self.project_root / "common")
|
||||
cmd.extend([
|
||||
"--cov", source_path,
|
||||
"--cov-report", "html",
|
||||
"--cov-report", "term"
|
||||
])
|
||||
|
||||
# Add parallel execution
|
||||
if self.parallel:
|
||||
# Try to get number of CPU cores
|
||||
try:
|
||||
import multiprocessing
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
cmd.extend(["-n", str(cpu_count)])
|
||||
except ImportError:
|
||||
# Fallback to auto if multiprocessing not available
|
||||
cmd.extend(["-n", "auto"])
|
||||
|
||||
# Add default options from pyproject.toml if it exists
|
||||
pyproject_path = self.project_root / "pyproject.toml"
|
||||
if pyproject_path.exists():
|
||||
cmd.extend(["--config-file", str(pyproject_path)])
|
||||
|
||||
return cmd
|
||||
|
||||
def run_tests(self) -> bool:
|
||||
"""Execute the pytest command"""
|
||||
# Change to test directory
|
||||
os.chdir(self.project_root)
|
||||
|
||||
# Build command
|
||||
cmd = self.build_pytest_command()
|
||||
|
||||
# Print test configuration
|
||||
self.print_info("Running RAGFlow Unit Tests")
|
||||
self.print_info("=" * 40)
|
||||
self.print_info(f"Test Directory: {self.ut_dir}")
|
||||
self.print_info(f"Coverage: {self.coverage}")
|
||||
self.print_info(f"Parallel: {self.parallel}")
|
||||
self.print_info(f"Verbose: {self.verbose}")
|
||||
|
||||
if self.markers:
|
||||
self.print_info(f"Markers: {self.markers}")
|
||||
|
||||
print(f"\n{Colors.BLUE}[EXECUTING]{Colors.NC} {' '.join(cmd)}\n")
|
||||
|
||||
# Run pytest
|
||||
try:
|
||||
result = subprocess.run(cmd, check=False)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"\n{Colors.GREEN}[SUCCESS]{Colors.NC} All tests passed!")
|
||||
|
||||
if self.coverage:
|
||||
coverage_dir = self.ut_dir / "htmlcov"
|
||||
if coverage_dir.exists():
|
||||
index_file = coverage_dir / "index.html"
|
||||
print(f"\n{Colors.BLUE}[INFO]{Colors.NC} Coverage report generated:")
|
||||
print(f" {index_file}")
|
||||
print("\nOpen with:")
|
||||
print(f" - Windows: start {index_file}")
|
||||
print(f" - macOS: open {index_file}")
|
||||
print(f" - Linux: xdg-open {index_file}")
|
||||
|
||||
return True
|
||||
else:
|
||||
print(f"\n{Colors.RED}[FAILURE]{Colors.NC} Some tests failed!")
|
||||
return False
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Colors.YELLOW}[INTERRUPTED]{Colors.NC} Test execution interrupted by user")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.print_error(f"Failed to execute tests: {e}")
|
||||
return False
|
||||
|
||||
def parse_arguments(self) -> bool:
|
||||
"""Parse command line arguments"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="RAGFlow Unit Test Runner",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python run_tests.py # Run all tests
|
||||
python run_tests.py --coverage # Run with coverage
|
||||
python run_tests.py --parallel # Run in parallel
|
||||
python run_tests.py --test services/test_dialog_service.py # Run specific test
|
||||
python run_tests.py --markers "unit" # Run only unit tests
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--coverage",
|
||||
action="store_true",
|
||||
help="Run tests with coverage report"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-p", "--parallel",
|
||||
action="store_true",
|
||||
help="Run tests in parallel (requires pytest-xdist)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--verbose",
|
||||
action="store_true",
|
||||
help="Verbose output"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-t", "--test",
|
||||
type=str,
|
||||
default="",
|
||||
help="Run specific test file or directory"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-m", "--markers",
|
||||
type=str,
|
||||
default="",
|
||||
help="Run tests with specific markers (e.g., 'unit', 'integration')"
|
||||
)
|
||||
|
||||
try:
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set options
|
||||
self.coverage = args.coverage
|
||||
self.parallel = args.parallel
|
||||
self.verbose = args.verbose
|
||||
self.markers = args.markers
|
||||
|
||||
return True
|
||||
|
||||
except SystemExit:
|
||||
# argparse already printed help, just exit
|
||||
return False
|
||||
except Exception as e:
|
||||
self.print_error(f"Error parsing arguments: {e}")
|
||||
return False
|
||||
|
||||
def run(self) -> int:
|
||||
"""Main execution method"""
|
||||
# Parse command line arguments
|
||||
if not self.parse_arguments():
|
||||
return 1
|
||||
|
||||
# Run tests
|
||||
success = self.run_tests()
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
|
||||
def main():
|
||||
"""Entry point"""
|
||||
runner = TestRunner()
|
||||
return runner.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -37,4 +37,4 @@ __all__ = [
|
||||
"Document",
|
||||
"Chunk",
|
||||
"Agent"
|
||||
]
|
||||
]
|
||||
@ -1,287 +0,0 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Unit tests for Raptor utility functions.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from rag.utils.raptor_utils import (
|
||||
is_structured_file_type,
|
||||
is_tabular_pdf,
|
||||
should_skip_raptor,
|
||||
get_skip_reason,
|
||||
EXCEL_EXTENSIONS,
|
||||
CSV_EXTENSIONS,
|
||||
STRUCTURED_EXTENSIONS
|
||||
)
|
||||
|
||||
|
||||
class TestIsStructuredFileType:
|
||||
"""Test file type detection for structured data"""
|
||||
|
||||
@pytest.mark.parametrize("file_type,expected", [
|
||||
(".xlsx", True),
|
||||
(".xls", True),
|
||||
(".xlsm", True),
|
||||
(".xlsb", True),
|
||||
(".csv", True),
|
||||
(".tsv", True),
|
||||
("xlsx", True), # Without leading dot
|
||||
("XLSX", True), # Uppercase
|
||||
(".pdf", False),
|
||||
(".docx", False),
|
||||
(".txt", False),
|
||||
("", False),
|
||||
(None, False),
|
||||
])
|
||||
def test_file_type_detection(self, file_type, expected):
|
||||
"""Test detection of various file types"""
|
||||
assert is_structured_file_type(file_type) == expected
|
||||
|
||||
def test_excel_extensions_defined(self):
|
||||
"""Test that Excel extensions are properly defined"""
|
||||
assert ".xlsx" in EXCEL_EXTENSIONS
|
||||
assert ".xls" in EXCEL_EXTENSIONS
|
||||
assert len(EXCEL_EXTENSIONS) >= 4
|
||||
|
||||
def test_csv_extensions_defined(self):
|
||||
"""Test that CSV extensions are properly defined"""
|
||||
assert ".csv" in CSV_EXTENSIONS
|
||||
assert ".tsv" in CSV_EXTENSIONS
|
||||
|
||||
def test_structured_extensions_combined(self):
|
||||
"""Test that structured extensions include both Excel and CSV"""
|
||||
assert EXCEL_EXTENSIONS.issubset(STRUCTURED_EXTENSIONS)
|
||||
assert CSV_EXTENSIONS.issubset(STRUCTURED_EXTENSIONS)
|
||||
|
||||
|
||||
class TestIsTabularPDF:
|
||||
"""Test tabular PDF detection"""
|
||||
|
||||
def test_table_parser_detected(self):
|
||||
"""Test that table parser is detected as tabular"""
|
||||
assert is_tabular_pdf("table", {}) is True
|
||||
assert is_tabular_pdf("TABLE", {}) is True
|
||||
|
||||
def test_html4excel_detected(self):
|
||||
"""Test that html4excel config is detected as tabular"""
|
||||
assert is_tabular_pdf("naive", {"html4excel": True}) is True
|
||||
assert is_tabular_pdf("", {"html4excel": True}) is True
|
||||
|
||||
def test_non_tabular_pdf(self):
|
||||
"""Test that non-tabular PDFs are not detected"""
|
||||
assert is_tabular_pdf("naive", {}) is False
|
||||
assert is_tabular_pdf("naive", {"html4excel": False}) is False
|
||||
assert is_tabular_pdf("", {}) is False
|
||||
|
||||
def test_combined_conditions(self):
|
||||
"""Test combined table parser and html4excel"""
|
||||
assert is_tabular_pdf("table", {"html4excel": True}) is True
|
||||
assert is_tabular_pdf("table", {"html4excel": False}) is True
|
||||
|
||||
|
||||
class TestShouldSkipRaptor:
|
||||
"""Test Raptor skip logic"""
|
||||
|
||||
def test_skip_excel_files(self):
|
||||
"""Test that Excel files skip Raptor"""
|
||||
assert should_skip_raptor(".xlsx") is True
|
||||
assert should_skip_raptor(".xls") is True
|
||||
assert should_skip_raptor(".xlsm") is True
|
||||
|
||||
def test_skip_csv_files(self):
|
||||
"""Test that CSV files skip Raptor"""
|
||||
assert should_skip_raptor(".csv") is True
|
||||
assert should_skip_raptor(".tsv") is True
|
||||
|
||||
def test_skip_tabular_pdf_with_table_parser(self):
|
||||
"""Test that tabular PDFs skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_id="table") is True
|
||||
assert should_skip_raptor("pdf", parser_id="TABLE") is True
|
||||
|
||||
def test_skip_tabular_pdf_with_html4excel(self):
|
||||
"""Test that PDFs with html4excel skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_config={"html4excel": True}) is True
|
||||
|
||||
def test_dont_skip_regular_pdf(self):
|
||||
"""Test that regular PDFs don't skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_id="naive") is False
|
||||
assert should_skip_raptor(".pdf", parser_config={}) is False
|
||||
|
||||
def test_dont_skip_text_files(self):
|
||||
"""Test that text files don't skip Raptor"""
|
||||
assert should_skip_raptor(".txt") is False
|
||||
assert should_skip_raptor(".docx") is False
|
||||
assert should_skip_raptor(".md") is False
|
||||
|
||||
def test_override_with_config(self):
|
||||
"""Test that auto-disable can be overridden"""
|
||||
raptor_config = {"auto_disable_for_structured_data": False}
|
||||
|
||||
# Should not skip even for Excel files
|
||||
assert should_skip_raptor(".xlsx", raptor_config=raptor_config) is False
|
||||
assert should_skip_raptor(".csv", raptor_config=raptor_config) is False
|
||||
assert should_skip_raptor(".pdf", parser_id="table", raptor_config=raptor_config) is False
|
||||
|
||||
def test_default_auto_disable_enabled(self):
|
||||
"""Test that auto-disable is enabled by default"""
|
||||
# Empty raptor_config should default to auto_disable=True
|
||||
assert should_skip_raptor(".xlsx", raptor_config={}) is True
|
||||
assert should_skip_raptor(".xlsx", raptor_config=None) is True
|
||||
|
||||
def test_explicit_auto_disable_enabled(self):
|
||||
"""Test explicit auto-disable enabled"""
|
||||
raptor_config = {"auto_disable_for_structured_data": True}
|
||||
assert should_skip_raptor(".xlsx", raptor_config=raptor_config) is True
|
||||
|
||||
|
||||
class TestGetSkipReason:
|
||||
"""Test skip reason generation"""
|
||||
|
||||
def test_excel_skip_reason(self):
|
||||
"""Test skip reason for Excel files"""
|
||||
reason = get_skip_reason(".xlsx")
|
||||
assert "Structured data file" in reason
|
||||
assert ".xlsx" in reason
|
||||
assert "auto-disabled" in reason.lower()
|
||||
|
||||
def test_csv_skip_reason(self):
|
||||
"""Test skip reason for CSV files"""
|
||||
reason = get_skip_reason(".csv")
|
||||
assert "Structured data file" in reason
|
||||
assert ".csv" in reason
|
||||
|
||||
def test_tabular_pdf_skip_reason(self):
|
||||
"""Test skip reason for tabular PDFs"""
|
||||
reason = get_skip_reason(".pdf", parser_id="table")
|
||||
assert "Tabular PDF" in reason
|
||||
assert "table" in reason.lower()
|
||||
assert "auto-disabled" in reason.lower()
|
||||
|
||||
def test_html4excel_skip_reason(self):
|
||||
"""Test skip reason for html4excel PDFs"""
|
||||
reason = get_skip_reason(".pdf", parser_config={"html4excel": True})
|
||||
assert "Tabular PDF" in reason
|
||||
|
||||
def test_no_skip_reason_for_regular_files(self):
|
||||
"""Test that regular files have no skip reason"""
|
||||
assert get_skip_reason(".txt") == ""
|
||||
assert get_skip_reason(".docx") == ""
|
||||
assert get_skip_reason(".pdf", parser_id="naive") == ""
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and error handling"""
|
||||
|
||||
def test_none_values(self):
|
||||
"""Test handling of None values"""
|
||||
assert should_skip_raptor(None) is False
|
||||
assert should_skip_raptor("") is False
|
||||
assert get_skip_reason(None) == ""
|
||||
|
||||
def test_empty_strings(self):
|
||||
"""Test handling of empty strings"""
|
||||
assert should_skip_raptor("") is False
|
||||
assert get_skip_reason("") == ""
|
||||
|
||||
def test_case_insensitivity(self):
|
||||
"""Test case insensitive handling"""
|
||||
assert is_structured_file_type("XLSX") is True
|
||||
assert is_structured_file_type("XlSx") is True
|
||||
assert is_tabular_pdf("TABLE", {}) is True
|
||||
assert is_tabular_pdf("TaBlE", {}) is True
|
||||
|
||||
def test_with_and_without_dot(self):
|
||||
"""Test file extensions with and without leading dot"""
|
||||
assert should_skip_raptor(".xlsx") is True
|
||||
assert should_skip_raptor("xlsx") is True
|
||||
assert should_skip_raptor(".CSV") is True
|
||||
assert should_skip_raptor("csv") is True
|
||||
|
||||
|
||||
class TestIntegrationScenarios:
|
||||
"""Test real-world integration scenarios"""
|
||||
|
||||
def test_financial_excel_report(self):
|
||||
"""Test scenario: Financial quarterly Excel report"""
|
||||
file_type = ".xlsx"
|
||||
parser_id = "naive"
|
||||
parser_config = {}
|
||||
raptor_config = {"use_raptor": True}
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config, raptor_config) is True
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert "Structured data file" in reason
|
||||
|
||||
def test_scientific_csv_data(self):
|
||||
"""Test scenario: Scientific experimental CSV results"""
|
||||
file_type = ".csv"
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type) is True
|
||||
reason = get_skip_reason(file_type)
|
||||
assert ".csv" in reason
|
||||
|
||||
def test_legal_contract_with_tables(self):
|
||||
"""Test scenario: Legal contract PDF with tables"""
|
||||
file_type = ".pdf"
|
||||
parser_id = "table"
|
||||
parser_config = {}
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config) is True
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert "Tabular PDF" in reason
|
||||
|
||||
def test_text_heavy_pdf_document(self):
|
||||
"""Test scenario: Text-heavy PDF document"""
|
||||
file_type = ".pdf"
|
||||
parser_id = "naive"
|
||||
parser_config = {}
|
||||
|
||||
# Should NOT skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config) is False
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert reason == ""
|
||||
|
||||
def test_mixed_dataset_processing(self):
|
||||
"""Test scenario: Mixed dataset with various file types"""
|
||||
files = [
|
||||
(".xlsx", "naive", {}, True), # Excel - skip
|
||||
(".csv", "naive", {}, True), # CSV - skip
|
||||
(".pdf", "table", {}, True), # Tabular PDF - skip
|
||||
(".pdf", "naive", {}, False), # Regular PDF - don't skip
|
||||
(".docx", "naive", {}, False), # Word doc - don't skip
|
||||
(".txt", "naive", {}, False), # Text file - don't skip
|
||||
]
|
||||
|
||||
for file_type, parser_id, parser_config, expected_skip in files:
|
||||
result = should_skip_raptor(file_type, parser_id, parser_config)
|
||||
assert result == expected_skip, f"Failed for {file_type}"
|
||||
|
||||
def test_override_for_special_excel(self):
|
||||
"""Test scenario: Override auto-disable for special Excel processing"""
|
||||
file_type = ".xlsx"
|
||||
raptor_config = {"auto_disable_for_structured_data": False}
|
||||
|
||||
# Should NOT skip when explicitly disabled
|
||||
assert should_skip_raptor(file_type, raptor_config=raptor_config) is False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
300
uv.lock
generated
300
uv.lock
generated
@ -443,36 +443,27 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backports-asyncio-runner"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bce-python-sdk"
|
||||
version = "0.9.55"
|
||||
version = "0.9.54"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "future" },
|
||||
{ name = "pycryptodome" },
|
||||
{ name = "six" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ae/f31ee3ccae94e1a07d8886a413f08c1581349e6cb45bf8b3c608fbf173e4/bce_python_sdk-0.9.55.tar.gz", hash = "sha256:bed63f8a0975f2e9daecf53417c3d5b803232ad87f35a0b16e25850710ce209c", size = 275733, upload-time = "2025-12-02T12:02:38.041Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/c8/1c3bc30aa745ad4c3d073f150bddaf1d43ee6ee33f0b8ec60068494f511e/bce_python_sdk-0.9.54.tar.gz", hash = "sha256:f68026f40f11ea38ef445f50a7756009d5b703c7253438b138b30fb3b83be275", size = 275698, upload-time = "2025-11-27T02:28:50.24Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/01/1b13a627e5f0239f24b168138d9a948e876d4b387c03f59d31699578c960/bce_python_sdk-0.9.55-py3-none-any.whl", hash = "sha256:6045d19d783b548644cce50a2f41ef5242da6654fb91b2c21629f309ca6dbf4c", size = 390463, upload-time = "2025-12-02T12:02:36.417Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/a7/b8806c8505bb830cc863837ef8b42695170dd9561605c61262250df066d3/bce_python_sdk-0.9.54-py3-none-any.whl", hash = "sha256:a084eee577931f15a55280a7401bea2474115989ee79ebbca131610bdce81c99", size = 390447, upload-time = "2025-11-27T02:28:48.603Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beartype"
|
||||
version = "0.22.8"
|
||||
version = "0.22.7"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/1d/794ae2acaa67c8b216d91d5919da2606c2bb14086849ffde7f5555f3a3a5/beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165", size = 1602262, upload-time = "2025-12-03T05:11:10.766Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/49/e28a77f8a3868b1c9ff6a030678e84de24c4783bae4c12cec9443cf8fb54/beartype-0.22.7.tar.gz", hash = "sha256:c7269855b71e32b7c9f0fc662baade752eb525107266e053338c2f6e8873826b", size = 1599627, upload-time = "2025-11-29T06:49:56.751Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/2a/fbcbf5a025d3e71ddafad7efd43e34ec4362f4d523c3c471b457148fb211/beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e", size = 1331895, upload-time = "2025-12-03T05:11:08.373Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/0c/a764253610513295b7f57904b91fae1d99c7afd1b16b6eaae06fdfb71fb5/beartype-0.22.7-py3-none-any.whl", hash = "sha256:e13430ac07c61fa4bc54d375970438aeb9aa47a482c529a6f438ce52e18e6f50", size = 1330771, upload-time = "2025-11-29T06:49:54.545Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1034,58 +1025,6 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coverage"
|
||||
version = "7.12.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/26/4a96807b193b011588099c3b5c89fbb05294e5b90e71018e065465f34eb6/coverage-7.12.0.tar.gz", hash = "sha256:fc11e0a4e372cb5f282f16ef90d4a585034050ccda536451901abfb19a57f40c", size = 819341, upload-time = "2025-11-18T13:34:20.766Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/4a/0dc3de1c172d35abe512332cfdcc43211b6ebce629e4cc42e6cd25ed8f4d/coverage-7.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:32b75c2ba3f324ee37af3ccee5b30458038c50b349ad9b88cee85096132a575b", size = 217409, upload-time = "2025-11-18T13:31:53.122Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/c3/086198b98db0109ad4f84241e8e9ea7e5fb2db8c8ffb787162d40c26cc76/coverage-7.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb2a1b6ab9fe833714a483a915de350abc624a37149649297624c8d57add089c", size = 217927, upload-time = "2025-11-18T13:31:54.458Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/5f/34614dbf5ce0420828fc6c6f915126a0fcb01e25d16cf141bf5361e6aea6/coverage-7.12.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5734b5d913c3755e72f70bf6cc37a0518d4f4745cde760c5d8e12005e62f9832", size = 244678, upload-time = "2025-11-18T13:31:55.805Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/7b/6b26fb32e8e4a6989ac1d40c4e132b14556131493b1d06bc0f2be169c357/coverage-7.12.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b527a08cdf15753279b7afb2339a12073620b761d79b81cbe2cdebdb43d90daa", size = 246507, upload-time = "2025-11-18T13:31:57.05Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/42/7d70e6603d3260199b90fb48b537ca29ac183d524a65cc31366b2e905fad/coverage-7.12.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9bb44c889fb68004e94cab71f6a021ec83eac9aeabdbb5a5a88821ec46e1da73", size = 248366, upload-time = "2025-11-18T13:31:58.362Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/4a/d86b837923878424c72458c5b25e899a3c5ca73e663082a915f5b3c4d749/coverage-7.12.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4b59b501455535e2e5dde5881739897967b272ba25988c89145c12d772810ccb", size = 245366, upload-time = "2025-11-18T13:31:59.572Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/c2/2adec557e0aa9721875f06ced19730fdb7fc58e31b02b5aa56f2ebe4944d/coverage-7.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d8842f17095b9868a05837b7b1b73495293091bed870e099521ada176aa3e00e", size = 246408, upload-time = "2025-11-18T13:32:00.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/4b/8bd1f1148260df11c618e535fdccd1e5aaf646e55b50759006a4f41d8a26/coverage-7.12.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c5a6f20bf48b8866095c6820641e7ffbe23f2ac84a2efc218d91235e404c7777", size = 244416, upload-time = "2025-11-18T13:32:01.963Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/13/3a248dd6a83df90414c54a4e121fd081fb20602ca43955fbe1d60e2312a9/coverage-7.12.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:5f3738279524e988d9da2893f307c2093815c623f8d05a8f79e3eff3a7a9e553", size = 244681, upload-time = "2025-11-18T13:32:03.408Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/30/aa833827465a5e8c938935f5d91ba055f70516941078a703740aaf1aa41f/coverage-7.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0d68c1f7eabbc8abe582d11fa393ea483caf4f44b0af86881174769f185c94d", size = 245300, upload-time = "2025-11-18T13:32:04.686Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/24/f85b3843af1370fb3739fa7571819b71243daa311289b31214fe3e8c9d68/coverage-7.12.0-cp310-cp310-win32.whl", hash = "sha256:7670d860e18b1e3ee5930b17a7d55ae6287ec6e55d9799982aa103a2cc1fa2ef", size = 220008, upload-time = "2025-11-18T13:32:05.806Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/a2/c7da5b9566f7164db9eefa133d17761ecb2c2fde9385d754e5b5c80f710d/coverage-7.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:f999813dddeb2a56aab5841e687b68169da0d3f6fc78ccf50952fa2463746022", size = 220943, upload-time = "2025-11-18T13:32:07.166Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/0c/0dfe7f0487477d96432e4815537263363fb6dd7289743a796e8e51eabdf2/coverage-7.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa124a3683d2af98bd9d9c2bfa7a5076ca7e5ab09fdb96b81fa7d89376ae928f", size = 217535, upload-time = "2025-11-18T13:32:08.812Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/f5/f9a4a053a5bbff023d3bec259faac8f11a1e5a6479c2ccf586f910d8dac7/coverage-7.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d93fbf446c31c0140208dcd07c5d882029832e8ed7891a39d6d44bd65f2316c3", size = 218044, upload-time = "2025-11-18T13:32:10.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/c5/84fc3697c1fa10cd8571919bf9693f693b7373278daaf3b73e328d502bc8/coverage-7.12.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:52ca620260bd8cd6027317bdd8b8ba929be1d741764ee765b42c4d79a408601e", size = 248440, upload-time = "2025-11-18T13:32:12.536Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/36/2d93fbf6a04670f3874aed397d5a5371948a076e3249244a9e84fb0e02d6/coverage-7.12.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f3433ffd541380f3a0e423cff0f4926d55b0cc8c1d160fdc3be24a4c03aa65f7", size = 250361, upload-time = "2025-11-18T13:32:13.852Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/49/66dc65cc456a6bfc41ea3d0758c4afeaa4068a2b2931bf83be6894cf1058/coverage-7.12.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7bbb321d4adc9f65e402c677cd1c8e4c2d0105d3ce285b51b4d87f1d5db5245", size = 252472, upload-time = "2025-11-18T13:32:15.068Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/1f/ebb8a18dffd406db9fcd4b3ae42254aedcaf612470e8712f12041325930f/coverage-7.12.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22a7aade354a72dff3b59c577bfd18d6945c61f97393bc5fb7bd293a4237024b", size = 248592, upload-time = "2025-11-18T13:32:16.328Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/a8/67f213c06e5ea3b3d4980df7dc344d7fea88240b5fe878a5dcbdfe0e2315/coverage-7.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3ff651dcd36d2fea66877cd4a82de478004c59b849945446acb5baf9379a1b64", size = 250167, upload-time = "2025-11-18T13:32:17.687Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/00/e52aef68154164ea40cc8389c120c314c747fe63a04b013a5782e989b77f/coverage-7.12.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:31b8b2e38391a56e3cea39d22a23faaa7c3fc911751756ef6d2621d2a9daf742", size = 248238, upload-time = "2025-11-18T13:32:19.2Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/a4/4d88750bcf9d6d66f77865e5a05a20e14db44074c25fd22519777cb69025/coverage-7.12.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:297bc2da28440f5ae51c845a47c8175a4db0553a53827886e4fb25c66633000c", size = 247964, upload-time = "2025-11-18T13:32:21.027Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/6b/b74693158899d5b47b0bf6238d2c6722e20ba749f86b74454fac0696bb00/coverage-7.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ff7651cc01a246908eac162a6a86fc0dbab6de1ad165dfb9a1e2ec660b44984", size = 248862, upload-time = "2025-11-18T13:32:22.304Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/de/6af6730227ce0e8ade307b1cc4a08e7f51b419a78d02083a86c04ccceb29/coverage-7.12.0-cp311-cp311-win32.whl", hash = "sha256:313672140638b6ddb2c6455ddeda41c6a0b208298034544cfca138978c6baed6", size = 220033, upload-time = "2025-11-18T13:32:23.714Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/a1/e7f63021a7c4fe20994359fcdeae43cbef4a4d0ca36a5a1639feeea5d9e1/coverage-7.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1783ed5bd0d5938d4435014626568dc7f93e3cb99bc59188cc18857c47aa3c4", size = 220966, upload-time = "2025-11-18T13:32:25.599Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/e8/deae26453f37c20c3aa0c4433a1e32cdc169bf415cce223a693117aa3ddd/coverage-7.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:4648158fd8dd9381b5847622df1c90ff314efbfc1df4550092ab6013c238a5fc", size = 219637, upload-time = "2025-11-18T13:32:27.265Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/bf/638c0427c0f0d47638242e2438127f3c8ee3cfc06c7fdeb16778ed47f836/coverage-7.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:29644c928772c78512b48e14156b81255000dcfd4817574ff69def189bcb3647", size = 217704, upload-time = "2025-11-18T13:32:28.906Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/e1/706fae6692a66c2d6b871a608bbde0da6281903fa0e9f53a39ed441da36a/coverage-7.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8638cbb002eaa5d7c8d04da667813ce1067080b9a91099801a0053086e52b736", size = 218064, upload-time = "2025-11-18T13:32:30.161Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/8b/eb0231d0540f8af3ffda39720ff43cb91926489d01524e68f60e961366e4/coverage-7.12.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:083631eeff5eb9992c923e14b810a179798bb598e6a0dd60586819fc23be6e60", size = 249560, upload-time = "2025-11-18T13:32:31.835Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/a1/67fb52af642e974d159b5b379e4d4c59d0ebe1288677fbd04bbffe665a82/coverage-7.12.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:99d5415c73ca12d558e07776bd957c4222c687b9f1d26fa0e1b57e3598bdcde8", size = 252318, upload-time = "2025-11-18T13:32:33.178Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/e5/38228f31b2c7665ebf9bdfdddd7a184d56450755c7e43ac721c11a4b8dab/coverage-7.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e949ebf60c717c3df63adb4a1a366c096c8d7fd8472608cd09359e1bd48ef59f", size = 253403, upload-time = "2025-11-18T13:32:34.45Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/4b/df78e4c8188f9960684267c5a4897836f3f0f20a20c51606ee778a1d9749/coverage-7.12.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d907ddccbca819afa2cd014bc69983b146cca2735a0b1e6259b2a6c10be1e70", size = 249984, upload-time = "2025-11-18T13:32:35.747Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/51/bb163933d195a345c6f63eab9e55743413d064c291b6220df754075c2769/coverage-7.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b1518ecbad4e6173f4c6e6c4a46e49555ea5679bf3feda5edb1b935c7c44e8a0", size = 251339, upload-time = "2025-11-18T13:32:37.352Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/40/c9b29cdb8412c837cdcbc2cfa054547dd83affe6cbbd4ce4fdb92b6ba7d1/coverage-7.12.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51777647a749abdf6f6fd8c7cffab12de68ab93aab15efc72fbbb83036c2a068", size = 249489, upload-time = "2025-11-18T13:32:39.212Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/da/b3131e20ba07a0de4437a50ef3b47840dfabf9293675b0cd5c2c7f66dd61/coverage-7.12.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:42435d46d6461a3b305cdfcad7cdd3248787771f53fe18305548cba474e6523b", size = 249070, upload-time = "2025-11-18T13:32:40.598Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/81/b653329b5f6302c08d683ceff6785bc60a34be9ae92a5c7b63ee7ee7acec/coverage-7.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5bcead88c8423e1855e64b8057d0544e33e4080b95b240c2a355334bb7ced937", size = 250929, upload-time = "2025-11-18T13:32:42.915Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/00/250ac3bca9f252a5fb1338b5ad01331ebb7b40223f72bef5b1b2cb03aa64/coverage-7.12.0-cp312-cp312-win32.whl", hash = "sha256:dcbb630ab034e86d2a0f79aefd2be07e583202f41e037602d438c80044957baa", size = 220241, upload-time = "2025-11-18T13:32:44.665Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/1c/77e79e76d37ce83302f6c21980b45e09f8aa4551965213a10e62d71ce0ab/coverage-7.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:2fd8354ed5d69775ac42986a691fbf68b4084278710cee9d7c3eaa0c28fa982a", size = 221051, upload-time = "2025-11-18T13:32:46.008Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/f5/641b8a25baae564f9e52cac0e2667b123de961985709a004e287ee7663cc/coverage-7.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:737c3814903be30695b2de20d22bcc5428fdae305c61ba44cdc8b3252984c49c", size = 219692, upload-time = "2025-11-18T13:32:47.372Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/a3/43b749004e3c09452e39bb56347a008f0a0668aad37324a99b5c8ca91d9e/coverage-7.12.0-py3-none-any.whl", hash = "sha256:159d50c0b12e060b15ed3d39f87ed43d4f7f7ad40b8a534f4dd331adbb51104a", size = 209503, upload-time = "2025-11-18T13:34:18.892Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
toml = [
|
||||
{ name = "tomli", marker = "python_full_version <= '3.11'" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cramjam"
|
||||
version = "2.11.0"
|
||||
@ -1573,15 +1512,6 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "execnet"
|
||||
version = "2.1.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "extract-msg"
|
||||
version = "0.41.5"
|
||||
@ -1980,11 +1910,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2025.12.0"
|
||||
version = "2025.10.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/27/954057b0d1f53f086f681755207dda6de6c660ce133c829158e8e8fe7895/fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973", size = 309748, upload-time = "2025-12-03T15:23:42.687Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2092,21 +2022,16 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-auth"
|
||||
version = "2.43.0"
|
||||
version = "2.41.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "pyasn1-modules" },
|
||||
{ name = "rsa" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ef/66d14cf0e01b08d2d51ffc3c20410c4e134a1548fc246a6081eae585a4fe/google_auth-2.43.0.tar.gz", hash = "sha256:88228eee5fc21b62a1b5fe773ca15e67778cb07dc8363adcb4a8827b52d81483", size = 296359, upload-time = "2025-11-06T00:13:36.587Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/af/5129ce5b2f9688d2fa49b463e544972a7c82b0fdb50980dafee92e121d9f/google_auth-2.41.1.tar.gz", hash = "sha256:b76b7b1f9e61f0cb7e88870d14f6a94aeef248959ef6992670efee37709cbfd2", size = 292284, upload-time = "2025-09-30T22:51:26.363Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d1/385110a9ae86d91cc14c5282c61fe9f4dc41c0b9f7d423c6ad77038c4448/google_auth-2.43.0-py2.py3-none-any.whl", hash = "sha256:af628ba6fa493f75c7e9dbe9373d148ca9f4399b5ea29976519e0a3848eddd16", size = 223114, upload-time = "2025-11-06T00:13:35.209Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
requests = [
|
||||
{ name = "requests" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl", hash = "sha256:754843be95575b9a19c604a848a41be03f7f2afd8c019f716dc1f51ee41c639d", size = 221302, upload-time = "2025-09-30T22:51:24.212Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2124,15 +2049,15 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-auth-oauthlib"
|
||||
version = "1.2.2"
|
||||
version = "1.2.3"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-auth" },
|
||||
{ name = "requests-oauthlib" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/a6/c6336a6ceb682709a4aa39e2e6b5754a458075ca92359512b6cbfcb25ae3/google_auth_oauthlib-1.2.3.tar.gz", hash = "sha256:eb09e450d3cc789ecbc2b3529cb94a713673fd5f7a22c718ad91cf75aedc2ea4", size = 21265, upload-time = "2025-10-30T21:28:19.105Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/07/a54c100da461ffc5968457823fcc665a48fb4b875c68bcfecbfe24a10dbe/google_auth_oauthlib-1.2.3-py3-none-any.whl", hash = "sha256:7c0940e037677f25e71999607493640d071212e7f3c15aa0febea4c47a5a0680", size = 19184, upload-time = "2025-10-30T21:28:17.88Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2252,11 +2177,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.53.0"
|
||||
version = "1.52.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "google-auth", extra = ["requests"] },
|
||||
{ name = "google-auth" },
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
@ -2264,9 +2189,9 @@ dependencies = [
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/b3/36fbfde2e21e6d3bc67780b61da33632f495ab1be08076cf0a16af74098f/google_genai-1.53.0.tar.gz", hash = "sha256:938a26d22f3fd32c6eeeb4276ef204ef82884e63af9842ce3eac05ceb39cbd8d", size = 260102, upload-time = "2025-12-03T17:21:23.233Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/f2/97fefdd1ad1f3428321bac819ae7a83ccc59f6439616054736b7819fa56c/google_genai-1.53.0-py3-none-any.whl", hash = "sha256:65a3f99e5c03c372d872cda7419f5940e723374bb12a2f3ffd5e3e56e8eb2094", size = 262015, upload-time = "2025-12-03T17:21:21.934Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2851,7 +2776,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "infinity-sdk"
|
||||
version = "0.6.10"
|
||||
version = "0.6.8"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "datrie" },
|
||||
@ -2870,9 +2795,9 @@ dependencies = [
|
||||
{ name = "sqlglot", extra = ["rs"] },
|
||||
{ name = "thrift" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/e5/88fdcfe42835c5494a08f02b64762a98e04dae4ad49f7dfabac18ee01928/infinity_sdk-0.6.10.tar.gz", hash = "sha256:b55c296ca3b2c8c2f4568f359dd8a50772e9432f09b64667140e9804bf780436", size = 29502969, upload-time = "2025-12-04T02:42:17.882Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/2d/4b699d62202319e5cbbcb4a7d9e87a86dde7ba7c767d0af4ebbee3de8419/infinity_sdk-0.6.8.tar.gz", hash = "sha256:e91c1f6cdf2fa41bc615c72be2a9e981211bd05b34522c1d27f1b825b905b125", size = 72669, upload-time = "2025-12-02T05:09:29.377Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/99/8857ea0805bd83fe092f5dca914a31f9fcc731c3800264657bd3ba950a1d/infinity_sdk-0.6.10-py3-none-any.whl", hash = "sha256:8f605039ec73d1b05d219105fbabef186e0178fddbad058c2c06c4873be48651", size = 29722107, upload-time = "2025-12-04T02:42:04.101Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/08/59ed1261ee80d3b2c5a80313a013a94cae83ce90ff1da1ef488055944a7b/infinity_sdk-0.6.8-py3-none-any.whl", hash = "sha256:392f942a2073a5b545261dad9859b217c6a0331ede606c8894e7ae335f2ead5e", size = 81564, upload-time = "2025-12-02T05:09:27.784Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3152,7 +3077,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langfuse"
|
||||
version = "3.10.5"
|
||||
version = "3.10.3"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "backoff" },
|
||||
@ -3166,9 +3091,9 @@ dependencies = [
|
||||
{ name = "requests" },
|
||||
{ name = "wrapt" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/21/dff0434290512484436bfa108e36f0adc3457eb4117767de70e76a411cac/langfuse-3.10.5.tar.gz", hash = "sha256:14eb767663f7e7480cd1cd1b3ca457022817c129e666efe97e5c80adb8c5aac0", size = 223142, upload-time = "2025-12-03T17:49:39.747Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/03/c4316cb0a91cff97118c21b973b3089c2fe1bdbcad02f3623d6ac572e954/langfuse-3.10.3.tar.gz", hash = "sha256:69d6eaf573212f8cdc1cebd2d6b47f271bfe76c7eb5a3c5d6766bb0d9bf0004c", size = 226617, upload-time = "2025-12-01T18:01:02.607Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/6f/dc15775f82d38da62cd2015110f5802bb175a9ee731a4533fe2a0cdf75b6/langfuse-3.10.5-py3-none-any.whl", hash = "sha256:0223a64109a4293b9bd9b2e0e3229f53b75291cd96341e42cc3eba186973fcdb", size = 398888, upload-time = "2025-12-03T17:49:38.171Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/04/f07c2a23f2822f73f8576b1ba7348c014c4be65127384b4bee475f913f3b/langfuse-3.10.3-py3-none-any.whl", hash = "sha256:b9a2e6506f8f0923c2f4b8c9e3fa355231994197a17f75509a37f335660ce334", size = 399062, upload-time = "2025-12-01T18:01:00.688Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3995,12 +3920,12 @@ name = "onnxruntime-gpu"
|
||||
version = "1.19.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "coloredlogs", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "flatbuffers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "packaging", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "protobuf", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "coloredlogs" },
|
||||
{ name = "flatbuffers" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "sympy" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/9c/3fa310e0730643051eb88e884f19813a6c8b67d0fbafcda610d960e589db/onnxruntime_gpu-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a49740e079e7c5215830d30cde3df792e903df007aa0b0fd7aa797937061b27a", size = 226178508, upload-time = "2024-09-04T06:43:40.83Z" },
|
||||
@ -4118,32 +4043,32 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-api"
|
||||
version = "1.39.0"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "importlib-metadata" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/0b/e5428c009d4d9af0515b0a8371a8aaae695371af291f45e702f7969dce6b/opentelemetry_api-1.39.0.tar.gz", hash = "sha256:6130644268c5ac6bdffaf660ce878f10906b3e789f7e2daa5e169b047a2933b9", size = 65763, upload-time = "2025-12-03T13:19:56.378Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/85/d831a9bc0a9e0e1a304ff3d12c1489a5fbc9bf6690a15dcbdae372bbca45/opentelemetry_api-1.39.0-py3-none-any.whl", hash = "sha256:3c3b3ca5c5687b1b5b37e5c5027ff68eacea8675241b29f13110a8ffbb8f0459", size = 66357, upload-time = "2025-12-03T13:19:33.043Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-common"
|
||||
version = "1.39.0"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-proto" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/cb/3a29ce606b10c76d413d6edd42d25a654af03e73e50696611e757d2602f3/opentelemetry_exporter_otlp_proto_common-1.39.0.tar.gz", hash = "sha256:a135fceed1a6d767f75be65bd2845da344dd8b9258eeed6bc48509d02b184409", size = 20407, upload-time = "2025-12-03T13:19:59.003Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/c6/215edba62d13a3948c718b289539f70e40965bc37fc82ecd55bb0b749c1a/opentelemetry_exporter_otlp_proto_common-1.39.0-py3-none-any.whl", hash = "sha256:3d77be7c4bdf90f1a76666c934368b8abed730b5c6f0547a2ec57feb115849ac", size = 18367, upload-time = "2025-12-03T13:19:36.906Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-http"
|
||||
version = "1.39.0"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos" },
|
||||
@ -4154,48 +4079,48 @@ dependencies = [
|
||||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/dc/1e9bf3f6a28e29eba516bc0266e052996d02bc7e92675f3cd38169607609/opentelemetry_exporter_otlp_proto_http-1.39.0.tar.gz", hash = "sha256:28d78fc0eb82d5a71ae552263d5012fa3ebad18dfd189bf8d8095ba0e65ee1ed", size = 17287, upload-time = "2025-12-03T13:20:01.134Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/46/e4a102e17205bb05a50dbf24ef0e92b66b648cd67db9a68865af06a242fd/opentelemetry_exporter_otlp_proto_http-1.39.0-py3-none-any.whl", hash = "sha256:5789cb1375a8b82653328c0ce13a054d285f774099faf9d068032a49de4c7862", size = 19639, upload-time = "2025-12-03T13:19:39.536Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "1.39.0"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/b5/64d2f8c3393cd13ea2092106118f7b98461ba09333d40179a31444c6f176/opentelemetry_proto-1.39.0.tar.gz", hash = "sha256:c1fa48678ad1a1624258698e59be73f990b7fc1f39e73e16a9d08eef65dd838c", size = 46153, upload-time = "2025-12-03T13:20:08.729Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/4d/d500e1862beed68318705732d1976c390f4a72ca8009c4983ff627acff20/opentelemetry_proto-1.39.0-py3-none-any.whl", hash = "sha256:1e086552ac79acb501485ff0ce75533f70f3382d43d0a30728eeee594f7bf818", size = 72534, upload-time = "2025-12-03T13:19:50.251Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-sdk"
|
||||
version = "1.39.0"
|
||||
version = "1.38.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e3/7cd989003e7cde72e0becfe830abff0df55c69d237ee7961a541e0167833/opentelemetry_sdk-1.39.0.tar.gz", hash = "sha256:c22204f12a0529e07aa4d985f1bca9d6b0e7b29fe7f03e923548ae52e0e15dde", size = 171322, upload-time = "2025-12-03T13:20:09.651Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/b4/2adc8bc83eb1055ecb592708efb6f0c520cc2eb68970b02b0f6ecda149cf/opentelemetry_sdk-1.39.0-py3-none-any.whl", hash = "sha256:90cfb07600dfc0d2de26120cebc0c8f27e69bf77cd80ef96645232372709a514", size = 132413, upload-time = "2025-12-03T13:19:51.364Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.60b0"
|
||||
version = "0.59b0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/0e/176a7844fe4e3cb5de604212094dffaed4e18b32f1c56b5258bcbcba85c2/opentelemetry_semantic_conventions-0.60b0.tar.gz", hash = "sha256:227d7aa73cbb8a2e418029d6b6465553aa01cf7e78ec9d0bc3255c7b3ac5bf8f", size = 137935, upload-time = "2025-12-03T13:20:12.395Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/56/af0306666f91bae47db14d620775604688361f0f76a872e0005277311131/opentelemetry_semantic_conventions-0.60b0-py3-none-any.whl", hash = "sha256:069530852691136018087b52688857d97bba61cd641d0f8628d2d92788c4f78a", size = 219981, upload-time = "2025-12-03T13:19:53.585Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -5064,6 +4989,12 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyicu"
|
||||
version = "2.16"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/c3/8d558b30deb33eb583c0bcae3e64d6db8316b69461a04bb9db5ff63d3f6e/pyicu-2.16.tar.gz", hash = "sha256:42b3a8062e3b23e927ca727e6b5e1730d86c70279834e4887152895d2eb012d9", size = 268126, upload-time = "2025-11-04T23:33:00.006Z" }
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.8.0"
|
||||
@ -5264,47 +5195,6 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" },
|
||||
{ name = "pytest" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-cov"
|
||||
version = "7.0.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "coverage", extra = ["toml"] },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-xdist"
|
||||
version = "3.8.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "execnet" },
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-calamine"
|
||||
version = "0.6.1"
|
||||
@ -5668,6 +5558,7 @@ dependencies = [
|
||||
{ name = "psycopg2-binary" },
|
||||
{ name = "pyclipper" },
|
||||
{ name = "pycryptodomex" },
|
||||
{ name = "pyicu" },
|
||||
{ name = "pymysql" },
|
||||
{ name = "pyobvector" },
|
||||
{ name = "pyodbc" },
|
||||
@ -5729,9 +5620,6 @@ test = [
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "pytest-cov" },
|
||||
{ name = "pytest-xdist" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "python-pptx" },
|
||||
{ name = "reportlab" },
|
||||
@ -5795,7 +5683,7 @@ requires-dist = [
|
||||
{ name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
|
||||
{ name = "imageio-ffmpeg", specifier = ">=0.6.0" },
|
||||
{ name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.10" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.8" },
|
||||
{ name = "itsdangerous", specifier = "==2.1.2" },
|
||||
{ name = "jira", specifier = "==3.10.5" },
|
||||
{ name = "json-repair", specifier = "==0.35.0" },
|
||||
@ -5835,6 +5723,7 @@ requires-dist = [
|
||||
{ name = "psycopg2-binary", specifier = "==2.9.9" },
|
||||
{ name = "pyclipper", specifier = "==1.3.0.post5" },
|
||||
{ name = "pycryptodomex", specifier = "==3.20.0" },
|
||||
{ name = "pyicu", specifier = ">=2.15.3,<3.0.0" },
|
||||
{ name = "pymysql", specifier = ">=1.1.1,<2.0.0" },
|
||||
{ name = "pyobvector", specifier = "==0.2.18" },
|
||||
{ name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
|
||||
@ -5896,9 +5785,6 @@ test = [
|
||||
{ name = "openpyxl", specifier = ">=3.1.5" },
|
||||
{ name = "pillow", specifier = ">=10.4.0" },
|
||||
{ name = "pytest", specifier = ">=8.3.5" },
|
||||
{ name = "pytest-asyncio", specifier = ">=1.3.0" },
|
||||
{ name = "pytest-cov", specifier = ">=7.0.0" },
|
||||
{ name = "pytest-xdist", specifier = ">=3.8.0" },
|
||||
{ name = "python-docx", specifier = ">=1.1.2" },
|
||||
{ name = "python-pptx", specifier = ">=1.0.2" },
|
||||
{ name = "reportlab", specifier = ">=4.4.1" },
|
||||
@ -6826,11 +6712,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlglot"
|
||||
version = "28.1.0"
|
||||
version = "28.0.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/49/cda1fc4e610ed5764de2842bb2f362f4aba267b4a7d05a3a217a25b39004/sqlglot-28.1.0.tar.gz", hash = "sha256:a3ef7344359667b51cf95e840aac70a49f847602c61c9fbaeb847f74f7877fe1", size = 5546281, upload-time = "2025-12-02T16:52:28.387Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/8d/9ce5904aca760b81adf821c77a1dcf07c98f9caaa7e3b5c991c541ff89d2/sqlglot-28.0.0.tar.gz", hash = "sha256:cc9a651ef4182e61dac58aa955e5fb21845a5865c6a4d7d7b5a7857450285ad4", size = 5520798, upload-time = "2025-11-17T10:34:57.016Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/e8/bd016214348f65ba31107c1b81af70fc7662d96758052d5d59b516fd3858/sqlglot-28.1.0-py3-none-any.whl", hash = "sha256:2a895a31666ba947c686caa980624c82bcd0e6fdf59b4fdb9e47108bd092d1ac", size = 547889, upload-time = "2025-12-02T16:52:26.019Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/6d/86de134f40199105d2fee1b066741aa870b3ce75ee74018d9c8508bbb182/sqlglot-28.0.0-py3-none-any.whl", hash = "sha256:ac1778e7fa4812f4f7e5881b260632fc167b00ca4c1226868891fb15467122e4", size = 536127, upload-time = "2025-11-17T10:34:55.192Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@ -6840,40 +6726,40 @@ rs = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlglotrs"
|
||||
version = "0.8.0"
|
||||
version = "0.7.3"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/37/118f24c367fde662e6c1181327dc9c16d08914108904c69bac3a6ba12c52/sqlglotrs-0.8.0.tar.gz", hash = "sha256:2b9a23c580d82be2388ee23496230cfc667f280ed0ed7eaa099d0da8d718cbf2", size = 15706, upload-time = "2025-12-02T16:58:38.197Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/5a/46d8efeda45be6ce1c630229455f000cafedea6129b47e6cfab39ff462f5/sqlglotrs-0.7.3.tar.gz", hash = "sha256:caadc572c8a194f99d6ba44d02f9ada0110e3d47cca3330c81f4aa608f1143eb", size = 15888, upload-time = "2025-10-13T06:33:57.322Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/88/7fc59c146118603e06abf69dc19c237ef496a8dd936e5c224fdffc7df120/sqlglotrs-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3db8f75b8efe5b94ed5540c13b80ef0a3e64c0d15864b05a6bccf5554c6e6008", size = 318097, upload-time = "2025-12-02T16:58:30.763Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/9a/7c0103f02b371f49f6ade420519d54c11c7e3ae4dcf22a855b9c71ccb546/sqlglotrs-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37d00b69814fdabd4256be955d66e699afa1c50740f03369503d85f90245af35", size = 306820, upload-time = "2025-12-02T16:58:23.714Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/cf/52de2a02a52976dfbd863ec57a3fafaf018a9536114f195404d51717501d/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:631da494550442ec2c7139993f59d854e4d4a44282b568594b5fc50818bc4736", size = 341540, upload-time = "2025-12-02T16:57:33.009Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/89/072a295c3b98322a3d08d85ed47551c1f080309f2cde2d2fa75bd1964621/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b624e0650067cc006d8a0595e07be3ac91599187ee353313eb9f114ca434e44", size = 350048, upload-time = "2025-12-02T16:57:41.477Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/b2/fbc05eef045124a9e5820812ddd641ec42add5e52f12126a85d942b0f166/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c0c5ae335b1917aa101d7cfe1aacbedf3b54f489d2038e94c8f42ffe5bd304a", size = 474032, upload-time = "2025-12-02T16:58:00.344Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/a8/1472a5d5f849803fb2ad566ae43db8e5c9f3b1686b104dda245e4acfd963/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21d145e9fef6e2e53fdf17f9b6ab7e7fbba26064365c56d2103a41e95053d1d4", size = 365233, upload-time = "2025-12-02T16:58:08.102Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/c8/ea700f277cba380c7919136a16e03f9f990f29da34c5404b861fbb8b6fd5/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ed5d7afd8b6b244c33316cc292122f26c20bf9677907bc5790c1b053097aff4", size = 348452, upload-time = "2025-12-02T16:58:15.863Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/f7/ba63c7cabcd71abed855e7a4cecb4b0df297bf17d315ff39eacf94926378/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:185442ad85a125719bf365a238c2b357c079cb5a13392adbbde172b1a0073410", size = 371656, upload-time = "2025-12-02T16:57:51.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/dc/1ba05670afe7f4c7e651f972f4738dc4508525bb67b9151cdf463b0ef55b/sqlglotrs-0.8.0-cp310-cp310-win32.whl", hash = "sha256:a7d3f36d9c53090842ae18de6d96bd7634d73584255014983aad998f2b7dc95f", size = 188554, upload-time = "2025-12-02T16:58:39.078Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/fc/a393a837a9e09411da87cf8ee2d9f190e3bad37d289cd385e3791356a788/sqlglotrs-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8a5e3c8870323666e9695be7cc65f710ed437ceea572e69e2b14e63b70f21b2", size = 200973, upload-time = "2025-12-02T16:58:46.02Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/be/a6a8e41e59813663baf02b23534d822b62521d018ee740f132b4547c4239/sqlglotrs-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0267b0121073669d1184bc0441779559e6b0c6067a12571b63befa2a9b4b0f77", size = 318016, upload-time = "2025-12-02T16:58:32.555Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/02/bf65a608b2caf268d81073171196f93beed8d32731ebda1288153dec2b73/sqlglotrs-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c1a2fa22a3ae4b38c7df9abbf14b2473f7e71c859c95bc270bd4a169688380", size = 306527, upload-time = "2025-12-02T16:58:24.853Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/98/32de2ad5ea9310e220baabfb6b2ee1e3c7ebb3b83a1db9bd2acdf72de6a5/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7df3d2117c92004aa20082d71fbbd1735f063f123354d32d0b2b602ab4e1353", size = 341821, upload-time = "2025-12-02T16:57:34.854Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/99/64247cb3b9f99ca09aafa11791fe250326d498b194795af91cc957003852/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ecd7fdfd1be44828a8a8046ee743ffbaf93a972d7a125ff13e4673bb659fcf2c", size = 350003, upload-time = "2025-12-02T16:57:42.659Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/91/bc15e4d2322cc28f4f94e519b2ae927ba42844830efaacf973ff774d8e06/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:171df6454f3dc064b89895c51cfb713163188493b36b845bf7c17df0e5702095", size = 474163, upload-time = "2025-12-02T16:58:01.554Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/8e/736451fc39f68f1e394a90d768dd9c8135412669ea3460e47033308cbb2e/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:497472ed07445a693e2699fd6f1b8ed5b8320488ade6a4a8e476664ee93ea51c", size = 365088, upload-time = "2025-12-02T16:58:09.604Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/2c/214f352fe03652b08873dcb8f4e6799a02be71446bdf9fea99ce13a502f3/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2be9add4daed501e28564208b30d4a772dfd6aaa1ad10dadd2d49f4e851f9fa", size = 348368, upload-time = "2025-12-02T16:58:17.363Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/22/c445428a52d053a6f6b31858ac817afb997316e9f0ab2ee3187a10bd85a4/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:871d5ee6414f2d7116b670d0430c16f5b3d5a96480c274f7f3d50d97dbea7601", size = 371720, upload-time = "2025-12-02T16:57:52.71Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/b2/301261db4ac543891f897b58a036e87ff33158ea4eda050ee0e08ae0083a/sqlglotrs-0.8.0-cp311-cp311-win32.whl", hash = "sha256:1bbe94effd9d64a8bdca12e0f14b28388059cb5a381561bac07aafedc8c63761", size = 188284, upload-time = "2025-12-02T16:58:40.21Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a1/0534075d3b8a7c8ab8eff4ea7ba0338a2ef76e3d2e49105b189049430e99/sqlglotrs-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:05a5098ec2836799c4c43b06df7c68a2b4c19c0fce042a66706fe3edc957459d", size = 201117, upload-time = "2025-12-02T16:58:47.14Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/20/7beddfd545aaebbfee10a77ac8ef8a205ff597f9ce041c4b0437d0194392/sqlglotrs-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fcb53f27cf4b9cae8a66c5777b84eeb3d079e96bcb4277b627fd90bfd1a591b5", size = 314699, upload-time = "2025-12-02T16:58:33.82Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/6f/6223a1946fe24a979b8af3c7ae2d16c5451d8f35f2468782bd4af2c122da/sqlglotrs-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4da1480cc288e02bd459e4638f212fa86a1fef81eb2cd69e6fdbdeb64e3df729", size = 303385, upload-time = "2025-12-02T16:58:26.052Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/98/55050208ef839cad740df6ca86f2f3ca895d469f6ce2040cba32d0b6c4a0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4a77df178b0ba242aba0e7cd775c3f9aef0fa79dfc31c6e642431ce690f51f", size = 341580, upload-time = "2025-12-02T16:57:36.197Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/f2/6f1d207e629fd4810cc826cf419acc386f3d43d32987684730fbc2399503/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8647d20cc5a9ff39071786169b3f1acf56f266483fa55386111783bca335f04", size = 348451, upload-time = "2025-12-02T16:57:43.756Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/1b/fa8a0907471fe7be3754bac683a21c984b17672eef6958206473f683b63a/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1afdd6a0fa915b3aef7c801cbdc815bb39b3d6aecc4d5b04c4ce54d3f73d0013", size = 475703, upload-time = "2025-12-02T16:58:02.843Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/56/f020c9c48d68883f6e24d69d18fe386eafc5963bc3982cc45013ec9b1ba0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b4c1edeb80f572cf3586b9a23d15f18f48ac8dc481eceabdbb85dc7dbf8a2ce", size = 365842, upload-time = "2025-12-02T16:58:10.847Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/7b/091464f8aa2232a2f33028f9c9a2cbea7c4e5719400656f203592d46264d/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b6d819f2753804d55b10e4320df08350cd2739556572a97ed1b1d7fc939f194", size = 348397, upload-time = "2025-12-02T16:58:18.567Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/1b/1b0cf0d41e8412786d1e80695778db799520223acf85c3ddc53c1200731f/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dcf2cce002969cefb1466f2837c716d20fc9eac62b05043523fda25b3de4c444", size = 369756, upload-time = "2025-12-02T16:57:53.85Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d9e50472aa92736751abf3d6fcad1c793f0701f17a553ae787e4a7581a1d/sqlglotrs-0.8.0-cp312-cp312-win32.whl", hash = "sha256:5459235a25b30eae508bcaea8bc6ebc04610acd87e985ba4d602981a94078384", size = 187891, upload-time = "2025-12-02T16:58:41.57Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/a2/21d09ff2065a7e883f8f68dcea57fb23f6f04ba7a193f2ac2895b5dfafae/sqlglotrs-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:1e0de4fa8e6c54419bd63a1205f3218feb5e2649d72f1bc69c5261b6c333e63b", size = 200842, upload-time = "2025-12-02T16:58:48.181Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/95/f08e01f54e521a286fcd9f7a8bdd178eabcddd9dbc6d6c15dc983c7be8dd/sqlglotrs-0.7.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7acc6dba37af53d9cf1e3217fdd719878dbfaaf2a578ad7b3fbc07ef9dadd035", size = 314621, upload-time = "2025-10-13T06:33:48.917Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/7d/01a5db15e413ab587816448f1222286d3a10f0465954d21f5d2915aaeed5/sqlglotrs-0.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cbfb42071422afbd7376d70b93a969e86fb74752efe98dd66ee6d2ae27a9665", size = 300189, upload-time = "2025-10-13T06:33:40.963Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/21/94d1fb647a394afcb09a9174f7bff078452bb956e6898093dd9ee459ef2b/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07500421de9dea8dfc0cd6769145df754178fc2ae5a3692bdbf5d37aebc0712a", size = 332771, upload-time = "2025-10-13T06:32:45.992Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/d1/ccade8e794304c925e9b94e1d7bff4c56896f571a291a03bfd96048c4a0f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:792eb179a742d7d72d1d47c9a50e073078f0133e9191bd07920945dcc9170844", size = 342960, upload-time = "2025-10-13T06:32:55.493Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/2f/2ff3cfe7d91ac3762100e511c4eff0c98824970d7c27e18e88c44a4d4567/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4c3849992e33e47403c2517d464564e4b4cf6a080ad761141504e271ab2c7cd", size = 487268, upload-time = "2025-10-13T06:33:13.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/d7/a95fbdd26f20b7bd5781bb5a4c51616fdd59f1c521010f668ffd54e59f5d/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:016f51409ed3d87c33ca5a31dd6766e75a809909e418a0ffd2965e0ae7b84a7b", size = 365853, upload-time = "2025-10-13T06:33:23.415Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/7a/5d50d0b1167c79a509957d58a6bf9f6450f894e0bc233987cb85ccaec50f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dd711ea2ba76e664dab3e7f7b08cb5517cf5164fd94a552598acfd1f6df59a", size = 343697, upload-time = "2025-10-13T06:33:32.542Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/89/85acbd412a5c7ef39ee5a96f5be28d6d38bce2c4521a264c747361b4c021/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:517198977f3baece667513326e42545b00b2878719922c58fcbfa21553f1338d", size = 363446, upload-time = "2025-10-13T06:33:03.995Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/4d/0a04f29731b6fda327bd11495c143ce70d1a7446b22440a32d8571408a06/sqlglotrs-0.7.3-cp310-cp310-win32.whl", hash = "sha256:1e9121ef3a64dc7d18e500e5e93df458a9bb6f4111b8f8569d5e4f8db21e61d2", size = 183997, upload-time = "2025-10-13T06:33:58.579Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/16/0e95fa77409da059c951c6be11d4d73311c60bb5ed82f1d40a4afc9a1aa9/sqlglotrs-0.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:48fd7e9efef56331e1ef7402b6d65113c087da1cfe2ef80d143ee62046d49056", size = 195923, upload-time = "2025-10-13T06:34:06.676Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/41/fcd87de298b562947cb2592feb9df5794886a8fa24eab8a080a552aa0e4d/sqlglotrs-0.7.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f2144fc8e472de2b165665c1370e7f0ca7f9400f60ca5e78c7aedbb3233bc8d7", size = 314465, upload-time = "2025-10-13T06:33:50.219Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/81/22cf241e22f364c414d57893fad9cfea869f8866189e75575a3862f1d329/sqlglotrs-0.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93cb74928b205da3f29f2b9c728d2c6656ad30e1ef500560f6c851bca2129fbc", size = 300129, upload-time = "2025-10-13T06:33:42.205Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/90/4e4220f8605c6fbca77dfad2052cdebf195099c99fd0684723677dcbf091/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a918137bacfa31529802063e349a99d5352f74c914beceb14689cd2864c5e4d0", size = 332735, upload-time = "2025-10-13T06:32:48.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/35/abe3cb6aa197b5193fcb446ab69465b5927e09e281b2c05f4e12249fd335/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3fd0edbd957d136c67919ead10c90d832da1aedbbedc6da899d173fe78bf600", size = 342779, upload-time = "2025-10-13T06:32:56.782Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/71/670ad31f4dbfe594592a1992c4e7a62003dc47dffb15d96b2fec4137f933/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a361a1dd8c55fbc57f81db738658141cab723509cc1b3edcc871bccfbba0cfb", size = 487344, upload-time = "2025-10-13T06:33:15.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/73/86e46b762b615c7cdec489e4b0670d2a04ea6fab0c0be30a5756e95f108f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c698af6379475c243a8f190845bf1d1267a2c9867011a4567d5cfdcc5b0eb094", size = 366062, upload-time = "2025-10-13T06:33:25.183Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/07/b4dd7315df7d975c4b82d09106eb73ea2ee8f3734f764889913636e9d68c/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d63ed29058c56f153912c90811d8af1706d81f0c759883baeb21acb6322969", size = 343642, upload-time = "2025-10-13T06:33:33.826Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/84/2e834fc665236ef6b0fced14d75c8e9eb0db471d96fde539d8c37ce3a10f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4e19dee6dc46c4d84c556ae456fa0c6400edb157528fd369670b3d041b54ef21", size = 363731, upload-time = "2025-10-13T06:33:05.913Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/db/b7063b1240a1c39bc5627880dbb80c9e3f7b5548a17962d3a6bf98239171/sqlglotrs-0.7.3-cp311-cp311-win32.whl", hash = "sha256:f1276d0f02eaefbdd149b614f6c21fb9be372d7e1137f19c3d5f9e50662367b3", size = 183607, upload-time = "2025-10-13T06:33:59.858Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/98/e9cb2b3dd4abb34d2ae71747f113bf12f741a86fa29e661f1f09ba8376d0/sqlglotrs-0.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccf05fc6e89523cf5819982fab12b8fe07a9656dbb5356fc4b56b562e734c202", size = 196050, upload-time = "2025-10-13T06:34:07.921Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/3f/3b059058e198b2fb6612d0ddaad5431a796d7081d40b21f12273ea1b26dc/sqlglotrs-0.7.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2e7be55bf719b5ebdc344a996b6d27b9a0ba9bae0a09462900805e2f7dc4dca5", size = 310987, upload-time = "2025-10-13T06:33:51.874Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/b6/0058b2fe4f4813d9f3280d65ace97a637e8edd152be2a13bb1782c5c2eff/sqlglotrs-0.7.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6fef415993e1843201a57916f310b49e79669db379ff38094161fa93be2ffdf2", size = 296829, upload-time = "2025-10-13T06:33:43.838Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/a8/35c593b03bf498876aea68ea944a7e7bb9cf648e68984f55795181c928dd/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e980354e576e852c53e0bb5444b04ebb6459054074bce8012cc3385dd3d116ed", size = 332313, upload-time = "2025-10-13T06:32:49.343Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/bc/534e21a233846d33d6b55100485bf1844d301b0b75deded5310ef9cd171f/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1444b260c040cc80697956629f3fd3adece0bdb4f83bae22cd618ca3f18c4de8", size = 342309, upload-time = "2025-10-13T06:32:58.031Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/63/1d7bd7de87f01adb43cd1710d3fd5b9d5b0b3fea160bbeadc340fe1a9132/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3202c6f00145b8adb4632c1bb5071be5aa362829054653bac058dbcdbc6228e7", size = 484954, upload-time = "2025-10-13T06:33:16.697Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/bd/10126c9f59fb4f8fa51bf3f0ad17895b953bd09e1687986d5d9e110758c8/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17ae27e895f0ed960e28e76028c84758ff00df24e598654df3b5f22de8c7fc30", size = 366874, upload-time = "2025-10-13T06:33:26.888Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/fa/f12a1eb9c22cdce962bafebefea58e898c19bae3d21e9b79d6e811a2951d/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a36c3d55b913c09dc31572ca7d5b423e85d761f1b3c9d8f86e2a1433a2f20d5", size = 342990, upload-time = "2025-10-13T06:33:35.478Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/1d/2bd1c8900d7a081a61a1c424785fd1a1452def751bc212630251423d80ce/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:94875611a2a953c06e8152b1d485f4d20ec61b72ebd848f96c04aca66b30f189", size = 362603, upload-time = "2025-10-13T06:33:07.507Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3a/9c176a7f9b93d78168b3d137db4704a703cb51b32edb29d231b615130b47/sqlglotrs-0.7.3-cp312-cp312-win32.whl", hash = "sha256:64a708c87d1bea1f4bdb3edf0a3e94ea1b908ed229502da93621a4a50ef20189", size = 183180, upload-time = "2025-10-13T06:34:01.017Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/ea/37757060d3caadb22509d349087e1e16a2dcc4c1649a00d2d6b501f8ff50/sqlglotrs-0.7.3-cp312-cp312-win_amd64.whl", hash = "sha256:fe1ef1bedbb34b87dfb4e99a911026f8895ff2514b222cfd82cd08033406de2e", size = 195746, upload-time = "2025-10-13T06:34:09.478Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
97
web/package-lock.json
generated
97
web/package-lock.json
generated
@ -76,7 +76,6 @@
|
||||
"pptx-preview": "^1.0.5",
|
||||
"rc-tween-one": "^3.0.6",
|
||||
"react": "^18.2.0",
|
||||
"react-audio-voice-recorder": "^2.2.0",
|
||||
"react-copy-to-clipboard": "^5.1.0",
|
||||
"react-day-picker": "^9.8.0",
|
||||
"react-dom": "^18.2.0",
|
||||
@ -2853,69 +2852,6 @@
|
||||
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg": {
|
||||
"version": "0.11.6",
|
||||
"resolved": "https://registry.npmmirror.com/@ffmpeg/ffmpeg/-/ffmpeg-0.11.6.tgz",
|
||||
"integrity": "sha512-uN8J8KDjADEavPhNva6tYO9Fj0lWs9z82swF3YXnTxWMBoFLGq3LZ6FLlIldRKEzhOBKnkVfA8UnFJuvGvNxcA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-url": "^1.2.4",
|
||||
"node-fetch": "^2.6.1",
|
||||
"regenerator-runtime": "^0.13.7",
|
||||
"resolve-url": "^0.2.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.16.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmmirror.com/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/regenerator-runtime": {
|
||||
"version": "0.13.11",
|
||||
"resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
||||
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/tr46/-/tr46-0.0.3.tgz",
|
||||
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/whatwg-url": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tr46": "~0.0.3",
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/core": {
|
||||
"version": "0.6.2",
|
||||
"resolved": "https://registry.npmmirror.com/@floating-ui/core/-/core-0.6.2.tgz",
|
||||
@ -21717,12 +21653,6 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/is-url": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmmirror.com/is-url/-/is-url-1.2.4.tgz",
|
||||
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-weakmap": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/is-weakmap/-/is-weakmap-2.0.1.tgz",
|
||||
@ -29700,30 +29630,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-audio-visualize": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-audio-visualize/-/react-audio-visualize-1.2.0.tgz",
|
||||
"integrity": "sha512-rfO5nmT0fp23gjU0y2WQT6+ZOq2ZsuPTMphchwX1PCz1Di4oaIr6x7JZII8MLrbHdG7UB0OHfGONTIsWdh67kQ==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"react": ">=16.2.0",
|
||||
"react-dom": ">=16.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-audio-voice-recorder": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-audio-voice-recorder/-/react-audio-voice-recorder-2.2.0.tgz",
|
||||
"integrity": "sha512-Hq+143Zs99vJojT/uFvtpxUuiIKoLbMhxhA7qgxe5v8hNXrh5/qTnvYP92hFaE5V+GyoCXlESONa0ufk7t5kHQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@ffmpeg/ffmpeg": "^0.11.6",
|
||||
"react-audio-visualize": "^1.1.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.2.0",
|
||||
"react-dom": ">=16.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-copy-to-clipboard": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
|
||||
@ -32196,7 +32102,8 @@
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmmirror.com/resolve-url/-/resolve-url-0.2.1.tgz",
|
||||
"integrity": "sha512-ZuF55hVUQaaczgOIwqWzkEcEidmlD/xl44x1UZnhOXcYuFN2S6+rcxpG+C1N3So0wvNI3DmJICUFfu2SxhBmvg==",
|
||||
"deprecated": "https://github.com/lydell/resolve-url#deprecated"
|
||||
"deprecated": "https://github.com/lydell/resolve-url#deprecated",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/resolve.exports": {
|
||||
"version": "2.0.2",
|
||||
|
||||
@ -89,7 +89,6 @@
|
||||
"pptx-preview": "^1.0.5",
|
||||
"rc-tween-one": "^3.0.6",
|
||||
"react": "^18.2.0",
|
||||
"react-audio-voice-recorder": "^2.2.0",
|
||||
"react-copy-to-clipboard": "^5.1.0",
|
||||
"react-day-picker": "^9.8.0",
|
||||
"react-dom": "^18.2.0",
|
||||
|
||||
48
web/src/components/auto-keywords-item.tsx
Normal file
48
web/src/components/auto-keywords-item.tsx
Normal file
@ -0,0 +1,48 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
export const AutoKeywordsItem = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('autoKeywords')} tooltip={t('autoKeywordsTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'auto_keywords']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
>
|
||||
<Slider max={30} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['parser_config', 'auto_keywords']} noStyle>
|
||||
<InputNumber max={30} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export const AutoQuestionsItem = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('autoQuestions')} tooltip={t('autoQuestionsTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'auto_questions']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
>
|
||||
<Slider max={10} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['parser_config', 'auto_questions']} noStyle>
|
||||
<InputNumber max={10} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
161
web/src/components/chunk-method-modal/hooks.ts
Normal file
161
web/src/components/chunk-method-modal/hooks.ts
Normal file
@ -0,0 +1,161 @@
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
|
||||
import { useSelectParserList } from '@/hooks/use-user-setting-request';
|
||||
import { FormInstance } from 'antd';
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
|
||||
const ParserListMap = new Map([
|
||||
[
|
||||
['pdf'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Manual,
|
||||
DocumentParserType.Paper,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.Presentation,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['doc', 'docx'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Manual,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['xlsx', 'xls'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
|
||||
[
|
||||
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
|
||||
[DocumentParserType.Picture],
|
||||
],
|
||||
[
|
||||
['txt'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['csv'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['md'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
|
||||
[['eml'], [DocumentParserType.Email]],
|
||||
]);
|
||||
|
||||
const getParserList = (
|
||||
values: string[],
|
||||
parserList: Array<{
|
||||
value: string;
|
||||
label: string;
|
||||
}>,
|
||||
) => {
|
||||
return parserList.filter((x) => values?.some((y) => y === x.value));
|
||||
};
|
||||
|
||||
export const useFetchParserListOnMount = (
|
||||
documentId: string,
|
||||
parserId: DocumentParserType,
|
||||
documentExtension: string,
|
||||
form: FormInstance,
|
||||
) => {
|
||||
const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
|
||||
const parserList = useSelectParserList();
|
||||
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);
|
||||
|
||||
const nextParserList = useMemo(() => {
|
||||
const key = [...ParserListMap.keys()].find((x) =>
|
||||
x.some((y) => y === documentExtension),
|
||||
);
|
||||
if (key) {
|
||||
const values = ParserListMap.get(key);
|
||||
return getParserList(values ?? [], parserList);
|
||||
}
|
||||
|
||||
return getParserList(
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
],
|
||||
parserList,
|
||||
);
|
||||
}, [parserList, documentExtension]);
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedTag(parserId);
|
||||
}, [parserId, documentId]);
|
||||
|
||||
const handleChange = (tag: string) => {
|
||||
handleChunkMethodSelectChange(tag);
|
||||
setSelectedTag(tag as DocumentParserType);
|
||||
};
|
||||
|
||||
return { parserList: nextParserList, handleChange, selectedTag };
|
||||
};
|
||||
|
||||
const hideAutoKeywords = [
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const useShowAutoKeywords = () => {
|
||||
const showAutoKeywords = useCallback(
|
||||
(selectedTag: DocumentParserType | undefined) => {
|
||||
return hideAutoKeywords.every((x) => selectedTag !== x);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
return showAutoKeywords;
|
||||
};
|
||||
14
web/src/components/chunk-method-modal/index.less
Normal file
14
web/src/components/chunk-method-modal/index.less
Normal file
@ -0,0 +1,14 @@
|
||||
.pageInputNumber {
|
||||
width: 220px;
|
||||
}
|
||||
|
||||
.questionIcon {
|
||||
margin-inline-start: 4px;
|
||||
color: rgba(0, 0, 0, 0.45);
|
||||
cursor: help;
|
||||
writing-mode: horizontal-tb;
|
||||
}
|
||||
|
||||
.chunkMethod {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
350
web/src/components/chunk-method-modal/index.tsx
Normal file
350
web/src/components/chunk-method-modal/index.tsx
Normal file
@ -0,0 +1,350 @@
|
||||
import MaxTokenNumber from '@/components/max-token-number';
|
||||
import { IModalManagerChildrenProps } from '@/components/modal-manager';
|
||||
import {
|
||||
MinusCircleOutlined,
|
||||
PlusOutlined,
|
||||
QuestionCircleOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import {
|
||||
Button,
|
||||
Divider,
|
||||
Form,
|
||||
InputNumber,
|
||||
Modal,
|
||||
Select,
|
||||
Space,
|
||||
Tooltip,
|
||||
} from 'antd';
|
||||
import omit from 'lodash/omit';
|
||||
import React, { useEffect, useMemo } from 'react';
|
||||
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
|
||||
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request';
|
||||
import { IParserConfig } from '@/interfaces/database/document';
|
||||
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
|
||||
import { get } from 'lodash';
|
||||
import { AutoKeywordsItem, AutoQuestionsItem } from '../auto-keywords-item';
|
||||
import { DatasetConfigurationContainer } from '../dataset-configuration-container';
|
||||
import Delimiter from '../delimiter';
|
||||
import EntityTypesItem from '../entity-types-item';
|
||||
import ExcelToHtml from '../excel-to-html';
|
||||
import LayoutRecognize from '../layout-recognize';
|
||||
import ParseConfiguration, {
|
||||
showRaptorParseConfiguration,
|
||||
} from '../parse-configuration';
|
||||
import {
|
||||
UseGraphRagItem,
|
||||
showGraphRagItems,
|
||||
} from '../parse-configuration/graph-rag-items';
|
||||
import styles from './index.less';
|
||||
|
||||
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
|
||||
loading: boolean;
|
||||
onOk: (
|
||||
parserId: DocumentParserType | undefined,
|
||||
parserConfig: IChangeParserConfigRequestBody,
|
||||
) => void;
|
||||
showModal?(): void;
|
||||
parserId: DocumentParserType;
|
||||
parserConfig: IParserConfig;
|
||||
documentExtension: string;
|
||||
documentId: string;
|
||||
}
|
||||
|
||||
const hidePagesChunkMethods = [
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Picture,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
];
|
||||
|
||||
const ChunkMethodModal: React.FC<IProps> = ({
|
||||
documentId,
|
||||
parserId,
|
||||
onOk,
|
||||
hideModal,
|
||||
visible,
|
||||
documentExtension,
|
||||
parserConfig,
|
||||
loading,
|
||||
}) => {
|
||||
const [form] = Form.useForm();
|
||||
const { parserList, handleChange, selectedTag } = useFetchParserListOnMount(
|
||||
documentId,
|
||||
parserId,
|
||||
documentExtension,
|
||||
form,
|
||||
);
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration();
|
||||
|
||||
const useGraphRag = useMemo(() => {
|
||||
return knowledgeDetails.parser_config?.graphrag?.use_graphrag;
|
||||
}, [knowledgeDetails.parser_config?.graphrag?.use_graphrag]);
|
||||
|
||||
const handleOk = async () => {
|
||||
const values = await form.validateFields();
|
||||
const parser_config = {
|
||||
...values.parser_config,
|
||||
pages: values.pages?.map((x: any) => [x.from, x.to]) ?? [],
|
||||
};
|
||||
onOk(selectedTag, parser_config);
|
||||
};
|
||||
|
||||
const isPdf = documentExtension === 'pdf';
|
||||
|
||||
const showPages = useMemo(() => {
|
||||
return isPdf && hidePagesChunkMethods.every((x) => x !== selectedTag);
|
||||
}, [selectedTag, isPdf]);
|
||||
|
||||
const showOne = useMemo(() => {
|
||||
return (
|
||||
isPdf &&
|
||||
hidePagesChunkMethods
|
||||
.filter((x) => x !== DocumentParserType.One)
|
||||
.every((x) => x !== selectedTag)
|
||||
);
|
||||
}, [selectedTag, isPdf]);
|
||||
|
||||
const showMaxTokenNumber =
|
||||
selectedTag === DocumentParserType.Naive ||
|
||||
selectedTag === DocumentParserType.KnowledgeGraph;
|
||||
|
||||
const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;
|
||||
|
||||
const showExcelToHtml =
|
||||
selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';
|
||||
|
||||
const showAutoKeywords = useShowAutoKeywords();
|
||||
|
||||
const afterClose = () => {
|
||||
form.resetFields();
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (visible) {
|
||||
const pages =
|
||||
parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
|
||||
form.setFieldsValue({
|
||||
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
|
||||
parser_config: {
|
||||
...omit(parserConfig, 'pages'),
|
||||
graphrag: {
|
||||
use_graphrag: get(
|
||||
parserConfig,
|
||||
'graphrag.use_graphrag',
|
||||
useGraphRag,
|
||||
),
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}, [
|
||||
form,
|
||||
knowledgeDetails.parser_config,
|
||||
parserConfig,
|
||||
useGraphRag,
|
||||
visible,
|
||||
]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={t('chunkMethod')}
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
afterClose={afterClose}
|
||||
confirmLoading={loading}
|
||||
width={700}
|
||||
>
|
||||
<Space size={[0, 8]} wrap>
|
||||
<Form.Item label={t('chunkMethod')} className={styles.chunkMethod}>
|
||||
<Select
|
||||
style={{ width: 160 }}
|
||||
onChange={handleChange}
|
||||
value={selectedTag}
|
||||
options={parserList}
|
||||
/>
|
||||
</Form.Item>
|
||||
</Space>
|
||||
<Divider></Divider>
|
||||
<Form
|
||||
name="dynamic_form_nest_item"
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
className="space-y-4"
|
||||
>
|
||||
{showPages && (
|
||||
<>
|
||||
<Space>
|
||||
<p>{t('pageRanges')}:</p>
|
||||
<Tooltip title={t('pageRangesTip')}>
|
||||
<QuestionCircleOutlined
|
||||
className={styles.questionIcon}
|
||||
></QuestionCircleOutlined>
|
||||
</Tooltip>
|
||||
</Space>
|
||||
<Form.List name="pages">
|
||||
{(fields, { add, remove }) => (
|
||||
<>
|
||||
{fields.map(({ key, name, ...restField }) => (
|
||||
<Space
|
||||
key={key}
|
||||
style={{
|
||||
display: 'flex',
|
||||
}}
|
||||
align="baseline"
|
||||
>
|
||||
<Form.Item
|
||||
{...restField}
|
||||
name={[name, 'from']}
|
||||
dependencies={name > 0 ? [name - 1, 'to'] : []}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('fromMessage'),
|
||||
},
|
||||
({ getFieldValue }) => ({
|
||||
validator(_, value) {
|
||||
if (
|
||||
name === 0 ||
|
||||
!value ||
|
||||
getFieldValue(['pages', name - 1, 'to']) < value
|
||||
) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return Promise.reject(
|
||||
new Error(t('greaterThanPrevious')),
|
||||
);
|
||||
},
|
||||
}),
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
placeholder={t('fromPlaceholder')}
|
||||
min={0}
|
||||
precision={0}
|
||||
className={styles.pageInputNumber}
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
{...restField}
|
||||
name={[name, 'to']}
|
||||
dependencies={[name, 'from']}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('toMessage'),
|
||||
},
|
||||
({ getFieldValue }) => ({
|
||||
validator(_, value) {
|
||||
if (
|
||||
!value ||
|
||||
getFieldValue(['pages', name, 'from']) < value
|
||||
) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return Promise.reject(
|
||||
new Error(t('greaterThan')),
|
||||
);
|
||||
},
|
||||
}),
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
placeholder={t('toPlaceholder')}
|
||||
min={0}
|
||||
precision={0}
|
||||
className={styles.pageInputNumber}
|
||||
/>
|
||||
</Form.Item>
|
||||
{name > 0 && (
|
||||
<MinusCircleOutlined onClick={() => remove(name)} />
|
||||
)}
|
||||
</Space>
|
||||
))}
|
||||
<Form.Item>
|
||||
<Button
|
||||
type="dashed"
|
||||
onClick={() => add()}
|
||||
block
|
||||
icon={<PlusOutlined />}
|
||||
>
|
||||
{t('addPage')}
|
||||
</Button>
|
||||
</Form.Item>
|
||||
</>
|
||||
)}
|
||||
</Form.List>
|
||||
</>
|
||||
)}
|
||||
|
||||
{showPages && (
|
||||
<Form.Item
|
||||
noStyle
|
||||
dependencies={[['parser_config', 'layout_recognize']]}
|
||||
>
|
||||
{({ getFieldValue }) =>
|
||||
getFieldValue(['parser_config', 'layout_recognize']) && (
|
||||
<Form.Item
|
||||
name={['parser_config', 'task_page_size']}
|
||||
label={t('taskPageSize')}
|
||||
tooltip={t('taskPageSizeTip')}
|
||||
initialValue={12}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('taskPageSizeMessage'),
|
||||
},
|
||||
]}
|
||||
>
|
||||
<InputNumber min={1} max={128} />
|
||||
</Form.Item>
|
||||
)
|
||||
}
|
||||
</Form.Item>
|
||||
)}
|
||||
<DatasetConfigurationContainer show={showOne || showMaxTokenNumber}>
|
||||
{showOne && <LayoutRecognize></LayoutRecognize>}
|
||||
{showMaxTokenNumber && (
|
||||
<>
|
||||
<MaxTokenNumber
|
||||
max={
|
||||
selectedTag === DocumentParserType.KnowledgeGraph
|
||||
? 8192 * 2
|
||||
: 2048
|
||||
}
|
||||
></MaxTokenNumber>
|
||||
<Delimiter></Delimiter>
|
||||
</>
|
||||
)}
|
||||
</DatasetConfigurationContainer>
|
||||
<DatasetConfigurationContainer
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
>
|
||||
{showAutoKeywords(selectedTag) && (
|
||||
<>
|
||||
<AutoKeywordsItem></AutoKeywordsItem>
|
||||
<AutoQuestionsItem></AutoQuestionsItem>
|
||||
</>
|
||||
)}
|
||||
{showExcelToHtml && <ExcelToHtml></ExcelToHtml>}
|
||||
</DatasetConfigurationContainer>
|
||||
{showRaptorParseConfiguration(selectedTag) && (
|
||||
<DatasetConfigurationContainer>
|
||||
<ParseConfiguration></ParseConfiguration>
|
||||
</DatasetConfigurationContainer>
|
||||
)}
|
||||
{showGraphRagItems(selectedTag) && useGraphRag && (
|
||||
<UseGraphRagItem></UseGraphRagItem>
|
||||
)}
|
||||
{showEntityTypes && <EntityTypesItem></EntityTypesItem>}
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
export default ChunkMethodModal;
|
||||
40
web/src/components/cross-language-item.tsx
Normal file
40
web/src/components/cross-language-item.tsx
Normal file
@ -0,0 +1,40 @@
|
||||
import { Select as AntSelect, Form } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const Languages = [
|
||||
'English',
|
||||
'Chinese',
|
||||
'Spanish',
|
||||
'French',
|
||||
'German',
|
||||
'Japanese',
|
||||
'Korean',
|
||||
'Vietnamese',
|
||||
];
|
||||
|
||||
const options = Languages.map((x) => ({ label: x, value: x }));
|
||||
|
||||
type CrossLanguageItemProps = {
|
||||
name?: string | Array<string>;
|
||||
};
|
||||
|
||||
export const CrossLanguageItem = ({
|
||||
name = ['prompt_config', 'cross_languages'],
|
||||
}: CrossLanguageItemProps) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
label={t('chat.crossLanguage')}
|
||||
name={name}
|
||||
tooltip={t('chat.crossLanguageTip')}
|
||||
>
|
||||
<AntSelect
|
||||
options={options}
|
||||
allowClear
|
||||
placeholder={t('common.languagePlaceholder')}
|
||||
mode="multiple"
|
||||
/>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
42
web/src/components/delimiter.tsx
Normal file
42
web/src/components/delimiter.tsx
Normal file
@ -0,0 +1,42 @@
|
||||
import { Form, Input } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
interface IProps {
|
||||
value?: string | undefined;
|
||||
onChange?: (val: string | undefined) => void;
|
||||
maxLength?: number;
|
||||
}
|
||||
|
||||
export const DelimiterInput = ({ value, onChange, maxLength }: IProps) => {
|
||||
const nextValue = value?.replaceAll('\n', '\\n');
|
||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const val = e.target.value;
|
||||
const nextValue = val.replaceAll('\\n', '\n');
|
||||
onChange?.(nextValue);
|
||||
};
|
||||
return (
|
||||
<Input
|
||||
value={nextValue}
|
||||
onChange={handleInputChange}
|
||||
maxLength={maxLength}
|
||||
></Input>
|
||||
);
|
||||
};
|
||||
|
||||
const Delimiter = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'delimiter']}
|
||||
label={t('knowledgeDetails.delimiter')}
|
||||
initialValue={`\n`}
|
||||
rules={[{ required: true }]}
|
||||
tooltip={t('knowledgeDetails.delimiterTip')}
|
||||
>
|
||||
<DelimiterInput />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default Delimiter;
|
||||
@ -1,5 +1,7 @@
|
||||
import message from '@/components/ui/message';
|
||||
import { Spin } from '@/components/ui/spin';
|
||||
import { Authorization } from '@/constants/authorization';
|
||||
import { getAuthorization } from '@/utils/authorization-util';
|
||||
import request from '@/utils/request';
|
||||
import classNames from 'classnames';
|
||||
import mammoth from 'mammoth';
|
||||
@ -10,87 +12,26 @@ interface DocPreviewerProps {
|
||||
url: string;
|
||||
}
|
||||
|
||||
// Word document preview component. Behavior:
|
||||
// 1) Fetches the document as a Blob.
|
||||
// 2) Detects .docx input via a ZIP header probe.
|
||||
// 3) Renders .docx using Mammoth; presents a controlled "unsupported" notice for non-ZIP payloads.
|
||||
export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
className,
|
||||
url,
|
||||
}) => {
|
||||
// const url = useGetDocumentUrl();
|
||||
const [htmlContent, setHtmlContent] = useState<string>('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
// Determines whether the Blob represents a .docx document by checking for the ZIP
|
||||
// file signature ("PK") in the initial bytes. A valid .docx file is a ZIP container
|
||||
// and always begins with:
|
||||
// 50 4B 03 04 ("PK..")
|
||||
//
|
||||
// Legacy .doc files use the CFBF binary format, commonly starting with:
|
||||
// D0 CF 11 E0 A1 B1 1A E1
|
||||
//
|
||||
// Note that some files distributed with a “.doc” extension may internally be .docx
|
||||
// documents (e.g., renamed files or files produced by systems that export .docx
|
||||
// content under a .doc filename). These files will still present the ZIP signature
|
||||
// and are therefore treated as supported .docx payloads. The header inspection
|
||||
// ensures correct routing regardless of filename or reported extension.
|
||||
const isZipLikeBlob = async (blob: Blob): Promise<boolean> => {
|
||||
try {
|
||||
const headerSlice = blob.slice(0, 4);
|
||||
const buf = await headerSlice.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
|
||||
// ZIP files start with "PK" (0x50, 0x4B)
|
||||
return bytes.length >= 2 && bytes[0] === 0x50 && bytes[1] === 0x4b;
|
||||
} catch (e) {
|
||||
console.error('Failed to inspect blob header', e);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const fetchDocument = async () => {
|
||||
if (!url) return;
|
||||
|
||||
setLoading(true);
|
||||
|
||||
const res = await request(url, {
|
||||
method: 'GET',
|
||||
responseType: 'blob',
|
||||
headers: { [Authorization]: getAuthorization() },
|
||||
onError: () => {
|
||||
message.error('Document parsing failed');
|
||||
console.error('Error loading document:', url);
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const blob: Blob = res.data;
|
||||
const contentType: string =
|
||||
blob.type || (res as any).headers?.['content-type'] || '';
|
||||
|
||||
// Execution path selection: ZIP-like payloads are treated as .docx and rendered via Mammoth;
|
||||
// non-ZIP payloads receive an explicit unsupported notice.
|
||||
const looksLikeZip = await isZipLikeBlob(blob);
|
||||
|
||||
if (!looksLikeZip) {
|
||||
// Non-ZIP payload (likely legacy .doc or another format): skip Mammoth processing.
|
||||
setHtmlContent(`
|
||||
<div class="flex h-full items-center justify-center">
|
||||
<div class="border border-dashed border-border-normal rounded-xl p-8 max-w-2xl text-center">
|
||||
<p class="text-2xl font-bold mb-4">
|
||||
Preview is not available for this Word document
|
||||
</p>
|
||||
<p class="italic text-sm text-muted-foreground leading-relaxed">
|
||||
Mammoth supports modern <code>.docx</code> files only.<br/>
|
||||
The file header does not indicate a <code>.docx</code> ZIP archive.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
`);
|
||||
return;
|
||||
}
|
||||
|
||||
// ZIP-like payload: parse as .docx with Mammoth
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const arrayBuffer = await res.data.arrayBuffer();
|
||||
const result = await mammoth.convertToHtml(
|
||||
{ arrayBuffer },
|
||||
{ includeDefaultStyleMap: true },
|
||||
@ -102,11 +43,10 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
|
||||
setHtmlContent(styledContent);
|
||||
} catch (err) {
|
||||
message.error('Failed to parse document.');
|
||||
message.error('Document parsing failed');
|
||||
console.error('Error parsing document:', err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
setLoading(false);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
@ -114,7 +54,6 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
fetchDocument();
|
||||
}
|
||||
}, [url]);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={classNames(
|
||||
|
||||
33
web/src/components/entity-types-item.tsx
Normal file
33
web/src/components/entity-types-item.tsx
Normal file
@ -0,0 +1,33 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form } from 'antd';
|
||||
import EditTag from './edit-tag';
|
||||
|
||||
const initialEntityTypes = [
|
||||
'organization',
|
||||
'person',
|
||||
'geo',
|
||||
'event',
|
||||
'category',
|
||||
];
|
||||
|
||||
type IProps = {
|
||||
field?: string[];
|
||||
};
|
||||
|
||||
const EntityTypesItem = ({
|
||||
field = ['parser_config', 'entity_types'],
|
||||
}: IProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
return (
|
||||
<Form.Item
|
||||
name={field}
|
||||
label={t('entityTypes')}
|
||||
rules={[{ required: true }]}
|
||||
initialValue={initialEntityTypes}
|
||||
>
|
||||
<EditTag />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default EntityTypesItem;
|
||||
19
web/src/components/excel-to-html.tsx
Normal file
19
web/src/components/excel-to-html.tsx
Normal file
@ -0,0 +1,19 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Switch } from 'antd';
|
||||
|
||||
const ExcelToHtml = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'html4excel']}
|
||||
label={t('html4excel')}
|
||||
initialValue={false}
|
||||
valuePropName="checked"
|
||||
tooltip={t('html4excelTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default ExcelToHtml;
|
||||
@ -1,77 +0,0 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { useCallback } from 'react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
import { RAGFlowFormItem } from './ragflow-form';
|
||||
import { ButtonLoading } from './ui/button';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from './ui/dialog';
|
||||
import { Form } from './ui/form';
|
||||
import { Textarea } from './ui/textarea';
|
||||
|
||||
const FormId = 'feedback-dialog';
|
||||
|
||||
const FeedbackDialog = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const { t } = useTranslation();
|
||||
const FormSchema = z.object({
|
||||
feedback: z
|
||||
.string()
|
||||
.min(1, {
|
||||
message: t('common.namePlaceholder'),
|
||||
})
|
||||
.trim(),
|
||||
});
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
defaultValues: { feedback: '' },
|
||||
});
|
||||
|
||||
const handleOk = useCallback(
|
||||
async (data: z.infer<typeof FormSchema>) => {
|
||||
return onOk?.({ thumbup: false, feedback: data.feedback });
|
||||
},
|
||||
[onOk],
|
||||
);
|
||||
|
||||
return (
|
||||
<Dialog open={visible} onOpenChange={hideModal}>
|
||||
<DialogContent className="sm:max-w-[425px]">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Feedback</DialogTitle>
|
||||
</DialogHeader>
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(handleOk)}
|
||||
className="space-y-6"
|
||||
id={FormId}
|
||||
>
|
||||
<RAGFlowFormItem name="feedback">
|
||||
<Textarea> </Textarea>
|
||||
</RAGFlowFormItem>
|
||||
</form>
|
||||
</Form>
|
||||
<DialogFooter>
|
||||
<ButtonLoading type="submit" form={FormId} loading={loading}>
|
||||
{t('common.save')}
|
||||
</ButtonLoading>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackDialog;
|
||||
13
web/src/components/file-upload-modal/index.less
Normal file
13
web/src/components/file-upload-modal/index.less
Normal file
@ -0,0 +1,13 @@
|
||||
.uploader {
|
||||
:global {
|
||||
.ant-upload-list {
|
||||
max-height: 40vh;
|
||||
overflow-y: auto;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.uploadLimit {
|
||||
color: red;
|
||||
font-size: 12px;
|
||||
}
|
||||
191
web/src/components/file-upload-modal/index.tsx
Normal file
191
web/src/components/file-upload-modal/index.tsx
Normal file
@ -0,0 +1,191 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { InboxOutlined } from '@ant-design/icons';
|
||||
import {
|
||||
Checkbox,
|
||||
Flex,
|
||||
Modal,
|
||||
Progress,
|
||||
Segmented,
|
||||
Tabs,
|
||||
TabsProps,
|
||||
Upload,
|
||||
UploadFile,
|
||||
UploadProps,
|
||||
} from 'antd';
|
||||
import { Dispatch, SetStateAction, useState } from 'react';
|
||||
|
||||
import styles from './index.less';
|
||||
|
||||
const { Dragger } = Upload;
|
||||
|
||||
const FileUpload = ({
|
||||
directory,
|
||||
fileList,
|
||||
setFileList,
|
||||
uploadProgress,
|
||||
}: {
|
||||
directory: boolean;
|
||||
fileList: UploadFile[];
|
||||
setFileList: Dispatch<SetStateAction<UploadFile[]>>;
|
||||
uploadProgress?: number;
|
||||
}) => {
|
||||
const { t } = useTranslate('fileManager');
|
||||
const props: UploadProps = {
|
||||
multiple: true,
|
||||
onRemove: (file) => {
|
||||
const index = fileList.indexOf(file);
|
||||
const newFileList = fileList.slice();
|
||||
newFileList.splice(index, 1);
|
||||
setFileList(newFileList);
|
||||
},
|
||||
beforeUpload: (file: UploadFile) => {
|
||||
setFileList((pre) => {
|
||||
return [...pre, file];
|
||||
});
|
||||
|
||||
return false;
|
||||
},
|
||||
directory,
|
||||
fileList,
|
||||
progress: {
|
||||
strokeWidth: 2,
|
||||
},
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Progress percent={uploadProgress} showInfo={false} />
|
||||
<Dragger {...props} className={styles.uploader}>
|
||||
<p className="ant-upload-drag-icon">
|
||||
<InboxOutlined />
|
||||
</p>
|
||||
<p className="ant-upload-text">{t('uploadTitle')}</p>
|
||||
<p className="ant-upload-hint">{t('uploadDescription')}</p>
|
||||
{false && <p className={styles.uploadLimit}>{t('uploadLimit')}</p>}
|
||||
</Dragger>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
interface IFileUploadModalProps
|
||||
extends IModalProps<
|
||||
{ parseOnCreation: boolean; directoryFileList: UploadFile[] } | UploadFile[]
|
||||
> {
|
||||
uploadFileList?: UploadFile[];
|
||||
setUploadFileList?: Dispatch<SetStateAction<UploadFile[]>>;
|
||||
uploadProgress?: number;
|
||||
setUploadProgress?: Dispatch<SetStateAction<number>>;
|
||||
}
|
||||
|
||||
const FileUploadModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
loading,
|
||||
onOk: onFileUploadOk,
|
||||
uploadFileList: fileList,
|
||||
setUploadFileList: setFileList,
|
||||
uploadProgress,
|
||||
setUploadProgress,
|
||||
}: IFileUploadModalProps) => {
|
||||
const { t } = useTranslate('fileManager');
|
||||
const [value, setValue] = useState<string | number>('local');
|
||||
const [parseOnCreation, setParseOnCreation] = useState(false);
|
||||
const [currentFileList, setCurrentFileList] = useState<UploadFile[]>([]);
|
||||
const [directoryFileList, setDirectoryFileList] = useState<UploadFile[]>([]);
|
||||
|
||||
const clearFileList = () => {
|
||||
if (setFileList) {
|
||||
setFileList([]);
|
||||
setUploadProgress?.(0);
|
||||
} else {
|
||||
setCurrentFileList([]);
|
||||
}
|
||||
setDirectoryFileList([]);
|
||||
};
|
||||
|
||||
const onOk = async () => {
|
||||
if (uploadProgress === 100) {
|
||||
hideModal?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const ret = await onFileUploadOk?.(
|
||||
fileList
|
||||
? { parseOnCreation, directoryFileList }
|
||||
: [...currentFileList, ...directoryFileList],
|
||||
);
|
||||
return ret;
|
||||
};
|
||||
|
||||
const afterClose = () => {
|
||||
clearFileList();
|
||||
};
|
||||
|
||||
const items: TabsProps['items'] = [
|
||||
{
|
||||
key: '1',
|
||||
label: t('file'),
|
||||
children: (
|
||||
<FileUpload
|
||||
directory={false}
|
||||
fileList={fileList ? fileList : currentFileList}
|
||||
setFileList={setFileList ? setFileList : setCurrentFileList}
|
||||
uploadProgress={uploadProgress}
|
||||
></FileUpload>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: '2',
|
||||
label: t('directory'),
|
||||
children: (
|
||||
<FileUpload
|
||||
directory
|
||||
fileList={directoryFileList}
|
||||
setFileList={setDirectoryFileList}
|
||||
uploadProgress={uploadProgress}
|
||||
></FileUpload>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<>
|
||||
<Modal
|
||||
title={t('uploadFile')}
|
||||
open={visible}
|
||||
onOk={onOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
afterClose={afterClose}
|
||||
>
|
||||
<Flex gap={'large'} vertical>
|
||||
<Segmented
|
||||
options={[
|
||||
{ label: t('local'), value: 'local' },
|
||||
{ label: t('s3'), value: 's3' },
|
||||
]}
|
||||
block
|
||||
value={value}
|
||||
onChange={setValue}
|
||||
/>
|
||||
{value === 'local' ? (
|
||||
<>
|
||||
<Checkbox
|
||||
checked={parseOnCreation}
|
||||
onChange={(e) => setParseOnCreation(e.target.checked)}
|
||||
>
|
||||
{t('parseOnCreation')}
|
||||
</Checkbox>
|
||||
<Tabs defaultActiveKey="1" items={items} />
|
||||
</>
|
||||
) : (
|
||||
t('comingSoon', { keyPrefix: 'common' })
|
||||
)}
|
||||
</Flex>
|
||||
</Modal>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default FileUploadModal;
|
||||
@ -1,9 +1,8 @@
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import React, { useEffect, useRef } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import './css/cloud9_night.less';
|
||||
import './css/index.less';
|
||||
import { JsonEditorOptions, JsonEditorProps } from './interface';
|
||||
|
||||
const defaultConfig: JsonEditorOptions = {
|
||||
mode: 'code',
|
||||
modes: ['tree', 'code'],
|
||||
@ -15,7 +14,6 @@ const defaultConfig: JsonEditorOptions = {
|
||||
enableTransform: false,
|
||||
indentation: 2,
|
||||
};
|
||||
|
||||
const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
value,
|
||||
onChange,
|
||||
@ -27,62 +25,43 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
const editorRef = useRef<any>(null);
|
||||
const { i18n } = useTranslation();
|
||||
const currentLanguageRef = useRef<string>(i18n.language);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
let isMounted = true;
|
||||
if (typeof window !== 'undefined') {
|
||||
const JSONEditor = require('jsoneditor');
|
||||
import('jsoneditor/dist/jsoneditor.min.css');
|
||||
|
||||
const initEditor = async () => {
|
||||
if (typeof window !== 'undefined') {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
|
||||
await import('jsoneditor/dist/jsoneditor.min.css');
|
||||
|
||||
if (isMounted && containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
if (containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
setIsLoading(false);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load jsoneditor:', error);
|
||||
if (isMounted) {
|
||||
setIsLoading(false);
|
||||
}
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
initEditor();
|
||||
}
|
||||
|
||||
return () => {
|
||||
isMounted = false;
|
||||
if (editorRef.current) {
|
||||
if (typeof editorRef.current.destroy === 'function') {
|
||||
editorRef.current.destroy();
|
||||
@ -113,38 +92,26 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
}
|
||||
|
||||
// Recreate the editor with new language
|
||||
const initEditorWithNewLanguage = async () => {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
const JSONEditor = require('jsoneditor');
|
||||
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Failed to reload jsoneditor with new language:',
|
||||
error,
|
||||
);
|
||||
}
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
initEditorWithNewLanguage();
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
}
|
||||
}, [i18n.language, value, onChange, options]);
|
||||
|
||||
@ -168,13 +135,7 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
ref={containerRef}
|
||||
style={{ height }}
|
||||
className={`ace-tomorrow-night w-full border border-border-button rounded-lg overflow-hidden bg-bg-input ${className} `}
|
||||
>
|
||||
{isLoading && (
|
||||
<div className="flex items-center justify-center h-full">
|
||||
<div className="text-text-secondary">Loading editor...</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
55
web/src/components/layout-recognize.tsx
Normal file
55
web/src/components/layout-recognize.tsx
Normal file
@ -0,0 +1,55 @@
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useSelectLlmOptionsByModelType } from '@/hooks/use-llm-request';
|
||||
import { Form, Select } from 'antd';
|
||||
import { camelCase } from 'lodash';
|
||||
import { useMemo } from 'react';
|
||||
|
||||
const enum DocumentType {
|
||||
DeepDOC = 'DeepDOC',
|
||||
PlainText = 'Plain Text',
|
||||
}
|
||||
|
||||
const LayoutRecognize = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const allOptions = useSelectLlmOptionsByModelType();
|
||||
|
||||
const options = useMemo(() => {
|
||||
const list = [DocumentType.DeepDOC, DocumentType.PlainText].map((x) => ({
|
||||
label: x === DocumentType.PlainText ? t(camelCase(x)) : 'DeepDoc',
|
||||
value: x,
|
||||
}));
|
||||
|
||||
const image2TextList = allOptions[LlmModelType.Image2text].map((x) => {
|
||||
return {
|
||||
...x,
|
||||
options: x.options.map((y) => {
|
||||
return {
|
||||
...y,
|
||||
label: (
|
||||
<div className="flex justify-between items-center gap-2">
|
||||
{y.label}
|
||||
<span className="text-red-500 text-sm">Experimental</span>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
return [...list, ...image2TextList];
|
||||
}, [allOptions, t]);
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'layout_recognize']}
|
||||
label={t('layoutRecognize')}
|
||||
initialValue={DocumentType.DeepDOC}
|
||||
tooltip={t('layoutRecognizeTip')}
|
||||
>
|
||||
<Select options={options} popupMatchSelectWidth={false} />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default LayoutRecognize;
|
||||
51
web/src/components/llm-tools-select.tsx
Normal file
51
web/src/components/llm-tools-select.tsx
Normal file
@ -0,0 +1,51 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useLlmToolsList } from '@/hooks/plugin-hooks';
|
||||
import { Select, Space } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
value?: string;
|
||||
onChange?: (value: string) => void;
|
||||
disabled?: boolean;
|
||||
}
|
||||
|
||||
const LLMToolsSelect = ({ value, onChange, disabled }: IProps) => {
|
||||
const { t } = useTranslate("llmTools");
|
||||
const tools = useLlmToolsList();
|
||||
|
||||
function wrapTranslation(text: string): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
if (text.startsWith("$t:")) {
|
||||
return t(text.substring(3));
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
const toolOptions = tools.map(t => ({
|
||||
label: wrapTranslation(t.displayName),
|
||||
description: wrapTranslation(t.displayDescription),
|
||||
value: t.name,
|
||||
title: wrapTranslation(t.displayDescription),
|
||||
}));
|
||||
|
||||
return (
|
||||
<Select
|
||||
mode="multiple"
|
||||
options={toolOptions}
|
||||
optionRender={option => (
|
||||
<Space size="large">
|
||||
{option.label}
|
||||
{option.data.description}
|
||||
</Space>
|
||||
)}
|
||||
onChange={onChange}
|
||||
value={value}
|
||||
disabled={disabled}
|
||||
></Select>
|
||||
);
|
||||
};
|
||||
|
||||
export default LLMToolsSelect;
|
||||
@ -217,23 +217,20 @@ const MarkdownContent = ({
|
||||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary"> {imageId}</span>
|
||||
</section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
||||
37
web/src/components/max-token-number.tsx
Normal file
37
web/src/components/max-token-number.tsx
Normal file
@ -0,0 +1,37 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
initialValue?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
const MaxTokenNumber = ({ initialValue = 512, max = 2048 }: IProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('chunkTokenNumber')} tooltip={t('chunkTokenNumberTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'chunk_token_num']}
|
||||
noStyle
|
||||
initialValue={initialValue}
|
||||
rules={[{ required: true, message: t('chunkTokenNumberMessage') }]}
|
||||
>
|
||||
<Slider max={max} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item
|
||||
name={['parser_config', 'chunk_token_num']}
|
||||
noStyle
|
||||
rules={[{ required: true, message: t('chunkTokenNumberMessage') }]}
|
||||
>
|
||||
<InputNumber max={max} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default MaxTokenNumber;
|
||||
@ -1,3 +1,4 @@
|
||||
import { Form, InputNumber } from 'antd';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import {
|
||||
@ -9,6 +10,27 @@ import {
|
||||
} from './ui/form';
|
||||
import { NumberInput } from './ui/input';
|
||||
|
||||
const MessageHistoryWindowSizeItem = ({
|
||||
initialValue,
|
||||
}: {
|
||||
initialValue: number;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={'message_history_window_size'}
|
||||
label={t('flow.messageHistoryWindowSize')}
|
||||
initialValue={initialValue}
|
||||
tooltip={t('flow.messageHistoryWindowSizeTip')}
|
||||
>
|
||||
<InputNumber style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default MessageHistoryWindowSizeItem;
|
||||
|
||||
export function MessageHistoryWindowSizeFormField() {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -18,9 +18,7 @@ import { cn } from '@/lib/utils';
|
||||
import { t } from 'i18next';
|
||||
import { CircleStop, Paperclip, Send, Upload, X } from 'lucide-react';
|
||||
import * as React from 'react';
|
||||
import { useEffect } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
import { AudioButton } from '../ui/audio-button';
|
||||
|
||||
interface IProps {
|
||||
disabled: boolean;
|
||||
@ -54,22 +52,6 @@ export function NextMessageInput({
|
||||
removeFile,
|
||||
}: IProps) {
|
||||
const [files, setFiles] = React.useState<File[]>([]);
|
||||
const [audioInputValue, setAudioInputValue] = React.useState<string | null>(
|
||||
null,
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (audioInputValue !== null) {
|
||||
onInputChange({
|
||||
target: { value: audioInputValue },
|
||||
} as React.ChangeEvent<HTMLTextAreaElement>);
|
||||
|
||||
setTimeout(() => {
|
||||
onPressEnter();
|
||||
setAudioInputValue(null);
|
||||
}, 0);
|
||||
}
|
||||
}, [audioInputValue, onInputChange, onPressEnter]);
|
||||
|
||||
const onFileReject = React.useCallback((file: File, message: string) => {
|
||||
toast(message, {
|
||||
@ -189,24 +171,15 @@ export function NextMessageInput({
|
||||
<CircleStop />
|
||||
</Button>
|
||||
) : (
|
||||
<div className="flex items-center gap-3">
|
||||
{/* <div className="bg-bg-input rounded-md hover:bg-bg-card p-1"> */}
|
||||
<AudioButton
|
||||
onOk={(value) => {
|
||||
setAudioInputValue(value);
|
||||
}}
|
||||
/>
|
||||
{/* </div> */}
|
||||
<Button
|
||||
className="size-5 rounded-sm"
|
||||
disabled={
|
||||
sendDisabled || isUploading || sendLoading || !value.trim()
|
||||
}
|
||||
>
|
||||
<Send />
|
||||
<span className="sr-only">Send message</span>
|
||||
</Button>
|
||||
</div>
|
||||
<Button
|
||||
className="size-5 rounded-sm"
|
||||
disabled={
|
||||
sendDisabled || isUploading || sendLoading || !value.trim()
|
||||
}
|
||||
>
|
||||
<Send />
|
||||
<span className="sr-only">Send message</span>
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</form>
|
||||
|
||||
51
web/src/components/message-item/feedback-modal.tsx
Normal file
51
web/src/components/message-item/feedback-modal.tsx
Normal file
@ -0,0 +1,51 @@
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { useCallback } from 'react';
|
||||
|
||||
type FieldType = {
|
||||
feedback?: string;
|
||||
};
|
||||
|
||||
const FeedbackModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const [form] = Form.useForm();
|
||||
|
||||
const handleOk = useCallback(async () => {
|
||||
const ret = await form.validateFields();
|
||||
return onOk?.({ thumbup: false, feedback: ret.feedback });
|
||||
}, [onOk, form]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title="Feedback"
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 0 }}
|
||||
wrapperCol={{ span: 24 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
name="feedback"
|
||||
rules={[{ required: true, message: 'Please input your feedback!' }]}
|
||||
>
|
||||
<Input.TextArea rows={8} placeholder="Please input your feedback!" />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackModal;
|
||||
@ -13,9 +13,9 @@ import {
|
||||
import { Radio, Tooltip } from 'antd';
|
||||
import { useCallback } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import FeedbackDialog from '../feedback-dialog';
|
||||
import { PromptDialog } from '../prompt-dialog';
|
||||
import FeedbackModal from './feedback-modal';
|
||||
import { useRemoveMessage, useSendFeedback, useSpeech } from './hooks';
|
||||
import PromptModal from './prompt-modal';
|
||||
|
||||
interface IProps {
|
||||
messageId: string;
|
||||
@ -79,19 +79,19 @@ export const AssistantGroupButton = ({
|
||||
)}
|
||||
</Radio.Group>
|
||||
{visible && (
|
||||
<FeedbackDialog
|
||||
<FeedbackModal
|
||||
visible={visible}
|
||||
hideModal={hideModal}
|
||||
onOk={onFeedbackOk}
|
||||
loading={loading}
|
||||
></FeedbackDialog>
|
||||
></FeedbackModal>
|
||||
)}
|
||||
{promptVisible && (
|
||||
<PromptDialog
|
||||
<PromptModal
|
||||
visible={promptVisible}
|
||||
hideModal={hidePromptModal}
|
||||
prompt={prompt}
|
||||
></PromptDialog>
|
||||
></PromptModal>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
30
web/src/components/message-item/prompt-modal.tsx
Normal file
30
web/src/components/message-item/prompt-modal.tsx
Normal file
@ -0,0 +1,30 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { Modal, Space } from 'antd';
|
||||
import HightLightMarkdown from '../highlight-markdown';
|
||||
import SvgIcon from '../svg-icon';
|
||||
|
||||
const PromptModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: IModalProps<IFeedbackRequestBody> & { prompt?: string }) => {
|
||||
return (
|
||||
<Modal
|
||||
title={
|
||||
<Space>
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
Prompt
|
||||
</Space>
|
||||
}
|
||||
width={'80%'}
|
||||
open={visible}
|
||||
onCancel={hideModal}
|
||||
footer={null}
|
||||
>
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptModal;
|
||||
@ -220,23 +220,20 @@ function MarkdownContent({
|
||||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary">{imageId}</span>
|
||||
</section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
||||
51
web/src/components/next-message-item/feedback-modal.tsx
Normal file
51
web/src/components/next-message-item/feedback-modal.tsx
Normal file
@ -0,0 +1,51 @@
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { useCallback } from 'react';
|
||||
|
||||
type FieldType = {
|
||||
feedback?: string;
|
||||
};
|
||||
|
||||
const FeedbackModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const [form] = Form.useForm();
|
||||
|
||||
const handleOk = useCallback(async () => {
|
||||
const ret = await form.validateFields();
|
||||
return onOk?.({ thumbup: false, feedback: ret.feedback });
|
||||
}, [onOk, form]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title="Feedback"
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 0 }}
|
||||
wrapperCol={{ span: 24 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
name="feedback"
|
||||
rules={[{ required: true, message: 'Please input your feedback!' }]}
|
||||
>
|
||||
<Input.TextArea rows={8} placeholder="Please input your feedback!" />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackModal;
|
||||
@ -3,8 +3,6 @@ import CopyToClipboard from '@/components/copy-to-clipboard';
|
||||
import { useSetModalState } from '@/hooks/common-hooks';
|
||||
import { IRemoveMessageById } from '@/hooks/logic-hooks';
|
||||
import { AgentChatContext } from '@/pages/agent/context';
|
||||
import { downloadFile } from '@/services/file-manager-service';
|
||||
import { downloadFileFromBlob } from '@/utils/file-util';
|
||||
import {
|
||||
DeleteOutlined,
|
||||
DislikeOutlined,
|
||||
@ -14,13 +12,13 @@ import {
|
||||
SyncOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { Radio, Tooltip } from 'antd';
|
||||
import { Download, NotebookText } from 'lucide-react';
|
||||
import { NotebookText } from 'lucide-react';
|
||||
import { useCallback, useContext } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import FeedbackDialog from '../feedback-dialog';
|
||||
import { PromptDialog } from '../prompt-dialog';
|
||||
import { ToggleGroup, ToggleGroupItem } from '../ui/toggle-group';
|
||||
import FeedbackModal from './feedback-modal';
|
||||
import { useRemoveMessage, useSendFeedback, useSpeech } from './hooks';
|
||||
import PromptModal from './prompt-modal';
|
||||
|
||||
interface IProps {
|
||||
messageId: string;
|
||||
@ -30,11 +28,6 @@ interface IProps {
|
||||
audioBinary?: string;
|
||||
showLoudspeaker?: boolean;
|
||||
showLog?: boolean;
|
||||
attachment?: {
|
||||
file_name: string;
|
||||
doc_id: string;
|
||||
format: string;
|
||||
};
|
||||
}
|
||||
|
||||
export const AssistantGroupButton = ({
|
||||
@ -45,7 +38,6 @@ export const AssistantGroupButton = ({
|
||||
showLikeButton,
|
||||
showLoudspeaker = true,
|
||||
showLog = true,
|
||||
attachment,
|
||||
}: IProps) => {
|
||||
const { visible, hideModal, showModal, onFeedbackOk, loading } =
|
||||
useSendFeedback(messageId);
|
||||
@ -106,42 +98,21 @@ export const AssistantGroupButton = ({
|
||||
<NotebookText className="size-4" />
|
||||
</ToggleGroupItem>
|
||||
)}
|
||||
{!!attachment?.doc_id && (
|
||||
<ToggleGroupItem
|
||||
value="g"
|
||||
onClick={async () => {
|
||||
try {
|
||||
const response = await downloadFile({
|
||||
docId: attachment.doc_id,
|
||||
ext: attachment.format,
|
||||
});
|
||||
const blob = new Blob([response.data], {
|
||||
type: response.data.type,
|
||||
});
|
||||
downloadFileFromBlob(blob, attachment.file_name);
|
||||
} catch (error) {
|
||||
console.error('Download failed:', error);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Download size={16} />
|
||||
</ToggleGroupItem>
|
||||
)}
|
||||
</ToggleGroup>
|
||||
{visible && (
|
||||
<FeedbackDialog
|
||||
<FeedbackModal
|
||||
visible={visible}
|
||||
hideModal={hideModal}
|
||||
onOk={onFeedbackOk}
|
||||
loading={loading}
|
||||
></FeedbackDialog>
|
||||
></FeedbackModal>
|
||||
)}
|
||||
{promptVisible && (
|
||||
<PromptDialog
|
||||
<PromptModal
|
||||
visible={promptVisible}
|
||||
hideModal={hidePromptModal}
|
||||
prompt={prompt}
|
||||
></PromptDialog>
|
||||
></PromptModal>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
@ -21,8 +21,10 @@ import { INodeEvent, MessageEventType } from '@/hooks/use-send-message';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { AgentChatContext } from '@/pages/agent/context';
|
||||
import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline';
|
||||
import { downloadFile } from '@/services/file-manager-service';
|
||||
import { downloadFileFromBlob } from '@/utils/file-util';
|
||||
import { isEmpty } from 'lodash';
|
||||
import { Atom, ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import { Atom, ChevronDown, ChevronUp, Download } from 'lucide-react';
|
||||
import MarkdownContent from '../next-markdown-content';
|
||||
import { RAGFlowAvatar } from '../ragflow-avatar';
|
||||
import { useTheme } from '../theme-provider';
|
||||
@ -174,7 +176,6 @@ function MessageItem({
|
||||
audioBinary={item.audio_binary}
|
||||
showLoudspeaker={showLoudspeaker}
|
||||
showLog={showLog}
|
||||
attachment={item.attachment}
|
||||
></AssistantGroupButton>
|
||||
)}
|
||||
{!isShare && (
|
||||
@ -186,7 +187,6 @@ function MessageItem({
|
||||
audioBinary={item.audio_binary}
|
||||
showLoudspeaker={showLoudspeaker}
|
||||
showLog={showLog}
|
||||
attachment={item.attachment}
|
||||
></AssistantGroupButton>
|
||||
)}
|
||||
</>
|
||||
@ -250,7 +250,7 @@ function MessageItem({
|
||||
{isUser && (
|
||||
<UploadedMessageFiles files={item.files}></UploadedMessageFiles>
|
||||
)}
|
||||
{/* {isAssistant && item.attachment && item.attachment.doc_id && (
|
||||
{isAssistant && item.attachment && item.attachment.doc_id && (
|
||||
<div className="w-full flex items-center justify-end">
|
||||
<Button
|
||||
variant="link"
|
||||
@ -275,7 +275,7 @@ function MessageItem({
|
||||
<Download size={16} />
|
||||
</Button>
|
||||
</div>
|
||||
)} */}
|
||||
)}
|
||||
</section>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
30
web/src/components/next-message-item/prompt-modal.tsx
Normal file
30
web/src/components/next-message-item/prompt-modal.tsx
Normal file
@ -0,0 +1,30 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { Modal, Space } from 'antd';
|
||||
import HightLightMarkdown from '../highlight-markdown';
|
||||
import SvgIcon from '../svg-icon';
|
||||
|
||||
const PromptModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: IModalProps<IFeedbackRequestBody> & { prompt?: string }) => {
|
||||
return (
|
||||
<Modal
|
||||
title={
|
||||
<Space>
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
Prompt
|
||||
</Space>
|
||||
}
|
||||
width={'80%'}
|
||||
open={visible}
|
||||
onCancel={hideModal}
|
||||
footer={null}
|
||||
>
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptModal;
|
||||
4
web/src/components/operate-dropdown/index.less
Normal file
4
web/src/components/operate-dropdown/index.less
Normal file
@ -0,0 +1,4 @@
|
||||
.delete {
|
||||
// height: 24px;
|
||||
display: inline-block;
|
||||
}
|
||||
90
web/src/components/operate-dropdown/index.tsx
Normal file
90
web/src/components/operate-dropdown/index.tsx
Normal file
@ -0,0 +1,90 @@
|
||||
import { useShowDeleteConfirm } from '@/hooks/common-hooks';
|
||||
import { DeleteOutlined, MoreOutlined } from '@ant-design/icons';
|
||||
import { Dropdown, MenuProps, Space } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import React, { useMemo } from 'react';
|
||||
import styles from './index.less';
|
||||
|
||||
interface IProps {
|
||||
deleteItem: () => Promise<any> | void;
|
||||
iconFontSize?: number;
|
||||
iconFontColor?: string;
|
||||
items?: MenuProps['items'];
|
||||
height?: number;
|
||||
needsDeletionValidation?: boolean;
|
||||
showDeleteItems?: boolean;
|
||||
}
|
||||
|
||||
const OperateDropdown = ({
|
||||
deleteItem,
|
||||
children,
|
||||
iconFontSize = 30,
|
||||
iconFontColor = 'gray',
|
||||
items: otherItems = [],
|
||||
height = 24,
|
||||
needsDeletionValidation = true,
|
||||
showDeleteItems = true,
|
||||
}: React.PropsWithChildren<IProps>) => {
|
||||
const { t } = useTranslation();
|
||||
const showDeleteConfirm = useShowDeleteConfirm();
|
||||
|
||||
const handleDelete = () => {
|
||||
if (needsDeletionValidation) {
|
||||
showDeleteConfirm({ onOk: deleteItem });
|
||||
} else {
|
||||
deleteItem();
|
||||
}
|
||||
};
|
||||
|
||||
const handleDropdownMenuClick: MenuProps['onClick'] = ({ domEvent, key }) => {
|
||||
domEvent.preventDefault();
|
||||
domEvent.stopPropagation();
|
||||
if (key === '1') {
|
||||
handleDelete();
|
||||
}
|
||||
};
|
||||
|
||||
const items: MenuProps['items'] = useMemo(() => {
|
||||
const items = [];
|
||||
|
||||
if (showDeleteItems) {
|
||||
items.push({
|
||||
key: '1',
|
||||
label: (
|
||||
<Space>
|
||||
{t('common.delete')}
|
||||
<DeleteOutlined />
|
||||
</Space>
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
return [...items, ...otherItems];
|
||||
}, [showDeleteItems, otherItems, t]);
|
||||
|
||||
return (
|
||||
<Dropdown
|
||||
menu={{
|
||||
items,
|
||||
onClick: handleDropdownMenuClick,
|
||||
}}
|
||||
>
|
||||
{children || (
|
||||
<span className={styles.delete}>
|
||||
<MoreOutlined
|
||||
rotate={90}
|
||||
style={{
|
||||
fontSize: iconFontSize,
|
||||
color: iconFontColor,
|
||||
cursor: 'pointer',
|
||||
height,
|
||||
}}
|
||||
/>
|
||||
</span>
|
||||
)}
|
||||
</Dropdown>
|
||||
);
|
||||
};
|
||||
|
||||
export default OperateDropdown;
|
||||
28
web/src/components/page-rank.tsx
Normal file
28
web/src/components/page-rank.tsx
Normal file
@ -0,0 +1,28 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
const PageRank = () => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('pageRank')} tooltip={t('pageRankTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['pagerank']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
rules={[{ required: true }]}
|
||||
>
|
||||
<Slider max={100} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['pagerank']} noStyle rules={[{ required: true }]}>
|
||||
<InputNumber max={100} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default PageRank;
|
||||
138
web/src/components/parse-configuration/graph-rag-items.tsx
Normal file
138
web/src/components/parse-configuration/graph-rag-items.tsx
Normal file
@ -0,0 +1,138 @@
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Form, Select, Switch } from 'antd';
|
||||
import { upperFirst } from 'lodash';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { DatasetConfigurationContainer } from '../dataset-configuration-container';
|
||||
import EntityTypesItem from '../entity-types-item';
|
||||
|
||||
const excludedTagParseMethods = [
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const showTagItems = (parserId: DocumentParserType) => {
|
||||
return !excludedTagParseMethods.includes(parserId);
|
||||
};
|
||||
|
||||
const enum MethodValue {
|
||||
General = 'general',
|
||||
Light = 'light',
|
||||
}
|
||||
|
||||
export const excludedParseMethods = [
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Picture,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
|
||||
return !excludedParseMethods.some((x) => x === parserId);
|
||||
};
|
||||
|
||||
type GraphRagItemsProps = {
|
||||
marginBottom?: boolean;
|
||||
};
|
||||
|
||||
export function UseGraphRagItem() {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'use_graphrag']}
|
||||
label={t('useGraphRag')}
|
||||
initialValue={false}
|
||||
valuePropName="checked"
|
||||
tooltip={t('useGraphRagTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
);
|
||||
}
|
||||
|
||||
// The three types "table", "resume" and "one" do not display this configuration.
|
||||
const GraphRagItems = ({ marginBottom = false }: GraphRagItemsProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
const methodOptions = useMemo(() => {
|
||||
return [MethodValue.Light, MethodValue.General].map((x) => ({
|
||||
value: x,
|
||||
label: upperFirst(x),
|
||||
}));
|
||||
}, []);
|
||||
|
||||
const renderWideTooltip = useCallback(
|
||||
(title: React.ReactNode | string) => {
|
||||
return {
|
||||
title: typeof title === 'string' ? t(title) : title,
|
||||
overlayInnerStyle: { width: '32vw' },
|
||||
};
|
||||
},
|
||||
[t],
|
||||
);
|
||||
|
||||
return (
|
||||
<DatasetConfigurationContainer className={cn({ 'mb-4': marginBottom })}>
|
||||
<UseGraphRagItem></UseGraphRagItem>
|
||||
<Form.Item
|
||||
shouldUpdate={(prevValues, curValues) =>
|
||||
prevValues.parser_config.graphrag.use_graphrag !==
|
||||
curValues.parser_config.graphrag.use_graphrag
|
||||
}
|
||||
>
|
||||
{({ getFieldValue }) => {
|
||||
const useRaptor = getFieldValue([
|
||||
'parser_config',
|
||||
'graphrag',
|
||||
'use_graphrag',
|
||||
]);
|
||||
|
||||
return (
|
||||
useRaptor && (
|
||||
<>
|
||||
<EntityTypesItem
|
||||
field={['parser_config', 'graphrag', 'entity_types']}
|
||||
></EntityTypesItem>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'method']}
|
||||
label={t('graphRagMethod')}
|
||||
tooltip={renderWideTooltip(
|
||||
<div
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t('graphRagMethodTip'),
|
||||
}}
|
||||
></div>,
|
||||
)}
|
||||
initialValue={MethodValue.Light}
|
||||
>
|
||||
<Select options={methodOptions} />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'resolution']}
|
||||
label={t('resolution')}
|
||||
tooltip={renderWideTooltip('resolutionTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'community']}
|
||||
label={t('community')}
|
||||
tooltip={renderWideTooltip('communityTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
</>
|
||||
)
|
||||
);
|
||||
}}
|
||||
</Form.Item>
|
||||
</DatasetConfigurationContainer>
|
||||
);
|
||||
};
|
||||
|
||||
export default GraphRagItems;
|
||||
@ -1,33 +0,0 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import HightLightMarkdown from './highlight-markdown';
|
||||
import SvgIcon from './svg-icon';
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle } from './ui/dialog';
|
||||
|
||||
type PromptDialogProps = IModalProps<IFeedbackRequestBody> & {
|
||||
prompt?: string;
|
||||
};
|
||||
|
||||
export function PromptDialog({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: PromptDialogProps) {
|
||||
return (
|
||||
<Dialog open={visible} onOpenChange={hideModal}>
|
||||
<DialogContent className="max-w-[80vw]">
|
||||
<DialogHeader>
|
||||
<DialogTitle>
|
||||
<div className="space-x-2">
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
<span> Prompt</span>
|
||||
</div>
|
||||
</DialogTitle>
|
||||
</DialogHeader>
|
||||
<section className="max-h-[80vh] overflow-auto">
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</section>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
82
web/src/components/rename-modal/index.tsx
Normal file
82
web/src/components/rename-modal/index.tsx
Normal file
@ -0,0 +1,82 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
import { useEffect } from 'react';
|
||||
import { IModalManagerChildrenProps } from '../modal-manager';
|
||||
|
||||
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
|
||||
loading: boolean;
|
||||
initialName: string;
|
||||
onOk: (name: string) => void;
|
||||
showModal?(): void;
|
||||
}
|
||||
|
||||
const RenameModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
loading,
|
||||
initialName,
|
||||
onOk,
|
||||
}: IProps) => {
|
||||
const [form] = Form.useForm();
|
||||
const { t } = useTranslate('common');
|
||||
|
||||
type FieldType = {
|
||||
name?: string;
|
||||
};
|
||||
|
||||
const handleOk = async () => {
|
||||
const ret = await form.validateFields();
|
||||
|
||||
return onOk(ret.name);
|
||||
};
|
||||
|
||||
const handleCancel = () => {
|
||||
hideModal();
|
||||
};
|
||||
|
||||
const onFinish = (values: any) => {
|
||||
console.log('Success:', values);
|
||||
};
|
||||
|
||||
const onFinishFailed = (errorInfo: any) => {
|
||||
console.log('Failed:', errorInfo);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (visible) {
|
||||
form.setFieldValue('name', initialName);
|
||||
}
|
||||
}, [initialName, form, visible]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={t('rename')}
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={handleCancel}
|
||||
okButtonProps={{ loading }}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 4 }}
|
||||
wrapperCol={{ span: 20 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
onFinish={onFinish}
|
||||
onFinishFailed={onFinishFailed}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
label={t('name')}
|
||||
name="name"
|
||||
rules={[{ required: true, message: t('namePlaceholder') }]}
|
||||
>
|
||||
<Input />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default RenameModal;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user