mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Compare commits
41 Commits
revert-116
...
nightly
| Author | SHA1 | Date | |
|---|---|---|---|
| 8de6b97806 | |||
| e4e0a88053 | |||
| 7719fd6350 | |||
| 15ef6dd72f | |||
| 5b5f19cbc1 | |||
| ea38e12d42 | |||
| 885eb2eab9 | |||
| 6587acef88 | |||
| ad03ede7cd | |||
| 468e4042c2 | |||
| af1344033d | |||
| 4012d65b3c | |||
| e2bc1a3478 | |||
| 6c2c447a72 | |||
| e7022db9a4 | |||
| ca4a0ee1b2 | |||
| 27b0550876 | |||
| 797e03f843 | |||
| b4e06237ef | |||
| 751a13fb64 | |||
| fa7b857aa9 | |||
| 257af75ece | |||
| cbdacf21f6 | |||
| b1f3130519 | |||
| 3c224c817b | |||
| a3c9402218 | |||
| a7d40e9132 | |||
| 648342b62f | |||
| 4870d42949 | |||
| caaf7043cc | |||
| 237a66913b | |||
| 3c50c7d3ac | |||
| b44e65a12e | |||
| e3f40db963 | |||
| b5ad7b7062 | |||
| 6fc7def562 | |||
| c8f608b2dd | |||
| 5c81e01de5 | |||
| 83fac6d0a0 | |||
| a6681d6366 | |||
| 1388c4420d |
8
.github/workflows/tests.yml
vendored
8
.github/workflows/tests.yml
vendored
@ -127,6 +127,14 @@ jobs:
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Run unit test
|
||||
run: |
|
||||
uv sync --python 3.10 --group test --frozen
|
||||
source .venv/bin/activate
|
||||
which pytest || echo "pytest not in PATH"
|
||||
echo "Start to run unit test"
|
||||
python3 run_tests.py
|
||||
|
||||
- name: Build ragflow:nightly
|
||||
run: |
|
||||
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}}
|
||||
|
||||
@ -10,11 +10,10 @@ WORKDIR /ragflow
|
||||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
|
||||
120
agent/canvas.py
120
agent/canvas.py
@ -16,6 +16,7 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import inspect
|
||||
import binascii
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
@ -28,7 +29,9 @@ from typing import Any, Union, Tuple
|
||||
from agent.component import component_class
|
||||
from agent.component.base import ComponentBase
|
||||
from api.db.services.file_service import FileService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.task_service import has_canceled
|
||||
from common.constants import LLMType
|
||||
from common.misc_utils import get_uuid, hash_str2int
|
||||
from common.exceptions import TaskCanceledException
|
||||
from rag.prompts.generator import chunks_format
|
||||
@ -88,9 +91,6 @@ class Graph:
|
||||
def load(self):
|
||||
self.components = self.dsl["components"]
|
||||
cpn_nms = set([])
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
|
||||
for k, cpn in self.components.items():
|
||||
cpn_nms.add(cpn["obj"]["component_name"])
|
||||
param = component_class(cpn["obj"]["component_name"] + "Param")()
|
||||
@ -356,8 +356,6 @@ class Canvas(Graph):
|
||||
self.globals[k] = ""
|
||||
else:
|
||||
self.globals[k] = ""
|
||||
print(self.globals)
|
||||
|
||||
|
||||
async def run(self, **kwargs):
|
||||
st = time.perf_counter()
|
||||
@ -415,6 +413,7 @@ class Canvas(Graph):
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
tasks = []
|
||||
|
||||
def _run_async_in_thread(coro_func, **call_kwargs):
|
||||
return asyncio.run(coro_func(**call_kwargs))
|
||||
|
||||
@ -466,6 +465,7 @@ class Canvas(Graph):
|
||||
self.error = ""
|
||||
idx = len(self.path) - 1
|
||||
partials = []
|
||||
tts_mdl = None
|
||||
while idx < len(self.path):
|
||||
to = len(self.path)
|
||||
for i in range(idx, to):
|
||||
@ -483,41 +483,63 @@ class Canvas(Graph):
|
||||
cpn = self.get_component(self.path[i])
|
||||
cpn_obj = self.get_component_obj(self.path[i])
|
||||
if cpn_obj.component_name.lower() == "message":
|
||||
if cpn_obj.get_param("auto_play"):
|
||||
tts_mdl = LLMBundle(self._tenant_id, LLMType.TTS)
|
||||
if isinstance(cpn_obj.output("content"), partial):
|
||||
_m = ""
|
||||
buff_m = ""
|
||||
stream = cpn_obj.output("content")()
|
||||
async def _process_stream(m):
|
||||
nonlocal buff_m, _m, tts_mdl
|
||||
if not m:
|
||||
return
|
||||
if m == "<think>":
|
||||
return decorate("message", {"content": "", "start_to_think": True})
|
||||
|
||||
elif m == "</think>":
|
||||
return decorate("message", {"content": "", "end_to_think": True})
|
||||
|
||||
buff_m += m
|
||||
_m += m
|
||||
|
||||
if len(buff_m) > 16:
|
||||
ev = decorate(
|
||||
"message",
|
||||
{
|
||||
"content": m,
|
||||
"audio_binary": self.tts(tts_mdl, buff_m)
|
||||
}
|
||||
)
|
||||
buff_m = ""
|
||||
return ev
|
||||
|
||||
return decorate("message", {"content": m})
|
||||
|
||||
if inspect.isasyncgen(stream):
|
||||
async for m in stream:
|
||||
if not m:
|
||||
continue
|
||||
if m == "<think>":
|
||||
yield decorate("message", {"content": "", "start_to_think": True})
|
||||
elif m == "</think>":
|
||||
yield decorate("message", {"content": "", "end_to_think": True})
|
||||
else:
|
||||
yield decorate("message", {"content": m})
|
||||
_m += m
|
||||
ev= await _process_stream(m)
|
||||
if ev:
|
||||
yield ev
|
||||
else:
|
||||
for m in stream:
|
||||
if not m:
|
||||
continue
|
||||
if m == "<think>":
|
||||
yield decorate("message", {"content": "", "start_to_think": True})
|
||||
elif m == "</think>":
|
||||
yield decorate("message", {"content": "", "end_to_think": True})
|
||||
else:
|
||||
yield decorate("message", {"content": m})
|
||||
_m += m
|
||||
ev= await _process_stream(m)
|
||||
if ev:
|
||||
yield ev
|
||||
if buff_m:
|
||||
yield decorate("message", {"content": "", "audio_binary": self.tts(tts_mdl, buff_m)})
|
||||
buff_m = ""
|
||||
cpn_obj.set_output("content", _m)
|
||||
cite = re.search(r"\[ID:[ 0-9]+\]", _m)
|
||||
else:
|
||||
yield decorate("message", {"content": cpn_obj.output("content")})
|
||||
cite = re.search(r"\[ID:[ 0-9]+\]", cpn_obj.output("content"))
|
||||
|
||||
if isinstance(cpn_obj.output("attachment"), tuple):
|
||||
yield decorate("message", {"attachment": cpn_obj.output("attachment")})
|
||||
|
||||
yield decorate("message_end", {"reference": self.get_reference() if cite else None})
|
||||
message_end = {}
|
||||
if isinstance(cpn_obj.output("attachment"), dict):
|
||||
message_end["attachment"] = cpn_obj.output("attachment")
|
||||
if cite:
|
||||
message_end["reference"] = self.get_reference()
|
||||
yield decorate("message_end", message_end)
|
||||
|
||||
while partials:
|
||||
_cpn_obj = self.get_component_obj(partials[0])
|
||||
@ -628,6 +650,50 @@ class Canvas(Graph):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def tts(self,tts_mdl, text):
|
||||
def clean_tts_text(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
text = text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
||||
|
||||
text = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]", "", text)
|
||||
|
||||
emoji_pattern = re.compile(
|
||||
"[\U0001F600-\U0001F64F"
|
||||
"\U0001F300-\U0001F5FF"
|
||||
"\U0001F680-\U0001F6FF"
|
||||
"\U0001F1E0-\U0001F1FF"
|
||||
"\U00002700-\U000027BF"
|
||||
"\U0001F900-\U0001F9FF"
|
||||
"\U0001FA70-\U0001FAFF"
|
||||
"\U0001FAD0-\U0001FAFF]+",
|
||||
flags=re.UNICODE
|
||||
)
|
||||
text = emoji_pattern.sub("", text)
|
||||
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
MAX_LEN = 500
|
||||
if len(text) > MAX_LEN:
|
||||
text = text[:MAX_LEN]
|
||||
|
||||
return text
|
||||
if not tts_mdl or not text:
|
||||
return None
|
||||
text = clean_tts_text(text)
|
||||
if not text:
|
||||
return None
|
||||
bin = b""
|
||||
try:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
except Exception as e:
|
||||
logging.error(f"TTS failed: {e}, text={text!r}")
|
||||
return None
|
||||
return binascii.hexlify(bin).decode("utf-8")
|
||||
|
||||
def get_history(self, window_size):
|
||||
convs = []
|
||||
if window_size <= 0:
|
||||
|
||||
@ -18,7 +18,6 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
@ -30,8 +29,8 @@ from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.mcp_server_service import MCPServerService
|
||||
from common.connection_utils import timeout
|
||||
from rag.prompts.generator import next_step, COMPLETE_TASK, analyze_task, \
|
||||
citation_prompt, reflect, rank_memories, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from rag.prompts.generator import next_step_async, COMPLETE_TASK, analyze_task_async, \
|
||||
citation_prompt, reflect_async, kb_prompt, citation_plus, full_question, message_fit_in, structured_output_prompt
|
||||
from common.mcp_tool_call_conn import MCPToolCallSession, mcp_tool_metadata_to_openai_tool
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
|
||||
@ -154,16 +153,19 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
return None
|
||||
|
||||
def _force_format_to_schema(self, text: str, schema_prompt: str) -> str:
|
||||
async def _force_format_to_schema_async(self, text: str, schema_prompt: str) -> str:
|
||||
fmt_msgs = [
|
||||
{"role": "system", "content": schema_prompt + "\nIMPORTANT: Output ONLY valid JSON. No markdown, no extra text."},
|
||||
{"role": "user", "content": text},
|
||||
]
|
||||
_, fmt_msgs = message_fit_in(fmt_msgs, int(self.chat_mdl.max_length * 0.97))
|
||||
return self._generate(fmt_msgs)
|
||||
return await self._generate_async(fmt_msgs)
|
||||
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 20*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
async def _invoke_async(self, **kwargs):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
@ -182,7 +184,7 @@ class Agent(LLM, ToolBase):
|
||||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return LLM._invoke(self, **kwargs)
|
||||
return await LLM._invoke_async(self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
output_schema = self._get_output_schema()
|
||||
@ -194,13 +196,13 @@ class Agent(LLM, ToolBase):
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
|
||||
self.set_output("content", partial(self.stream_output_with_tools, prompt, msg, user_defined_prompt))
|
||||
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, deepcopy(msg), user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
async for delta_ans, _tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt,schema_prompt=schema_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
@ -228,7 +230,7 @@ class Agent(LLM, ToolBase):
|
||||
return obj
|
||||
except Exception:
|
||||
error = "The answer cannot be parsed as JSON"
|
||||
ans = self._force_format_to_schema(ans, schema_prompt)
|
||||
ans = await self._force_format_to_schema_async(ans, schema_prompt)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
continue
|
||||
|
||||
@ -240,108 +242,6 @@ class Agent(LLM, ToolBase):
|
||||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
async def invoke_async(self, **kwargs):
|
||||
"""
|
||||
Async entry: reuse existing logic but offload heavy sync parts via async wrappers to reduce blocking.
|
||||
"""
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
|
||||
if kwargs.get("user_prompt"):
|
||||
usr_pmt = ""
|
||||
if kwargs.get("reasoning"):
|
||||
usr_pmt += "\nREASONING:\n{}\n".format(kwargs["reasoning"])
|
||||
if kwargs.get("context"):
|
||||
usr_pmt += "\nCONTEXT:\n{}\n".format(kwargs["context"])
|
||||
if usr_pmt:
|
||||
usr_pmt += "\nQUERY:\n{}\n".format(str(kwargs["user_prompt"]))
|
||||
else:
|
||||
usr_pmt = str(kwargs["user_prompt"])
|
||||
self._param.prompts = [{"role": "user", "content": usr_pmt}]
|
||||
|
||||
if not self.tools:
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
return await asyncio.to_thread(LLM._invoke, self, **kwargs)
|
||||
|
||||
prompt, msg, user_defined_prompt = self._prepare_prompt_variables()
|
||||
output_schema = self._get_output_schema()
|
||||
schema_prompt = ""
|
||||
if output_schema:
|
||||
schema = json.dumps(output_schema, ensure_ascii=False, indent=2)
|
||||
schema_prompt = structured_output_prompt(schema)
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]) and not output_schema:
|
||||
self.set_output("content", partial(self.stream_output_with_tools_async, prompt, msg, user_defined_prompt))
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
use_tools = []
|
||||
ans = ""
|
||||
async for delta_ans, tk in self._react_with_tools_streamly_async(prompt, msg, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
|
||||
if self.check_if_canceled("Agent processing"):
|
||||
return
|
||||
ans += delta_ans
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"Agent._chat got error. response: {ans}")
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
|
||||
if output_schema:
|
||||
error = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
try:
|
||||
def clean_formated_answer(ans: str) -> str:
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
ans = re.sub(r"^.*```json", "", ans, flags=re.DOTALL)
|
||||
return re.sub(r"```\n*$", "", ans, flags=re.DOTALL)
|
||||
obj = json_repair.loads(clean_formated_answer(ans))
|
||||
self.set_output("structured", obj)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
return obj
|
||||
except Exception:
|
||||
error = "The answer cannot be parsed as JSON"
|
||||
ans = self._force_format_to_schema(ans, schema_prompt)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
continue
|
||||
|
||||
self.set_output("_ERROR", error)
|
||||
return
|
||||
|
||||
self.set_output("content", ans)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
return ans
|
||||
|
||||
def stream_output_with_tools(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
use_tools = []
|
||||
for delta_ans,_ in self._react_with_tools_streamly(prompt, msg, use_tools, user_defined_prompt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
|
||||
if delta_ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", delta_ans)
|
||||
return
|
||||
answer_without_toolcall += delta_ans
|
||||
yield delta_ans
|
||||
|
||||
self.set_output("content", answer_without_toolcall)
|
||||
if use_tools:
|
||||
self.set_output("use_tools", use_tools)
|
||||
|
||||
async def stream_output_with_tools_async(self, prompt, msg, user_defined_prompt={}):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer_without_toolcall = ""
|
||||
@ -365,64 +265,22 @@ class Agent(LLM, ToolBase):
|
||||
self.set_output("use_tools", use_tools)
|
||||
|
||||
async def _react_with_tools_streamly_async(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
"""
|
||||
Async wrapper that offloads synchronous flow to a thread, yielding results without blocking the event loop.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for delta_ans, tk in self._react_with_tools_streamly(prompt, history, use_tools, user_defined_prompt, schema_prompt=schema_prompt):
|
||||
asyncio.run_coroutine_threadsafe(queue.put((delta_ans, tk)), loop)
|
||||
except Exception as e:
|
||||
asyncio.run_coroutine_threadsafe(queue.put(e), loop)
|
||||
finally:
|
||||
asyncio.run_coroutine_threadsafe(queue.put(StopAsyncIteration), loop)
|
||||
|
||||
await asyncio.to_thread(worker)
|
||||
|
||||
while True:
|
||||
item = await queue.get()
|
||||
if item is StopAsyncIteration:
|
||||
break
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
yield item
|
||||
|
||||
def _gen_citations(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
for delta_ans in self._generate_streamly([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def _react_with_tools_streamly(self, prompt, history: list[dict], use_tools, user_defined_prompt={}, schema_prompt: str = ""):
|
||||
token_count = 0
|
||||
tool_metas = self.tool_meta
|
||||
hist = deepcopy(history)
|
||||
last_calling = ""
|
||||
if len(hist) > 3:
|
||||
st = timer()
|
||||
user_request = full_question(messages=history, chat_mdl=self.chat_mdl)
|
||||
user_request = await asyncio.to_thread(full_question, messages=history, chat_mdl=self.chat_mdl)
|
||||
self.callback("Multi-turn conversation optimization", {}, user_request, elapsed_time=timer()-st)
|
||||
else:
|
||||
user_request = history[-1]["content"]
|
||||
|
||||
def use_tool(name, args):
|
||||
nonlocal hist, use_tools, token_count,last_calling,user_request
|
||||
async def use_tool_async(name, args):
|
||||
nonlocal hist, use_tools, last_calling
|
||||
logging.info(f"{last_calling=} == {name=}")
|
||||
# Summarize of function calling
|
||||
#if all([
|
||||
# isinstance(self.toolcall_session.get_tool_obj(name), Agent),
|
||||
# last_calling,
|
||||
# last_calling != name
|
||||
#]):
|
||||
# self.toolcall_session.get_tool_obj(name).add2system_prompt(f"The chat history with other agents are as following: \n" + self.get_useful_memory(user_request, str(args["user_prompt"]),user_defined_prompt))
|
||||
last_calling = name
|
||||
tool_response = self.toolcall_session.tool_call(name, args)
|
||||
tool_response = await self.toolcall_session.tool_call_async(name, args)
|
||||
use_tools.append({
|
||||
"name": name,
|
||||
"arguments": args,
|
||||
@ -433,7 +291,7 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
return name, tool_response
|
||||
|
||||
def complete():
|
||||
async def complete():
|
||||
nonlocal hist
|
||||
need2cite = self._param.cite and self._canvas.get_reference()["chunks"] and self._id.find("-->") < 0
|
||||
if schema_prompt:
|
||||
@ -451,7 +309,7 @@ class Agent(LLM, ToolBase):
|
||||
if len(hist) > 12:
|
||||
_hist = [hist[0], hist[1], *hist[-10:]]
|
||||
entire_txt = ""
|
||||
for delta_ans in self._generate_streamly(_hist):
|
||||
async for delta_ans in self._generate_streamly_async(_hist):
|
||||
if not need2cite or cited:
|
||||
yield delta_ans, 0
|
||||
entire_txt += delta_ans
|
||||
@ -460,7 +318,7 @@ class Agent(LLM, ToolBase):
|
||||
|
||||
st = timer()
|
||||
txt = ""
|
||||
for delta_ans in self._gen_citations(entire_txt):
|
||||
async for delta_ans in self._gen_citations_async(entire_txt):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
yield delta_ans, 0
|
||||
@ -475,14 +333,14 @@ class Agent(LLM, ToolBase):
|
||||
hist.append({"role": "user", "content": content})
|
||||
|
||||
st = timer()
|
||||
task_desc = analyze_task(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
task_desc = await analyze_task_async(self.chat_mdl, prompt, user_request, tool_metas, user_defined_prompt)
|
||||
self.callback("analyze_task", {}, task_desc, elapsed_time=timer()-st)
|
||||
for _ in range(self._param.max_rounds + 1):
|
||||
if self.check_if_canceled("Agent streaming"):
|
||||
return
|
||||
response, tk = next_step(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
response, tk = await next_step_async(self.chat_mdl, hist, tool_metas, task_desc, user_defined_prompt)
|
||||
# self.callback("next_step", {}, str(response)[:256]+"...")
|
||||
token_count += tk
|
||||
token_count += tk or 0
|
||||
hist.append({"role": "assistant", "content": response})
|
||||
try:
|
||||
functions = json_repair.loads(re.sub(r"```.*", "", response))
|
||||
@ -491,23 +349,24 @@ class Agent(LLM, ToolBase):
|
||||
for f in functions:
|
||||
if not isinstance(f, dict):
|
||||
raise TypeError(f"An object type should be returned, but `{f}`")
|
||||
with ThreadPoolExecutor(max_workers=5) as executor:
|
||||
thr = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
thr.append(executor.submit(use_tool, name, args))
|
||||
tool_tasks = []
|
||||
for func in functions:
|
||||
name = func["name"]
|
||||
args = func["arguments"]
|
||||
if name == COMPLETE_TASK:
|
||||
append_user_content(hist, f"Respond with a formal answer. FORGET(DO NOT mention) about `{COMPLETE_TASK}`. The language for the response MUST be as the same as the first user request.\n")
|
||||
async for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
return
|
||||
|
||||
st = timer()
|
||||
reflection = reflect(self.chat_mdl, hist, [th.result() for th in thr], user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
tool_tasks.append(asyncio.create_task(use_tool_async(name, args)))
|
||||
|
||||
results = await asyncio.gather(*tool_tasks) if tool_tasks else []
|
||||
st = timer()
|
||||
reflection = await reflect_async(self.chat_mdl, hist, results, user_defined_prompt)
|
||||
append_user_content(hist, reflection)
|
||||
self.callback("reflection", {}, str(reflection), elapsed_time=timer()-st)
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(msg=f"Wrong JSON argument format in LLM ReAct response: {e}")
|
||||
@ -531,21 +390,17 @@ Respond immediately with your final comprehensive answer.
|
||||
return
|
||||
append_user_content(hist, final_instruction)
|
||||
|
||||
for txt, tkcnt in complete():
|
||||
async for txt, tkcnt in complete():
|
||||
yield txt, tkcnt
|
||||
|
||||
def get_useful_memory(self, goal: str, sub_goal:str, topn=3, user_defined_prompt:dict={}) -> str:
|
||||
# self.callback("get_useful_memory", {"topn": 3}, "...")
|
||||
mems = self._canvas.get_memory()
|
||||
rank = rank_memories(self.chat_mdl, goal, sub_goal, [summ for (user, assist, summ) in mems], user_defined_prompt)
|
||||
try:
|
||||
rank = json_repair.loads(re.sub(r"```.*", "", rank))[:topn]
|
||||
mems = [mems[r] for r in rank]
|
||||
return "\n\n".join([f"User: {u}\nAgent: {a}" for u, a,_ in mems])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
return "Error occurred."
|
||||
async def _gen_citations_async(self, text):
|
||||
retrievals = self._canvas.get_reference()
|
||||
retrievals = {"chunks": list(retrievals["chunks"].values()), "doc_aggs": list(retrievals["doc_aggs"].values())}
|
||||
formated_refer = kb_prompt(retrievals, self.chat_mdl.max_length, True)
|
||||
async for delta_ans in self._generate_streamly_async([{"role": "system", "content": citation_plus("\n\n".join(formated_refer))},
|
||||
{"role": "user", "content": text}
|
||||
]):
|
||||
yield delta_ans
|
||||
|
||||
def reset(self, only_output=False):
|
||||
"""
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from abc import ABC
|
||||
@ -445,6 +446,34 @@ class ComponentBase(ABC):
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
async def invoke_async(self, **kwargs) -> dict[str, Any]:
|
||||
"""
|
||||
Async wrapper for component invocation.
|
||||
Prefers coroutine `_invoke_async` if present; otherwise falls back to `_invoke`.
|
||||
Handles timing and error recording consistently with `invoke`.
|
||||
"""
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
if self.check_if_canceled("Component processing"):
|
||||
return
|
||||
|
||||
fn_async = getattr(self, "_invoke_async", None)
|
||||
if fn_async and asyncio.iscoroutinefunction(fn_async):
|
||||
await fn_async(**kwargs)
|
||||
elif asyncio.iscoroutinefunction(self._invoke):
|
||||
await self._invoke(**kwargs)
|
||||
else:
|
||||
await asyncio.to_thread(self._invoke, **kwargs)
|
||||
except Exception as e:
|
||||
if self.get_exception_default_value():
|
||||
self.set_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", str(e))
|
||||
logging.exception(e)
|
||||
self._param.debug_inputs = {}
|
||||
self.set_output("_elapsed_time", time.perf_counter() - self.output("_created_time"))
|
||||
return self.output()
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -18,6 +18,7 @@ import re
|
||||
from functools import partial
|
||||
|
||||
from agent.component.base import ComponentParamBase, ComponentBase
|
||||
from api.db.services.file_service import FileService
|
||||
|
||||
|
||||
class UserFillUpParam(ComponentParamBase):
|
||||
@ -63,6 +64,13 @@ class UserFillUp(ComponentBase):
|
||||
for k, v in kwargs.get("inputs", {}).items():
|
||||
if self.check_if_canceled("UserFillUp processing"):
|
||||
return
|
||||
if isinstance(v, dict) and v.get("type", "").lower().find("file") >=0:
|
||||
if v.get("optional") and v.get("value", None) is None:
|
||||
v = None
|
||||
else:
|
||||
v = FileService.get_files([v["value"]])
|
||||
else:
|
||||
v = v.get("value")
|
||||
self.set_output(k, v)
|
||||
|
||||
def thoughts(self) -> str:
|
||||
|
||||
@ -215,7 +215,6 @@ class LLM(ComponentBase):
|
||||
yield delta(txt)
|
||||
|
||||
async def _generate_streamly_async(self, msg: list[dict], **kwargs) -> AsyncGenerator[str, None]:
|
||||
# Prefer async chat_streamly if available
|
||||
async def delta_wrapper(txt_iter):
|
||||
ans = ""
|
||||
last_idx = 0
|
||||
@ -256,7 +255,7 @@ class LLM(ComponentBase):
|
||||
yield t
|
||||
return
|
||||
|
||||
# Fallback: run sync stream in thread, bridge results
|
||||
# fallback
|
||||
loop = asyncio.get_running_loop()
|
||||
queue: asyncio.Queue = asyncio.Queue()
|
||||
|
||||
@ -328,7 +327,7 @@ class LLM(ComponentBase):
|
||||
self.set_output("content", answer)
|
||||
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
async def _invoke_async(self, **kwargs):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
@ -339,22 +338,25 @@ class LLM(ComponentBase):
|
||||
|
||||
prompt, msg, _ = self._prepare_prompt_variables()
|
||||
error: str = ""
|
||||
output_structure=None
|
||||
output_structure = None
|
||||
try:
|
||||
output_structure = self._param.outputs['structured']
|
||||
output_structure = self._param.outputs["structured"]
|
||||
except Exception:
|
||||
pass
|
||||
if output_structure and isinstance(output_structure, dict) and output_structure.get("properties") and len(output_structure["properties"]) > 0:
|
||||
schema=json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt += structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries+1):
|
||||
schema = json.dumps(output_structure, ensure_ascii=False, indent=2)
|
||||
prompt_with_schema = prompt + structured_output_prompt(schema)
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt_with_schema}, *deepcopy(msg)],
|
||||
int(self.chat_mdl.max_length * 0.97),
|
||||
)
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
@ -363,7 +365,7 @@ class LLM(ComponentBase):
|
||||
self.set_output("structured", json_repair.loads(clean_formated_answer(ans)))
|
||||
return
|
||||
except Exception:
|
||||
msg.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
msg_fit.append({"role": "user", "content": "The answer can't not be parsed as JSON"})
|
||||
error = "The answer can't not be parsed as JSON"
|
||||
if error:
|
||||
self.set_output("_ERROR", error)
|
||||
@ -371,18 +373,23 @@ class LLM(ComponentBase):
|
||||
|
||||
downstreams = self._canvas.get_component(self._id)["downstream"] if self._canvas.get_component(self._id) else []
|
||||
ex = self.exception_handler()
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower()=="message" for cid in downstreams]) and not (ex and ex["goto"]):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, msg))
|
||||
if any([self._canvas.get_component_obj(cid).component_name.lower() == "message" for cid in downstreams]) and not (
|
||||
ex and ex["goto"]
|
||||
):
|
||||
self.set_output("content", partial(self._stream_output_async, prompt, deepcopy(msg)))
|
||||
return
|
||||
|
||||
for _ in range(self._param.max_retries+1):
|
||||
error = ""
|
||||
for _ in range(self._param.max_retries + 1):
|
||||
if self.check_if_canceled("LLM processing"):
|
||||
return
|
||||
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
_, msg_fit = message_fit_in(
|
||||
[{"role": "system", "content": prompt}, *deepcopy(msg)], int(self.chat_mdl.max_length * 0.97)
|
||||
)
|
||||
error = ""
|
||||
ans = self._generate(msg)
|
||||
msg.pop(0)
|
||||
ans = await self._generate_async(msg_fit)
|
||||
msg_fit.pop(0)
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
logging.error(f"LLM response error: {ans}")
|
||||
error = ans
|
||||
@ -396,23 +403,9 @@ class LLM(ComponentBase):
|
||||
else:
|
||||
self.set_output("_ERROR", error)
|
||||
|
||||
def _stream_output(self, prompt, msg):
|
||||
_, msg = message_fit_in([{"role": "system", "content": prompt}, *msg], int(self.chat_mdl.max_length * 0.97))
|
||||
answer = ""
|
||||
for ans in self._generate_streamly(msg):
|
||||
if self.check_if_canceled("LLM streaming"):
|
||||
return
|
||||
|
||||
if ans.find("**ERROR**") >= 0:
|
||||
if self.get_exception_default_value():
|
||||
self.set_output("content", self.get_exception_default_value())
|
||||
yield self.get_exception_default_value()
|
||||
else:
|
||||
self.set_output("_ERROR", ans)
|
||||
return
|
||||
yield ans
|
||||
answer += ans
|
||||
self.set_output("content", answer)
|
||||
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
|
||||
def _invoke(self, **kwargs):
|
||||
return asyncio.run(self._invoke_async(**kwargs))
|
||||
|
||||
def add_memory(self, user:str, assist:str, func_name: str, params: dict, results: str, user_defined_prompt:dict={}):
|
||||
summ = tool_call_summary(self.chat_mdl, func_name, params, results, user_defined_prompt)
|
||||
|
||||
@ -49,16 +49,19 @@ class LLMToolPluginCallSession(ToolCallSession):
|
||||
self.callback = callback
|
||||
|
||||
def tool_call(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
return asyncio.run(self.tool_call_async(name, arguments))
|
||||
|
||||
async def tool_call_async(self, name: str, arguments: dict[str, Any]) -> Any:
|
||||
assert name in self.tools_map, f"LLM tool {name} does not exist"
|
||||
st = timer()
|
||||
tool_obj = self.tools_map[name]
|
||||
if isinstance(tool_obj, MCPToolCallSession):
|
||||
resp = tool_obj.tool_call(name, arguments, 60)
|
||||
resp = await asyncio.to_thread(tool_obj.tool_call, name, arguments, 60)
|
||||
else:
|
||||
if hasattr(tool_obj, "invoke_async") and asyncio.iscoroutinefunction(tool_obj.invoke_async):
|
||||
resp = asyncio.run(tool_obj.invoke_async(**arguments))
|
||||
resp = await tool_obj.invoke_async(**arguments)
|
||||
else:
|
||||
resp = asyncio.run(asyncio.to_thread(tool_obj.invoke, **arguments))
|
||||
resp = await asyncio.to_thread(tool_obj.invoke, **arguments)
|
||||
|
||||
self.callback(name, arguments, resp, elapsed_time=timer()-st)
|
||||
return resp
|
||||
@ -155,7 +158,10 @@ class ToolBase(ComponentBase):
|
||||
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(self._invoke):
|
||||
fn_async = getattr(self, "_invoke_async", None)
|
||||
if fn_async and asyncio.iscoroutinefunction(fn_async):
|
||||
res = await fn_async(**kwargs)
|
||||
elif asyncio.iscoroutinefunction(self._invoke):
|
||||
res = await self._invoke(**kwargs)
|
||||
else:
|
||||
res = await asyncio.to_thread(self._invoke, **kwargs)
|
||||
|
||||
@ -198,6 +198,7 @@ class Retrieval(ToolBase, ABC):
|
||||
return
|
||||
if cks:
|
||||
kbinfos["chunks"] = cks
|
||||
kbinfos["chunks"] = settings.retriever.retrieval_by_children(kbinfos["chunks"], [kb.tenant_id for kb in kbs])
|
||||
if self._param.use_kg:
|
||||
ck = settings.kg_retriever.retrieval(query,
|
||||
[kb.tenant_id for kb in kbs],
|
||||
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
|
||||
479
api/apps/evaluation_app.py
Normal file
479
api/apps/evaluation_app.py
Normal file
@ -0,0 +1,479 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
RAG Evaluation API Endpoints
|
||||
|
||||
Provides REST API for RAG evaluation functionality including:
|
||||
- Dataset management
|
||||
- Test case management
|
||||
- Evaluation execution
|
||||
- Results retrieval
|
||||
- Configuration recommendations
|
||||
"""
|
||||
|
||||
from quart import request
|
||||
from api.apps import login_required, current_user
|
||||
from api.db.services.evaluation_service import EvaluationService
|
||||
from api.utils.api_utils import (
|
||||
get_data_error_result,
|
||||
get_json_result,
|
||||
get_request_json,
|
||||
server_error_response,
|
||||
validate_request
|
||||
)
|
||||
from common.constants import RetCode
|
||||
|
||||
|
||||
# ==================== Dataset Management ====================
|
||||
|
||||
@manager.route('/dataset/create', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("name", "kb_ids")
|
||||
async def create_dataset():
|
||||
"""
|
||||
Create a new evaluation dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"name": "Dataset name",
|
||||
"description": "Optional description",
|
||||
"kb_ids": ["kb_id1", "kb_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
name = req.get("name", "").strip()
|
||||
description = req.get("description", "")
|
||||
kb_ids = req.get("kb_ids", [])
|
||||
|
||||
if not name:
|
||||
return get_data_error_result(message="Dataset name cannot be empty")
|
||||
|
||||
if not kb_ids or not isinstance(kb_ids, list):
|
||||
return get_data_error_result(message="kb_ids must be a non-empty list")
|
||||
|
||||
success, result = EvaluationService.create_dataset(
|
||||
name=name,
|
||||
description=description,
|
||||
kb_ids=kb_ids,
|
||||
tenant_id=current_user.id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"dataset_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def list_datasets():
|
||||
"""
|
||||
List evaluation datasets for current tenant.
|
||||
|
||||
Query params:
|
||||
- page: Page number (default: 1)
|
||||
- page_size: Items per page (default: 20)
|
||||
"""
|
||||
try:
|
||||
page = int(request.args.get("page", 1))
|
||||
page_size = int(request.args.get("page_size", 20))
|
||||
|
||||
result = EvaluationService.list_datasets(
|
||||
tenant_id=current_user.id,
|
||||
user_id=current_user.id,
|
||||
page=page,
|
||||
page_size=page_size
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_dataset(dataset_id):
|
||||
"""Get dataset details by ID"""
|
||||
try:
|
||||
dataset = EvaluationService.get_dataset(dataset_id)
|
||||
if not dataset:
|
||||
return get_data_error_result(
|
||||
message="Dataset not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=dataset)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['PUT']) # noqa: F821
|
||||
@login_required
|
||||
async def update_dataset(dataset_id):
|
||||
"""
|
||||
Update dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"name": "New name",
|
||||
"description": "New description",
|
||||
"kb_ids": ["kb_id1", "kb_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
|
||||
# Remove fields that shouldn't be updated
|
||||
req.pop("id", None)
|
||||
req.pop("tenant_id", None)
|
||||
req.pop("created_by", None)
|
||||
req.pop("create_time", None)
|
||||
|
||||
success = EvaluationService.update_dataset(dataset_id, **req)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to update dataset")
|
||||
|
||||
return get_json_result(data={"dataset_id": dataset_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_dataset(dataset_id):
|
||||
"""Delete dataset (soft delete)"""
|
||||
try:
|
||||
success = EvaluationService.delete_dataset(dataset_id)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to delete dataset")
|
||||
|
||||
return get_json_result(data={"dataset_id": dataset_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Test Case Management ====================
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/case/add', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("question")
|
||||
async def add_test_case(dataset_id):
|
||||
"""
|
||||
Add a test case to a dataset.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"question": "Test question",
|
||||
"reference_answer": "Optional ground truth answer",
|
||||
"relevant_doc_ids": ["doc_id1", "doc_id2"],
|
||||
"relevant_chunk_ids": ["chunk_id1", "chunk_id2"],
|
||||
"metadata": {"key": "value"}
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
question = req.get("question", "").strip()
|
||||
|
||||
if not question:
|
||||
return get_data_error_result(message="Question cannot be empty")
|
||||
|
||||
success, result = EvaluationService.add_test_case(
|
||||
dataset_id=dataset_id,
|
||||
question=question,
|
||||
reference_answer=req.get("reference_answer"),
|
||||
relevant_doc_ids=req.get("relevant_doc_ids"),
|
||||
relevant_chunk_ids=req.get("relevant_chunk_ids"),
|
||||
metadata=req.get("metadata")
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"case_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/case/import', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("cases")
|
||||
async def import_test_cases(dataset_id):
|
||||
"""
|
||||
Bulk import test cases.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"cases": [
|
||||
{
|
||||
"question": "Question 1",
|
||||
"reference_answer": "Answer 1",
|
||||
...
|
||||
},
|
||||
{
|
||||
"question": "Question 2",
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
cases = req.get("cases", [])
|
||||
|
||||
if not cases or not isinstance(cases, list):
|
||||
return get_data_error_result(message="cases must be a non-empty list")
|
||||
|
||||
success_count, failure_count = EvaluationService.import_test_cases(
|
||||
dataset_id=dataset_id,
|
||||
cases=cases
|
||||
)
|
||||
|
||||
return get_json_result(data={
|
||||
"success_count": success_count,
|
||||
"failure_count": failure_count,
|
||||
"total": len(cases)
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/dataset/<dataset_id>/cases', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_test_cases(dataset_id):
|
||||
"""Get all test cases for a dataset"""
|
||||
try:
|
||||
cases = EvaluationService.get_test_cases(dataset_id)
|
||||
return get_json_result(data={"cases": cases, "total": len(cases)})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/case/<case_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_test_case(case_id):
|
||||
"""Delete a test case"""
|
||||
try:
|
||||
success = EvaluationService.delete_test_case(case_id)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message="Failed to delete test case")
|
||||
|
||||
return get_json_result(data={"case_id": case_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Evaluation Execution ====================
|
||||
|
||||
@manager.route('/run/start', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("dataset_id", "dialog_id")
|
||||
async def start_evaluation():
|
||||
"""
|
||||
Start an evaluation run.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"dataset_id": "dataset_id",
|
||||
"dialog_id": "dialog_id",
|
||||
"name": "Optional run name"
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
dataset_id = req.get("dataset_id")
|
||||
dialog_id = req.get("dialog_id")
|
||||
name = req.get("name")
|
||||
|
||||
success, result = EvaluationService.start_evaluation(
|
||||
dataset_id=dataset_id,
|
||||
dialog_id=dialog_id,
|
||||
user_id=current_user.id,
|
||||
name=name
|
||||
)
|
||||
|
||||
if not success:
|
||||
return get_data_error_result(message=result)
|
||||
|
||||
return get_json_result(data={"run_id": result})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_evaluation_run(run_id):
|
||||
"""Get evaluation run details"""
|
||||
try:
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>/results', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_run_results(run_id):
|
||||
"""Get detailed results for an evaluation run"""
|
||||
try:
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/list', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def list_evaluation_runs():
|
||||
"""
|
||||
List evaluation runs.
|
||||
|
||||
Query params:
|
||||
- dataset_id: Filter by dataset (optional)
|
||||
- dialog_id: Filter by dialog (optional)
|
||||
- page: Page number (default: 1)
|
||||
- page_size: Items per page (default: 20)
|
||||
"""
|
||||
try:
|
||||
# TODO: Implement list_runs in EvaluationService
|
||||
return get_json_result(data={"runs": [], "total": 0})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>', methods=['DELETE']) # noqa: F821
|
||||
@login_required
|
||||
async def delete_evaluation_run(run_id):
|
||||
"""Delete an evaluation run"""
|
||||
try:
|
||||
# TODO: Implement delete_run in EvaluationService
|
||||
return get_json_result(data={"run_id": run_id})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Analysis & Recommendations ====================
|
||||
|
||||
@manager.route('/run/<run_id>/recommendations', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def get_recommendations(run_id):
|
||||
"""Get configuration recommendations based on evaluation results"""
|
||||
try:
|
||||
recommendations = EvaluationService.get_recommendations(run_id)
|
||||
return get_json_result(data={"recommendations": recommendations})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/compare', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("run_ids")
|
||||
async def compare_runs():
|
||||
"""
|
||||
Compare multiple evaluation runs.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"run_ids": ["run_id1", "run_id2", "run_id3"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
req = await get_request_json()
|
||||
run_ids = req.get("run_ids", [])
|
||||
|
||||
if not run_ids or not isinstance(run_ids, list) or len(run_ids) < 2:
|
||||
return get_data_error_result(
|
||||
message="run_ids must be a list with at least 2 run IDs"
|
||||
)
|
||||
|
||||
# TODO: Implement compare_runs in EvaluationService
|
||||
return get_json_result(data={"comparison": {}})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/run/<run_id>/export', methods=['GET']) # noqa: F821
|
||||
@login_required
|
||||
async def export_results(run_id):
|
||||
"""Export evaluation results as JSON/CSV"""
|
||||
try:
|
||||
# format_type = request.args.get("format", "json") # TODO: Use for CSV export
|
||||
|
||||
result = EvaluationService.get_run_results(run_id)
|
||||
|
||||
if not result:
|
||||
return get_data_error_result(
|
||||
message="Evaluation run not found",
|
||||
code=RetCode.DATA_ERROR
|
||||
)
|
||||
|
||||
# TODO: Implement CSV export
|
||||
return get_json_result(data=result)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
# ==================== Real-time Evaluation ====================
|
||||
|
||||
@manager.route('/evaluate_single', methods=['POST']) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("question", "dialog_id")
|
||||
async def evaluate_single():
|
||||
"""
|
||||
Evaluate a single question-answer pair in real-time.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"question": "Test question",
|
||||
"dialog_id": "dialog_id",
|
||||
"reference_answer": "Optional ground truth",
|
||||
"relevant_chunk_ids": ["chunk_id1", "chunk_id2"]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
# req = await get_request_json() # TODO: Use for single evaluation implementation
|
||||
|
||||
# TODO: Implement single evaluation
|
||||
# This would execute the RAG pipeline and return metrics immediately
|
||||
|
||||
return get_json_result(data={
|
||||
"answer": "",
|
||||
"metrics": {},
|
||||
"retrieved_chunks": []
|
||||
})
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
@ -33,7 +33,7 @@ from api.db.services.file_service import FileService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.tenant_llm_service import TenantLLMService
|
||||
from api.db.services.task_service import TaskService, queue_tasks
|
||||
from api.db.services.task_service import TaskService, queue_tasks, cancel_all_task_of
|
||||
from api.db.services.dialog_service import meta_filter, convert_conditions
|
||||
from api.utils.api_utils import check_duplicate_ids, construct_json_result, get_error_data_result, get_parser_config, get_result, server_error_response, token_required, \
|
||||
get_request_json
|
||||
@ -321,9 +321,7 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
||||
try:
|
||||
if not DocumentService.update_by_id(doc.id, {"status": str(status)}):
|
||||
return get_error_data_result(message="Database error (Document update)!")
|
||||
|
||||
settings.docStoreConn.update({"doc_id": doc.id}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
|
||||
return get_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -350,12 +348,10 @@ async def update_doc(tenant_id, dataset_id, document_id):
|
||||
}
|
||||
renamed_doc = {}
|
||||
for key, value in doc.to_dict().items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
|
||||
return get_result(data=renamed_doc)
|
||||
|
||||
@ -839,6 +835,8 @@ async def stop_parsing(tenant_id, dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
if int(doc[0].progress) == 1 or doc[0].progress == 0:
|
||||
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
|
||||
# Send cancellation signal via Redis to stop background task
|
||||
cancel_all_task_of(id)
|
||||
info = {"run": "2", "progress": 0, "chunk_num": 0}
|
||||
DocumentService.update_by_id(id, info)
|
||||
settings.docStoreConn.delete({"doc_id": doc[0].id}, search.index_name(tenant_id), dataset_id)
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import pathlib
|
||||
import re
|
||||
from quart import request, make_response
|
||||
@ -29,6 +29,7 @@ from api.db import FileType
|
||||
from api.db.services import duplicate_name
|
||||
from api.db.services.file_service import FileService
|
||||
from api.utils.file_utils import filename_type
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||
from common import settings
|
||||
from common.constants import RetCode
|
||||
|
||||
@ -39,7 +40,7 @@ async def upload(tenant_id):
|
||||
Upload a file to the system.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -155,7 +156,7 @@ async def create(tenant_id):
|
||||
Create a new file or folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -233,7 +234,7 @@ async def list_files(tenant_id):
|
||||
List files under a specific folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -325,7 +326,7 @@ async def get_root_folder(tenant_id):
|
||||
Get user's root folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
responses:
|
||||
@ -361,7 +362,7 @@ async def get_parent_folder():
|
||||
Get parent folder info of a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -406,7 +407,7 @@ async def get_all_parent_folders(tenant_id):
|
||||
Get all parent folders of a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -454,7 +455,7 @@ async def rm(tenant_id):
|
||||
Delete one or multiple files/folders.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -528,7 +529,7 @@ async def rename(tenant_id):
|
||||
Rename a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
@ -589,7 +590,7 @@ async def get(tenant_id, file_id):
|
||||
Download a file.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
produces:
|
||||
@ -629,6 +630,19 @@ async def get(tenant_id, file_id):
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@manager.route("/file/download/<attachment_id>", methods=["GET"]) # noqa: F821
|
||||
@token_required
|
||||
async def download_attachment(tenant_id,attachment_id):
|
||||
try:
|
||||
ext = request.args.get("ext", "markdown")
|
||||
data = await asyncio.to_thread(settings.STORAGE_IMPL.get, tenant_id, attachment_id)
|
||||
response = await make_response(data)
|
||||
response.headers.set("Content-Type", CONTENT_TYPE_MAP.get(ext, f"application/{ext}"))
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@manager.route('/file/mv', methods=['POST']) # noqa: F821
|
||||
@token_required
|
||||
@ -637,7 +651,7 @@ async def move(tenant_id):
|
||||
Move one or multiple files to another folder.
|
||||
---
|
||||
tags:
|
||||
- File Management
|
||||
- File
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
parameters:
|
||||
|
||||
@ -1113,6 +1113,70 @@ class SyncLogs(DataBaseModel):
|
||||
db_table = "sync_logs"
|
||||
|
||||
|
||||
class EvaluationDataset(DataBaseModel):
|
||||
"""Ground truth dataset for RAG evaluation"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
tenant_id = CharField(max_length=32, null=False, index=True, help_text="tenant ID")
|
||||
name = CharField(max_length=255, null=False, index=True, help_text="dataset name")
|
||||
description = TextField(null=True, help_text="dataset description")
|
||||
kb_ids = JSONField(null=False, help_text="knowledge base IDs to evaluate against")
|
||||
created_by = CharField(max_length=32, null=False, index=True, help_text="creator user ID")
|
||||
create_time = BigIntegerField(null=False, index=True, help_text="creation timestamp")
|
||||
update_time = BigIntegerField(null=False, help_text="last update timestamp")
|
||||
status = IntegerField(null=False, default=1, help_text="1=valid, 0=invalid")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_datasets"
|
||||
|
||||
|
||||
class EvaluationCase(DataBaseModel):
|
||||
"""Individual test case in an evaluation dataset"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
|
||||
question = TextField(null=False, help_text="test question")
|
||||
reference_answer = TextField(null=True, help_text="optional ground truth answer")
|
||||
relevant_doc_ids = JSONField(null=True, help_text="expected relevant document IDs")
|
||||
relevant_chunk_ids = JSONField(null=True, help_text="expected relevant chunk IDs")
|
||||
metadata = JSONField(null=True, help_text="additional context/tags")
|
||||
create_time = BigIntegerField(null=False, help_text="creation timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_cases"
|
||||
|
||||
|
||||
class EvaluationRun(DataBaseModel):
|
||||
"""A single evaluation run"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
dataset_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_datasets")
|
||||
dialog_id = CharField(max_length=32, null=False, index=True, help_text="dialog configuration being evaluated")
|
||||
name = CharField(max_length=255, null=False, help_text="run name")
|
||||
config_snapshot = JSONField(null=False, help_text="dialog config at time of evaluation")
|
||||
metrics_summary = JSONField(null=True, help_text="aggregated metrics")
|
||||
status = CharField(max_length=32, null=False, default="PENDING", help_text="PENDING/RUNNING/COMPLETED/FAILED")
|
||||
created_by = CharField(max_length=32, null=False, index=True, help_text="user who started the run")
|
||||
create_time = BigIntegerField(null=False, index=True, help_text="creation timestamp")
|
||||
complete_time = BigIntegerField(null=True, help_text="completion timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_runs"
|
||||
|
||||
|
||||
class EvaluationResult(DataBaseModel):
|
||||
"""Result for a single test case in an evaluation run"""
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
run_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_runs")
|
||||
case_id = CharField(max_length=32, null=False, index=True, help_text="FK to evaluation_cases")
|
||||
generated_answer = TextField(null=False, help_text="generated answer")
|
||||
retrieved_chunks = JSONField(null=False, help_text="chunks that were retrieved")
|
||||
metrics = JSONField(null=False, help_text="all computed metrics")
|
||||
execution_time = FloatField(null=False, help_text="response time in seconds")
|
||||
token_usage = JSONField(null=True, help_text="prompt/completion tokens")
|
||||
create_time = BigIntegerField(null=False, help_text="creation timestamp")
|
||||
|
||||
class Meta:
|
||||
db_table = "evaluation_results"
|
||||
|
||||
|
||||
def migrate_db():
|
||||
logging.disable(logging.ERROR)
|
||||
migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
|
||||
@ -1293,4 +1357,43 @@ def migrate_db():
|
||||
migrate(migrator.add_column("llm_factories", "rank", IntegerField(default=0, index=False)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# RAG Evaluation tables
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "id", CharField(max_length=32, primary_key=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "tenant_id", CharField(max_length=32, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "name", CharField(max_length=255, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "description", TextField(null=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "kb_ids", JSONField(null=False)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "created_by", CharField(max_length=32, null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "create_time", BigIntegerField(null=False, index=True)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "update_time", BigIntegerField(null=False)))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
migrate(migrator.add_column("evaluation_datasets", "status", IntegerField(null=False, default=1)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logging.disable(logging.NOTSET)
|
||||
|
||||
@ -761,13 +761,48 @@ Please write the SQL, only SQL, without any other explanations or text.
|
||||
"prompt": sys_prompt,
|
||||
}
|
||||
|
||||
def clean_tts_text(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
text = text.encode("utf-8", "ignore").decode("utf-8", "ignore")
|
||||
|
||||
text = re.sub(r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]", "", text)
|
||||
|
||||
emoji_pattern = re.compile(
|
||||
"[\U0001F600-\U0001F64F"
|
||||
"\U0001F300-\U0001F5FF"
|
||||
"\U0001F680-\U0001F6FF"
|
||||
"\U0001F1E0-\U0001F1FF"
|
||||
"\U00002700-\U000027BF"
|
||||
"\U0001F900-\U0001F9FF"
|
||||
"\U0001FA70-\U0001FAFF"
|
||||
"\U0001FAD0-\U0001FAFF]+",
|
||||
flags=re.UNICODE
|
||||
)
|
||||
text = emoji_pattern.sub("", text)
|
||||
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
MAX_LEN = 500
|
||||
if len(text) > MAX_LEN:
|
||||
text = text[:MAX_LEN]
|
||||
|
||||
return text
|
||||
|
||||
def tts(tts_mdl, text):
|
||||
if not tts_mdl or not text:
|
||||
return None
|
||||
text = clean_tts_text(text)
|
||||
if not text:
|
||||
return None
|
||||
bin = b""
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
try:
|
||||
for chunk in tts_mdl.tts(text):
|
||||
bin += chunk
|
||||
except Exception as e:
|
||||
logging.error(f"TTS failed: {e}, text={text!r}")
|
||||
return None
|
||||
return binascii.hexlify(bin).decode("utf-8")
|
||||
|
||||
|
||||
|
||||
598
api/db/services/evaluation_service.py
Normal file
598
api/db/services/evaluation_service.py
Normal file
@ -0,0 +1,598 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
RAG Evaluation Service
|
||||
|
||||
Provides functionality for evaluating RAG system performance including:
|
||||
- Dataset management
|
||||
- Test case management
|
||||
- Evaluation execution
|
||||
- Metrics computation
|
||||
- Configuration recommendations
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from timeit import default_timer as timer
|
||||
|
||||
from api.db.db_models import EvaluationDataset, EvaluationCase, EvaluationRun, EvaluationResult
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.db.services.dialog_service import DialogService, chat
|
||||
from common.misc_utils import get_uuid
|
||||
from common.time_utils import current_timestamp
|
||||
from common.constants import StatusEnum
|
||||
|
||||
|
||||
class EvaluationService(CommonService):
|
||||
"""Service for managing RAG evaluations"""
|
||||
|
||||
model = EvaluationDataset
|
||||
|
||||
# ==================== Dataset Management ====================
|
||||
|
||||
@classmethod
|
||||
def create_dataset(cls, name: str, description: str, kb_ids: List[str],
|
||||
tenant_id: str, user_id: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Create a new evaluation dataset.
|
||||
|
||||
Args:
|
||||
name: Dataset name
|
||||
description: Dataset description
|
||||
kb_ids: List of knowledge base IDs to evaluate against
|
||||
tenant_id: Tenant ID
|
||||
user_id: User ID who creates the dataset
|
||||
|
||||
Returns:
|
||||
(success, dataset_id or error_message)
|
||||
"""
|
||||
try:
|
||||
dataset_id = get_uuid()
|
||||
dataset = {
|
||||
"id": dataset_id,
|
||||
"tenant_id": tenant_id,
|
||||
"name": name,
|
||||
"description": description,
|
||||
"kb_ids": kb_ids,
|
||||
"created_by": user_id,
|
||||
"create_time": current_timestamp(),
|
||||
"update_time": current_timestamp(),
|
||||
"status": StatusEnum.VALID.value
|
||||
}
|
||||
|
||||
if not EvaluationDataset.create(**dataset):
|
||||
return False, "Failed to create dataset"
|
||||
|
||||
return True, dataset_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error creating evaluation dataset: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def get_dataset(cls, dataset_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get dataset by ID"""
|
||||
try:
|
||||
dataset = EvaluationDataset.get_by_id(dataset_id)
|
||||
if dataset:
|
||||
return dataset.to_dict()
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting dataset {dataset_id}: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def list_datasets(cls, tenant_id: str, user_id: str,
|
||||
page: int = 1, page_size: int = 20) -> Dict[str, Any]:
|
||||
"""List datasets for a tenant"""
|
||||
try:
|
||||
query = EvaluationDataset.select().where(
|
||||
(EvaluationDataset.tenant_id == tenant_id) &
|
||||
(EvaluationDataset.status == StatusEnum.VALID.value)
|
||||
).order_by(EvaluationDataset.create_time.desc())
|
||||
|
||||
total = query.count()
|
||||
datasets = query.paginate(page, page_size)
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"datasets": [d.to_dict() for d in datasets]
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error listing datasets: {e}")
|
||||
return {"total": 0, "datasets": []}
|
||||
|
||||
@classmethod
|
||||
def update_dataset(cls, dataset_id: str, **kwargs) -> bool:
|
||||
"""Update dataset"""
|
||||
try:
|
||||
kwargs["update_time"] = current_timestamp()
|
||||
return EvaluationDataset.update(**kwargs).where(
|
||||
EvaluationDataset.id == dataset_id
|
||||
).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error updating dataset {dataset_id}: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def delete_dataset(cls, dataset_id: str) -> bool:
|
||||
"""Soft delete dataset"""
|
||||
try:
|
||||
return EvaluationDataset.update(
|
||||
status=StatusEnum.INVALID.value,
|
||||
update_time=current_timestamp()
|
||||
).where(EvaluationDataset.id == dataset_id).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting dataset {dataset_id}: {e}")
|
||||
return False
|
||||
|
||||
# ==================== Test Case Management ====================
|
||||
|
||||
@classmethod
|
||||
def add_test_case(cls, dataset_id: str, question: str,
|
||||
reference_answer: Optional[str] = None,
|
||||
relevant_doc_ids: Optional[List[str]] = None,
|
||||
relevant_chunk_ids: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> Tuple[bool, str]:
|
||||
"""
|
||||
Add a test case to a dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
question: Test question
|
||||
reference_answer: Optional ground truth answer
|
||||
relevant_doc_ids: Optional list of relevant document IDs
|
||||
relevant_chunk_ids: Optional list of relevant chunk IDs
|
||||
metadata: Optional additional metadata
|
||||
|
||||
Returns:
|
||||
(success, case_id or error_message)
|
||||
"""
|
||||
try:
|
||||
case_id = get_uuid()
|
||||
case = {
|
||||
"id": case_id,
|
||||
"dataset_id": dataset_id,
|
||||
"question": question,
|
||||
"reference_answer": reference_answer,
|
||||
"relevant_doc_ids": relevant_doc_ids,
|
||||
"relevant_chunk_ids": relevant_chunk_ids,
|
||||
"metadata": metadata,
|
||||
"create_time": current_timestamp()
|
||||
}
|
||||
|
||||
if not EvaluationCase.create(**case):
|
||||
return False, "Failed to create test case"
|
||||
|
||||
return True, case_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error adding test case: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def get_test_cases(cls, dataset_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get all test cases for a dataset"""
|
||||
try:
|
||||
cases = EvaluationCase.select().where(
|
||||
EvaluationCase.dataset_id == dataset_id
|
||||
).order_by(EvaluationCase.create_time)
|
||||
|
||||
return [c.to_dict() for c in cases]
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting test cases for dataset {dataset_id}: {e}")
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def delete_test_case(cls, case_id: str) -> bool:
|
||||
"""Delete a test case"""
|
||||
try:
|
||||
return EvaluationCase.delete().where(
|
||||
EvaluationCase.id == case_id
|
||||
).execute() > 0
|
||||
except Exception as e:
|
||||
logging.error(f"Error deleting test case {case_id}: {e}")
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def import_test_cases(cls, dataset_id: str, cases: List[Dict[str, Any]]) -> Tuple[int, int]:
|
||||
"""
|
||||
Bulk import test cases from a list.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
cases: List of test case dictionaries
|
||||
|
||||
Returns:
|
||||
(success_count, failure_count)
|
||||
"""
|
||||
success_count = 0
|
||||
failure_count = 0
|
||||
|
||||
for case_data in cases:
|
||||
success, _ = cls.add_test_case(
|
||||
dataset_id=dataset_id,
|
||||
question=case_data.get("question", ""),
|
||||
reference_answer=case_data.get("reference_answer"),
|
||||
relevant_doc_ids=case_data.get("relevant_doc_ids"),
|
||||
relevant_chunk_ids=case_data.get("relevant_chunk_ids"),
|
||||
metadata=case_data.get("metadata")
|
||||
)
|
||||
|
||||
if success:
|
||||
success_count += 1
|
||||
else:
|
||||
failure_count += 1
|
||||
|
||||
return success_count, failure_count
|
||||
|
||||
# ==================== Evaluation Execution ====================
|
||||
|
||||
@classmethod
|
||||
def start_evaluation(cls, dataset_id: str, dialog_id: str,
|
||||
user_id: str, name: Optional[str] = None) -> Tuple[bool, str]:
|
||||
"""
|
||||
Start an evaluation run.
|
||||
|
||||
Args:
|
||||
dataset_id: Dataset ID
|
||||
dialog_id: Dialog configuration to evaluate
|
||||
user_id: User ID who starts the run
|
||||
name: Optional run name
|
||||
|
||||
Returns:
|
||||
(success, run_id or error_message)
|
||||
"""
|
||||
try:
|
||||
# Get dialog configuration
|
||||
success, dialog = DialogService.get_by_id(dialog_id)
|
||||
if not success:
|
||||
return False, "Dialog not found"
|
||||
|
||||
# Create evaluation run
|
||||
run_id = get_uuid()
|
||||
if not name:
|
||||
name = f"Evaluation Run {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
|
||||
run = {
|
||||
"id": run_id,
|
||||
"dataset_id": dataset_id,
|
||||
"dialog_id": dialog_id,
|
||||
"name": name,
|
||||
"config_snapshot": dialog.to_dict(),
|
||||
"metrics_summary": None,
|
||||
"status": "RUNNING",
|
||||
"created_by": user_id,
|
||||
"create_time": current_timestamp(),
|
||||
"complete_time": None
|
||||
}
|
||||
|
||||
if not EvaluationRun.create(**run):
|
||||
return False, "Failed to create evaluation run"
|
||||
|
||||
# Execute evaluation asynchronously (in production, use task queue)
|
||||
# For now, we'll execute synchronously
|
||||
cls._execute_evaluation(run_id, dataset_id, dialog)
|
||||
|
||||
return True, run_id
|
||||
except Exception as e:
|
||||
logging.error(f"Error starting evaluation: {e}")
|
||||
return False, str(e)
|
||||
|
||||
@classmethod
|
||||
def _execute_evaluation(cls, run_id: str, dataset_id: str, dialog: Any):
|
||||
"""
|
||||
Execute evaluation for all test cases.
|
||||
|
||||
This method runs the RAG pipeline for each test case and computes metrics.
|
||||
"""
|
||||
try:
|
||||
# Get all test cases
|
||||
test_cases = cls.get_test_cases(dataset_id)
|
||||
|
||||
if not test_cases:
|
||||
EvaluationRun.update(
|
||||
status="FAILED",
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
return
|
||||
|
||||
# Execute each test case
|
||||
results = []
|
||||
for case in test_cases:
|
||||
result = cls._evaluate_single_case(run_id, case, dialog)
|
||||
if result:
|
||||
results.append(result)
|
||||
|
||||
# Compute summary metrics
|
||||
metrics_summary = cls._compute_summary_metrics(results)
|
||||
|
||||
# Update run status
|
||||
EvaluationRun.update(
|
||||
status="COMPLETED",
|
||||
metrics_summary=metrics_summary,
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error executing evaluation {run_id}: {e}")
|
||||
EvaluationRun.update(
|
||||
status="FAILED",
|
||||
complete_time=current_timestamp()
|
||||
).where(EvaluationRun.id == run_id).execute()
|
||||
|
||||
@classmethod
|
||||
def _evaluate_single_case(cls, run_id: str, case: Dict[str, Any],
|
||||
dialog: Any) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Evaluate a single test case.
|
||||
|
||||
Args:
|
||||
run_id: Evaluation run ID
|
||||
case: Test case dictionary
|
||||
dialog: Dialog configuration
|
||||
|
||||
Returns:
|
||||
Result dictionary or None if failed
|
||||
"""
|
||||
try:
|
||||
# Prepare messages
|
||||
messages = [{"role": "user", "content": case["question"]}]
|
||||
|
||||
# Execute RAG pipeline
|
||||
start_time = timer()
|
||||
answer = ""
|
||||
retrieved_chunks = []
|
||||
|
||||
for ans in chat(dialog, messages, stream=False):
|
||||
if isinstance(ans, dict):
|
||||
answer = ans.get("answer", "")
|
||||
retrieved_chunks = ans.get("reference", {}).get("chunks", [])
|
||||
break
|
||||
|
||||
execution_time = timer() - start_time
|
||||
|
||||
# Compute metrics
|
||||
metrics = cls._compute_metrics(
|
||||
question=case["question"],
|
||||
generated_answer=answer,
|
||||
reference_answer=case.get("reference_answer"),
|
||||
retrieved_chunks=retrieved_chunks,
|
||||
relevant_chunk_ids=case.get("relevant_chunk_ids"),
|
||||
dialog=dialog
|
||||
)
|
||||
|
||||
# Save result
|
||||
result_id = get_uuid()
|
||||
result = {
|
||||
"id": result_id,
|
||||
"run_id": run_id,
|
||||
"case_id": case["id"],
|
||||
"generated_answer": answer,
|
||||
"retrieved_chunks": retrieved_chunks,
|
||||
"metrics": metrics,
|
||||
"execution_time": execution_time,
|
||||
"token_usage": None, # TODO: Track token usage
|
||||
"create_time": current_timestamp()
|
||||
}
|
||||
|
||||
EvaluationResult.create(**result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"Error evaluating case {case.get('id')}: {e}")
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _compute_metrics(cls, question: str, generated_answer: str,
|
||||
reference_answer: Optional[str],
|
||||
retrieved_chunks: List[Dict[str, Any]],
|
||||
relevant_chunk_ids: Optional[List[str]],
|
||||
dialog: Any) -> Dict[str, float]:
|
||||
"""
|
||||
Compute evaluation metrics for a single test case.
|
||||
|
||||
Returns:
|
||||
Dictionary of metric names to values
|
||||
"""
|
||||
metrics = {}
|
||||
|
||||
# Retrieval metrics (if ground truth chunks provided)
|
||||
if relevant_chunk_ids:
|
||||
retrieved_ids = [c.get("chunk_id") for c in retrieved_chunks]
|
||||
metrics.update(cls._compute_retrieval_metrics(retrieved_ids, relevant_chunk_ids))
|
||||
|
||||
# Generation metrics
|
||||
if generated_answer:
|
||||
# Basic metrics
|
||||
metrics["answer_length"] = len(generated_answer)
|
||||
metrics["has_answer"] = 1.0 if generated_answer.strip() else 0.0
|
||||
|
||||
# TODO: Implement advanced metrics using LLM-as-judge
|
||||
# - Faithfulness (hallucination detection)
|
||||
# - Answer relevance
|
||||
# - Context relevance
|
||||
# - Semantic similarity (if reference answer provided)
|
||||
|
||||
return metrics
|
||||
|
||||
@classmethod
|
||||
def _compute_retrieval_metrics(cls, retrieved_ids: List[str],
|
||||
relevant_ids: List[str]) -> Dict[str, float]:
|
||||
"""
|
||||
Compute retrieval metrics.
|
||||
|
||||
Args:
|
||||
retrieved_ids: List of retrieved chunk IDs
|
||||
relevant_ids: List of relevant chunk IDs (ground truth)
|
||||
|
||||
Returns:
|
||||
Dictionary of retrieval metrics
|
||||
"""
|
||||
if not relevant_ids:
|
||||
return {}
|
||||
|
||||
retrieved_set = set(retrieved_ids)
|
||||
relevant_set = set(relevant_ids)
|
||||
|
||||
# Precision: proportion of retrieved that are relevant
|
||||
precision = len(retrieved_set & relevant_set) / len(retrieved_set) if retrieved_set else 0.0
|
||||
|
||||
# Recall: proportion of relevant that were retrieved
|
||||
recall = len(retrieved_set & relevant_set) / len(relevant_set) if relevant_set else 0.0
|
||||
|
||||
# F1 score
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
|
||||
|
||||
# Hit rate: whether any relevant chunk was retrieved
|
||||
hit_rate = 1.0 if (retrieved_set & relevant_set) else 0.0
|
||||
|
||||
# MRR (Mean Reciprocal Rank): position of first relevant chunk
|
||||
mrr = 0.0
|
||||
for i, chunk_id in enumerate(retrieved_ids, 1):
|
||||
if chunk_id in relevant_set:
|
||||
mrr = 1.0 / i
|
||||
break
|
||||
|
||||
return {
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1_score": f1,
|
||||
"hit_rate": hit_rate,
|
||||
"mrr": mrr
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _compute_summary_metrics(cls, results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Compute summary metrics across all test cases.
|
||||
|
||||
Args:
|
||||
results: List of result dictionaries
|
||||
|
||||
Returns:
|
||||
Summary metrics dictionary
|
||||
"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
# Aggregate metrics
|
||||
metric_sums = {}
|
||||
metric_counts = {}
|
||||
|
||||
for result in results:
|
||||
metrics = result.get("metrics", {})
|
||||
for key, value in metrics.items():
|
||||
if isinstance(value, (int, float)):
|
||||
metric_sums[key] = metric_sums.get(key, 0) + value
|
||||
metric_counts[key] = metric_counts.get(key, 0) + 1
|
||||
|
||||
# Compute averages
|
||||
summary = {
|
||||
"total_cases": len(results),
|
||||
"avg_execution_time": sum(r.get("execution_time", 0) for r in results) / len(results)
|
||||
}
|
||||
|
||||
for key in metric_sums:
|
||||
summary[f"avg_{key}"] = metric_sums[key] / metric_counts[key]
|
||||
|
||||
return summary
|
||||
|
||||
# ==================== Results & Analysis ====================
|
||||
|
||||
@classmethod
|
||||
def get_run_results(cls, run_id: str) -> Dict[str, Any]:
|
||||
"""Get results for an evaluation run"""
|
||||
try:
|
||||
run = EvaluationRun.get_by_id(run_id)
|
||||
if not run:
|
||||
return {}
|
||||
|
||||
results = EvaluationResult.select().where(
|
||||
EvaluationResult.run_id == run_id
|
||||
).order_by(EvaluationResult.create_time)
|
||||
|
||||
return {
|
||||
"run": run.to_dict(),
|
||||
"results": [r.to_dict() for r in results]
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error getting run results {run_id}: {e}")
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def get_recommendations(cls, run_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze evaluation results and provide configuration recommendations.
|
||||
|
||||
Args:
|
||||
run_id: Evaluation run ID
|
||||
|
||||
Returns:
|
||||
List of recommendation dictionaries
|
||||
"""
|
||||
try:
|
||||
run = EvaluationRun.get_by_id(run_id)
|
||||
if not run or not run.metrics_summary:
|
||||
return []
|
||||
|
||||
metrics = run.metrics_summary
|
||||
recommendations = []
|
||||
|
||||
# Low precision: retrieving irrelevant chunks
|
||||
if metrics.get("avg_precision", 1.0) < 0.7:
|
||||
recommendations.append({
|
||||
"issue": "Low Precision",
|
||||
"severity": "high",
|
||||
"description": "System is retrieving many irrelevant chunks",
|
||||
"suggestions": [
|
||||
"Increase similarity_threshold to filter out less relevant chunks",
|
||||
"Enable reranking to improve chunk ordering",
|
||||
"Reduce top_k to return fewer chunks"
|
||||
]
|
||||
})
|
||||
|
||||
# Low recall: missing relevant chunks
|
||||
if metrics.get("avg_recall", 1.0) < 0.7:
|
||||
recommendations.append({
|
||||
"issue": "Low Recall",
|
||||
"severity": "high",
|
||||
"description": "System is missing relevant chunks",
|
||||
"suggestions": [
|
||||
"Increase top_k to retrieve more chunks",
|
||||
"Lower similarity_threshold to be more inclusive",
|
||||
"Enable hybrid search (keyword + semantic)",
|
||||
"Check chunk size - may be too large or too small"
|
||||
]
|
||||
})
|
||||
|
||||
# Slow response time
|
||||
if metrics.get("avg_execution_time", 0) > 5.0:
|
||||
recommendations.append({
|
||||
"issue": "Slow Response Time",
|
||||
"severity": "medium",
|
||||
"description": f"Average response time is {metrics['avg_execution_time']:.2f}s",
|
||||
"suggestions": [
|
||||
"Reduce top_k to retrieve fewer chunks",
|
||||
"Optimize embedding model selection",
|
||||
"Consider caching frequently asked questions"
|
||||
]
|
||||
})
|
||||
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating recommendations for run {run_id}: {e}")
|
||||
return []
|
||||
@ -385,6 +385,7 @@ class LLMBundle(LLM4Tenant):
|
||||
|
||||
async def async_chat_streamly(self, system: str, history: list, gen_conf: dict = {}, **kwargs):
|
||||
total_tokens = 0
|
||||
ans = ""
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
stream_fn = getattr(self.mdl, "async_chat_streamly_with_tools", None)
|
||||
else:
|
||||
@ -397,7 +398,15 @@ class LLMBundle(LLM4Tenant):
|
||||
if isinstance(txt, int):
|
||||
total_tokens = txt
|
||||
break
|
||||
yield txt
|
||||
|
||||
if txt.endswith("</think>"):
|
||||
ans = ans[: -len("</think>")]
|
||||
|
||||
if not self.verbose_tool_use:
|
||||
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
||||
|
||||
ans += txt
|
||||
yield ans
|
||||
if total_tokens and not TenantLLMService.increase_usage(self.tenant_id, self.llm_type, total_tokens, self.llm_name):
|
||||
logging.error("LLMBundle.async_chat_streamly can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, total_tokens))
|
||||
return
|
||||
|
||||
@ -331,6 +331,7 @@ class RaptorConfig(Base):
|
||||
threshold: Annotated[float, Field(default=0.1, ge=0.0, le=1.0)]
|
||||
max_cluster: Annotated[int, Field(default=64, ge=1, le=1024)]
|
||||
random_seed: Annotated[int, Field(default=0, ge=0)]
|
||||
auto_disable_for_structured_data: Annotated[bool, Field(default=True)]
|
||||
|
||||
|
||||
class GraphragConfig(Base):
|
||||
|
||||
@ -148,6 +148,7 @@ class Storage(Enum):
|
||||
AWS_S3 = 4
|
||||
OSS = 5
|
||||
OPENDAL = 6
|
||||
GCS = 7
|
||||
|
||||
# environment
|
||||
# ENV_STRONG_TEST_COUNT = "STRONG_TEST_COUNT"
|
||||
|
||||
@ -190,6 +190,11 @@ class WebDAVConnector(LoadConnector, PollConnector):
|
||||
files = self._list_files_recursive(self.remote_path, start, end)
|
||||
logging.info(f"Found {len(files)} files matching time criteria")
|
||||
|
||||
filename_counts: dict[str, int] = {}
|
||||
for file_path, _ in files:
|
||||
file_name = os.path.basename(file_path)
|
||||
filename_counts[file_name] = filename_counts.get(file_name, 0) + 1
|
||||
|
||||
batch: list[Document] = []
|
||||
for file_path, file_info in files:
|
||||
file_name = os.path.basename(file_path)
|
||||
@ -237,12 +242,22 @@ class WebDAVConnector(LoadConnector, PollConnector):
|
||||
else:
|
||||
modified = datetime.now(timezone.utc)
|
||||
|
||||
if filename_counts.get(file_name, 0) > 1:
|
||||
relative_path = file_path
|
||||
if file_path.startswith(self.remote_path):
|
||||
relative_path = file_path[len(self.remote_path):]
|
||||
if relative_path.startswith('/'):
|
||||
relative_path = relative_path[1:]
|
||||
semantic_id = relative_path.replace('/', ' / ') if relative_path else file_name
|
||||
else:
|
||||
semantic_id = file_name
|
||||
|
||||
batch.append(
|
||||
Document(
|
||||
id=f"webdav:{self.base_url}:{file_path}",
|
||||
blob=blob,
|
||||
source=DocumentSource.WEBDAV,
|
||||
semantic_identifier=file_name,
|
||||
semantic_identifier=semantic_id,
|
||||
extension=get_file_ext(file_name),
|
||||
doc_updated_at=modified,
|
||||
size_bytes=size_bytes if size_bytes else 0
|
||||
|
||||
@ -153,7 +153,7 @@ def parse_mineru_paths() -> Dict[str, Path]:
|
||||
|
||||
|
||||
@once
|
||||
def install_mineru() -> None:
|
||||
def check_and_install_mineru() -> None:
|
||||
"""
|
||||
Ensure MinerU is installed.
|
||||
|
||||
@ -173,8 +173,8 @@ def install_mineru() -> None:
|
||||
Logging is used to indicate status.
|
||||
"""
|
||||
# Check if MinerU is enabled
|
||||
use_mineru = os.getenv("USE_MINERU", "").strip().lower()
|
||||
if use_mineru == "false":
|
||||
use_mineru = os.getenv("USE_MINERU", "false").strip().lower()
|
||||
if use_mineru != "true":
|
||||
logging.info("USE_MINERU=%r. Skipping MinerU installation.", use_mineru)
|
||||
return
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@ import rag.utils.ob_conn
|
||||
import rag.utils.opensearch_conn
|
||||
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
||||
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
||||
from rag.utils.gcs_conn import RAGFlowGCS
|
||||
from rag.utils.minio_conn import RAGFlowMinio
|
||||
from rag.utils.opendal_conn import OpenDALStorage
|
||||
from rag.utils.s3_conn import RAGFlowS3
|
||||
@ -109,6 +110,7 @@ MINIO = {}
|
||||
OB = {}
|
||||
OSS = {}
|
||||
OS = {}
|
||||
GCS = {}
|
||||
|
||||
DOC_MAXIMUM_SIZE: int = 128 * 1024 * 1024
|
||||
DOC_BULK_SIZE: int = 4
|
||||
@ -151,7 +153,8 @@ class StorageFactory:
|
||||
Storage.AZURE_SAS: RAGFlowAzureSasBlob,
|
||||
Storage.AWS_S3: RAGFlowS3,
|
||||
Storage.OSS: RAGFlowOSS,
|
||||
Storage.OPENDAL: OpenDALStorage
|
||||
Storage.OPENDAL: OpenDALStorage,
|
||||
Storage.GCS: RAGFlowGCS,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -250,7 +253,7 @@ def init_settings():
|
||||
else:
|
||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||
|
||||
global AZURE, S3, MINIO, OSS
|
||||
global AZURE, S3, MINIO, OSS, GCS
|
||||
if STORAGE_IMPL_TYPE in ['AZURE_SPN', 'AZURE_SAS']:
|
||||
AZURE = get_base_config("azure", {})
|
||||
elif STORAGE_IMPL_TYPE == 'AWS_S3':
|
||||
@ -259,6 +262,8 @@ def init_settings():
|
||||
MINIO = decrypt_database_config(name="minio")
|
||||
elif STORAGE_IMPL_TYPE == 'OSS':
|
||||
OSS = get_base_config("oss", {})
|
||||
elif STORAGE_IMPL_TYPE == 'GCS':
|
||||
GCS = get_base_config("gcs", {})
|
||||
|
||||
global STORAGE_IMPL
|
||||
STORAGE_IMPL = StorageFactory.create(Storage[STORAGE_IMPL_TYPE])
|
||||
|
||||
@ -60,6 +60,8 @@ user_default_llm:
|
||||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
# region: 'region'
|
||||
#gcs:
|
||||
# bucket: 'bridgtl-edm-d-bucket-ragflow'
|
||||
# oss:
|
||||
# access_key: 'access_key'
|
||||
# secret_key: 'secret_key'
|
||||
|
||||
@ -25,6 +25,8 @@ from rag.prompts.generator import vision_llm_figure_describe_prompt
|
||||
|
||||
|
||||
def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
if not figures_data_without_positions:
|
||||
return []
|
||||
return [
|
||||
(
|
||||
(figure_data[1], [figure_data[0]]),
|
||||
@ -35,7 +37,9 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
]
|
||||
|
||||
|
||||
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
def vision_figure_parser_docx_wrapper(sections, tbls, callback=None,**kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
@ -53,6 +57,8 @@ def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
|
||||
|
||||
def vision_figure_parser_pdf_wrapper(tbls, callback=None, **kwargs):
|
||||
if not tbls:
|
||||
return []
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
|
||||
@ -63,6 +63,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def _extract_zip_no_root(self, zip_path, extract_to, root_dir):
|
||||
self.logger.info(f"[MinerU] Extract zip: zip_path={zip_path}, extract_to={extract_to}, root_hint={root_dir}")
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
if not root_dir:
|
||||
files = zip_ref.namelist()
|
||||
@ -72,7 +73,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
root_dir = None
|
||||
|
||||
if not root_dir or not root_dir.endswith("/"):
|
||||
self.logger.info(f"[MinerU] No root directory found, extracting all...fff{root_dir}")
|
||||
self.logger.info(f"[MinerU] No root directory found, extracting all (root_hint={root_dir})")
|
||||
zip_ref.extractall(extract_to)
|
||||
return
|
||||
|
||||
@ -108,7 +109,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
valid_backends = ["pipeline", "vlm-http-client", "vlm-transformers", "vlm-vllm-engine"]
|
||||
if backend not in valid_backends:
|
||||
reason = "[MinerU] Invalid backend '{backend}'. Valid backends are: {valid_backends}"
|
||||
logging.warning(reason)
|
||||
self.logger.warning(reason)
|
||||
return False, reason
|
||||
|
||||
subprocess_kwargs = {
|
||||
@ -128,40 +129,40 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
if backend == "vlm-http-client" and server_url:
|
||||
try:
|
||||
server_accessible = self._is_http_endpoint_valid(server_url + "/openapi.json")
|
||||
logging.info(f"[MinerU] vlm-http-client server check: {server_accessible}")
|
||||
self.logger.info(f"[MinerU] vlm-http-client server check: {server_accessible}")
|
||||
if server_accessible:
|
||||
self.using_api = False # We are using http client, not API
|
||||
return True, reason
|
||||
else:
|
||||
reason = f"[MinerU] vlm-http-client server not accessible: {server_url}"
|
||||
logging.warning(f"[MinerU] vlm-http-client server not accessible: {server_url}")
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server not accessible: {server_url}")
|
||||
return False, reason
|
||||
except Exception as e:
|
||||
logging.warning(f"[MinerU] vlm-http-client server check failed: {e}")
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server check failed: {e}")
|
||||
try:
|
||||
response = requests.get(server_url, timeout=5)
|
||||
logging.info(f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}")
|
||||
self.logger.info(f"[MinerU] vlm-http-client server connection check: success with status {response.status_code}")
|
||||
self.using_api = False
|
||||
return True, reason
|
||||
except Exception as e:
|
||||
reason = f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}"
|
||||
logging.warning(f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}")
|
||||
self.logger.warning(f"[MinerU] vlm-http-client server connection check failed: {server_url}: {e}")
|
||||
return False, reason
|
||||
|
||||
try:
|
||||
result = subprocess.run([str(self.mineru_path), "--version"], **subprocess_kwargs)
|
||||
version_info = result.stdout.strip()
|
||||
if version_info:
|
||||
logging.info(f"[MinerU] Detected version: {version_info}")
|
||||
self.logger.info(f"[MinerU] Detected version: {version_info}")
|
||||
else:
|
||||
logging.info("[MinerU] Detected MinerU, but version info is empty.")
|
||||
self.logger.info("[MinerU] Detected MinerU, but version info is empty.")
|
||||
return True, reason
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.warning(f"[MinerU] Execution failed (exit code {e.returncode}).")
|
||||
self.logger.warning(f"[MinerU] Execution failed (exit code {e.returncode}).")
|
||||
except FileNotFoundError:
|
||||
logging.warning("[MinerU] MinerU not found. Please install it via: pip install -U 'mineru[core]'")
|
||||
self.logger.warning("[MinerU] MinerU not found. Please install it via: pip install -U 'mineru[core]'")
|
||||
except Exception as e:
|
||||
logging.error(f"[MinerU] Unexpected error during installation check: {e}")
|
||||
self.logger.error(f"[MinerU] Unexpected error during installation check: {e}")
|
||||
|
||||
# If executable check fails, try API check
|
||||
try:
|
||||
@ -171,14 +172,14 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
if not openapi_exists:
|
||||
reason = "[MinerU] Failed to detect vaild MinerU API server"
|
||||
return openapi_exists, reason
|
||||
logging.info(f"[MinerU] Detected {self.mineru_api}/openapi.json: {openapi_exists}")
|
||||
self.logger.info(f"[MinerU] Detected {self.mineru_api}/openapi.json: {openapi_exists}")
|
||||
self.using_api = openapi_exists
|
||||
return openapi_exists, reason
|
||||
else:
|
||||
logging.info("[MinerU] api not exists.")
|
||||
self.logger.info("[MinerU] api not exists.")
|
||||
except Exception as e:
|
||||
reason = f"[MinerU] Unexpected error during api check: {e}"
|
||||
logging.error(f"[MinerU] Unexpected error during api check: {e}")
|
||||
self.logger.error(f"[MinerU] Unexpected error during api check: {e}")
|
||||
return False, reason
|
||||
|
||||
def _run_mineru(
|
||||
@ -314,7 +315,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
except Exception as e:
|
||||
self.page_images = None
|
||||
self.total_page = 0
|
||||
logging.exception(e)
|
||||
self.logger.exception(e)
|
||||
|
||||
def _line_tag(self, bx):
|
||||
pn = [bx["page_idx"] + 1]
|
||||
@ -480,15 +481,49 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
|
||||
json_file = None
|
||||
subdir = None
|
||||
attempted = []
|
||||
|
||||
# mirror MinerU's sanitize_filename to align ZIP naming
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
sanitized = re.sub(r"[/\\\.]{2,}|[/\\]", "", name)
|
||||
sanitized = re.sub(r"[^\w.-]", "_", sanitized, flags=re.UNICODE)
|
||||
if sanitized.startswith("."):
|
||||
sanitized = "_" + sanitized[1:]
|
||||
return sanitized or "unnamed"
|
||||
|
||||
safe_stem = _sanitize_filename(file_stem)
|
||||
allowed_names = {f"{file_stem}_content_list.json", f"{safe_stem}_content_list.json"}
|
||||
self.logger.info(f"[MinerU] Expected output files: {', '.join(sorted(allowed_names))}")
|
||||
self.logger.info(f"[MinerU] Searching output candidates: {', '.join(str(c) for c in candidates)}")
|
||||
|
||||
for sub in candidates:
|
||||
jf = sub / f"{file_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying original path: {jf}")
|
||||
attempted.append(jf)
|
||||
if jf.exists():
|
||||
subdir = sub
|
||||
json_file = jf
|
||||
break
|
||||
|
||||
# MinerU API sanitizes non-ASCII filenames inside the ZIP root and file names.
|
||||
alt = sub / f"{safe_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying sanitized filename: {alt}")
|
||||
attempted.append(alt)
|
||||
if alt.exists():
|
||||
subdir = sub
|
||||
json_file = alt
|
||||
break
|
||||
|
||||
nested_alt = sub / safe_stem / f"{safe_stem}_content_list.json"
|
||||
self.logger.info(f"[MinerU] Trying sanitized nested path: {nested_alt}")
|
||||
attempted.append(nested_alt)
|
||||
if nested_alt.exists():
|
||||
subdir = nested_alt.parent
|
||||
json_file = nested_alt
|
||||
break
|
||||
|
||||
if not json_file:
|
||||
raise FileNotFoundError(f"[MinerU] Missing output file, tried: {', '.join(str(c / (file_stem + '_content_list.json')) for c in candidates)}")
|
||||
raise FileNotFoundError(f"[MinerU] Missing output file, tried: {', '.join(str(p) for p in attempted)}")
|
||||
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
@ -170,7 +170,7 @@ TZ=Asia/Shanghai
|
||||
# Uncomment the following line if your operating system is MacOS:
|
||||
# MACOS=1
|
||||
|
||||
# The maximum file size limit (in bytes) for each upload to your knowledge base or File Management.
|
||||
# The maximum file size limit (in bytes) for each upload to your dataset or RAGFlow's File system.
|
||||
# To change the 1GB file size limit, uncomment the line below and update as needed.
|
||||
# MAX_CONTENT_LENGTH=1073741824
|
||||
# After updating, ensure `client_max_body_size` in nginx/nginx.conf is updated accordingly.
|
||||
|
||||
@ -23,7 +23,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -48,7 +48,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
|
||||
@ -31,7 +31,7 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
opensearch01:
|
||||
profiles:
|
||||
@ -67,12 +67,12 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
infinity:
|
||||
profiles:
|
||||
- infinity
|
||||
image: infiniflow/infinity:v0.6.8
|
||||
image: infiniflow/infinity:v0.6.10
|
||||
volumes:
|
||||
- infinity_data:/var/infinity
|
||||
- ./infinity_conf.toml:/infinity_conf.toml
|
||||
@ -94,7 +94,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
oceanbase:
|
||||
profiles:
|
||||
@ -119,7 +119,7 @@ services:
|
||||
timeout: 10s
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
sandbox-executor-manager:
|
||||
profiles:
|
||||
@ -147,7 +147,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
mysql:
|
||||
# mysql:5.7 linux/arm64 image is unavailable.
|
||||
@ -175,7 +175,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
||||
@ -191,7 +191,7 @@ services:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||
interval: 10s
|
||||
@ -209,7 +209,7 @@ services:
|
||||
- redis_data:/data
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 10s
|
||||
@ -228,7 +228,7 @@ services:
|
||||
networks:
|
||||
- ragflow
|
||||
command: ["--model-id", "/data/${TEI_MODEL}", "--auto-truncate"]
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
tei-gpu:
|
||||
@ -249,7 +249,7 @@ services:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
kibana:
|
||||
@ -271,7 +271,7 @@ services:
|
||||
retries: 120
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
|
||||
|
||||
volumes:
|
||||
|
||||
@ -22,7 +22,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -39,7 +39,7 @@ services:
|
||||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: on-failure
|
||||
# restart: unless-stopped
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
||||
@ -45,7 +45,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -94,7 +94,7 @@ services:
|
||||
env_file: .env
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
restart: unless-stopped
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you use Docker Desktop, the --add-host flag is optional. This flag ensures that the host's internal IP is exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
@ -120,7 +120,7 @@ services:
|
||||
# entrypoint: "/ragflow/entrypoint_task_executor.sh 1 3"
|
||||
# networks:
|
||||
# - ragflow
|
||||
# restart: on-failure
|
||||
# restart: unless-stopped
|
||||
# # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
# extra_hosts:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[general]
|
||||
version = "0.6.8"
|
||||
version = "0.6.10"
|
||||
time_zone = "utc-8"
|
||||
|
||||
[network]
|
||||
|
||||
@ -76,5 +76,5 @@ No. Files uploaded to an agent as input are not stored in a dataset and hence wi
|
||||
There is no _specific_ file size limit for a file uploaded to an agent. However, note that model providers typically have a default or explicit maximum token setting, which can range from 8196 to 128k: The plain text part of the uploaded file will be passed in as the key value, but if the file's token count exceeds this limit, the string will be truncated and incomplete.
|
||||
|
||||
:::tip NOTE
|
||||
The variables `MAX_CONTENT_LENGTH` in `/docker/.env` and `client_max_body_size` in `/docker/nginx/nginx.conf` set the file size limit for each upload to a dataset or **File Management**. These settings DO NOT apply in this scenario.
|
||||
The variables `MAX_CONTENT_LENGTH` in `/docker/.env` and `client_max_body_size` in `/docker/nginx/nginx.conf` set the file size limit for each upload to a dataset or RAGFlow's File system. These settings DO NOT apply in this scenario.
|
||||
:::
|
||||
|
||||
@ -9,7 +9,7 @@ Initiate an AI-powered chat with a configured chat assistant.
|
||||
|
||||
---
|
||||
|
||||
Knowledge base, hallucination-free chat, and file management are the three pillars of RAGFlow. Chats in RAGFlow are based on a particular dataset or multiple datasets. Once you have created your dataset, finished file parsing, and [run a retrieval test](../dataset/run_retrieval_test.md), you can go ahead and start an AI conversation.
|
||||
Chats in RAGFlow are based on a particular dataset or multiple datasets. Once you have created your dataset, finished file parsing, and [run a retrieval test](../dataset/run_retrieval_test.md), you can go ahead and start an AI conversation.
|
||||
|
||||
## Start an AI chat
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ slug: /configure_knowledge_base
|
||||
|
||||
# Configure dataset
|
||||
|
||||
Most of RAGFlow's chat assistants and Agents are based on datasets. Each of RAGFlow's datasets serves as a knowledge source, *parsing* files uploaded from your local machine and file references generated in **File Management** into the real 'knowledge' for future AI chats. This guide demonstrates some basic usages of the dataset feature, covering the following topics:
|
||||
Most of RAGFlow's chat assistants and Agents are based on datasets. Each of RAGFlow's datasets serves as a knowledge source, *parsing* files uploaded from your local machine and file references generated in RAGFlow's File system into the real 'knowledge' for future AI chats. This guide demonstrates some basic usages of the dataset feature, covering the following topics:
|
||||
|
||||
- Create a dataset
|
||||
- Configure a dataset
|
||||
@ -82,10 +82,10 @@ Some embedding models are optimized for specific languages, so performance may b
|
||||
|
||||
### Upload file
|
||||
|
||||
- RAGFlow's **File Management** allows you to link a file to multiple datasets, in which case each target dataset holds a reference to the file.
|
||||
- RAGFlow's File system allows you to link a file to multiple datasets, in which case each target dataset holds a reference to the file.
|
||||
- In **Knowledge Base**, you are also given the option of uploading a single file or a folder of files (bulk upload) from your local machine to a dataset, in which case the dataset holds file copies.
|
||||
|
||||
While uploading files directly to a dataset seems more convenient, we *highly* recommend uploading files to **File Management** and then linking them to the target datasets. This way, you can avoid permanently deleting files uploaded to the dataset.
|
||||
While uploading files directly to a dataset seems more convenient, we *highly* recommend uploading files to RAGFlow's File system and then linking them to the target datasets. This way, you can avoid permanently deleting files uploaded to the dataset.
|
||||
|
||||
### Parse file
|
||||
|
||||
@ -142,6 +142,6 @@ As of RAGFlow v0.22.1, the search feature is still in a rudimentary form, suppor
|
||||
You are allowed to delete a dataset. Hover your mouse over the three dot of the intended dataset card and the **Delete** option appears. Once you delete a dataset, the associated folder under **root/.knowledge** directory is AUTOMATICALLY REMOVED. The consequence is:
|
||||
|
||||
- The files uploaded directly to the dataset are gone;
|
||||
- The file references, which you created from within **File Management**, are gone, but the associated files still exist in **File Management**.
|
||||
- The file references, which you created from within RAGFlow's File system, are gone, but the associated files still exist.
|
||||
|
||||

|
||||
|
||||
@ -419,17 +419,11 @@ Creates a dataset.
|
||||
- `"embedding_model"`: `string`
|
||||
- `"permission"`: `string`
|
||||
- `"chunk_method"`: `string`
|
||||
- "parser_config": `object`
|
||||
- "parse_type": `int`
|
||||
- "pipeline_id": `string`
|
||||
- `"parser_config"`: `object`
|
||||
- `"parse_type"`: `int`
|
||||
- `"pipeline_id"`: `string`
|
||||
|
||||
Note: Choose exactly one ingestion mode when creating a dataset.
|
||||
- Chunking method: provide `"chunk_method"` (optionally with `"parser_config"`).
|
||||
- Ingestion pipeline: provide both `"parse_type"` and `"pipeline_id"` and do not provide `"chunk_method"`.
|
||||
|
||||
These options are mutually exclusive. If all three of `chunk_method`, `parse_type`, and `pipeline_id` are omitted, the system defaults to `chunk_method = "naive"`.
|
||||
|
||||
##### Request example
|
||||
##### A basic request example
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
@ -441,9 +435,11 @@ curl --request POST \
|
||||
}'
|
||||
```
|
||||
|
||||
##### Request example (ingestion pipeline)
|
||||
##### A request example specifying ingestion pipeline
|
||||
|
||||
Use this form when specifying an ingestion pipeline (do not include `chunk_method`).
|
||||
:::caution WARNING
|
||||
You must *not* include `"chunk_method"` or `"parser_config"` when specifying an ingestion pipeline.
|
||||
:::
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
@ -452,15 +448,11 @@ curl --request POST \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>' \
|
||||
--data '{
|
||||
"name": "test-sdk",
|
||||
"parse_type": <NUMBER_OF_FORMATS_IN_PARSE>,
|
||||
"parse_type": <NUMBER_OF_PARSERS_IN_YOUR_PARSER_COMPONENT>,
|
||||
"pipeline_id": "<PIPELINE_ID_32_HEX>"
|
||||
}'
|
||||
```
|
||||
|
||||
Notes:
|
||||
- `parse_type` is an integer. Replace `<NUMBER_OF_FORMATS_IN_PARSE>` with your pipeline's parse-type value.
|
||||
- `pipeline_id` must be a 32-character lowercase hexadecimal string.
|
||||
|
||||
##### Request parameters
|
||||
|
||||
- `"name"`: (*Body parameter*), `string`, *Required*
|
||||
@ -488,7 +480,8 @@ Notes:
|
||||
- `"team"`: All team members can manage the dataset.
|
||||
|
||||
- `"chunk_method"`: (*Body parameter*), `enum<string>`
|
||||
The chunking method of the dataset to create. Available options:
|
||||
The default chunk method of the dataset to create. Mutually exclusive with `"parse_type"` and `"pipeline_id"`. If you set `"chunk_method"`, do not include `"parse_type"` or `"pipeline_id"`.
|
||||
Available options:
|
||||
- `"naive"`: General (default)
|
||||
- `"book"`: Book
|
||||
- `"email"`: Email
|
||||
@ -501,7 +494,6 @@ Notes:
|
||||
- `"qa"`: Q&A
|
||||
- `"table"`: Table
|
||||
- `"tag"`: Tag
|
||||
- Mutually exclusive with `parse_type` and `pipeline_id`. If you set `chunk_method`, do not include `parse_type` or `pipeline_id`.
|
||||
|
||||
- `"parser_config"`: (*Body parameter*), `object`
|
||||
The configuration settings for the dataset parser. The attributes in this JSON object vary with the selected `"chunk_method"`:
|
||||
@ -520,13 +512,16 @@ Notes:
|
||||
- Maximum: `2048`
|
||||
- `"delimiter"`: `string`
|
||||
- Defaults to `"\n"`.
|
||||
- `"html4excel"`: `bool` Indicates whether to convert Excel documents into HTML format.
|
||||
- `"html4excel"`: `bool`
|
||||
- Whether to convert Excel documents into HTML format.
|
||||
- Defaults to `false`
|
||||
- `"layout_recognize"`: `string`
|
||||
- Defaults to `DeepDOC`
|
||||
- `"tag_kb_ids"`: `array<string>` refer to [Use tag set](https://ragflow.io/docs/dev/use_tag_sets)
|
||||
- Must include a list of dataset IDs, where each dataset is parsed using the Tag Chunking Method
|
||||
- `"task_page_size"`: `int` For PDF only.
|
||||
- `"tag_kb_ids"`: `array<string>`
|
||||
- IDs of datasets to be parsed using the Tag chunk method.
|
||||
- Before setting this, ensure a tag set is created and properly configured. For details, see [Use tag set](https://ragflow.io/docs/dev/use_tag_sets).
|
||||
- `"task_page_size"`: `int`
|
||||
- For PDFs only.
|
||||
- Defaults to `12`
|
||||
- Minimum: `1`
|
||||
- `"raptor"`: `object` RAPTOR-specific settings.
|
||||
@ -538,14 +533,25 @@ Notes:
|
||||
- Defaults to: `{"use_raptor": false}`.
|
||||
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
||||
|
||||
- "parse_type": (*Body parameter*), `int`
|
||||
The ingestion pipeline parse type identifier. Required if and only if you are using an ingestion pipeline (together with `"pipeline_id"`). Must not be provided when `"chunk_method"` is set.
|
||||
- `"parse_type"`: (*Body parameter*), `int`
|
||||
The ingestion pipeline parse type identifier, i.e., the number of parsers in your **Parser** component.
|
||||
- Required (along with `"pipeline_id"`) if specifying an ingestion pipeline.
|
||||
- Must not be included when `"chunk_method"` is specified.
|
||||
|
||||
- "pipeline_id": (*Body parameter*), `string`
|
||||
The ingestion pipeline ID. Required if and only if you are using an ingestion pipeline (together with `"parse_type"`).
|
||||
- Must not be provided when `"chunk_method"` is set.
|
||||
- `"pipeline_id"`: (*Body parameter*), `string`
|
||||
The ingestion pipeline ID. Can be found in the corresponding URL in the RAGFlow UI.
|
||||
- Required (along with `"parse_type"`) if specifying an ingestion pipeline.
|
||||
- Must be a 32-character lowercase hexadecimal string, e.g., `"d0bebe30ae2211f0970942010a8e0005"`.
|
||||
- Must not be included when `"chunk_method"` is specified.
|
||||
|
||||
Note: If none of `chunk_method`, `parse_type`, and `pipeline_id` are provided, the system will default to `chunk_method = "naive"`.
|
||||
:::caution WARNING
|
||||
You can choose either of the following ingestion options when creating a dataset, but *not* both:
|
||||
|
||||
- Use a built-in chunk method -- specify `"chunk_method"` (optionally with `"parser_config"`).
|
||||
- Use an ingestion pipeline -- specify both `"parse_type"` and `"pipeline_id"`.
|
||||
|
||||
If none of `"chunk_method"`, `"parse_type"`, or `"pipeline_id"` are provided, the system defaults to `chunk_method = "naive"`.
|
||||
:::
|
||||
|
||||
#### Response
|
||||
|
||||
|
||||
@ -43,7 +43,6 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
||||
repos = [
|
||||
"InfiniFlow/text_concat_xgb_v1.0",
|
||||
"InfiniFlow/deepdoc",
|
||||
"InfiniFlow/huqie",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ async def run_graphrag(
|
||||
start = trio.current_time()
|
||||
tenant_id, kb_id, doc_id = row["tenant_id"], str(row["kb_id"]), row["doc_id"]
|
||||
chunks = []
|
||||
for d in settings.retriever.chunk_list(doc_id, tenant_id, [kb_id], fields=["content_with_weight", "doc_id"], sort_by_position=True):
|
||||
for d in settings.retriever.chunk_list(doc_id, tenant_id, [kb_id], max_count=10000, fields=["content_with_weight", "doc_id"], sort_by_position=True):
|
||||
chunks.append(d["content_with_weight"])
|
||||
|
||||
with trio.fail_after(max(120, len(chunks) * 60 * 10) if enable_timeout_assertion else 10000000000):
|
||||
@ -174,13 +174,19 @@ async def run_graphrag_for_kb(
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
for d in settings.retriever.chunk_list(
|
||||
# DEBUG: Obtener todos los chunks primero
|
||||
raw_chunks = list(settings.retriever.chunk_list(
|
||||
doc_id,
|
||||
tenant_id,
|
||||
[kb_id],
|
||||
max_count=10000, # FIX: Aumentar límite para procesar todos los chunks
|
||||
fields=fields_for_chunks,
|
||||
sort_by_position=True,
|
||||
):
|
||||
))
|
||||
|
||||
callback(msg=f"[DEBUG] chunk_list() returned {len(raw_chunks)} raw chunks for doc {doc_id}")
|
||||
|
||||
for d in raw_chunks:
|
||||
content = d["content_with_weight"]
|
||||
if num_tokens_from_string(current_chunk + content) < 1024:
|
||||
current_chunk += content
|
||||
|
||||
@ -96,7 +96,7 @@ ragflow:
|
||||
infinity:
|
||||
image:
|
||||
repository: infiniflow/infinity
|
||||
tag: v0.6.8
|
||||
tag: v0.6.10
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
storage:
|
||||
|
||||
@ -57,7 +57,6 @@ JSON_RESPONSE = True
|
||||
|
||||
class RAGFlowConnector:
|
||||
_MAX_DATASET_CACHE = 32
|
||||
_MAX_DOCUMENT_CACHE = 128
|
||||
_CACHE_TTL = 300
|
||||
|
||||
_dataset_metadata_cache: OrderedDict[str, tuple[dict, float | int]] = OrderedDict() # "dataset_id" -> (metadata, expiry_ts)
|
||||
@ -116,8 +115,6 @@ class RAGFlowConnector:
|
||||
def _set_cached_document_metadata_by_dataset(self, dataset_id, doc_id_meta_list):
|
||||
self._document_metadata_cache[dataset_id] = (doc_id_meta_list, self._get_expiry_timestamp())
|
||||
self._document_metadata_cache.move_to_end(dataset_id)
|
||||
if len(self._document_metadata_cache) > self._MAX_DOCUMENT_CACHE:
|
||||
self._document_metadata_cache.popitem(last=False)
|
||||
|
||||
def list_datasets(self, page: int = 1, page_size: int = 1000, orderby: str = "create_time", desc: bool = True, id: str | None = None, name: str | None = None):
|
||||
res = self._get("/datasets", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
||||
@ -240,46 +237,46 @@ class RAGFlowConnector:
|
||||
|
||||
docs = None if force_refresh else self._get_cached_document_metadata_by_dataset(dataset_id)
|
||||
if docs is None:
|
||||
docs_res = self._get(f"/datasets/{dataset_id}/documents")
|
||||
docs_data = docs_res.json()
|
||||
if docs_data.get("code") == 0 and docs_data.get("data", {}).get("docs"):
|
||||
doc_id_meta_list = []
|
||||
docs = {}
|
||||
for doc in docs_data["data"]["docs"]:
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
doc_meta = {
|
||||
"document_id": doc_id,
|
||||
"name": doc.get("name", ""),
|
||||
"location": doc.get("location", ""),
|
||||
"type": doc.get("type", ""),
|
||||
"size": doc.get("size"),
|
||||
"chunk_count": doc.get("chunk_count"),
|
||||
# "chunk_method": doc.get("chunk_method", ""),
|
||||
"create_date": doc.get("create_date", ""),
|
||||
"update_date": doc.get("update_date", ""),
|
||||
# "process_begin_at": doc.get("process_begin_at", ""),
|
||||
# "process_duration": doc.get("process_duration"),
|
||||
# "progress": doc.get("progress"),
|
||||
# "progress_msg": doc.get("progress_msg", ""),
|
||||
# "status": doc.get("status", ""),
|
||||
# "run": doc.get("run", ""),
|
||||
"token_count": doc.get("token_count"),
|
||||
# "source_type": doc.get("source_type", ""),
|
||||
"thumbnail": doc.get("thumbnail", ""),
|
||||
"dataset_id": doc.get("dataset_id", dataset_id),
|
||||
"meta_fields": doc.get("meta_fields", {}),
|
||||
# "parser_config": doc.get("parser_config", {})
|
||||
}
|
||||
doc_id_meta_list.append((doc_id, doc_meta))
|
||||
docs[doc_id] = doc_meta
|
||||
page = 1
|
||||
page_size = 30
|
||||
doc_id_meta_list = []
|
||||
docs = {}
|
||||
while page:
|
||||
docs_res = self._get(f"/datasets/{dataset_id}/documents?page={page}")
|
||||
docs_data = docs_res.json()
|
||||
if docs_data.get("code") == 0 and docs_data.get("data", {}).get("docs"):
|
||||
for doc in docs_data["data"]["docs"]:
|
||||
doc_id = doc.get("id")
|
||||
if not doc_id:
|
||||
continue
|
||||
doc_meta = {
|
||||
"document_id": doc_id,
|
||||
"name": doc.get("name", ""),
|
||||
"location": doc.get("location", ""),
|
||||
"type": doc.get("type", ""),
|
||||
"size": doc.get("size"),
|
||||
"chunk_count": doc.get("chunk_count"),
|
||||
"create_date": doc.get("create_date", ""),
|
||||
"update_date": doc.get("update_date", ""),
|
||||
"token_count": doc.get("token_count"),
|
||||
"thumbnail": doc.get("thumbnail", ""),
|
||||
"dataset_id": doc.get("dataset_id", dataset_id),
|
||||
"meta_fields": doc.get("meta_fields", {}),
|
||||
}
|
||||
doc_id_meta_list.append((doc_id, doc_meta))
|
||||
docs[doc_id] = doc_meta
|
||||
|
||||
page += 1
|
||||
if docs_data.get("data", {}).get("total", 0) - page * page_size <= 0:
|
||||
page = None
|
||||
|
||||
self._set_cached_document_metadata_by_dataset(dataset_id, doc_id_meta_list)
|
||||
if docs:
|
||||
document_cache.update(docs)
|
||||
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
# Gracefully handle metadata cache failures
|
||||
logging.error(f"Problem building the document metadata cache: {str(e)}")
|
||||
pass
|
||||
|
||||
return document_cache, dataset_cache
|
||||
|
||||
@ -49,7 +49,7 @@ dependencies = [
|
||||
"html-text==0.6.2",
|
||||
"httpx[socks]>=0.28.1,<0.29.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.8",
|
||||
"infinity-sdk==0.6.10",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
"itsdangerous==2.1.2",
|
||||
"json-repair==0.35.0",
|
||||
@ -131,7 +131,6 @@ dependencies = [
|
||||
"graspologic @ git+https://github.com/yuzhichang/graspologic.git@38e680cab72bc9fb68a7992c3bcc2d53b24e42fd",
|
||||
"mini-racer>=0.12.4,<0.13.0",
|
||||
"pyodbc>=5.2.0,<6.0.0",
|
||||
"pyicu>=2.15.3,<3.0.0",
|
||||
"flasgger>=0.9.7.1,<0.10.0",
|
||||
"xxhash>=3.5.0,<4.0.0",
|
||||
"trio>=0.17.0,<0.29.0",
|
||||
@ -163,6 +162,9 @@ test = [
|
||||
"openpyxl>=3.1.5",
|
||||
"pillow>=10.4.0",
|
||||
"pytest>=8.3.5",
|
||||
"pytest-asyncio>=1.3.0",
|
||||
"pytest-xdist>=3.8.0",
|
||||
"pytest-cov>=7.0.0",
|
||||
"python-docx>=1.1.2",
|
||||
"python-pptx>=1.0.2",
|
||||
"reportlab>=4.4.1",
|
||||
@ -195,8 +197,83 @@ extend-select = ["ASYNC", "ASYNC1"]
|
||||
ignore = ["E402"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
pythonpath = [
|
||||
"."
|
||||
]
|
||||
|
||||
testpaths = ["test"]
|
||||
python_files = ["test_*.py"]
|
||||
python_classes = ["Test*"]
|
||||
python_functions = ["test_*"]
|
||||
|
||||
markers = [
|
||||
"p1: high priority test cases",
|
||||
"p2: medium priority test cases",
|
||||
"p3: low priority test cases",
|
||||
]
|
||||
|
||||
# Test collection and runtime configuration
|
||||
filterwarnings = [
|
||||
"error", # Treat warnings as errors
|
||||
"ignore::DeprecationWarning", # Ignore specific warnings
|
||||
]
|
||||
|
||||
# Command line options
|
||||
addopts = [
|
||||
"-v", # Verbose output
|
||||
"--strict-markers", # Enforce marker definitions
|
||||
"--tb=short", # Simplified traceback
|
||||
"--disable-warnings", # Disable warnings
|
||||
"--color=yes" # Colored output
|
||||
]
|
||||
|
||||
|
||||
# Coverage configuration
|
||||
[tool.coverage.run]
|
||||
# Source paths - adjust according to your project structure
|
||||
source = [
|
||||
# "../../api/db/services",
|
||||
# Add more directories if needed:
|
||||
"../../common",
|
||||
# "../../utils",
|
||||
]
|
||||
|
||||
# Files/directories to exclude
|
||||
omit = [
|
||||
"*/tests/*",
|
||||
"*/test_*",
|
||||
"*/__pycache__/*",
|
||||
"*/.pytest_cache/*",
|
||||
"*/venv/*",
|
||||
"*/.venv/*",
|
||||
"*/env/*",
|
||||
"*/site-packages/*",
|
||||
"*/dist/*",
|
||||
"*/build/*",
|
||||
"*/migrations/*",
|
||||
"setup.py"
|
||||
]
|
||||
|
||||
[tool.coverage.report]
|
||||
# Report configuration
|
||||
precision = 2
|
||||
show_missing = true
|
||||
skip_covered = false
|
||||
fail_under = 0 # Minimum coverage requirement (0-100)
|
||||
|
||||
# Lines to exclude (optional)
|
||||
exclude_lines = [
|
||||
# "pragma: no cover",
|
||||
# "def __repr__",
|
||||
# "raise AssertionError",
|
||||
# "raise NotImplementedError",
|
||||
# "if __name__ == .__main__.:",
|
||||
# "if TYPE_CHECKING:",
|
||||
"pass"
|
||||
]
|
||||
|
||||
[tool.coverage.html]
|
||||
# HTML report configuration
|
||||
directory = "htmlcov"
|
||||
title = "Test Coverage Report"
|
||||
# extra_css = "custom.css" # Optional custom CSS
|
||||
@ -14,5 +14,5 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from beartype.claw import beartype_this_package
|
||||
beartype_this_package()
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
|
||||
@ -219,23 +219,27 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
)
|
||||
|
||||
def _normalize_section(section):
|
||||
# pad section to length 3: (txt, sec_id, poss)
|
||||
if len(section) == 1:
|
||||
# Pad/normalize to (txt, layout, positions)
|
||||
if not isinstance(section, (list, tuple)):
|
||||
section = (section, "", [])
|
||||
elif len(section) == 1:
|
||||
section = (section[0], "", [])
|
||||
elif len(section) == 2:
|
||||
section = (section[0], "", section[1])
|
||||
elif len(section) != 3:
|
||||
raise ValueError(f"Unexpected section length: {len(section)} (value={section!r})")
|
||||
else:
|
||||
section = (section[0], section[1], section[2])
|
||||
|
||||
txt, layoutno, poss = section
|
||||
if isinstance(poss, str):
|
||||
poss = pdf_parser.extract_positions(poss)
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
|
||||
if isinstance(pn, list):
|
||||
pn = pn[0] # [pn] -> pn
|
||||
if poss:
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
pn = first[0]
|
||||
if isinstance(pn, list) and pn:
|
||||
pn = pn[0] # [pn] -> pn
|
||||
poss[0] = (pn, *first[1:])
|
||||
if not poss:
|
||||
poss = []
|
||||
|
||||
return (txt, layoutno, poss)
|
||||
|
||||
|
||||
@ -86,9 +86,11 @@ class Pdf(PdfParser):
|
||||
|
||||
# (A) Add text
|
||||
for b in self.boxes:
|
||||
if not (from_page < b["page_number"] <= to_page + from_page):
|
||||
# b["page_number"] is relative page number,must + from_page
|
||||
global_page_num = b["page_number"] + from_page
|
||||
if not (from_page < global_page_num <= to_page + from_page):
|
||||
continue
|
||||
page_items[b["page_number"]].append({
|
||||
page_items[global_page_num].append({
|
||||
"top": b["top"],
|
||||
"x0": b["x0"],
|
||||
"text": b["text"],
|
||||
@ -100,7 +102,6 @@ class Pdf(PdfParser):
|
||||
if not positions:
|
||||
continue
|
||||
|
||||
# Handle content type (list vs str)
|
||||
if isinstance(content, list):
|
||||
final_text = "\n".join(content)
|
||||
elif isinstance(content, str):
|
||||
@ -109,10 +110,11 @@ class Pdf(PdfParser):
|
||||
final_text = str(content)
|
||||
|
||||
try:
|
||||
# Parse positions
|
||||
pn_index = positions[0][0]
|
||||
if isinstance(pn_index, list):
|
||||
pn_index = pn_index[0]
|
||||
|
||||
# pn_index in tbls is absolute page number
|
||||
current_page_num = int(pn_index) + 1
|
||||
except Exception as e:
|
||||
print(f"Error parsing position: {e}")
|
||||
|
||||
@ -12,10 +12,17 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
from copy import deepcopy
|
||||
from copy import deepcopy, copy
|
||||
|
||||
import trio
|
||||
import xxhash
|
||||
|
||||
from agent.component.llm import LLMParam, LLM
|
||||
from rag.flow.base import ProcessBase, ProcessParamBase
|
||||
from rag.prompts.generator import run_toc_from_text
|
||||
|
||||
|
||||
class ExtractorParam(ProcessParamBase, LLMParam):
|
||||
@ -31,6 +38,38 @@ class ExtractorParam(ProcessParamBase, LLMParam):
|
||||
class Extractor(ProcessBase, LLM):
|
||||
component_name = "Extractor"
|
||||
|
||||
def _build_TOC(self, docs):
|
||||
self.callback(message="Start to generate table of content ...")
|
||||
docs = sorted(docs, key=lambda d:(
|
||||
d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
|
||||
d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
|
||||
))
|
||||
toc: list[dict] = trio.run(run_toc_from_text, [d["text"] for d in docs], self.chat_mdl)
|
||||
logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent=' '))
|
||||
ii = 0
|
||||
while ii < len(toc):
|
||||
try:
|
||||
idx = int(toc[ii]["chunk_id"])
|
||||
del toc[ii]["chunk_id"]
|
||||
toc[ii]["ids"] = [docs[idx]["id"]]
|
||||
if ii == len(toc) -1:
|
||||
break
|
||||
for jj in range(idx+1, int(toc[ii+1]["chunk_id"])+1):
|
||||
toc[ii]["ids"].append(docs[jj]["id"])
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
ii += 1
|
||||
|
||||
if toc:
|
||||
d = copy.deepcopy(docs[-1])
|
||||
d["content_with_weight"] = json.dumps(toc, ensure_ascii=False)
|
||||
d["toc_kwd"] = "toc"
|
||||
d["available_int"] = 0
|
||||
d["page_num_int"] = [100000000]
|
||||
d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
|
||||
return d
|
||||
return None
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
self.set_output("output_format", "chunks")
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to generate.")
|
||||
@ -45,6 +84,12 @@ class Extractor(ProcessBase, LLM):
|
||||
chunks_key = k
|
||||
|
||||
if chunks:
|
||||
if self._param.field_name == "toc":
|
||||
toc = self._build_TOC(chunks)
|
||||
chunks.append(toc)
|
||||
self.set_output("chunks", chunks)
|
||||
return
|
||||
|
||||
prog = 0
|
||||
for i, ck in enumerate(chunks):
|
||||
args[chunks_key] = ck["text"]
|
||||
|
||||
@ -537,7 +537,8 @@ class Dealer:
|
||||
doc["id"] = id
|
||||
if dict_chunks:
|
||||
res.extend(dict_chunks.values())
|
||||
if len(dict_chunks.values()) < bs:
|
||||
# FIX: Solo terminar si no hay chunks, no si hay menos de bs
|
||||
if len(dict_chunks.values()) == 0:
|
||||
break
|
||||
return res
|
||||
|
||||
|
||||
@ -343,7 +343,8 @@ def form_history(history, limit=-6):
|
||||
return context
|
||||
|
||||
|
||||
def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
|
||||
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
tools_desc = tool_schema(tools_description)
|
||||
context = ""
|
||||
|
||||
@ -352,7 +353,7 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
|
||||
else:
|
||||
template = PROMPT_JINJA_ENV.from_string(ANALYZE_TASK_SYSTEM + "\n\n" + ANALYZE_TASK_USER)
|
||||
context = template.render(task=task_name, context=context, agent_prompt=prompt, tools_desc=tools_desc)
|
||||
kwd = chat_mdl.chat(context, [{"role": "user", "content": "Please analyze it."}])
|
||||
kwd = await _chat_async(chat_mdl, context, [{"role": "user", "content": "Please analyze it."}])
|
||||
if isinstance(kwd, tuple):
|
||||
kwd = kwd[0]
|
||||
kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
|
||||
@ -361,13 +362,17 @@ def analyze_task(chat_mdl, prompt, task_name, tools_description: list[dict], use
|
||||
return kwd
|
||||
|
||||
|
||||
async def analyze_task_async(chat_mdl, prompt, task_name, tools_description: list[dict], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(analyze_task, chat_mdl, prompt, task_name, tools_description, user_defined_prompts)
|
||||
async def _chat_async(chat_mdl, system: str, history: list, **kwargs):
|
||||
chat_async = getattr(chat_mdl, "async_chat", None)
|
||||
if chat_async and asyncio.iscoroutinefunction(chat_async):
|
||||
return await chat_async(system, history, **kwargs)
|
||||
return await asyncio.to_thread(chat_mdl.chat, system, history, **kwargs)
|
||||
|
||||
|
||||
def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
|
||||
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
if not tools_description:
|
||||
return ""
|
||||
return "", 0
|
||||
desc = tool_schema(tools_description)
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("plan_generation", NEXT_STEP))
|
||||
user_prompt = "\nWhat's the next tool to call? If ready OR IMPOSSIBLE TO BE READY, then call `complete_task`."
|
||||
@ -376,18 +381,18 @@ def next_step(chat_mdl, history:list, tools_description: list[dict], task_desc,
|
||||
hist[-1]["content"] += user_prompt
|
||||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
json_str = chat_mdl.chat(template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:], stop=["<|stop|>"])
|
||||
json_str = await _chat_async(
|
||||
chat_mdl,
|
||||
template.render(task_analysis=task_desc, desc=desc, today=datetime.datetime.now().strftime("%Y-%m-%d")),
|
||||
hist[1:],
|
||||
stop=["<|stop|>"],
|
||||
)
|
||||
tk_cnt = num_tokens_from_string(json_str)
|
||||
json_str = re.sub(r"^.*</think>", "", json_str, flags=re.DOTALL)
|
||||
return json_str, tk_cnt
|
||||
|
||||
|
||||
async def next_step_async(chat_mdl, history:list, tools_description: list[dict], task_desc, user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(next_step, chat_mdl, history, tools_description, task_desc, user_defined_prompts)
|
||||
|
||||
|
||||
def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
tool_calls = [{"name": p[0], "result": p[1]} for p in tool_call_res]
|
||||
goal = history[1]["content"]
|
||||
template = PROMPT_JINJA_ENV.from_string(user_defined_prompts.get("reflection", REFLECT))
|
||||
@ -398,7 +403,7 @@ def reflect(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defi
|
||||
else:
|
||||
hist.append({"role": "user", "content": user_prompt})
|
||||
_, msg = message_fit_in(hist, chat_mdl.max_length)
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:])
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:])
|
||||
ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
return """
|
||||
**Observation**
|
||||
@ -429,23 +434,15 @@ def tool_call_summary(chat_mdl, name: str, params: dict, result: str, user_defin
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
def rank_memories(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
template = PROMPT_JINJA_ENV.from_string(RANK_MEMORY)
|
||||
system_prompt = template.render(goal=goal, sub_goal=sub_goal, results=[{"i": i, "content": s} for i,s in enumerate(tool_call_summaries)])
|
||||
user_prompt = " → rank: "
|
||||
_, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length)
|
||||
ans = chat_mdl.chat(msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
ans = await _chat_async(chat_mdl, msg[0]["content"], msg[1:], stop="<|stop|>")
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
async def reflect_async(chat_mdl, history: list[dict], tool_call_res: list[Tuple], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(reflect, chat_mdl, history, tool_call_res, user_defined_prompts)
|
||||
|
||||
|
||||
async def rank_memories_async(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[str], user_defined_prompts: dict={}):
|
||||
return await asyncio.to_thread(rank_memories, chat_mdl, goal, sub_goal, tool_call_summaries, user_defined_prompts)
|
||||
|
||||
|
||||
def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> dict:
|
||||
meta_data_structure = {}
|
||||
for key, values in meta_data.items():
|
||||
@ -514,7 +511,7 @@ def toc_index_extractor(toc:list[dict], content:str, chat_mdl):
|
||||
|
||||
The structure variable is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
@ -641,8 +638,8 @@ def toc_transformer(toc_pages, chat_mdl):
|
||||
|
||||
The `structure` is the numeric system which represents the index of the hierarchy section in the table of contents. For example, the first section has structure index 1, the first subsection has structure index 1.1, the second subsection has structure index 1.2, etc.
|
||||
The `title` is a short phrase or a several-words term.
|
||||
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The response should be in the following JSON format:
|
||||
[
|
||||
{
|
||||
"structure": <structure index, "x.x.x" or None> (string),
|
||||
@ -667,7 +664,7 @@ def toc_transformer(toc_pages, chat_mdl):
|
||||
while not (if_complete == "yes"):
|
||||
prompt = f"""
|
||||
Your task is to continue the table of contents json structure, directly output the remaining part of the json structure.
|
||||
The response should be in the following JSON format:
|
||||
The response should be in the following JSON format:
|
||||
|
||||
The raw table of contents json structure is:
|
||||
{toc_content}
|
||||
@ -756,7 +753,7 @@ async def run_toc_from_text(chunks, chat_mdl, callback=None):
|
||||
|
||||
for chunk in chunks_res:
|
||||
titles.extend(chunk.get("toc", []))
|
||||
|
||||
|
||||
# Filter out entries with title == -1
|
||||
prune = len(titles) > 512
|
||||
max_len = 12 if prune else 22
|
||||
|
||||
555629
rag/res/huqie.txt
555629
rag/res/huqie.txt
File diff suppressed because it is too large
Load Diff
@ -157,11 +157,30 @@ class Confluence(SyncBase):
|
||||
from common.data_source.config import DocumentSource
|
||||
from common.data_source.interfaces import StaticCredentialsProvider
|
||||
|
||||
index_mode = (self.conf.get("index_mode") or "everything").lower()
|
||||
if index_mode not in {"everything", "space", "page"}:
|
||||
index_mode = "everything"
|
||||
|
||||
space = ""
|
||||
page_id = ""
|
||||
|
||||
index_recursively = False
|
||||
if index_mode == "space":
|
||||
space = (self.conf.get("space") or "").strip()
|
||||
if not space:
|
||||
raise ValueError("Space Key is required when indexing a specific Confluence space.")
|
||||
elif index_mode == "page":
|
||||
page_id = (self.conf.get("page_id") or "").strip()
|
||||
if not page_id:
|
||||
raise ValueError("Page ID is required when indexing a specific Confluence page.")
|
||||
index_recursively = bool(self.conf.get("index_recursively", False))
|
||||
|
||||
self.connector = ConfluenceConnector(
|
||||
wiki_base=self.conf["wiki_base"],
|
||||
space=self.conf.get("space", ""),
|
||||
is_cloud=self.conf.get("is_cloud", True),
|
||||
# page_id=self.conf.get("page_id", ""),
|
||||
space=space,
|
||||
page_id=page_id,
|
||||
index_recursively=index_recursively,
|
||||
)
|
||||
|
||||
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"], connector_name=DocumentSource.CONFLUENCE, credential_json=self.conf["credentials"])
|
||||
|
||||
@ -29,6 +29,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||
from common.connection_utils import timeout
|
||||
from rag.utils.base64_image import image2id
|
||||
from rag.utils.raptor_utils import should_skip_raptor, get_skip_reason
|
||||
from common.log_utils import init_root_logger
|
||||
from common.config_utils import show_configs
|
||||
from graphrag.general.index import run_graphrag_for_kb
|
||||
@ -68,7 +69,7 @@ from common.signal_utils import start_tracemalloc_and_snapshot, stop_tracemalloc
|
||||
from common.exceptions import TaskCanceledException
|
||||
from common import settings
|
||||
from common.constants import PAGERANK_FLD, TAG_FLD, SVR_CONSUMER_GROUP_NAME
|
||||
from common.misc_utils import install_mineru
|
||||
from common.misc_utils import check_and_install_mineru
|
||||
|
||||
BATCH_SIZE = 64
|
||||
|
||||
@ -853,6 +854,17 @@ async def do_handle_task(task):
|
||||
progress_callback(prog=-1.0, msg="Internal error: Invalid RAPTOR configuration")
|
||||
return
|
||||
|
||||
# Check if Raptor should be skipped for structured data
|
||||
file_type = task.get("type", "")
|
||||
parser_id = task.get("parser_id", "")
|
||||
raptor_config = kb_parser_config.get("raptor", {})
|
||||
|
||||
if should_skip_raptor(file_type, parser_id, task_parser_config, raptor_config):
|
||||
skip_reason = get_skip_reason(file_type, parser_id, task_parser_config)
|
||||
logging.info(f"Skipping Raptor for document {task_document_name}: {skip_reason}")
|
||||
progress_callback(prog=1.0, msg=f"Raptor skipped: {skip_reason}")
|
||||
return
|
||||
|
||||
# bind LLM for raptor
|
||||
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
|
||||
# run RAPTOR
|
||||
@ -944,7 +956,7 @@ async def do_handle_task(task):
|
||||
logging.info(progress_message)
|
||||
progress_callback(msg=progress_message)
|
||||
if task["parser_id"].lower() == "naive" and task["parser_config"].get("toc_extraction", False):
|
||||
toc_thread = executor.submit(build_TOC,task, chunks, progress_callback)
|
||||
toc_thread = executor.submit(build_TOC, task, chunks, progress_callback)
|
||||
|
||||
chunk_count = len(set([chunk["id"] for chunk in chunks]))
|
||||
start_ts = timer()
|
||||
@ -1101,8 +1113,8 @@ async def main():
|
||||
show_configs()
|
||||
settings.init_settings()
|
||||
settings.check_and_install_torch()
|
||||
install_mineru()
|
||||
logging.info(f'settings.EMBEDDING_CFG: {settings.EMBEDDING_CFG}')
|
||||
check_and_install_mineru()
|
||||
logging.info(f'default embedding config: {settings.EMBEDDING_CFG}')
|
||||
settings.print_rag_settings()
|
||||
if sys.platform != "win32":
|
||||
signal.signal(signal.SIGUSR1, start_tracemalloc_and_snapshot)
|
||||
|
||||
207
rag/utils/gcs_conn.py
Normal file
207
rag/utils/gcs_conn.py
Normal file
@ -0,0 +1,207 @@
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import logging
|
||||
import time
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
from google.cloud import storage
|
||||
from google.api_core.exceptions import NotFound
|
||||
from common.decorator import singleton
|
||||
from common import settings
|
||||
|
||||
|
||||
@singleton
|
||||
class RAGFlowGCS:
|
||||
def __init__(self):
|
||||
self.client = None
|
||||
self.bucket_name = None
|
||||
self.__open__()
|
||||
|
||||
def __open__(self):
|
||||
try:
|
||||
if self.client:
|
||||
self.client = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.client = storage.Client()
|
||||
self.bucket_name = settings.GCS["bucket"]
|
||||
except Exception:
|
||||
logging.exception("Fail to connect to GCS")
|
||||
|
||||
def _get_blob_path(self, folder, filename):
|
||||
"""Helper to construct the path: folder/filename"""
|
||||
if not folder:
|
||||
return filename
|
||||
return f"{folder}/{filename}"
|
||||
|
||||
def health(self):
|
||||
folder, fnm, binary = "ragflow-health", "health_check", b"_t@@@1"
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
if not bucket_obj.exists():
|
||||
logging.error(f"Health check failed: Main bucket '{self.bucket_name}' does not exist.")
|
||||
return False
|
||||
|
||||
blob_path = self._get_blob_path(folder, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.exception(f"Health check failed: {e}")
|
||||
return False
|
||||
|
||||
def put(self, bucket, fnm, binary, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket (to match interface)
|
||||
for _ in range(3):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
blob.upload_from_file(BytesIO(binary), content_type='application/octet-stream')
|
||||
return True
|
||||
except NotFound:
|
||||
logging.error(f"Fail to put: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to put {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
def rm(self, bucket, fnm, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
blob.delete()
|
||||
except NotFound:
|
||||
pass
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove {bucket}/{fnm}:")
|
||||
|
||||
def get(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(1):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.download_as_bytes()
|
||||
except NotFound:
|
||||
logging.warning(f"File not found {bucket}/{filename} in {self.bucket_name}")
|
||||
return None
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get {bucket}/{filename}")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def obj_exist(self, bucket, filename, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, filename)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
return blob.exists()
|
||||
except Exception:
|
||||
logging.exception(f"obj_exist {bucket}/{filename} got exception")
|
||||
return False
|
||||
|
||||
def bucket_exists(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
return bucket_obj.exists()
|
||||
except Exception:
|
||||
logging.exception(f"bucket_exist check for {self.bucket_name} got exception")
|
||||
return False
|
||||
|
||||
def get_presigned_url(self, bucket, fnm, expires, tenant_id=None):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
for _ in range(10):
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
blob_path = self._get_blob_path(bucket, fnm)
|
||||
blob = bucket_obj.blob(blob_path)
|
||||
|
||||
expiration = expires
|
||||
if isinstance(expires, int):
|
||||
expiration = datetime.timedelta(seconds=expires)
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=expiration,
|
||||
method="GET"
|
||||
)
|
||||
return url
|
||||
except Exception:
|
||||
logging.exception(f"Fail to get_presigned {bucket}/{fnm}:")
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def remove_bucket(self, bucket):
|
||||
# RENAMED PARAMETER: bucket_name -> bucket
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
prefix = f"{bucket}/"
|
||||
|
||||
blobs = list(self.client.list_blobs(self.bucket_name, prefix=prefix))
|
||||
|
||||
if blobs:
|
||||
bucket_obj.delete_blobs(blobs)
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove virtual bucket (folder) {bucket}")
|
||||
|
||||
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
# RENAMED PARAMETERS to match original interface
|
||||
try:
|
||||
bucket_obj = self.client.bucket(self.bucket_name)
|
||||
|
||||
src_blob_path = self._get_blob_path(src_bucket, src_path)
|
||||
dest_blob_path = self._get_blob_path(dest_bucket, dest_path)
|
||||
|
||||
src_blob = bucket_obj.blob(src_blob_path)
|
||||
|
||||
if not src_blob.exists():
|
||||
logging.error(f"Source object not found: {src_blob_path}")
|
||||
return False
|
||||
|
||||
bucket_obj.copy_blob(src_blob, bucket_obj, dest_blob_path)
|
||||
return True
|
||||
|
||||
except NotFound:
|
||||
logging.error(f"Copy failed: Main bucket {self.bucket_name} does not exist.")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
||||
def move(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
try:
|
||||
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
|
||||
self.rm(src_bucket, src_path)
|
||||
return True
|
||||
else:
|
||||
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
145
rag/utils/raptor_utils.py
Normal file
145
rag/utils/raptor_utils.py
Normal file
@ -0,0 +1,145 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Utility functions for Raptor processing decisions.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# File extensions for structured data types
|
||||
EXCEL_EXTENSIONS = {".xls", ".xlsx", ".xlsm", ".xlsb"}
|
||||
CSV_EXTENSIONS = {".csv", ".tsv"}
|
||||
STRUCTURED_EXTENSIONS = EXCEL_EXTENSIONS | CSV_EXTENSIONS
|
||||
|
||||
|
||||
def is_structured_file_type(file_type: Optional[str]) -> bool:
|
||||
"""
|
||||
Check if a file type is structured data (Excel, CSV, etc.)
|
||||
|
||||
Args:
|
||||
file_type: File extension (e.g., ".xlsx", ".csv")
|
||||
|
||||
Returns:
|
||||
True if file is structured data type
|
||||
"""
|
||||
if not file_type:
|
||||
return False
|
||||
|
||||
# Normalize to lowercase and ensure leading dot
|
||||
file_type = file_type.lower()
|
||||
if not file_type.startswith("."):
|
||||
file_type = f".{file_type}"
|
||||
|
||||
return file_type in STRUCTURED_EXTENSIONS
|
||||
|
||||
|
||||
def is_tabular_pdf(parser_id: str = "", parser_config: Optional[dict] = None) -> bool:
|
||||
"""
|
||||
Check if a PDF is being parsed as tabular data.
|
||||
|
||||
Args:
|
||||
parser_id: Parser ID (e.g., "table", "naive")
|
||||
parser_config: Parser configuration dict
|
||||
|
||||
Returns:
|
||||
True if PDF is being parsed as tabular data
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
|
||||
# If using table parser, it's tabular
|
||||
if parser_id and parser_id.lower() == "table":
|
||||
return True
|
||||
|
||||
# Check if html4excel is enabled (Excel-like table parsing)
|
||||
if parser_config.get("html4excel", False):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def should_skip_raptor(
|
||||
file_type: Optional[str] = None,
|
||||
parser_id: str = "",
|
||||
parser_config: Optional[dict] = None,
|
||||
raptor_config: Optional[dict] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if Raptor should be skipped for a given document.
|
||||
|
||||
This function implements the logic to automatically disable Raptor for:
|
||||
1. Excel files (.xls, .xlsx, .csv, etc.)
|
||||
2. PDFs with tabular data (using table parser or html4excel)
|
||||
|
||||
Args:
|
||||
file_type: File extension (e.g., ".xlsx", ".pdf")
|
||||
parser_id: Parser ID being used
|
||||
parser_config: Parser configuration dict
|
||||
raptor_config: Raptor configuration dict (can override with auto_disable_for_structured_data)
|
||||
|
||||
Returns:
|
||||
True if Raptor should be skipped, False otherwise
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
raptor_config = raptor_config or {}
|
||||
|
||||
# Check if auto-disable is explicitly disabled in config
|
||||
if raptor_config.get("auto_disable_for_structured_data", True) is False:
|
||||
logging.info("Raptor auto-disable is turned off via configuration")
|
||||
return False
|
||||
|
||||
# Check for Excel/CSV files
|
||||
if is_structured_file_type(file_type):
|
||||
logging.info(f"Skipping Raptor for structured file type: {file_type}")
|
||||
return True
|
||||
|
||||
# Check for tabular PDFs
|
||||
if file_type and file_type.lower() in [".pdf", "pdf"]:
|
||||
if is_tabular_pdf(parser_id, parser_config):
|
||||
logging.info(f"Skipping Raptor for tabular PDF (parser_id={parser_id})")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_skip_reason(
|
||||
file_type: Optional[str] = None,
|
||||
parser_id: str = "",
|
||||
parser_config: Optional[dict] = None
|
||||
) -> str:
|
||||
"""
|
||||
Get a human-readable reason why Raptor was skipped.
|
||||
|
||||
Args:
|
||||
file_type: File extension
|
||||
parser_id: Parser ID being used
|
||||
parser_config: Parser configuration dict
|
||||
|
||||
Returns:
|
||||
Reason string, or empty string if Raptor should not be skipped
|
||||
"""
|
||||
parser_config = parser_config or {}
|
||||
|
||||
if is_structured_file_type(file_type):
|
||||
return f"Structured data file ({file_type}) - Raptor auto-disabled"
|
||||
|
||||
if file_type and file_type.lower() in [".pdf", "pdf"]:
|
||||
if is_tabular_pdf(parser_id, parser_config):
|
||||
return f"Tabular PDF (parser={parser_id}) - Raptor auto-disabled"
|
||||
|
||||
return ""
|
||||
275
run_tests.py
Executable file
275
run_tests.py
Executable file
@ -0,0 +1,275 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
|
||||
class Colors:
|
||||
"""ANSI color codes for terminal output"""
|
||||
RED = '\033[0;31m'
|
||||
GREEN = '\033[0;32m'
|
||||
YELLOW = '\033[1;33m'
|
||||
BLUE = '\033[0;34m'
|
||||
NC = '\033[0m' # No Color
|
||||
|
||||
|
||||
class TestRunner:
|
||||
"""RAGFlow Unit Test Runner"""
|
||||
|
||||
def __init__(self):
|
||||
self.project_root = Path(__file__).parent.resolve()
|
||||
self.ut_dir = Path(self.project_root / 'test' / 'unit_test')
|
||||
# Default options
|
||||
self.coverage = False
|
||||
self.parallel = False
|
||||
self.verbose = False
|
||||
self.markers = ""
|
||||
|
||||
# Python interpreter path
|
||||
self.python = sys.executable
|
||||
|
||||
@staticmethod
|
||||
def print_info(message: str) -> None:
|
||||
"""Print informational message"""
|
||||
print(f"{Colors.BLUE}[INFO]{Colors.NC} {message}")
|
||||
|
||||
@staticmethod
|
||||
def print_error(message: str) -> None:
|
||||
"""Print error message"""
|
||||
print(f"{Colors.RED}[ERROR]{Colors.NC} {message}")
|
||||
|
||||
@staticmethod
|
||||
def show_usage() -> None:
|
||||
"""Display usage information"""
|
||||
usage = """
|
||||
RAGFlow Unit Test Runner
|
||||
Usage: python run_tests.py [OPTIONS]
|
||||
|
||||
OPTIONS:
|
||||
-h, --help Show this help message
|
||||
-c, --coverage Run tests with coverage report
|
||||
-p, --parallel Run tests in parallel (requires pytest-xdist)
|
||||
-v, --verbose Verbose output
|
||||
-t, --test FILE Run specific test file or directory
|
||||
-m, --markers MARKERS Run tests with specific markers (e.g., "unit", "integration")
|
||||
|
||||
EXAMPLES:
|
||||
# Run all tests
|
||||
python run_tests.py
|
||||
|
||||
# Run with coverage
|
||||
python run_tests.py --coverage
|
||||
|
||||
# Run in parallel
|
||||
python run_tests.py --parallel
|
||||
|
||||
# Run specific test file
|
||||
python run_tests.py --test services/test_dialog_service.py
|
||||
|
||||
# Run only unit tests
|
||||
python run_tests.py --markers "unit"
|
||||
|
||||
# Run tests with coverage and parallel execution
|
||||
python run_tests.py --coverage --parallel
|
||||
|
||||
"""
|
||||
print(usage)
|
||||
|
||||
def build_pytest_command(self) -> List[str]:
|
||||
"""Build the pytest command arguments"""
|
||||
cmd = ["pytest", str(self.ut_dir)]
|
||||
|
||||
# Add test path
|
||||
|
||||
# Add markers
|
||||
if self.markers:
|
||||
cmd.extend(["-m", self.markers])
|
||||
|
||||
# Add verbose flag
|
||||
if self.verbose:
|
||||
cmd.extend(["-vv"])
|
||||
else:
|
||||
cmd.append("-v")
|
||||
|
||||
# Add coverage
|
||||
if self.coverage:
|
||||
# Relative path from test directory to source code
|
||||
source_path = str(self.project_root / "common")
|
||||
cmd.extend([
|
||||
"--cov", source_path,
|
||||
"--cov-report", "html",
|
||||
"--cov-report", "term"
|
||||
])
|
||||
|
||||
# Add parallel execution
|
||||
if self.parallel:
|
||||
# Try to get number of CPU cores
|
||||
try:
|
||||
import multiprocessing
|
||||
cpu_count = multiprocessing.cpu_count()
|
||||
cmd.extend(["-n", str(cpu_count)])
|
||||
except ImportError:
|
||||
# Fallback to auto if multiprocessing not available
|
||||
cmd.extend(["-n", "auto"])
|
||||
|
||||
# Add default options from pyproject.toml if it exists
|
||||
pyproject_path = self.project_root / "pyproject.toml"
|
||||
if pyproject_path.exists():
|
||||
cmd.extend(["--config-file", str(pyproject_path)])
|
||||
|
||||
return cmd
|
||||
|
||||
def run_tests(self) -> bool:
|
||||
"""Execute the pytest command"""
|
||||
# Change to test directory
|
||||
os.chdir(self.project_root)
|
||||
|
||||
# Build command
|
||||
cmd = self.build_pytest_command()
|
||||
|
||||
# Print test configuration
|
||||
self.print_info("Running RAGFlow Unit Tests")
|
||||
self.print_info("=" * 40)
|
||||
self.print_info(f"Test Directory: {self.ut_dir}")
|
||||
self.print_info(f"Coverage: {self.coverage}")
|
||||
self.print_info(f"Parallel: {self.parallel}")
|
||||
self.print_info(f"Verbose: {self.verbose}")
|
||||
|
||||
if self.markers:
|
||||
self.print_info(f"Markers: {self.markers}")
|
||||
|
||||
print(f"\n{Colors.BLUE}[EXECUTING]{Colors.NC} {' '.join(cmd)}\n")
|
||||
|
||||
# Run pytest
|
||||
try:
|
||||
result = subprocess.run(cmd, check=False)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"\n{Colors.GREEN}[SUCCESS]{Colors.NC} All tests passed!")
|
||||
|
||||
if self.coverage:
|
||||
coverage_dir = self.ut_dir / "htmlcov"
|
||||
if coverage_dir.exists():
|
||||
index_file = coverage_dir / "index.html"
|
||||
print(f"\n{Colors.BLUE}[INFO]{Colors.NC} Coverage report generated:")
|
||||
print(f" {index_file}")
|
||||
print("\nOpen with:")
|
||||
print(f" - Windows: start {index_file}")
|
||||
print(f" - macOS: open {index_file}")
|
||||
print(f" - Linux: xdg-open {index_file}")
|
||||
|
||||
return True
|
||||
else:
|
||||
print(f"\n{Colors.RED}[FAILURE]{Colors.NC} Some tests failed!")
|
||||
return False
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print(f"\n{Colors.YELLOW}[INTERRUPTED]{Colors.NC} Test execution interrupted by user")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.print_error(f"Failed to execute tests: {e}")
|
||||
return False
|
||||
|
||||
def parse_arguments(self) -> bool:
|
||||
"""Parse command line arguments"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="RAGFlow Unit Test Runner",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python run_tests.py # Run all tests
|
||||
python run_tests.py --coverage # Run with coverage
|
||||
python run_tests.py --parallel # Run in parallel
|
||||
python run_tests.py --test services/test_dialog_service.py # Run specific test
|
||||
python run_tests.py --markers "unit" # Run only unit tests
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--coverage",
|
||||
action="store_true",
|
||||
help="Run tests with coverage report"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-p", "--parallel",
|
||||
action="store_true",
|
||||
help="Run tests in parallel (requires pytest-xdist)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--verbose",
|
||||
action="store_true",
|
||||
help="Verbose output"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-t", "--test",
|
||||
type=str,
|
||||
default="",
|
||||
help="Run specific test file or directory"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-m", "--markers",
|
||||
type=str,
|
||||
default="",
|
||||
help="Run tests with specific markers (e.g., 'unit', 'integration')"
|
||||
)
|
||||
|
||||
try:
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set options
|
||||
self.coverage = args.coverage
|
||||
self.parallel = args.parallel
|
||||
self.verbose = args.verbose
|
||||
self.markers = args.markers
|
||||
|
||||
return True
|
||||
|
||||
except SystemExit:
|
||||
# argparse already printed help, just exit
|
||||
return False
|
||||
except Exception as e:
|
||||
self.print_error(f"Error parsing arguments: {e}")
|
||||
return False
|
||||
|
||||
def run(self) -> int:
|
||||
"""Main execution method"""
|
||||
# Parse command line arguments
|
||||
if not self.parse_arguments():
|
||||
return 1
|
||||
|
||||
# Run tests
|
||||
success = self.run_tests()
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
|
||||
def main():
|
||||
"""Entry point"""
|
||||
runner = TestRunner()
|
||||
return runner.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -37,4 +37,4 @@ __all__ = [
|
||||
"Document",
|
||||
"Chunk",
|
||||
"Agent"
|
||||
]
|
||||
]
|
||||
|
||||
287
test/unit_test/utils/test_raptor_utils.py
Normal file
287
test/unit_test/utils/test_raptor_utils.py
Normal file
@ -0,0 +1,287 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
"""
|
||||
Unit tests for Raptor utility functions.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from rag.utils.raptor_utils import (
|
||||
is_structured_file_type,
|
||||
is_tabular_pdf,
|
||||
should_skip_raptor,
|
||||
get_skip_reason,
|
||||
EXCEL_EXTENSIONS,
|
||||
CSV_EXTENSIONS,
|
||||
STRUCTURED_EXTENSIONS
|
||||
)
|
||||
|
||||
|
||||
class TestIsStructuredFileType:
|
||||
"""Test file type detection for structured data"""
|
||||
|
||||
@pytest.mark.parametrize("file_type,expected", [
|
||||
(".xlsx", True),
|
||||
(".xls", True),
|
||||
(".xlsm", True),
|
||||
(".xlsb", True),
|
||||
(".csv", True),
|
||||
(".tsv", True),
|
||||
("xlsx", True), # Without leading dot
|
||||
("XLSX", True), # Uppercase
|
||||
(".pdf", False),
|
||||
(".docx", False),
|
||||
(".txt", False),
|
||||
("", False),
|
||||
(None, False),
|
||||
])
|
||||
def test_file_type_detection(self, file_type, expected):
|
||||
"""Test detection of various file types"""
|
||||
assert is_structured_file_type(file_type) == expected
|
||||
|
||||
def test_excel_extensions_defined(self):
|
||||
"""Test that Excel extensions are properly defined"""
|
||||
assert ".xlsx" in EXCEL_EXTENSIONS
|
||||
assert ".xls" in EXCEL_EXTENSIONS
|
||||
assert len(EXCEL_EXTENSIONS) >= 4
|
||||
|
||||
def test_csv_extensions_defined(self):
|
||||
"""Test that CSV extensions are properly defined"""
|
||||
assert ".csv" in CSV_EXTENSIONS
|
||||
assert ".tsv" in CSV_EXTENSIONS
|
||||
|
||||
def test_structured_extensions_combined(self):
|
||||
"""Test that structured extensions include both Excel and CSV"""
|
||||
assert EXCEL_EXTENSIONS.issubset(STRUCTURED_EXTENSIONS)
|
||||
assert CSV_EXTENSIONS.issubset(STRUCTURED_EXTENSIONS)
|
||||
|
||||
|
||||
class TestIsTabularPDF:
|
||||
"""Test tabular PDF detection"""
|
||||
|
||||
def test_table_parser_detected(self):
|
||||
"""Test that table parser is detected as tabular"""
|
||||
assert is_tabular_pdf("table", {}) is True
|
||||
assert is_tabular_pdf("TABLE", {}) is True
|
||||
|
||||
def test_html4excel_detected(self):
|
||||
"""Test that html4excel config is detected as tabular"""
|
||||
assert is_tabular_pdf("naive", {"html4excel": True}) is True
|
||||
assert is_tabular_pdf("", {"html4excel": True}) is True
|
||||
|
||||
def test_non_tabular_pdf(self):
|
||||
"""Test that non-tabular PDFs are not detected"""
|
||||
assert is_tabular_pdf("naive", {}) is False
|
||||
assert is_tabular_pdf("naive", {"html4excel": False}) is False
|
||||
assert is_tabular_pdf("", {}) is False
|
||||
|
||||
def test_combined_conditions(self):
|
||||
"""Test combined table parser and html4excel"""
|
||||
assert is_tabular_pdf("table", {"html4excel": True}) is True
|
||||
assert is_tabular_pdf("table", {"html4excel": False}) is True
|
||||
|
||||
|
||||
class TestShouldSkipRaptor:
|
||||
"""Test Raptor skip logic"""
|
||||
|
||||
def test_skip_excel_files(self):
|
||||
"""Test that Excel files skip Raptor"""
|
||||
assert should_skip_raptor(".xlsx") is True
|
||||
assert should_skip_raptor(".xls") is True
|
||||
assert should_skip_raptor(".xlsm") is True
|
||||
|
||||
def test_skip_csv_files(self):
|
||||
"""Test that CSV files skip Raptor"""
|
||||
assert should_skip_raptor(".csv") is True
|
||||
assert should_skip_raptor(".tsv") is True
|
||||
|
||||
def test_skip_tabular_pdf_with_table_parser(self):
|
||||
"""Test that tabular PDFs skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_id="table") is True
|
||||
assert should_skip_raptor("pdf", parser_id="TABLE") is True
|
||||
|
||||
def test_skip_tabular_pdf_with_html4excel(self):
|
||||
"""Test that PDFs with html4excel skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_config={"html4excel": True}) is True
|
||||
|
||||
def test_dont_skip_regular_pdf(self):
|
||||
"""Test that regular PDFs don't skip Raptor"""
|
||||
assert should_skip_raptor(".pdf", parser_id="naive") is False
|
||||
assert should_skip_raptor(".pdf", parser_config={}) is False
|
||||
|
||||
def test_dont_skip_text_files(self):
|
||||
"""Test that text files don't skip Raptor"""
|
||||
assert should_skip_raptor(".txt") is False
|
||||
assert should_skip_raptor(".docx") is False
|
||||
assert should_skip_raptor(".md") is False
|
||||
|
||||
def test_override_with_config(self):
|
||||
"""Test that auto-disable can be overridden"""
|
||||
raptor_config = {"auto_disable_for_structured_data": False}
|
||||
|
||||
# Should not skip even for Excel files
|
||||
assert should_skip_raptor(".xlsx", raptor_config=raptor_config) is False
|
||||
assert should_skip_raptor(".csv", raptor_config=raptor_config) is False
|
||||
assert should_skip_raptor(".pdf", parser_id="table", raptor_config=raptor_config) is False
|
||||
|
||||
def test_default_auto_disable_enabled(self):
|
||||
"""Test that auto-disable is enabled by default"""
|
||||
# Empty raptor_config should default to auto_disable=True
|
||||
assert should_skip_raptor(".xlsx", raptor_config={}) is True
|
||||
assert should_skip_raptor(".xlsx", raptor_config=None) is True
|
||||
|
||||
def test_explicit_auto_disable_enabled(self):
|
||||
"""Test explicit auto-disable enabled"""
|
||||
raptor_config = {"auto_disable_for_structured_data": True}
|
||||
assert should_skip_raptor(".xlsx", raptor_config=raptor_config) is True
|
||||
|
||||
|
||||
class TestGetSkipReason:
|
||||
"""Test skip reason generation"""
|
||||
|
||||
def test_excel_skip_reason(self):
|
||||
"""Test skip reason for Excel files"""
|
||||
reason = get_skip_reason(".xlsx")
|
||||
assert "Structured data file" in reason
|
||||
assert ".xlsx" in reason
|
||||
assert "auto-disabled" in reason.lower()
|
||||
|
||||
def test_csv_skip_reason(self):
|
||||
"""Test skip reason for CSV files"""
|
||||
reason = get_skip_reason(".csv")
|
||||
assert "Structured data file" in reason
|
||||
assert ".csv" in reason
|
||||
|
||||
def test_tabular_pdf_skip_reason(self):
|
||||
"""Test skip reason for tabular PDFs"""
|
||||
reason = get_skip_reason(".pdf", parser_id="table")
|
||||
assert "Tabular PDF" in reason
|
||||
assert "table" in reason.lower()
|
||||
assert "auto-disabled" in reason.lower()
|
||||
|
||||
def test_html4excel_skip_reason(self):
|
||||
"""Test skip reason for html4excel PDFs"""
|
||||
reason = get_skip_reason(".pdf", parser_config={"html4excel": True})
|
||||
assert "Tabular PDF" in reason
|
||||
|
||||
def test_no_skip_reason_for_regular_files(self):
|
||||
"""Test that regular files have no skip reason"""
|
||||
assert get_skip_reason(".txt") == ""
|
||||
assert get_skip_reason(".docx") == ""
|
||||
assert get_skip_reason(".pdf", parser_id="naive") == ""
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and error handling"""
|
||||
|
||||
def test_none_values(self):
|
||||
"""Test handling of None values"""
|
||||
assert should_skip_raptor(None) is False
|
||||
assert should_skip_raptor("") is False
|
||||
assert get_skip_reason(None) == ""
|
||||
|
||||
def test_empty_strings(self):
|
||||
"""Test handling of empty strings"""
|
||||
assert should_skip_raptor("") is False
|
||||
assert get_skip_reason("") == ""
|
||||
|
||||
def test_case_insensitivity(self):
|
||||
"""Test case insensitive handling"""
|
||||
assert is_structured_file_type("XLSX") is True
|
||||
assert is_structured_file_type("XlSx") is True
|
||||
assert is_tabular_pdf("TABLE", {}) is True
|
||||
assert is_tabular_pdf("TaBlE", {}) is True
|
||||
|
||||
def test_with_and_without_dot(self):
|
||||
"""Test file extensions with and without leading dot"""
|
||||
assert should_skip_raptor(".xlsx") is True
|
||||
assert should_skip_raptor("xlsx") is True
|
||||
assert should_skip_raptor(".CSV") is True
|
||||
assert should_skip_raptor("csv") is True
|
||||
|
||||
|
||||
class TestIntegrationScenarios:
|
||||
"""Test real-world integration scenarios"""
|
||||
|
||||
def test_financial_excel_report(self):
|
||||
"""Test scenario: Financial quarterly Excel report"""
|
||||
file_type = ".xlsx"
|
||||
parser_id = "naive"
|
||||
parser_config = {}
|
||||
raptor_config = {"use_raptor": True}
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config, raptor_config) is True
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert "Structured data file" in reason
|
||||
|
||||
def test_scientific_csv_data(self):
|
||||
"""Test scenario: Scientific experimental CSV results"""
|
||||
file_type = ".csv"
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type) is True
|
||||
reason = get_skip_reason(file_type)
|
||||
assert ".csv" in reason
|
||||
|
||||
def test_legal_contract_with_tables(self):
|
||||
"""Test scenario: Legal contract PDF with tables"""
|
||||
file_type = ".pdf"
|
||||
parser_id = "table"
|
||||
parser_config = {}
|
||||
|
||||
# Should skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config) is True
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert "Tabular PDF" in reason
|
||||
|
||||
def test_text_heavy_pdf_document(self):
|
||||
"""Test scenario: Text-heavy PDF document"""
|
||||
file_type = ".pdf"
|
||||
parser_id = "naive"
|
||||
parser_config = {}
|
||||
|
||||
# Should NOT skip Raptor
|
||||
assert should_skip_raptor(file_type, parser_id, parser_config) is False
|
||||
reason = get_skip_reason(file_type, parser_id, parser_config)
|
||||
assert reason == ""
|
||||
|
||||
def test_mixed_dataset_processing(self):
|
||||
"""Test scenario: Mixed dataset with various file types"""
|
||||
files = [
|
||||
(".xlsx", "naive", {}, True), # Excel - skip
|
||||
(".csv", "naive", {}, True), # CSV - skip
|
||||
(".pdf", "table", {}, True), # Tabular PDF - skip
|
||||
(".pdf", "naive", {}, False), # Regular PDF - don't skip
|
||||
(".docx", "naive", {}, False), # Word doc - don't skip
|
||||
(".txt", "naive", {}, False), # Text file - don't skip
|
||||
]
|
||||
|
||||
for file_type, parser_id, parser_config, expected_skip in files:
|
||||
result = should_skip_raptor(file_type, parser_id, parser_config)
|
||||
assert result == expected_skip, f"Failed for {file_type}"
|
||||
|
||||
def test_override_for_special_excel(self):
|
||||
"""Test scenario: Override auto-disable for special Excel processing"""
|
||||
file_type = ".xlsx"
|
||||
raptor_config = {"auto_disable_for_structured_data": False}
|
||||
|
||||
# Should NOT skip when explicitly disabled
|
||||
assert should_skip_raptor(file_type, raptor_config=raptor_config) is False
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
300
uv.lock
generated
300
uv.lock
generated
@ -443,27 +443,36 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backports-asyncio-runner"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bce-python-sdk"
|
||||
version = "0.9.54"
|
||||
version = "0.9.55"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "future" },
|
||||
{ name = "pycryptodome" },
|
||||
{ name = "six" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/c8/1c3bc30aa745ad4c3d073f150bddaf1d43ee6ee33f0b8ec60068494f511e/bce_python_sdk-0.9.54.tar.gz", hash = "sha256:f68026f40f11ea38ef445f50a7756009d5b703c7253438b138b30fb3b83be275", size = 275698, upload-time = "2025-11-27T02:28:50.24Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ae/f31ee3ccae94e1a07d8886a413f08c1581349e6cb45bf8b3c608fbf173e4/bce_python_sdk-0.9.55.tar.gz", hash = "sha256:bed63f8a0975f2e9daecf53417c3d5b803232ad87f35a0b16e25850710ce209c", size = 275733, upload-time = "2025-12-02T12:02:38.041Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/a7/b8806c8505bb830cc863837ef8b42695170dd9561605c61262250df066d3/bce_python_sdk-0.9.54-py3-none-any.whl", hash = "sha256:a084eee577931f15a55280a7401bea2474115989ee79ebbca131610bdce81c99", size = 390447, upload-time = "2025-11-27T02:28:48.603Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/01/1b13a627e5f0239f24b168138d9a948e876d4b387c03f59d31699578c960/bce_python_sdk-0.9.55-py3-none-any.whl", hash = "sha256:6045d19d783b548644cce50a2f41ef5242da6654fb91b2c21629f309ca6dbf4c", size = 390463, upload-time = "2025-12-02T12:02:36.417Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beartype"
|
||||
version = "0.22.7"
|
||||
version = "0.22.8"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/49/e28a77f8a3868b1c9ff6a030678e84de24c4783bae4c12cec9443cf8fb54/beartype-0.22.7.tar.gz", hash = "sha256:c7269855b71e32b7c9f0fc662baade752eb525107266e053338c2f6e8873826b", size = 1599627, upload-time = "2025-11-29T06:49:56.751Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/1d/794ae2acaa67c8b216d91d5919da2606c2bb14086849ffde7f5555f3a3a5/beartype-0.22.8.tar.gz", hash = "sha256:b19b21c9359722ee3f7cc433f063b3e13997b27ae8226551ea5062e621f61165", size = 1602262, upload-time = "2025-12-03T05:11:10.766Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/0c/a764253610513295b7f57904b91fae1d99c7afd1b16b6eaae06fdfb71fb5/beartype-0.22.7-py3-none-any.whl", hash = "sha256:e13430ac07c61fa4bc54d375970438aeb9aa47a482c529a6f438ce52e18e6f50", size = 1330771, upload-time = "2025-11-29T06:49:54.545Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/2a/fbcbf5a025d3e71ddafad7efd43e34ec4362f4d523c3c471b457148fb211/beartype-0.22.8-py3-none-any.whl", hash = "sha256:b832882d04e41a4097bab9f63e6992bc6de58c414ee84cba9b45b67314f5ab2e", size = 1331895, upload-time = "2025-12-03T05:11:08.373Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1025,6 +1034,58 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coverage"
|
||||
version = "7.12.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/26/4a96807b193b011588099c3b5c89fbb05294e5b90e71018e065465f34eb6/coverage-7.12.0.tar.gz", hash = "sha256:fc11e0a4e372cb5f282f16ef90d4a585034050ccda536451901abfb19a57f40c", size = 819341, upload-time = "2025-11-18T13:34:20.766Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/4a/0dc3de1c172d35abe512332cfdcc43211b6ebce629e4cc42e6cd25ed8f4d/coverage-7.12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:32b75c2ba3f324ee37af3ccee5b30458038c50b349ad9b88cee85096132a575b", size = 217409, upload-time = "2025-11-18T13:31:53.122Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/c3/086198b98db0109ad4f84241e8e9ea7e5fb2db8c8ffb787162d40c26cc76/coverage-7.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cb2a1b6ab9fe833714a483a915de350abc624a37149649297624c8d57add089c", size = 217927, upload-time = "2025-11-18T13:31:54.458Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/5f/34614dbf5ce0420828fc6c6f915126a0fcb01e25d16cf141bf5361e6aea6/coverage-7.12.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5734b5d913c3755e72f70bf6cc37a0518d4f4745cde760c5d8e12005e62f9832", size = 244678, upload-time = "2025-11-18T13:31:55.805Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/7b/6b26fb32e8e4a6989ac1d40c4e132b14556131493b1d06bc0f2be169c357/coverage-7.12.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b527a08cdf15753279b7afb2339a12073620b761d79b81cbe2cdebdb43d90daa", size = 246507, upload-time = "2025-11-18T13:31:57.05Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/42/7d70e6603d3260199b90fb48b537ca29ac183d524a65cc31366b2e905fad/coverage-7.12.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9bb44c889fb68004e94cab71f6a021ec83eac9aeabdbb5a5a88821ec46e1da73", size = 248366, upload-time = "2025-11-18T13:31:58.362Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/4a/d86b837923878424c72458c5b25e899a3c5ca73e663082a915f5b3c4d749/coverage-7.12.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4b59b501455535e2e5dde5881739897967b272ba25988c89145c12d772810ccb", size = 245366, upload-time = "2025-11-18T13:31:59.572Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/c2/2adec557e0aa9721875f06ced19730fdb7fc58e31b02b5aa56f2ebe4944d/coverage-7.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d8842f17095b9868a05837b7b1b73495293091bed870e099521ada176aa3e00e", size = 246408, upload-time = "2025-11-18T13:32:00.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/4b/8bd1f1148260df11c618e535fdccd1e5aaf646e55b50759006a4f41d8a26/coverage-7.12.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c5a6f20bf48b8866095c6820641e7ffbe23f2ac84a2efc218d91235e404c7777", size = 244416, upload-time = "2025-11-18T13:32:01.963Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/13/3a248dd6a83df90414c54a4e121fd081fb20602ca43955fbe1d60e2312a9/coverage-7.12.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:5f3738279524e988d9da2893f307c2093815c623f8d05a8f79e3eff3a7a9e553", size = 244681, upload-time = "2025-11-18T13:32:03.408Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/30/aa833827465a5e8c938935f5d91ba055f70516941078a703740aaf1aa41f/coverage-7.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e0d68c1f7eabbc8abe582d11fa393ea483caf4f44b0af86881174769f185c94d", size = 245300, upload-time = "2025-11-18T13:32:04.686Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/24/f85b3843af1370fb3739fa7571819b71243daa311289b31214fe3e8c9d68/coverage-7.12.0-cp310-cp310-win32.whl", hash = "sha256:7670d860e18b1e3ee5930b17a7d55ae6287ec6e55d9799982aa103a2cc1fa2ef", size = 220008, upload-time = "2025-11-18T13:32:05.806Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/a2/c7da5b9566f7164db9eefa133d17761ecb2c2fde9385d754e5b5c80f710d/coverage-7.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:f999813dddeb2a56aab5841e687b68169da0d3f6fc78ccf50952fa2463746022", size = 220943, upload-time = "2025-11-18T13:32:07.166Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/0c/0dfe7f0487477d96432e4815537263363fb6dd7289743a796e8e51eabdf2/coverage-7.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa124a3683d2af98bd9d9c2bfa7a5076ca7e5ab09fdb96b81fa7d89376ae928f", size = 217535, upload-time = "2025-11-18T13:32:08.812Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/f5/f9a4a053a5bbff023d3bec259faac8f11a1e5a6479c2ccf586f910d8dac7/coverage-7.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d93fbf446c31c0140208dcd07c5d882029832e8ed7891a39d6d44bd65f2316c3", size = 218044, upload-time = "2025-11-18T13:32:10.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/c5/84fc3697c1fa10cd8571919bf9693f693b7373278daaf3b73e328d502bc8/coverage-7.12.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:52ca620260bd8cd6027317bdd8b8ba929be1d741764ee765b42c4d79a408601e", size = 248440, upload-time = "2025-11-18T13:32:12.536Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/36/2d93fbf6a04670f3874aed397d5a5371948a076e3249244a9e84fb0e02d6/coverage-7.12.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f3433ffd541380f3a0e423cff0f4926d55b0cc8c1d160fdc3be24a4c03aa65f7", size = 250361, upload-time = "2025-11-18T13:32:13.852Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/49/66dc65cc456a6bfc41ea3d0758c4afeaa4068a2b2931bf83be6894cf1058/coverage-7.12.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7bbb321d4adc9f65e402c677cd1c8e4c2d0105d3ce285b51b4d87f1d5db5245", size = 252472, upload-time = "2025-11-18T13:32:15.068Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/1f/ebb8a18dffd406db9fcd4b3ae42254aedcaf612470e8712f12041325930f/coverage-7.12.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:22a7aade354a72dff3b59c577bfd18d6945c61f97393bc5fb7bd293a4237024b", size = 248592, upload-time = "2025-11-18T13:32:16.328Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/a8/67f213c06e5ea3b3d4980df7dc344d7fea88240b5fe878a5dcbdfe0e2315/coverage-7.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3ff651dcd36d2fea66877cd4a82de478004c59b849945446acb5baf9379a1b64", size = 250167, upload-time = "2025-11-18T13:32:17.687Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/00/e52aef68154164ea40cc8389c120c314c747fe63a04b013a5782e989b77f/coverage-7.12.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:31b8b2e38391a56e3cea39d22a23faaa7c3fc911751756ef6d2621d2a9daf742", size = 248238, upload-time = "2025-11-18T13:32:19.2Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/a4/4d88750bcf9d6d66f77865e5a05a20e14db44074c25fd22519777cb69025/coverage-7.12.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:297bc2da28440f5ae51c845a47c8175a4db0553a53827886e4fb25c66633000c", size = 247964, upload-time = "2025-11-18T13:32:21.027Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/6b/b74693158899d5b47b0bf6238d2c6722e20ba749f86b74454fac0696bb00/coverage-7.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ff7651cc01a246908eac162a6a86fc0dbab6de1ad165dfb9a1e2ec660b44984", size = 248862, upload-time = "2025-11-18T13:32:22.304Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/de/6af6730227ce0e8ade307b1cc4a08e7f51b419a78d02083a86c04ccceb29/coverage-7.12.0-cp311-cp311-win32.whl", hash = "sha256:313672140638b6ddb2c6455ddeda41c6a0b208298034544cfca138978c6baed6", size = 220033, upload-time = "2025-11-18T13:32:23.714Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/a1/e7f63021a7c4fe20994359fcdeae43cbef4a4d0ca36a5a1639feeea5d9e1/coverage-7.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a1783ed5bd0d5938d4435014626568dc7f93e3cb99bc59188cc18857c47aa3c4", size = 220966, upload-time = "2025-11-18T13:32:25.599Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/e8/deae26453f37c20c3aa0c4433a1e32cdc169bf415cce223a693117aa3ddd/coverage-7.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:4648158fd8dd9381b5847622df1c90ff314efbfc1df4550092ab6013c238a5fc", size = 219637, upload-time = "2025-11-18T13:32:27.265Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/bf/638c0427c0f0d47638242e2438127f3c8ee3cfc06c7fdeb16778ed47f836/coverage-7.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:29644c928772c78512b48e14156b81255000dcfd4817574ff69def189bcb3647", size = 217704, upload-time = "2025-11-18T13:32:28.906Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/e1/706fae6692a66c2d6b871a608bbde0da6281903fa0e9f53a39ed441da36a/coverage-7.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8638cbb002eaa5d7c8d04da667813ce1067080b9a91099801a0053086e52b736", size = 218064, upload-time = "2025-11-18T13:32:30.161Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/8b/eb0231d0540f8af3ffda39720ff43cb91926489d01524e68f60e961366e4/coverage-7.12.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:083631eeff5eb9992c923e14b810a179798bb598e6a0dd60586819fc23be6e60", size = 249560, upload-time = "2025-11-18T13:32:31.835Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/a1/67fb52af642e974d159b5b379e4d4c59d0ebe1288677fbd04bbffe665a82/coverage-7.12.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:99d5415c73ca12d558e07776bd957c4222c687b9f1d26fa0e1b57e3598bdcde8", size = 252318, upload-time = "2025-11-18T13:32:33.178Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/e5/38228f31b2c7665ebf9bdfdddd7a184d56450755c7e43ac721c11a4b8dab/coverage-7.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e949ebf60c717c3df63adb4a1a366c096c8d7fd8472608cd09359e1bd48ef59f", size = 253403, upload-time = "2025-11-18T13:32:34.45Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/4b/df78e4c8188f9960684267c5a4897836f3f0f20a20c51606ee778a1d9749/coverage-7.12.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d907ddccbca819afa2cd014bc69983b146cca2735a0b1e6259b2a6c10be1e70", size = 249984, upload-time = "2025-11-18T13:32:35.747Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/51/bb163933d195a345c6f63eab9e55743413d064c291b6220df754075c2769/coverage-7.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b1518ecbad4e6173f4c6e6c4a46e49555ea5679bf3feda5edb1b935c7c44e8a0", size = 251339, upload-time = "2025-11-18T13:32:37.352Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/40/c9b29cdb8412c837cdcbc2cfa054547dd83affe6cbbd4ce4fdb92b6ba7d1/coverage-7.12.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:51777647a749abdf6f6fd8c7cffab12de68ab93aab15efc72fbbb83036c2a068", size = 249489, upload-time = "2025-11-18T13:32:39.212Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/da/b3131e20ba07a0de4437a50ef3b47840dfabf9293675b0cd5c2c7f66dd61/coverage-7.12.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:42435d46d6461a3b305cdfcad7cdd3248787771f53fe18305548cba474e6523b", size = 249070, upload-time = "2025-11-18T13:32:40.598Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/81/b653329b5f6302c08d683ceff6785bc60a34be9ae92a5c7b63ee7ee7acec/coverage-7.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5bcead88c8423e1855e64b8057d0544e33e4080b95b240c2a355334bb7ced937", size = 250929, upload-time = "2025-11-18T13:32:42.915Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/00/250ac3bca9f252a5fb1338b5ad01331ebb7b40223f72bef5b1b2cb03aa64/coverage-7.12.0-cp312-cp312-win32.whl", hash = "sha256:dcbb630ab034e86d2a0f79aefd2be07e583202f41e037602d438c80044957baa", size = 220241, upload-time = "2025-11-18T13:32:44.665Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/1c/77e79e76d37ce83302f6c21980b45e09f8aa4551965213a10e62d71ce0ab/coverage-7.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:2fd8354ed5d69775ac42986a691fbf68b4084278710cee9d7c3eaa0c28fa982a", size = 221051, upload-time = "2025-11-18T13:32:46.008Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/f5/641b8a25baae564f9e52cac0e2667b123de961985709a004e287ee7663cc/coverage-7.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:737c3814903be30695b2de20d22bcc5428fdae305c61ba44cdc8b3252984c49c", size = 219692, upload-time = "2025-11-18T13:32:47.372Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/a3/43b749004e3c09452e39bb56347a008f0a0668aad37324a99b5c8ca91d9e/coverage-7.12.0-py3-none-any.whl", hash = "sha256:159d50c0b12e060b15ed3d39f87ed43d4f7f7ad40b8a534f4dd331adbb51104a", size = 209503, upload-time = "2025-11-18T13:34:18.892Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
toml = [
|
||||
{ name = "tomli", marker = "python_full_version <= '3.11'" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cramjam"
|
||||
version = "2.11.0"
|
||||
@ -1512,6 +1573,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "execnet"
|
||||
version = "2.1.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "extract-msg"
|
||||
version = "0.41.5"
|
||||
@ -1910,11 +1980,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2025.10.0"
|
||||
version = "2025.12.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7f/2747c0d332b9acfa75dc84447a066fdf812b5a6b8d30472b74d309bfe8cb/fsspec-2025.10.0.tar.gz", hash = "sha256:b6789427626f068f9a83ca4e8a3cc050850b6c0f71f99ddb4f542b8266a26a59", size = 309285, upload-time = "2025-10-30T14:58:44.036Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/27/954057b0d1f53f086f681755207dda6de6c660ce133c829158e8e8fe7895/fsspec-2025.12.0.tar.gz", hash = "sha256:c505de011584597b1060ff778bb664c1bc022e87921b0e4f10cc9c44f9635973", size = 309748, upload-time = "2025-12-03T15:23:42.687Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/02/a6b21098b1d5d6249b7c5ab69dde30108a71e4e819d4a9778f1de1d5b70d/fsspec-2025.10.0-py3-none-any.whl", hash = "sha256:7c7712353ae7d875407f97715f0e1ffcc21e33d5b24556cb1e090ae9409ec61d", size = 200966, upload-time = "2025-10-30T14:58:42.53Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2022,16 +2092,21 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-auth"
|
||||
version = "2.41.1"
|
||||
version = "2.43.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "pyasn1-modules" },
|
||||
{ name = "rsa" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/af/5129ce5b2f9688d2fa49b463e544972a7c82b0fdb50980dafee92e121d9f/google_auth-2.41.1.tar.gz", hash = "sha256:b76b7b1f9e61f0cb7e88870d14f6a94aeef248959ef6992670efee37709cbfd2", size = 292284, upload-time = "2025-09-30T22:51:26.363Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/ef/66d14cf0e01b08d2d51ffc3c20410c4e134a1548fc246a6081eae585a4fe/google_auth-2.43.0.tar.gz", hash = "sha256:88228eee5fc21b62a1b5fe773ca15e67778cb07dc8363adcb4a8827b52d81483", size = 296359, upload-time = "2025-11-06T00:13:36.587Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl", hash = "sha256:754843be95575b9a19c604a848a41be03f7f2afd8c019f716dc1f51ee41c639d", size = 221302, upload-time = "2025-09-30T22:51:24.212Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d1/385110a9ae86d91cc14c5282c61fe9f4dc41c0b9f7d423c6ad77038c4448/google_auth-2.43.0-py2.py3-none-any.whl", hash = "sha256:af628ba6fa493f75c7e9dbe9373d148ca9f4399b5ea29976519e0a3848eddd16", size = 223114, upload-time = "2025-11-06T00:13:35.209Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
requests = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2049,15 +2124,15 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-auth-oauthlib"
|
||||
version = "1.2.3"
|
||||
version = "1.2.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-auth" },
|
||||
{ name = "requests-oauthlib" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/a6/c6336a6ceb682709a4aa39e2e6b5754a458075ca92359512b6cbfcb25ae3/google_auth_oauthlib-1.2.3.tar.gz", hash = "sha256:eb09e450d3cc789ecbc2b3529cb94a713673fd5f7a22c718ad91cf75aedc2ea4", size = 21265, upload-time = "2025-10-30T21:28:19.105Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/07/a54c100da461ffc5968457823fcc665a48fb4b875c68bcfecbfe24a10dbe/google_auth_oauthlib-1.2.3-py3-none-any.whl", hash = "sha256:7c0940e037677f25e71999607493640d071212e7f3c15aa0febea4c47a5a0680", size = 19184, upload-time = "2025-10-30T21:28:17.88Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/84/40ee070be95771acd2f4418981edb834979424565c3eec3cd88b6aa09d24/google_auth_oauthlib-1.2.2-py3-none-any.whl", hash = "sha256:fd619506f4b3908b5df17b65f39ca8d66ea56986e5472eb5978fd8f3786f00a2", size = 19072, upload-time = "2025-04-22T16:40:28.174Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2177,11 +2252,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.52.0"
|
||||
version = "1.53.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "google-auth", extra = ["requests"] },
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
@ -2189,9 +2264,9 @@ dependencies = [
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/b3/36fbfde2e21e6d3bc67780b61da33632f495ab1be08076cf0a16af74098f/google_genai-1.53.0.tar.gz", hash = "sha256:938a26d22f3fd32c6eeeb4276ef204ef82884e63af9842ce3eac05ceb39cbd8d", size = 260102, upload-time = "2025-12-03T17:21:23.233Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/f2/97fefdd1ad1f3428321bac819ae7a83ccc59f6439616054736b7819fa56c/google_genai-1.53.0-py3-none-any.whl", hash = "sha256:65a3f99e5c03c372d872cda7419f5940e723374bb12a2f3ffd5e3e56e8eb2094", size = 262015, upload-time = "2025-12-03T17:21:21.934Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2776,7 +2851,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "infinity-sdk"
|
||||
version = "0.6.8"
|
||||
version = "0.6.10"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "datrie" },
|
||||
@ -2795,9 +2870,9 @@ dependencies = [
|
||||
{ name = "sqlglot", extra = ["rs"] },
|
||||
{ name = "thrift" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/2d/4b699d62202319e5cbbcb4a7d9e87a86dde7ba7c767d0af4ebbee3de8419/infinity_sdk-0.6.8.tar.gz", hash = "sha256:e91c1f6cdf2fa41bc615c72be2a9e981211bd05b34522c1d27f1b825b905b125", size = 72669, upload-time = "2025-12-02T05:09:29.377Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/e5/88fdcfe42835c5494a08f02b64762a98e04dae4ad49f7dfabac18ee01928/infinity_sdk-0.6.10.tar.gz", hash = "sha256:b55c296ca3b2c8c2f4568f359dd8a50772e9432f09b64667140e9804bf780436", size = 29502969, upload-time = "2025-12-04T02:42:17.882Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/08/59ed1261ee80d3b2c5a80313a013a94cae83ce90ff1da1ef488055944a7b/infinity_sdk-0.6.8-py3-none-any.whl", hash = "sha256:392f942a2073a5b545261dad9859b217c6a0331ede606c8894e7ae335f2ead5e", size = 81564, upload-time = "2025-12-02T05:09:27.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/99/8857ea0805bd83fe092f5dca914a31f9fcc731c3800264657bd3ba950a1d/infinity_sdk-0.6.10-py3-none-any.whl", hash = "sha256:8f605039ec73d1b05d219105fbabef186e0178fddbad058c2c06c4873be48651", size = 29722107, upload-time = "2025-12-04T02:42:04.101Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3077,7 +3152,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langfuse"
|
||||
version = "3.10.3"
|
||||
version = "3.10.5"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "backoff" },
|
||||
@ -3091,9 +3166,9 @@ dependencies = [
|
||||
{ name = "requests" },
|
||||
{ name = "wrapt" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/03/c4316cb0a91cff97118c21b973b3089c2fe1bdbcad02f3623d6ac572e954/langfuse-3.10.3.tar.gz", hash = "sha256:69d6eaf573212f8cdc1cebd2d6b47f271bfe76c7eb5a3c5d6766bb0d9bf0004c", size = 226617, upload-time = "2025-12-01T18:01:02.607Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/21/dff0434290512484436bfa108e36f0adc3457eb4117767de70e76a411cac/langfuse-3.10.5.tar.gz", hash = "sha256:14eb767663f7e7480cd1cd1b3ca457022817c129e666efe97e5c80adb8c5aac0", size = 223142, upload-time = "2025-12-03T17:49:39.747Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/04/f07c2a23f2822f73f8576b1ba7348c014c4be65127384b4bee475f913f3b/langfuse-3.10.3-py3-none-any.whl", hash = "sha256:b9a2e6506f8f0923c2f4b8c9e3fa355231994197a17f75509a37f335660ce334", size = 399062, upload-time = "2025-12-01T18:01:00.688Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/6f/dc15775f82d38da62cd2015110f5802bb175a9ee731a4533fe2a0cdf75b6/langfuse-3.10.5-py3-none-any.whl", hash = "sha256:0223a64109a4293b9bd9b2e0e3229f53b75291cd96341e42cc3eba186973fcdb", size = 398888, upload-time = "2025-12-03T17:49:38.171Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -3920,12 +3995,12 @@ name = "onnxruntime-gpu"
|
||||
version = "1.19.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "coloredlogs" },
|
||||
{ name = "flatbuffers" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "sympy" },
|
||||
{ name = "coloredlogs", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "flatbuffers", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "packaging", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "protobuf", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
{ name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/9c/3fa310e0730643051eb88e884f19813a6c8b67d0fbafcda610d960e589db/onnxruntime_gpu-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a49740e079e7c5215830d30cde3df792e903df007aa0b0fd7aa797937061b27a", size = 226178508, upload-time = "2024-09-04T06:43:40.83Z" },
|
||||
@ -4043,32 +4118,32 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-api"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "importlib-metadata" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/0b/e5428c009d4d9af0515b0a8371a8aaae695371af291f45e702f7969dce6b/opentelemetry_api-1.39.0.tar.gz", hash = "sha256:6130644268c5ac6bdffaf660ce878f10906b3e789f7e2daa5e169b047a2933b9", size = 65763, upload-time = "2025-12-03T13:19:56.378Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/85/d831a9bc0a9e0e1a304ff3d12c1489a5fbc9bf6690a15dcbdae372bbca45/opentelemetry_api-1.39.0-py3-none-any.whl", hash = "sha256:3c3b3ca5c5687b1b5b37e5c5027ff68eacea8675241b29f13110a8ffbb8f0459", size = 66357, upload-time = "2025-12-03T13:19:33.043Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-common"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-proto" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/83/dd4660f2956ff88ed071e9e0e36e830df14b8c5dc06722dbde1841accbe8/opentelemetry_exporter_otlp_proto_common-1.38.0.tar.gz", hash = "sha256:e333278afab4695aa8114eeb7bf4e44e65c6607d54968271a249c180b2cb605c", size = 20431, upload-time = "2025-10-16T08:35:53.285Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/cb/3a29ce606b10c76d413d6edd42d25a654af03e73e50696611e757d2602f3/opentelemetry_exporter_otlp_proto_common-1.39.0.tar.gz", hash = "sha256:a135fceed1a6d767f75be65bd2845da344dd8b9258eeed6bc48509d02b184409", size = 20407, upload-time = "2025-12-03T13:19:59.003Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/9e/55a41c9601191e8cd8eb626b54ee6827b9c9d4a46d736f32abc80d8039fc/opentelemetry_exporter_otlp_proto_common-1.38.0-py3-none-any.whl", hash = "sha256:03cb76ab213300fe4f4c62b7d8f17d97fcfd21b89f0b5ce38ea156327ddda74a", size = 18359, upload-time = "2025-10-16T08:35:34.099Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/c6/215edba62d13a3948c718b289539f70e40965bc37fc82ecd55bb0b749c1a/opentelemetry_exporter_otlp_proto_common-1.39.0-py3-none-any.whl", hash = "sha256:3d77be7c4bdf90f1a76666c934368b8abed730b5c6f0547a2ec57feb115849ac", size = 18367, upload-time = "2025-12-03T13:19:36.906Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-exporter-otlp-proto-http"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "googleapis-common-protos" },
|
||||
@ -4079,48 +4154,48 @@ dependencies = [
|
||||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/0a/debcdfb029fbd1ccd1563f7c287b89a6f7bef3b2902ade56797bfd020854/opentelemetry_exporter_otlp_proto_http-1.38.0.tar.gz", hash = "sha256:f16bd44baf15cbe07633c5112ffc68229d0edbeac7b37610be0b2def4e21e90b", size = 17282, upload-time = "2025-10-16T08:35:54.422Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/dc/1e9bf3f6a28e29eba516bc0266e052996d02bc7e92675f3cd38169607609/opentelemetry_exporter_otlp_proto_http-1.39.0.tar.gz", hash = "sha256:28d78fc0eb82d5a71ae552263d5012fa3ebad18dfd189bf8d8095ba0e65ee1ed", size = 17287, upload-time = "2025-12-03T13:20:01.134Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/77/154004c99fb9f291f74aa0822a2f5bbf565a72d8126b3a1b63ed8e5f83c7/opentelemetry_exporter_otlp_proto_http-1.38.0-py3-none-any.whl", hash = "sha256:84b937305edfc563f08ec69b9cb2298be8188371217e867c1854d77198d0825b", size = 19579, upload-time = "2025-10-16T08:35:36.269Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/46/e4a102e17205bb05a50dbf24ef0e92b66b648cd67db9a68865af06a242fd/opentelemetry_exporter_otlp_proto_http-1.39.0-py3-none-any.whl", hash = "sha256:5789cb1375a8b82653328c0ce13a054d285f774099faf9d068032a49de4c7862", size = 19639, upload-time = "2025-12-03T13:19:39.536Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-proto"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/b5/64d2f8c3393cd13ea2092106118f7b98461ba09333d40179a31444c6f176/opentelemetry_proto-1.39.0.tar.gz", hash = "sha256:c1fa48678ad1a1624258698e59be73f990b7fc1f39e73e16a9d08eef65dd838c", size = 46153, upload-time = "2025-12-03T13:20:08.729Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/4d/d500e1862beed68318705732d1976c390f4a72ca8009c4983ff627acff20/opentelemetry_proto-1.39.0-py3-none-any.whl", hash = "sha256:1e086552ac79acb501485ff0ce75533f70f3382d43d0a30728eeee594f7bf818", size = 72534, upload-time = "2025-12-03T13:19:50.251Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-sdk"
|
||||
version = "1.38.0"
|
||||
version = "1.39.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "opentelemetry-semantic-conventions" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/e3/7cd989003e7cde72e0becfe830abff0df55c69d237ee7961a541e0167833/opentelemetry_sdk-1.39.0.tar.gz", hash = "sha256:c22204f12a0529e07aa4d985f1bca9d6b0e7b29fe7f03e923548ae52e0e15dde", size = 171322, upload-time = "2025-12-03T13:20:09.651Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/b4/2adc8bc83eb1055ecb592708efb6f0c520cc2eb68970b02b0f6ecda149cf/opentelemetry_sdk-1.39.0-py3-none-any.whl", hash = "sha256:90cfb07600dfc0d2de26120cebc0c8f27e69bf77cd80ef96645232372709a514", size = 132413, upload-time = "2025-12-03T13:19:51.364Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-semantic-conventions"
|
||||
version = "0.59b0"
|
||||
version = "0.60b0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "opentelemetry-api" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/0e/176a7844fe4e3cb5de604212094dffaed4e18b32f1c56b5258bcbcba85c2/opentelemetry_semantic_conventions-0.60b0.tar.gz", hash = "sha256:227d7aa73cbb8a2e418029d6b6465553aa01cf7e78ec9d0bc3255c7b3ac5bf8f", size = 137935, upload-time = "2025-12-03T13:20:12.395Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/56/af0306666f91bae47db14d620775604688361f0f76a872e0005277311131/opentelemetry_semantic_conventions-0.60b0-py3-none-any.whl", hash = "sha256:069530852691136018087b52688857d97bba61cd641d0f8628d2d92788c4f78a", size = 219981, upload-time = "2025-12-03T13:19:53.585Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4989,12 +5064,6 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyicu"
|
||||
version = "2.16"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/c3/8d558b30deb33eb583c0bcae3e64d6db8316b69461a04bb9db5ff63d3f6e/pyicu-2.16.tar.gz", hash = "sha256:42b3a8062e3b23e927ca727e6b5e1730d86c70279834e4887152895d2eb012d9", size = 268126, upload-time = "2025-11-04T23:33:00.006Z" }
|
||||
|
||||
[[package]]
|
||||
name = "pyjwt"
|
||||
version = "2.8.0"
|
||||
@ -5195,6 +5264,47 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" },
|
||||
{ name = "pytest" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-cov"
|
||||
version = "7.0.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "coverage", extra = ["toml"] },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-xdist"
|
||||
version = "3.8.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "execnet" },
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-calamine"
|
||||
version = "0.6.1"
|
||||
@ -5558,7 +5668,6 @@ dependencies = [
|
||||
{ name = "psycopg2-binary" },
|
||||
{ name = "pyclipper" },
|
||||
{ name = "pycryptodomex" },
|
||||
{ name = "pyicu" },
|
||||
{ name = "pymysql" },
|
||||
{ name = "pyobvector" },
|
||||
{ name = "pyodbc" },
|
||||
@ -5620,6 +5729,9 @@ test = [
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "pytest-cov" },
|
||||
{ name = "pytest-xdist" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "python-pptx" },
|
||||
{ name = "reportlab" },
|
||||
@ -5683,7 +5795,7 @@ requires-dist = [
|
||||
{ name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
|
||||
{ name = "imageio-ffmpeg", specifier = ">=0.6.0" },
|
||||
{ name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.8" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.10" },
|
||||
{ name = "itsdangerous", specifier = "==2.1.2" },
|
||||
{ name = "jira", specifier = "==3.10.5" },
|
||||
{ name = "json-repair", specifier = "==0.35.0" },
|
||||
@ -5723,7 +5835,6 @@ requires-dist = [
|
||||
{ name = "psycopg2-binary", specifier = "==2.9.9" },
|
||||
{ name = "pyclipper", specifier = "==1.3.0.post5" },
|
||||
{ name = "pycryptodomex", specifier = "==3.20.0" },
|
||||
{ name = "pyicu", specifier = ">=2.15.3,<3.0.0" },
|
||||
{ name = "pymysql", specifier = ">=1.1.1,<2.0.0" },
|
||||
{ name = "pyobvector", specifier = "==0.2.18" },
|
||||
{ name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
|
||||
@ -5785,6 +5896,9 @@ test = [
|
||||
{ name = "openpyxl", specifier = ">=3.1.5" },
|
||||
{ name = "pillow", specifier = ">=10.4.0" },
|
||||
{ name = "pytest", specifier = ">=8.3.5" },
|
||||
{ name = "pytest-asyncio", specifier = ">=1.3.0" },
|
||||
{ name = "pytest-cov", specifier = ">=7.0.0" },
|
||||
{ name = "pytest-xdist", specifier = ">=3.8.0" },
|
||||
{ name = "python-docx", specifier = ">=1.1.2" },
|
||||
{ name = "python-pptx", specifier = ">=1.0.2" },
|
||||
{ name = "reportlab", specifier = ">=4.4.1" },
|
||||
@ -6712,11 +6826,11 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlglot"
|
||||
version = "28.0.0"
|
||||
version = "28.1.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/8d/9ce5904aca760b81adf821c77a1dcf07c98f9caaa7e3b5c991c541ff89d2/sqlglot-28.0.0.tar.gz", hash = "sha256:cc9a651ef4182e61dac58aa955e5fb21845a5865c6a4d7d7b5a7857450285ad4", size = 5520798, upload-time = "2025-11-17T10:34:57.016Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/49/cda1fc4e610ed5764de2842bb2f362f4aba267b4a7d05a3a217a25b39004/sqlglot-28.1.0.tar.gz", hash = "sha256:a3ef7344359667b51cf95e840aac70a49f847602c61c9fbaeb847f74f7877fe1", size = 5546281, upload-time = "2025-12-02T16:52:28.387Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/6d/86de134f40199105d2fee1b066741aa870b3ce75ee74018d9c8508bbb182/sqlglot-28.0.0-py3-none-any.whl", hash = "sha256:ac1778e7fa4812f4f7e5881b260632fc167b00ca4c1226868891fb15467122e4", size = 536127, upload-time = "2025-11-17T10:34:55.192Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/e8/bd016214348f65ba31107c1b81af70fc7662d96758052d5d59b516fd3858/sqlglot-28.1.0-py3-none-any.whl", hash = "sha256:2a895a31666ba947c686caa980624c82bcd0e6fdf59b4fdb9e47108bd092d1ac", size = 547889, upload-time = "2025-12-02T16:52:26.019Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@ -6726,40 +6840,40 @@ rs = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlglotrs"
|
||||
version = "0.7.3"
|
||||
version = "0.8.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/5a/46d8efeda45be6ce1c630229455f000cafedea6129b47e6cfab39ff462f5/sqlglotrs-0.7.3.tar.gz", hash = "sha256:caadc572c8a194f99d6ba44d02f9ada0110e3d47cca3330c81f4aa608f1143eb", size = 15888, upload-time = "2025-10-13T06:33:57.322Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/37/118f24c367fde662e6c1181327dc9c16d08914108904c69bac3a6ba12c52/sqlglotrs-0.8.0.tar.gz", hash = "sha256:2b9a23c580d82be2388ee23496230cfc667f280ed0ed7eaa099d0da8d718cbf2", size = 15706, upload-time = "2025-12-02T16:58:38.197Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/95/f08e01f54e521a286fcd9f7a8bdd178eabcddd9dbc6d6c15dc983c7be8dd/sqlglotrs-0.7.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7acc6dba37af53d9cf1e3217fdd719878dbfaaf2a578ad7b3fbc07ef9dadd035", size = 314621, upload-time = "2025-10-13T06:33:48.917Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/7d/01a5db15e413ab587816448f1222286d3a10f0465954d21f5d2915aaeed5/sqlglotrs-0.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cbfb42071422afbd7376d70b93a969e86fb74752efe98dd66ee6d2ae27a9665", size = 300189, upload-time = "2025-10-13T06:33:40.963Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/21/94d1fb647a394afcb09a9174f7bff078452bb956e6898093dd9ee459ef2b/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07500421de9dea8dfc0cd6769145df754178fc2ae5a3692bdbf5d37aebc0712a", size = 332771, upload-time = "2025-10-13T06:32:45.992Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/d1/ccade8e794304c925e9b94e1d7bff4c56896f571a291a03bfd96048c4a0f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:792eb179a742d7d72d1d47c9a50e073078f0133e9191bd07920945dcc9170844", size = 342960, upload-time = "2025-10-13T06:32:55.493Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/2f/2ff3cfe7d91ac3762100e511c4eff0c98824970d7c27e18e88c44a4d4567/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4c3849992e33e47403c2517d464564e4b4cf6a080ad761141504e271ab2c7cd", size = 487268, upload-time = "2025-10-13T06:33:13.784Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/d7/a95fbdd26f20b7bd5781bb5a4c51616fdd59f1c521010f668ffd54e59f5d/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:016f51409ed3d87c33ca5a31dd6766e75a809909e418a0ffd2965e0ae7b84a7b", size = 365853, upload-time = "2025-10-13T06:33:23.415Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/7a/5d50d0b1167c79a509957d58a6bf9f6450f894e0bc233987cb85ccaec50f/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dd711ea2ba76e664dab3e7f7b08cb5517cf5164fd94a552598acfd1f6df59a", size = 343697, upload-time = "2025-10-13T06:33:32.542Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/89/85acbd412a5c7ef39ee5a96f5be28d6d38bce2c4521a264c747361b4c021/sqlglotrs-0.7.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:517198977f3baece667513326e42545b00b2878719922c58fcbfa21553f1338d", size = 363446, upload-time = "2025-10-13T06:33:03.995Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/4d/0a04f29731b6fda327bd11495c143ce70d1a7446b22440a32d8571408a06/sqlglotrs-0.7.3-cp310-cp310-win32.whl", hash = "sha256:1e9121ef3a64dc7d18e500e5e93df458a9bb6f4111b8f8569d5e4f8db21e61d2", size = 183997, upload-time = "2025-10-13T06:33:58.579Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/16/0e95fa77409da059c951c6be11d4d73311c60bb5ed82f1d40a4afc9a1aa9/sqlglotrs-0.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:48fd7e9efef56331e1ef7402b6d65113c087da1cfe2ef80d143ee62046d49056", size = 195923, upload-time = "2025-10-13T06:34:06.676Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/41/fcd87de298b562947cb2592feb9df5794886a8fa24eab8a080a552aa0e4d/sqlglotrs-0.7.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f2144fc8e472de2b165665c1370e7f0ca7f9400f60ca5e78c7aedbb3233bc8d7", size = 314465, upload-time = "2025-10-13T06:33:50.219Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/81/22cf241e22f364c414d57893fad9cfea869f8866189e75575a3862f1d329/sqlglotrs-0.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93cb74928b205da3f29f2b9c728d2c6656ad30e1ef500560f6c851bca2129fbc", size = 300129, upload-time = "2025-10-13T06:33:42.205Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/90/4e4220f8605c6fbca77dfad2052cdebf195099c99fd0684723677dcbf091/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a918137bacfa31529802063e349a99d5352f74c914beceb14689cd2864c5e4d0", size = 332735, upload-time = "2025-10-13T06:32:48.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/35/abe3cb6aa197b5193fcb446ab69465b5927e09e281b2c05f4e12249fd335/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c3fd0edbd957d136c67919ead10c90d832da1aedbbedc6da899d173fe78bf600", size = 342779, upload-time = "2025-10-13T06:32:56.782Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/71/670ad31f4dbfe594592a1992c4e7a62003dc47dffb15d96b2fec4137f933/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a361a1dd8c55fbc57f81db738658141cab723509cc1b3edcc871bccfbba0cfb", size = 487344, upload-time = "2025-10-13T06:33:15.095Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/73/86e46b762b615c7cdec489e4b0670d2a04ea6fab0c0be30a5756e95f108f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c698af6379475c243a8f190845bf1d1267a2c9867011a4567d5cfdcc5b0eb094", size = 366062, upload-time = "2025-10-13T06:33:25.183Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/07/b4dd7315df7d975c4b82d09106eb73ea2ee8f3734f764889913636e9d68c/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d63ed29058c56f153912c90811d8af1706d81f0c759883baeb21acb6322969", size = 343642, upload-time = "2025-10-13T06:33:33.826Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/84/2e834fc665236ef6b0fced14d75c8e9eb0db471d96fde539d8c37ce3a10f/sqlglotrs-0.7.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4e19dee6dc46c4d84c556ae456fa0c6400edb157528fd369670b3d041b54ef21", size = 363731, upload-time = "2025-10-13T06:33:05.913Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/db/b7063b1240a1c39bc5627880dbb80c9e3f7b5548a17962d3a6bf98239171/sqlglotrs-0.7.3-cp311-cp311-win32.whl", hash = "sha256:f1276d0f02eaefbdd149b614f6c21fb9be372d7e1137f19c3d5f9e50662367b3", size = 183607, upload-time = "2025-10-13T06:33:59.858Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/98/e9cb2b3dd4abb34d2ae71747f113bf12f741a86fa29e661f1f09ba8376d0/sqlglotrs-0.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccf05fc6e89523cf5819982fab12b8fe07a9656dbb5356fc4b56b562e734c202", size = 196050, upload-time = "2025-10-13T06:34:07.921Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/3f/3b059058e198b2fb6612d0ddaad5431a796d7081d40b21f12273ea1b26dc/sqlglotrs-0.7.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:2e7be55bf719b5ebdc344a996b6d27b9a0ba9bae0a09462900805e2f7dc4dca5", size = 310987, upload-time = "2025-10-13T06:33:51.874Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/b6/0058b2fe4f4813d9f3280d65ace97a637e8edd152be2a13bb1782c5c2eff/sqlglotrs-0.7.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6fef415993e1843201a57916f310b49e79669db379ff38094161fa93be2ffdf2", size = 296829, upload-time = "2025-10-13T06:33:43.838Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/a8/35c593b03bf498876aea68ea944a7e7bb9cf648e68984f55795181c928dd/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e980354e576e852c53e0bb5444b04ebb6459054074bce8012cc3385dd3d116ed", size = 332313, upload-time = "2025-10-13T06:32:49.343Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/bc/534e21a233846d33d6b55100485bf1844d301b0b75deded5310ef9cd171f/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1444b260c040cc80697956629f3fd3adece0bdb4f83bae22cd618ca3f18c4de8", size = 342309, upload-time = "2025-10-13T06:32:58.031Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/63/1d7bd7de87f01adb43cd1710d3fd5b9d5b0b3fea160bbeadc340fe1a9132/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3202c6f00145b8adb4632c1bb5071be5aa362829054653bac058dbcdbc6228e7", size = 484954, upload-time = "2025-10-13T06:33:16.697Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/bd/10126c9f59fb4f8fa51bf3f0ad17895b953bd09e1687986d5d9e110758c8/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17ae27e895f0ed960e28e76028c84758ff00df24e598654df3b5f22de8c7fc30", size = 366874, upload-time = "2025-10-13T06:33:26.888Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/fa/f12a1eb9c22cdce962bafebefea58e898c19bae3d21e9b79d6e811a2951d/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a36c3d55b913c09dc31572ca7d5b423e85d761f1b3c9d8f86e2a1433a2f20d5", size = 342990, upload-time = "2025-10-13T06:33:35.478Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/1d/2bd1c8900d7a081a61a1c424785fd1a1452def751bc212630251423d80ce/sqlglotrs-0.7.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:94875611a2a953c06e8152b1d485f4d20ec61b72ebd848f96c04aca66b30f189", size = 362603, upload-time = "2025-10-13T06:33:07.507Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3a/9c176a7f9b93d78168b3d137db4704a703cb51b32edb29d231b615130b47/sqlglotrs-0.7.3-cp312-cp312-win32.whl", hash = "sha256:64a708c87d1bea1f4bdb3edf0a3e94ea1b908ed229502da93621a4a50ef20189", size = 183180, upload-time = "2025-10-13T06:34:01.017Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/ea/37757060d3caadb22509d349087e1e16a2dcc4c1649a00d2d6b501f8ff50/sqlglotrs-0.7.3-cp312-cp312-win_amd64.whl", hash = "sha256:fe1ef1bedbb34b87dfb4e99a911026f8895ff2514b222cfd82cd08033406de2e", size = 195746, upload-time = "2025-10-13T06:34:09.478Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/88/7fc59c146118603e06abf69dc19c237ef496a8dd936e5c224fdffc7df120/sqlglotrs-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3db8f75b8efe5b94ed5540c13b80ef0a3e64c0d15864b05a6bccf5554c6e6008", size = 318097, upload-time = "2025-12-02T16:58:30.763Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/9a/7c0103f02b371f49f6ade420519d54c11c7e3ae4dcf22a855b9c71ccb546/sqlglotrs-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37d00b69814fdabd4256be955d66e699afa1c50740f03369503d85f90245af35", size = 306820, upload-time = "2025-12-02T16:58:23.714Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/cf/52de2a02a52976dfbd863ec57a3fafaf018a9536114f195404d51717501d/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:631da494550442ec2c7139993f59d854e4d4a44282b568594b5fc50818bc4736", size = 341540, upload-time = "2025-12-02T16:57:33.009Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/89/072a295c3b98322a3d08d85ed47551c1f080309f2cde2d2fa75bd1964621/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b624e0650067cc006d8a0595e07be3ac91599187ee353313eb9f114ca434e44", size = 350048, upload-time = "2025-12-02T16:57:41.477Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/b2/fbc05eef045124a9e5820812ddd641ec42add5e52f12126a85d942b0f166/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c0c5ae335b1917aa101d7cfe1aacbedf3b54f489d2038e94c8f42ffe5bd304a", size = 474032, upload-time = "2025-12-02T16:58:00.344Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/a8/1472a5d5f849803fb2ad566ae43db8e5c9f3b1686b104dda245e4acfd963/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:21d145e9fef6e2e53fdf17f9b6ab7e7fbba26064365c56d2103a41e95053d1d4", size = 365233, upload-time = "2025-12-02T16:58:08.102Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/c8/ea700f277cba380c7919136a16e03f9f990f29da34c5404b861fbb8b6fd5/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ed5d7afd8b6b244c33316cc292122f26c20bf9677907bc5790c1b053097aff4", size = 348452, upload-time = "2025-12-02T16:58:15.863Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/f7/ba63c7cabcd71abed855e7a4cecb4b0df297bf17d315ff39eacf94926378/sqlglotrs-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:185442ad85a125719bf365a238c2b357c079cb5a13392adbbde172b1a0073410", size = 371656, upload-time = "2025-12-02T16:57:51.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/dc/1ba05670afe7f4c7e651f972f4738dc4508525bb67b9151cdf463b0ef55b/sqlglotrs-0.8.0-cp310-cp310-win32.whl", hash = "sha256:a7d3f36d9c53090842ae18de6d96bd7634d73584255014983aad998f2b7dc95f", size = 188554, upload-time = "2025-12-02T16:58:39.078Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/fc/a393a837a9e09411da87cf8ee2d9f190e3bad37d289cd385e3791356a788/sqlglotrs-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8a5e3c8870323666e9695be7cc65f710ed437ceea572e69e2b14e63b70f21b2", size = 200973, upload-time = "2025-12-02T16:58:46.02Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/be/a6a8e41e59813663baf02b23534d822b62521d018ee740f132b4547c4239/sqlglotrs-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0267b0121073669d1184bc0441779559e6b0c6067a12571b63befa2a9b4b0f77", size = 318016, upload-time = "2025-12-02T16:58:32.555Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/02/bf65a608b2caf268d81073171196f93beed8d32731ebda1288153dec2b73/sqlglotrs-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c1a2fa22a3ae4b38c7df9abbf14b2473f7e71c859c95bc270bd4a169688380", size = 306527, upload-time = "2025-12-02T16:58:24.853Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/98/32de2ad5ea9310e220baabfb6b2ee1e3c7ebb3b83a1db9bd2acdf72de6a5/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7df3d2117c92004aa20082d71fbbd1735f063f123354d32d0b2b602ab4e1353", size = 341821, upload-time = "2025-12-02T16:57:34.854Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/99/64247cb3b9f99ca09aafa11791fe250326d498b194795af91cc957003852/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ecd7fdfd1be44828a8a8046ee743ffbaf93a972d7a125ff13e4673bb659fcf2c", size = 350003, upload-time = "2025-12-02T16:57:42.659Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/91/bc15e4d2322cc28f4f94e519b2ae927ba42844830efaacf973ff774d8e06/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:171df6454f3dc064b89895c51cfb713163188493b36b845bf7c17df0e5702095", size = 474163, upload-time = "2025-12-02T16:58:01.554Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/8e/736451fc39f68f1e394a90d768dd9c8135412669ea3460e47033308cbb2e/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:497472ed07445a693e2699fd6f1b8ed5b8320488ade6a4a8e476664ee93ea51c", size = 365088, upload-time = "2025-12-02T16:58:09.604Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/2c/214f352fe03652b08873dcb8f4e6799a02be71446bdf9fea99ce13a502f3/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2be9add4daed501e28564208b30d4a772dfd6aaa1ad10dadd2d49f4e851f9fa", size = 348368, upload-time = "2025-12-02T16:58:17.363Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/22/c445428a52d053a6f6b31858ac817afb997316e9f0ab2ee3187a10bd85a4/sqlglotrs-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:871d5ee6414f2d7116b670d0430c16f5b3d5a96480c274f7f3d50d97dbea7601", size = 371720, upload-time = "2025-12-02T16:57:52.71Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/b2/301261db4ac543891f897b58a036e87ff33158ea4eda050ee0e08ae0083a/sqlglotrs-0.8.0-cp311-cp311-win32.whl", hash = "sha256:1bbe94effd9d64a8bdca12e0f14b28388059cb5a381561bac07aafedc8c63761", size = 188284, upload-time = "2025-12-02T16:58:40.21Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/a1/0534075d3b8a7c8ab8eff4ea7ba0338a2ef76e3d2e49105b189049430e99/sqlglotrs-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:05a5098ec2836799c4c43b06df7c68a2b4c19c0fce042a66706fe3edc957459d", size = 201117, upload-time = "2025-12-02T16:58:47.14Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/20/7beddfd545aaebbfee10a77ac8ef8a205ff597f9ce041c4b0437d0194392/sqlglotrs-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fcb53f27cf4b9cae8a66c5777b84eeb3d079e96bcb4277b627fd90bfd1a591b5", size = 314699, upload-time = "2025-12-02T16:58:33.82Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/6f/6223a1946fe24a979b8af3c7ae2d16c5451d8f35f2468782bd4af2c122da/sqlglotrs-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4da1480cc288e02bd459e4638f212fa86a1fef81eb2cd69e6fdbdeb64e3df729", size = 303385, upload-time = "2025-12-02T16:58:26.052Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/98/55050208ef839cad740df6ca86f2f3ca895d469f6ce2040cba32d0b6c4a0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4a77df178b0ba242aba0e7cd775c3f9aef0fa79dfc31c6e642431ce690f51f", size = 341580, upload-time = "2025-12-02T16:57:36.197Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/f2/6f1d207e629fd4810cc826cf419acc386f3d43d32987684730fbc2399503/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8647d20cc5a9ff39071786169b3f1acf56f266483fa55386111783bca335f04", size = 348451, upload-time = "2025-12-02T16:57:43.756Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/1b/fa8a0907471fe7be3754bac683a21c984b17672eef6958206473f683b63a/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1afdd6a0fa915b3aef7c801cbdc815bb39b3d6aecc4d5b04c4ce54d3f73d0013", size = 475703, upload-time = "2025-12-02T16:58:02.843Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/56/f020c9c48d68883f6e24d69d18fe386eafc5963bc3982cc45013ec9b1ba0/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b4c1edeb80f572cf3586b9a23d15f18f48ac8dc481eceabdbb85dc7dbf8a2ce", size = 365842, upload-time = "2025-12-02T16:58:10.847Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/7b/091464f8aa2232a2f33028f9c9a2cbea7c4e5719400656f203592d46264d/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b6d819f2753804d55b10e4320df08350cd2739556572a97ed1b1d7fc939f194", size = 348397, upload-time = "2025-12-02T16:58:18.567Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/1b/1b0cf0d41e8412786d1e80695778db799520223acf85c3ddc53c1200731f/sqlglotrs-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dcf2cce002969cefb1466f2837c716d20fc9eac62b05043523fda25b3de4c444", size = 369756, upload-time = "2025-12-02T16:57:53.85Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d9e50472aa92736751abf3d6fcad1c793f0701f17a553ae787e4a7581a1d/sqlglotrs-0.8.0-cp312-cp312-win32.whl", hash = "sha256:5459235a25b30eae508bcaea8bc6ebc04610acd87e985ba4d602981a94078384", size = 187891, upload-time = "2025-12-02T16:58:41.57Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/a2/21d09ff2065a7e883f8f68dcea57fb23f6f04ba7a193f2ac2895b5dfafae/sqlglotrs-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:1e0de4fa8e6c54419bd63a1205f3218feb5e2649d72f1bc69c5261b6c333e63b", size = 200842, upload-time = "2025-12-02T16:58:48.181Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
97
web/package-lock.json
generated
97
web/package-lock.json
generated
@ -76,6 +76,7 @@
|
||||
"pptx-preview": "^1.0.5",
|
||||
"rc-tween-one": "^3.0.6",
|
||||
"react": "^18.2.0",
|
||||
"react-audio-voice-recorder": "^2.2.0",
|
||||
"react-copy-to-clipboard": "^5.1.0",
|
||||
"react-day-picker": "^9.8.0",
|
||||
"react-dom": "^18.2.0",
|
||||
@ -2852,6 +2853,69 @@
|
||||
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg": {
|
||||
"version": "0.11.6",
|
||||
"resolved": "https://registry.npmmirror.com/@ffmpeg/ffmpeg/-/ffmpeg-0.11.6.tgz",
|
||||
"integrity": "sha512-uN8J8KDjADEavPhNva6tYO9Fj0lWs9z82swF3YXnTxWMBoFLGq3LZ6FLlIldRKEzhOBKnkVfA8UnFJuvGvNxcA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-url": "^1.2.4",
|
||||
"node-fetch": "^2.6.1",
|
||||
"regenerator-runtime": "^0.13.7",
|
||||
"resolve-url": "^0.2.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.16.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmmirror.com/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/regenerator-runtime": {
|
||||
"version": "0.13.11",
|
||||
"resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
||||
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/tr46/-/tr46-0.0.3.tgz",
|
||||
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/@ffmpeg/ffmpeg/node_modules/whatwg-url": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tr46": "~0.0.3",
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@floating-ui/core": {
|
||||
"version": "0.6.2",
|
||||
"resolved": "https://registry.npmmirror.com/@floating-ui/core/-/core-0.6.2.tgz",
|
||||
@ -21653,6 +21717,12 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/is-url": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmmirror.com/is-url/-/is-url-1.2.4.tgz",
|
||||
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-weakmap": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/is-weakmap/-/is-weakmap-2.0.1.tgz",
|
||||
@ -29630,6 +29700,30 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-audio-visualize": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-audio-visualize/-/react-audio-visualize-1.2.0.tgz",
|
||||
"integrity": "sha512-rfO5nmT0fp23gjU0y2WQT6+ZOq2ZsuPTMphchwX1PCz1Di4oaIr6x7JZII8MLrbHdG7UB0OHfGONTIsWdh67kQ==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"react": ">=16.2.0",
|
||||
"react-dom": ">=16.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-audio-voice-recorder": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-audio-voice-recorder/-/react-audio-voice-recorder-2.2.0.tgz",
|
||||
"integrity": "sha512-Hq+143Zs99vJojT/uFvtpxUuiIKoLbMhxhA7qgxe5v8hNXrh5/qTnvYP92hFaE5V+GyoCXlESONa0ufk7t5kHQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@ffmpeg/ffmpeg": "^0.11.6",
|
||||
"react-audio-visualize": "^1.1.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": ">=16.2.0",
|
||||
"react-dom": ">=16.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-copy-to-clipboard": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
|
||||
@ -32102,8 +32196,7 @@
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmmirror.com/resolve-url/-/resolve-url-0.2.1.tgz",
|
||||
"integrity": "sha512-ZuF55hVUQaaczgOIwqWzkEcEidmlD/xl44x1UZnhOXcYuFN2S6+rcxpG+C1N3So0wvNI3DmJICUFfu2SxhBmvg==",
|
||||
"deprecated": "https://github.com/lydell/resolve-url#deprecated",
|
||||
"dev": true
|
||||
"deprecated": "https://github.com/lydell/resolve-url#deprecated"
|
||||
},
|
||||
"node_modules/resolve.exports": {
|
||||
"version": "2.0.2",
|
||||
|
||||
@ -89,6 +89,7 @@
|
||||
"pptx-preview": "^1.0.5",
|
||||
"rc-tween-one": "^3.0.6",
|
||||
"react": "^18.2.0",
|
||||
"react-audio-voice-recorder": "^2.2.0",
|
||||
"react-copy-to-clipboard": "^5.1.0",
|
||||
"react-day-picker": "^9.8.0",
|
||||
"react-dom": "^18.2.0",
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
export const AutoKeywordsItem = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('autoKeywords')} tooltip={t('autoKeywordsTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'auto_keywords']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
>
|
||||
<Slider max={30} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['parser_config', 'auto_keywords']} noStyle>
|
||||
<InputNumber max={30} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export const AutoQuestionsItem = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('autoQuestions')} tooltip={t('autoQuestionsTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'auto_questions']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
>
|
||||
<Slider max={10} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['parser_config', 'auto_questions']} noStyle>
|
||||
<InputNumber max={10} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
@ -1,161 +0,0 @@
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
|
||||
import { useSelectParserList } from '@/hooks/use-user-setting-request';
|
||||
import { FormInstance } from 'antd';
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
|
||||
const ParserListMap = new Map([
|
||||
[
|
||||
['pdf'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Manual,
|
||||
DocumentParserType.Paper,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.Presentation,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['doc', 'docx'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Manual,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['xlsx', 'xls'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
|
||||
[
|
||||
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
|
||||
[DocumentParserType.Picture],
|
||||
],
|
||||
[
|
||||
['txt'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['csv'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[
|
||||
['md'],
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
],
|
||||
],
|
||||
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
|
||||
[['eml'], [DocumentParserType.Email]],
|
||||
]);
|
||||
|
||||
const getParserList = (
|
||||
values: string[],
|
||||
parserList: Array<{
|
||||
value: string;
|
||||
label: string;
|
||||
}>,
|
||||
) => {
|
||||
return parserList.filter((x) => values?.some((y) => y === x.value));
|
||||
};
|
||||
|
||||
export const useFetchParserListOnMount = (
|
||||
documentId: string,
|
||||
parserId: DocumentParserType,
|
||||
documentExtension: string,
|
||||
form: FormInstance,
|
||||
) => {
|
||||
const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
|
||||
const parserList = useSelectParserList();
|
||||
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);
|
||||
|
||||
const nextParserList = useMemo(() => {
|
||||
const key = [...ParserListMap.keys()].find((x) =>
|
||||
x.some((y) => y === documentExtension),
|
||||
);
|
||||
if (key) {
|
||||
const values = ParserListMap.get(key);
|
||||
return getParserList(values ?? [], parserList);
|
||||
}
|
||||
|
||||
return getParserList(
|
||||
[
|
||||
DocumentParserType.Naive,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Book,
|
||||
DocumentParserType.Laws,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
],
|
||||
parserList,
|
||||
);
|
||||
}, [parserList, documentExtension]);
|
||||
|
||||
useEffect(() => {
|
||||
setSelectedTag(parserId);
|
||||
}, [parserId, documentId]);
|
||||
|
||||
const handleChange = (tag: string) => {
|
||||
handleChunkMethodSelectChange(tag);
|
||||
setSelectedTag(tag as DocumentParserType);
|
||||
};
|
||||
|
||||
return { parserList: nextParserList, handleChange, selectedTag };
|
||||
};
|
||||
|
||||
const hideAutoKeywords = [
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const useShowAutoKeywords = () => {
|
||||
const showAutoKeywords = useCallback(
|
||||
(selectedTag: DocumentParserType | undefined) => {
|
||||
return hideAutoKeywords.every((x) => selectedTag !== x);
|
||||
},
|
||||
[],
|
||||
);
|
||||
|
||||
return showAutoKeywords;
|
||||
};
|
||||
@ -1,14 +0,0 @@
|
||||
.pageInputNumber {
|
||||
width: 220px;
|
||||
}
|
||||
|
||||
.questionIcon {
|
||||
margin-inline-start: 4px;
|
||||
color: rgba(0, 0, 0, 0.45);
|
||||
cursor: help;
|
||||
writing-mode: horizontal-tb;
|
||||
}
|
||||
|
||||
.chunkMethod {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
@ -1,350 +0,0 @@
|
||||
import MaxTokenNumber from '@/components/max-token-number';
|
||||
import { IModalManagerChildrenProps } from '@/components/modal-manager';
|
||||
import {
|
||||
MinusCircleOutlined,
|
||||
PlusOutlined,
|
||||
QuestionCircleOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import {
|
||||
Button,
|
||||
Divider,
|
||||
Form,
|
||||
InputNumber,
|
||||
Modal,
|
||||
Select,
|
||||
Space,
|
||||
Tooltip,
|
||||
} from 'antd';
|
||||
import omit from 'lodash/omit';
|
||||
import React, { useEffect, useMemo } from 'react';
|
||||
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
|
||||
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request';
|
||||
import { IParserConfig } from '@/interfaces/database/document';
|
||||
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
|
||||
import { get } from 'lodash';
|
||||
import { AutoKeywordsItem, AutoQuestionsItem } from '../auto-keywords-item';
|
||||
import { DatasetConfigurationContainer } from '../dataset-configuration-container';
|
||||
import Delimiter from '../delimiter';
|
||||
import EntityTypesItem from '../entity-types-item';
|
||||
import ExcelToHtml from '../excel-to-html';
|
||||
import LayoutRecognize from '../layout-recognize';
|
||||
import ParseConfiguration, {
|
||||
showRaptorParseConfiguration,
|
||||
} from '../parse-configuration';
|
||||
import {
|
||||
UseGraphRagItem,
|
||||
showGraphRagItems,
|
||||
} from '../parse-configuration/graph-rag-items';
|
||||
import styles from './index.less';
|
||||
|
||||
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
|
||||
loading: boolean;
|
||||
onOk: (
|
||||
parserId: DocumentParserType | undefined,
|
||||
parserConfig: IChangeParserConfigRequestBody,
|
||||
) => void;
|
||||
showModal?(): void;
|
||||
parserId: DocumentParserType;
|
||||
parserConfig: IParserConfig;
|
||||
documentExtension: string;
|
||||
documentId: string;
|
||||
}
|
||||
|
||||
const hidePagesChunkMethods = [
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Picture,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.One,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
];
|
||||
|
||||
const ChunkMethodModal: React.FC<IProps> = ({
|
||||
documentId,
|
||||
parserId,
|
||||
onOk,
|
||||
hideModal,
|
||||
visible,
|
||||
documentExtension,
|
||||
parserConfig,
|
||||
loading,
|
||||
}) => {
|
||||
const [form] = Form.useForm();
|
||||
const { parserList, handleChange, selectedTag } = useFetchParserListOnMount(
|
||||
documentId,
|
||||
parserId,
|
||||
documentExtension,
|
||||
form,
|
||||
);
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration();
|
||||
|
||||
const useGraphRag = useMemo(() => {
|
||||
return knowledgeDetails.parser_config?.graphrag?.use_graphrag;
|
||||
}, [knowledgeDetails.parser_config?.graphrag?.use_graphrag]);
|
||||
|
||||
const handleOk = async () => {
|
||||
const values = await form.validateFields();
|
||||
const parser_config = {
|
||||
...values.parser_config,
|
||||
pages: values.pages?.map((x: any) => [x.from, x.to]) ?? [],
|
||||
};
|
||||
onOk(selectedTag, parser_config);
|
||||
};
|
||||
|
||||
const isPdf = documentExtension === 'pdf';
|
||||
|
||||
const showPages = useMemo(() => {
|
||||
return isPdf && hidePagesChunkMethods.every((x) => x !== selectedTag);
|
||||
}, [selectedTag, isPdf]);
|
||||
|
||||
const showOne = useMemo(() => {
|
||||
return (
|
||||
isPdf &&
|
||||
hidePagesChunkMethods
|
||||
.filter((x) => x !== DocumentParserType.One)
|
||||
.every((x) => x !== selectedTag)
|
||||
);
|
||||
}, [selectedTag, isPdf]);
|
||||
|
||||
const showMaxTokenNumber =
|
||||
selectedTag === DocumentParserType.Naive ||
|
||||
selectedTag === DocumentParserType.KnowledgeGraph;
|
||||
|
||||
const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;
|
||||
|
||||
const showExcelToHtml =
|
||||
selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';
|
||||
|
||||
const showAutoKeywords = useShowAutoKeywords();
|
||||
|
||||
const afterClose = () => {
|
||||
form.resetFields();
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (visible) {
|
||||
const pages =
|
||||
parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
|
||||
form.setFieldsValue({
|
||||
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
|
||||
parser_config: {
|
||||
...omit(parserConfig, 'pages'),
|
||||
graphrag: {
|
||||
use_graphrag: get(
|
||||
parserConfig,
|
||||
'graphrag.use_graphrag',
|
||||
useGraphRag,
|
||||
),
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
}, [
|
||||
form,
|
||||
knowledgeDetails.parser_config,
|
||||
parserConfig,
|
||||
useGraphRag,
|
||||
visible,
|
||||
]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={t('chunkMethod')}
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
afterClose={afterClose}
|
||||
confirmLoading={loading}
|
||||
width={700}
|
||||
>
|
||||
<Space size={[0, 8]} wrap>
|
||||
<Form.Item label={t('chunkMethod')} className={styles.chunkMethod}>
|
||||
<Select
|
||||
style={{ width: 160 }}
|
||||
onChange={handleChange}
|
||||
value={selectedTag}
|
||||
options={parserList}
|
||||
/>
|
||||
</Form.Item>
|
||||
</Space>
|
||||
<Divider></Divider>
|
||||
<Form
|
||||
name="dynamic_form_nest_item"
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
className="space-y-4"
|
||||
>
|
||||
{showPages && (
|
||||
<>
|
||||
<Space>
|
||||
<p>{t('pageRanges')}:</p>
|
||||
<Tooltip title={t('pageRangesTip')}>
|
||||
<QuestionCircleOutlined
|
||||
className={styles.questionIcon}
|
||||
></QuestionCircleOutlined>
|
||||
</Tooltip>
|
||||
</Space>
|
||||
<Form.List name="pages">
|
||||
{(fields, { add, remove }) => (
|
||||
<>
|
||||
{fields.map(({ key, name, ...restField }) => (
|
||||
<Space
|
||||
key={key}
|
||||
style={{
|
||||
display: 'flex',
|
||||
}}
|
||||
align="baseline"
|
||||
>
|
||||
<Form.Item
|
||||
{...restField}
|
||||
name={[name, 'from']}
|
||||
dependencies={name > 0 ? [name - 1, 'to'] : []}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('fromMessage'),
|
||||
},
|
||||
({ getFieldValue }) => ({
|
||||
validator(_, value) {
|
||||
if (
|
||||
name === 0 ||
|
||||
!value ||
|
||||
getFieldValue(['pages', name - 1, 'to']) < value
|
||||
) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return Promise.reject(
|
||||
new Error(t('greaterThanPrevious')),
|
||||
);
|
||||
},
|
||||
}),
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
placeholder={t('fromPlaceholder')}
|
||||
min={0}
|
||||
precision={0}
|
||||
className={styles.pageInputNumber}
|
||||
/>
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
{...restField}
|
||||
name={[name, 'to']}
|
||||
dependencies={[name, 'from']}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('toMessage'),
|
||||
},
|
||||
({ getFieldValue }) => ({
|
||||
validator(_, value) {
|
||||
if (
|
||||
!value ||
|
||||
getFieldValue(['pages', name, 'from']) < value
|
||||
) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return Promise.reject(
|
||||
new Error(t('greaterThan')),
|
||||
);
|
||||
},
|
||||
}),
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
placeholder={t('toPlaceholder')}
|
||||
min={0}
|
||||
precision={0}
|
||||
className={styles.pageInputNumber}
|
||||
/>
|
||||
</Form.Item>
|
||||
{name > 0 && (
|
||||
<MinusCircleOutlined onClick={() => remove(name)} />
|
||||
)}
|
||||
</Space>
|
||||
))}
|
||||
<Form.Item>
|
||||
<Button
|
||||
type="dashed"
|
||||
onClick={() => add()}
|
||||
block
|
||||
icon={<PlusOutlined />}
|
||||
>
|
||||
{t('addPage')}
|
||||
</Button>
|
||||
</Form.Item>
|
||||
</>
|
||||
)}
|
||||
</Form.List>
|
||||
</>
|
||||
)}
|
||||
|
||||
{showPages && (
|
||||
<Form.Item
|
||||
noStyle
|
||||
dependencies={[['parser_config', 'layout_recognize']]}
|
||||
>
|
||||
{({ getFieldValue }) =>
|
||||
getFieldValue(['parser_config', 'layout_recognize']) && (
|
||||
<Form.Item
|
||||
name={['parser_config', 'task_page_size']}
|
||||
label={t('taskPageSize')}
|
||||
tooltip={t('taskPageSizeTip')}
|
||||
initialValue={12}
|
||||
rules={[
|
||||
{
|
||||
required: true,
|
||||
message: t('taskPageSizeMessage'),
|
||||
},
|
||||
]}
|
||||
>
|
||||
<InputNumber min={1} max={128} />
|
||||
</Form.Item>
|
||||
)
|
||||
}
|
||||
</Form.Item>
|
||||
)}
|
||||
<DatasetConfigurationContainer show={showOne || showMaxTokenNumber}>
|
||||
{showOne && <LayoutRecognize></LayoutRecognize>}
|
||||
{showMaxTokenNumber && (
|
||||
<>
|
||||
<MaxTokenNumber
|
||||
max={
|
||||
selectedTag === DocumentParserType.KnowledgeGraph
|
||||
? 8192 * 2
|
||||
: 2048
|
||||
}
|
||||
></MaxTokenNumber>
|
||||
<Delimiter></Delimiter>
|
||||
</>
|
||||
)}
|
||||
</DatasetConfigurationContainer>
|
||||
<DatasetConfigurationContainer
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
>
|
||||
{showAutoKeywords(selectedTag) && (
|
||||
<>
|
||||
<AutoKeywordsItem></AutoKeywordsItem>
|
||||
<AutoQuestionsItem></AutoQuestionsItem>
|
||||
</>
|
||||
)}
|
||||
{showExcelToHtml && <ExcelToHtml></ExcelToHtml>}
|
||||
</DatasetConfigurationContainer>
|
||||
{showRaptorParseConfiguration(selectedTag) && (
|
||||
<DatasetConfigurationContainer>
|
||||
<ParseConfiguration></ParseConfiguration>
|
||||
</DatasetConfigurationContainer>
|
||||
)}
|
||||
{showGraphRagItems(selectedTag) && useGraphRag && (
|
||||
<UseGraphRagItem></UseGraphRagItem>
|
||||
)}
|
||||
{showEntityTypes && <EntityTypesItem></EntityTypesItem>}
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
export default ChunkMethodModal;
|
||||
@ -1,40 +0,0 @@
|
||||
import { Select as AntSelect, Form } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const Languages = [
|
||||
'English',
|
||||
'Chinese',
|
||||
'Spanish',
|
||||
'French',
|
||||
'German',
|
||||
'Japanese',
|
||||
'Korean',
|
||||
'Vietnamese',
|
||||
];
|
||||
|
||||
const options = Languages.map((x) => ({ label: x, value: x }));
|
||||
|
||||
type CrossLanguageItemProps = {
|
||||
name?: string | Array<string>;
|
||||
};
|
||||
|
||||
export const CrossLanguageItem = ({
|
||||
name = ['prompt_config', 'cross_languages'],
|
||||
}: CrossLanguageItemProps) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
label={t('chat.crossLanguage')}
|
||||
name={name}
|
||||
tooltip={t('chat.crossLanguageTip')}
|
||||
>
|
||||
<AntSelect
|
||||
options={options}
|
||||
allowClear
|
||||
placeholder={t('common.languagePlaceholder')}
|
||||
mode="multiple"
|
||||
/>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
@ -1,42 +0,0 @@
|
||||
import { Form, Input } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
interface IProps {
|
||||
value?: string | undefined;
|
||||
onChange?: (val: string | undefined) => void;
|
||||
maxLength?: number;
|
||||
}
|
||||
|
||||
export const DelimiterInput = ({ value, onChange, maxLength }: IProps) => {
|
||||
const nextValue = value?.replaceAll('\n', '\\n');
|
||||
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
||||
const val = e.target.value;
|
||||
const nextValue = val.replaceAll('\\n', '\n');
|
||||
onChange?.(nextValue);
|
||||
};
|
||||
return (
|
||||
<Input
|
||||
value={nextValue}
|
||||
onChange={handleInputChange}
|
||||
maxLength={maxLength}
|
||||
></Input>
|
||||
);
|
||||
};
|
||||
|
||||
const Delimiter = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'delimiter']}
|
||||
label={t('knowledgeDetails.delimiter')}
|
||||
initialValue={`\n`}
|
||||
rules={[{ required: true }]}
|
||||
tooltip={t('knowledgeDetails.delimiterTip')}
|
||||
>
|
||||
<DelimiterInput />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default Delimiter;
|
||||
@ -1,7 +1,5 @@
|
||||
import message from '@/components/ui/message';
|
||||
import { Spin } from '@/components/ui/spin';
|
||||
import { Authorization } from '@/constants/authorization';
|
||||
import { getAuthorization } from '@/utils/authorization-util';
|
||||
import request from '@/utils/request';
|
||||
import classNames from 'classnames';
|
||||
import mammoth from 'mammoth';
|
||||
@ -12,26 +10,87 @@ interface DocPreviewerProps {
|
||||
url: string;
|
||||
}
|
||||
|
||||
// Word document preview component. Behavior:
|
||||
// 1) Fetches the document as a Blob.
|
||||
// 2) Detects .docx input via a ZIP header probe.
|
||||
// 3) Renders .docx using Mammoth; presents a controlled "unsupported" notice for non-ZIP payloads.
|
||||
export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
className,
|
||||
url,
|
||||
}) => {
|
||||
// const url = useGetDocumentUrl();
|
||||
const [htmlContent, setHtmlContent] = useState<string>('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
// Determines whether the Blob represents a .docx document by checking for the ZIP
|
||||
// file signature ("PK") in the initial bytes. A valid .docx file is a ZIP container
|
||||
// and always begins with:
|
||||
// 50 4B 03 04 ("PK..")
|
||||
//
|
||||
// Legacy .doc files use the CFBF binary format, commonly starting with:
|
||||
// D0 CF 11 E0 A1 B1 1A E1
|
||||
//
|
||||
// Note that some files distributed with a “.doc” extension may internally be .docx
|
||||
// documents (e.g., renamed files or files produced by systems that export .docx
|
||||
// content under a .doc filename). These files will still present the ZIP signature
|
||||
// and are therefore treated as supported .docx payloads. The header inspection
|
||||
// ensures correct routing regardless of filename or reported extension.
|
||||
const isZipLikeBlob = async (blob: Blob): Promise<boolean> => {
|
||||
try {
|
||||
const headerSlice = blob.slice(0, 4);
|
||||
const buf = await headerSlice.arrayBuffer();
|
||||
const bytes = new Uint8Array(buf);
|
||||
|
||||
// ZIP files start with "PK" (0x50, 0x4B)
|
||||
return bytes.length >= 2 && bytes[0] === 0x50 && bytes[1] === 0x4b;
|
||||
} catch (e) {
|
||||
console.error('Failed to inspect blob header', e);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const fetchDocument = async () => {
|
||||
if (!url) return;
|
||||
|
||||
setLoading(true);
|
||||
|
||||
const res = await request(url, {
|
||||
method: 'GET',
|
||||
responseType: 'blob',
|
||||
headers: { [Authorization]: getAuthorization() },
|
||||
onError: () => {
|
||||
message.error('Document parsing failed');
|
||||
console.error('Error loading document:', url);
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
const arrayBuffer = await res.data.arrayBuffer();
|
||||
const blob: Blob = res.data;
|
||||
const contentType: string =
|
||||
blob.type || (res as any).headers?.['content-type'] || '';
|
||||
|
||||
// Execution path selection: ZIP-like payloads are treated as .docx and rendered via Mammoth;
|
||||
// non-ZIP payloads receive an explicit unsupported notice.
|
||||
const looksLikeZip = await isZipLikeBlob(blob);
|
||||
|
||||
if (!looksLikeZip) {
|
||||
// Non-ZIP payload (likely legacy .doc or another format): skip Mammoth processing.
|
||||
setHtmlContent(`
|
||||
<div class="flex h-full items-center justify-center">
|
||||
<div class="border border-dashed border-border-normal rounded-xl p-8 max-w-2xl text-center">
|
||||
<p class="text-2xl font-bold mb-4">
|
||||
Preview is not available for this Word document
|
||||
</p>
|
||||
<p class="italic text-sm text-muted-foreground leading-relaxed">
|
||||
Mammoth supports modern <code>.docx</code> files only.<br/>
|
||||
The file header does not indicate a <code>.docx</code> ZIP archive.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
`);
|
||||
return;
|
||||
}
|
||||
|
||||
// ZIP-like payload: parse as .docx with Mammoth
|
||||
const arrayBuffer = await blob.arrayBuffer();
|
||||
const result = await mammoth.convertToHtml(
|
||||
{ arrayBuffer },
|
||||
{ includeDefaultStyleMap: true },
|
||||
@ -43,10 +102,11 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
|
||||
setHtmlContent(styledContent);
|
||||
} catch (err) {
|
||||
message.error('Document parsing failed');
|
||||
message.error('Failed to parse document.');
|
||||
console.error('Error parsing document:', err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
setLoading(false);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
@ -54,6 +114,7 @@ export const DocPreviewer: React.FC<DocPreviewerProps> = ({
|
||||
fetchDocument();
|
||||
}
|
||||
}, [url]);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={classNames(
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form } from 'antd';
|
||||
import EditTag from './edit-tag';
|
||||
|
||||
const initialEntityTypes = [
|
||||
'organization',
|
||||
'person',
|
||||
'geo',
|
||||
'event',
|
||||
'category',
|
||||
];
|
||||
|
||||
type IProps = {
|
||||
field?: string[];
|
||||
};
|
||||
|
||||
const EntityTypesItem = ({
|
||||
field = ['parser_config', 'entity_types'],
|
||||
}: IProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
return (
|
||||
<Form.Item
|
||||
name={field}
|
||||
label={t('entityTypes')}
|
||||
rules={[{ required: true }]}
|
||||
initialValue={initialEntityTypes}
|
||||
>
|
||||
<EditTag />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default EntityTypesItem;
|
||||
@ -1,19 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Switch } from 'antd';
|
||||
|
||||
const ExcelToHtml = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'html4excel']}
|
||||
label={t('html4excel')}
|
||||
initialValue={false}
|
||||
valuePropName="checked"
|
||||
tooltip={t('html4excelTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default ExcelToHtml;
|
||||
77
web/src/components/feedback-dialog.tsx
Normal file
77
web/src/components/feedback-dialog.tsx
Normal file
@ -0,0 +1,77 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { useCallback } from 'react';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
import { RAGFlowFormItem } from './ragflow-form';
|
||||
import { ButtonLoading } from './ui/button';
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from './ui/dialog';
|
||||
import { Form } from './ui/form';
|
||||
import { Textarea } from './ui/textarea';
|
||||
|
||||
const FormId = 'feedback-dialog';
|
||||
|
||||
const FeedbackDialog = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const { t } = useTranslation();
|
||||
const FormSchema = z.object({
|
||||
feedback: z
|
||||
.string()
|
||||
.min(1, {
|
||||
message: t('common.namePlaceholder'),
|
||||
})
|
||||
.trim(),
|
||||
});
|
||||
|
||||
const form = useForm<z.infer<typeof FormSchema>>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
defaultValues: { feedback: '' },
|
||||
});
|
||||
|
||||
const handleOk = useCallback(
|
||||
async (data: z.infer<typeof FormSchema>) => {
|
||||
return onOk?.({ thumbup: false, feedback: data.feedback });
|
||||
},
|
||||
[onOk],
|
||||
);
|
||||
|
||||
return (
|
||||
<Dialog open={visible} onOpenChange={hideModal}>
|
||||
<DialogContent className="sm:max-w-[425px]">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Feedback</DialogTitle>
|
||||
</DialogHeader>
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(handleOk)}
|
||||
className="space-y-6"
|
||||
id={FormId}
|
||||
>
|
||||
<RAGFlowFormItem name="feedback">
|
||||
<Textarea> </Textarea>
|
||||
</RAGFlowFormItem>
|
||||
</form>
|
||||
</Form>
|
||||
<DialogFooter>
|
||||
<ButtonLoading type="submit" form={FormId} loading={loading}>
|
||||
{t('common.save')}
|
||||
</ButtonLoading>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackDialog;
|
||||
@ -1,13 +0,0 @@
|
||||
.uploader {
|
||||
:global {
|
||||
.ant-upload-list {
|
||||
max-height: 40vh;
|
||||
overflow-y: auto;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.uploadLimit {
|
||||
color: red;
|
||||
font-size: 12px;
|
||||
}
|
||||
@ -1,191 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { InboxOutlined } from '@ant-design/icons';
|
||||
import {
|
||||
Checkbox,
|
||||
Flex,
|
||||
Modal,
|
||||
Progress,
|
||||
Segmented,
|
||||
Tabs,
|
||||
TabsProps,
|
||||
Upload,
|
||||
UploadFile,
|
||||
UploadProps,
|
||||
} from 'antd';
|
||||
import { Dispatch, SetStateAction, useState } from 'react';
|
||||
|
||||
import styles from './index.less';
|
||||
|
||||
const { Dragger } = Upload;
|
||||
|
||||
const FileUpload = ({
|
||||
directory,
|
||||
fileList,
|
||||
setFileList,
|
||||
uploadProgress,
|
||||
}: {
|
||||
directory: boolean;
|
||||
fileList: UploadFile[];
|
||||
setFileList: Dispatch<SetStateAction<UploadFile[]>>;
|
||||
uploadProgress?: number;
|
||||
}) => {
|
||||
const { t } = useTranslate('fileManager');
|
||||
const props: UploadProps = {
|
||||
multiple: true,
|
||||
onRemove: (file) => {
|
||||
const index = fileList.indexOf(file);
|
||||
const newFileList = fileList.slice();
|
||||
newFileList.splice(index, 1);
|
||||
setFileList(newFileList);
|
||||
},
|
||||
beforeUpload: (file: UploadFile) => {
|
||||
setFileList((pre) => {
|
||||
return [...pre, file];
|
||||
});
|
||||
|
||||
return false;
|
||||
},
|
||||
directory,
|
||||
fileList,
|
||||
progress: {
|
||||
strokeWidth: 2,
|
||||
},
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<Progress percent={uploadProgress} showInfo={false} />
|
||||
<Dragger {...props} className={styles.uploader}>
|
||||
<p className="ant-upload-drag-icon">
|
||||
<InboxOutlined />
|
||||
</p>
|
||||
<p className="ant-upload-text">{t('uploadTitle')}</p>
|
||||
<p className="ant-upload-hint">{t('uploadDescription')}</p>
|
||||
{false && <p className={styles.uploadLimit}>{t('uploadLimit')}</p>}
|
||||
</Dragger>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
interface IFileUploadModalProps
|
||||
extends IModalProps<
|
||||
{ parseOnCreation: boolean; directoryFileList: UploadFile[] } | UploadFile[]
|
||||
> {
|
||||
uploadFileList?: UploadFile[];
|
||||
setUploadFileList?: Dispatch<SetStateAction<UploadFile[]>>;
|
||||
uploadProgress?: number;
|
||||
setUploadProgress?: Dispatch<SetStateAction<number>>;
|
||||
}
|
||||
|
||||
const FileUploadModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
loading,
|
||||
onOk: onFileUploadOk,
|
||||
uploadFileList: fileList,
|
||||
setUploadFileList: setFileList,
|
||||
uploadProgress,
|
||||
setUploadProgress,
|
||||
}: IFileUploadModalProps) => {
|
||||
const { t } = useTranslate('fileManager');
|
||||
const [value, setValue] = useState<string | number>('local');
|
||||
const [parseOnCreation, setParseOnCreation] = useState(false);
|
||||
const [currentFileList, setCurrentFileList] = useState<UploadFile[]>([]);
|
||||
const [directoryFileList, setDirectoryFileList] = useState<UploadFile[]>([]);
|
||||
|
||||
const clearFileList = () => {
|
||||
if (setFileList) {
|
||||
setFileList([]);
|
||||
setUploadProgress?.(0);
|
||||
} else {
|
||||
setCurrentFileList([]);
|
||||
}
|
||||
setDirectoryFileList([]);
|
||||
};
|
||||
|
||||
const onOk = async () => {
|
||||
if (uploadProgress === 100) {
|
||||
hideModal?.();
|
||||
return;
|
||||
}
|
||||
|
||||
const ret = await onFileUploadOk?.(
|
||||
fileList
|
||||
? { parseOnCreation, directoryFileList }
|
||||
: [...currentFileList, ...directoryFileList],
|
||||
);
|
||||
return ret;
|
||||
};
|
||||
|
||||
const afterClose = () => {
|
||||
clearFileList();
|
||||
};
|
||||
|
||||
const items: TabsProps['items'] = [
|
||||
{
|
||||
key: '1',
|
||||
label: t('file'),
|
||||
children: (
|
||||
<FileUpload
|
||||
directory={false}
|
||||
fileList={fileList ? fileList : currentFileList}
|
||||
setFileList={setFileList ? setFileList : setCurrentFileList}
|
||||
uploadProgress={uploadProgress}
|
||||
></FileUpload>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: '2',
|
||||
label: t('directory'),
|
||||
children: (
|
||||
<FileUpload
|
||||
directory
|
||||
fileList={directoryFileList}
|
||||
setFileList={setDirectoryFileList}
|
||||
uploadProgress={uploadProgress}
|
||||
></FileUpload>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<>
|
||||
<Modal
|
||||
title={t('uploadFile')}
|
||||
open={visible}
|
||||
onOk={onOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
afterClose={afterClose}
|
||||
>
|
||||
<Flex gap={'large'} vertical>
|
||||
<Segmented
|
||||
options={[
|
||||
{ label: t('local'), value: 'local' },
|
||||
{ label: t('s3'), value: 's3' },
|
||||
]}
|
||||
block
|
||||
value={value}
|
||||
onChange={setValue}
|
||||
/>
|
||||
{value === 'local' ? (
|
||||
<>
|
||||
<Checkbox
|
||||
checked={parseOnCreation}
|
||||
onChange={(e) => setParseOnCreation(e.target.checked)}
|
||||
>
|
||||
{t('parseOnCreation')}
|
||||
</Checkbox>
|
||||
<Tabs defaultActiveKey="1" items={items} />
|
||||
</>
|
||||
) : (
|
||||
t('comingSoon', { keyPrefix: 'common' })
|
||||
)}
|
||||
</Flex>
|
||||
</Modal>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default FileUploadModal;
|
||||
@ -1,8 +1,9 @@
|
||||
import React, { useEffect, useRef } from 'react';
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import './css/cloud9_night.less';
|
||||
import './css/index.less';
|
||||
import { JsonEditorOptions, JsonEditorProps } from './interface';
|
||||
|
||||
const defaultConfig: JsonEditorOptions = {
|
||||
mode: 'code',
|
||||
modes: ['tree', 'code'],
|
||||
@ -14,6 +15,7 @@ const defaultConfig: JsonEditorOptions = {
|
||||
enableTransform: false,
|
||||
indentation: 2,
|
||||
};
|
||||
|
||||
const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
value,
|
||||
onChange,
|
||||
@ -25,43 +27,62 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
const editorRef = useRef<any>(null);
|
||||
const { i18n } = useTranslation();
|
||||
const currentLanguageRef = useRef<string>(i18n.language);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof window !== 'undefined') {
|
||||
const JSONEditor = require('jsoneditor');
|
||||
import('jsoneditor/dist/jsoneditor.min.css');
|
||||
let isMounted = true;
|
||||
|
||||
if (containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
const initEditor = async () => {
|
||||
if (typeof window !== 'undefined') {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
|
||||
await import('jsoneditor/dist/jsoneditor.min.css');
|
||||
|
||||
if (isMounted && containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
setIsLoading(false);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load jsoneditor:', error);
|
||||
if (isMounted) {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
initEditor();
|
||||
|
||||
return () => {
|
||||
isMounted = false;
|
||||
if (editorRef.current) {
|
||||
if (typeof editorRef.current.destroy === 'function') {
|
||||
editorRef.current.destroy();
|
||||
@ -92,26 +113,38 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
}
|
||||
|
||||
// Recreate the editor with new language
|
||||
const JSONEditor = require('jsoneditor');
|
||||
const initEditorWithNewLanguage = async () => {
|
||||
try {
|
||||
const JSONEditorModule = await import('jsoneditor');
|
||||
const JSONEditor = JSONEditorModule.default || JSONEditorModule;
|
||||
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Failed to reload jsoneditor with new language:',
|
||||
error,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
initEditorWithNewLanguage();
|
||||
}
|
||||
}, [i18n.language, value, onChange, options]);
|
||||
|
||||
@ -135,7 +168,13 @@ const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
ref={containerRef}
|
||||
style={{ height }}
|
||||
className={`ace-tomorrow-night w-full border border-border-button rounded-lg overflow-hidden bg-bg-input ${className} `}
|
||||
/>
|
||||
>
|
||||
{isLoading && (
|
||||
<div className="flex items-center justify-center h-full">
|
||||
<div className="text-text-secondary">Loading editor...</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useSelectLlmOptionsByModelType } from '@/hooks/use-llm-request';
|
||||
import { Form, Select } from 'antd';
|
||||
import { camelCase } from 'lodash';
|
||||
import { useMemo } from 'react';
|
||||
|
||||
const enum DocumentType {
|
||||
DeepDOC = 'DeepDOC',
|
||||
PlainText = 'Plain Text',
|
||||
}
|
||||
|
||||
const LayoutRecognize = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const allOptions = useSelectLlmOptionsByModelType();
|
||||
|
||||
const options = useMemo(() => {
|
||||
const list = [DocumentType.DeepDOC, DocumentType.PlainText].map((x) => ({
|
||||
label: x === DocumentType.PlainText ? t(camelCase(x)) : 'DeepDoc',
|
||||
value: x,
|
||||
}));
|
||||
|
||||
const image2TextList = allOptions[LlmModelType.Image2text].map((x) => {
|
||||
return {
|
||||
...x,
|
||||
options: x.options.map((y) => {
|
||||
return {
|
||||
...y,
|
||||
label: (
|
||||
<div className="flex justify-between items-center gap-2">
|
||||
{y.label}
|
||||
<span className="text-red-500 text-sm">Experimental</span>
|
||||
</div>
|
||||
),
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
return [...list, ...image2TextList];
|
||||
}, [allOptions, t]);
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'layout_recognize']}
|
||||
label={t('layoutRecognize')}
|
||||
initialValue={DocumentType.DeepDOC}
|
||||
tooltip={t('layoutRecognizeTip')}
|
||||
>
|
||||
<Select options={options} popupMatchSelectWidth={false} />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default LayoutRecognize;
|
||||
@ -1,51 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useLlmToolsList } from '@/hooks/plugin-hooks';
|
||||
import { Select, Space } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
value?: string;
|
||||
onChange?: (value: string) => void;
|
||||
disabled?: boolean;
|
||||
}
|
||||
|
||||
const LLMToolsSelect = ({ value, onChange, disabled }: IProps) => {
|
||||
const { t } = useTranslate("llmTools");
|
||||
const tools = useLlmToolsList();
|
||||
|
||||
function wrapTranslation(text: string): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
if (text.startsWith("$t:")) {
|
||||
return t(text.substring(3));
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
const toolOptions = tools.map(t => ({
|
||||
label: wrapTranslation(t.displayName),
|
||||
description: wrapTranslation(t.displayDescription),
|
||||
value: t.name,
|
||||
title: wrapTranslation(t.displayDescription),
|
||||
}));
|
||||
|
||||
return (
|
||||
<Select
|
||||
mode="multiple"
|
||||
options={toolOptions}
|
||||
optionRender={option => (
|
||||
<Space size="large">
|
||||
{option.label}
|
||||
{option.data.description}
|
||||
</Space>
|
||||
)}
|
||||
onChange={onChange}
|
||||
value={value}
|
||||
disabled={disabled}
|
||||
></Select>
|
||||
);
|
||||
};
|
||||
|
||||
export default LLMToolsSelect;
|
||||
@ -217,20 +217,23 @@ const MarkdownContent = ({
|
||||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary"> {imageId}</span>
|
||||
</section>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
||||
@ -1,37 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
initialValue?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
const MaxTokenNumber = ({ initialValue = 512, max = 2048 }: IProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('chunkTokenNumber')} tooltip={t('chunkTokenNumberTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'chunk_token_num']}
|
||||
noStyle
|
||||
initialValue={initialValue}
|
||||
rules={[{ required: true, message: t('chunkTokenNumberMessage') }]}
|
||||
>
|
||||
<Slider max={max} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item
|
||||
name={['parser_config', 'chunk_token_num']}
|
||||
noStyle
|
||||
rules={[{ required: true, message: t('chunkTokenNumberMessage') }]}
|
||||
>
|
||||
<InputNumber max={max} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default MaxTokenNumber;
|
||||
@ -1,4 +1,3 @@
|
||||
import { Form, InputNumber } from 'antd';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import {
|
||||
@ -10,27 +9,6 @@ import {
|
||||
} from './ui/form';
|
||||
import { NumberInput } from './ui/input';
|
||||
|
||||
const MessageHistoryWindowSizeItem = ({
|
||||
initialValue,
|
||||
}: {
|
||||
initialValue: number;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={'message_history_window_size'}
|
||||
label={t('flow.messageHistoryWindowSize')}
|
||||
initialValue={initialValue}
|
||||
tooltip={t('flow.messageHistoryWindowSizeTip')}
|
||||
>
|
||||
<InputNumber style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default MessageHistoryWindowSizeItem;
|
||||
|
||||
export function MessageHistoryWindowSizeFormField() {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -18,7 +18,9 @@ import { cn } from '@/lib/utils';
|
||||
import { t } from 'i18next';
|
||||
import { CircleStop, Paperclip, Send, Upload, X } from 'lucide-react';
|
||||
import * as React from 'react';
|
||||
import { useEffect } from 'react';
|
||||
import { toast } from 'sonner';
|
||||
import { AudioButton } from '../ui/audio-button';
|
||||
|
||||
interface IProps {
|
||||
disabled: boolean;
|
||||
@ -52,6 +54,22 @@ export function NextMessageInput({
|
||||
removeFile,
|
||||
}: IProps) {
|
||||
const [files, setFiles] = React.useState<File[]>([]);
|
||||
const [audioInputValue, setAudioInputValue] = React.useState<string | null>(
|
||||
null,
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
if (audioInputValue !== null) {
|
||||
onInputChange({
|
||||
target: { value: audioInputValue },
|
||||
} as React.ChangeEvent<HTMLTextAreaElement>);
|
||||
|
||||
setTimeout(() => {
|
||||
onPressEnter();
|
||||
setAudioInputValue(null);
|
||||
}, 0);
|
||||
}
|
||||
}, [audioInputValue, onInputChange, onPressEnter]);
|
||||
|
||||
const onFileReject = React.useCallback((file: File, message: string) => {
|
||||
toast(message, {
|
||||
@ -171,15 +189,24 @@ export function NextMessageInput({
|
||||
<CircleStop />
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
className="size-5 rounded-sm"
|
||||
disabled={
|
||||
sendDisabled || isUploading || sendLoading || !value.trim()
|
||||
}
|
||||
>
|
||||
<Send />
|
||||
<span className="sr-only">Send message</span>
|
||||
</Button>
|
||||
<div className="flex items-center gap-3">
|
||||
{/* <div className="bg-bg-input rounded-md hover:bg-bg-card p-1"> */}
|
||||
<AudioButton
|
||||
onOk={(value) => {
|
||||
setAudioInputValue(value);
|
||||
}}
|
||||
/>
|
||||
{/* </div> */}
|
||||
<Button
|
||||
className="size-5 rounded-sm"
|
||||
disabled={
|
||||
sendDisabled || isUploading || sendLoading || !value.trim()
|
||||
}
|
||||
>
|
||||
<Send />
|
||||
<span className="sr-only">Send message</span>
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@ -1,51 +0,0 @@
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { useCallback } from 'react';
|
||||
|
||||
type FieldType = {
|
||||
feedback?: string;
|
||||
};
|
||||
|
||||
const FeedbackModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const [form] = Form.useForm();
|
||||
|
||||
const handleOk = useCallback(async () => {
|
||||
const ret = await form.validateFields();
|
||||
return onOk?.({ thumbup: false, feedback: ret.feedback });
|
||||
}, [onOk, form]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title="Feedback"
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 0 }}
|
||||
wrapperCol={{ span: 24 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
name="feedback"
|
||||
rules={[{ required: true, message: 'Please input your feedback!' }]}
|
||||
>
|
||||
<Input.TextArea rows={8} placeholder="Please input your feedback!" />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackModal;
|
||||
@ -13,9 +13,9 @@ import {
|
||||
import { Radio, Tooltip } from 'antd';
|
||||
import { useCallback } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import FeedbackModal from './feedback-modal';
|
||||
import FeedbackDialog from '../feedback-dialog';
|
||||
import { PromptDialog } from '../prompt-dialog';
|
||||
import { useRemoveMessage, useSendFeedback, useSpeech } from './hooks';
|
||||
import PromptModal from './prompt-modal';
|
||||
|
||||
interface IProps {
|
||||
messageId: string;
|
||||
@ -79,19 +79,19 @@ export const AssistantGroupButton = ({
|
||||
)}
|
||||
</Radio.Group>
|
||||
{visible && (
|
||||
<FeedbackModal
|
||||
<FeedbackDialog
|
||||
visible={visible}
|
||||
hideModal={hideModal}
|
||||
onOk={onFeedbackOk}
|
||||
loading={loading}
|
||||
></FeedbackModal>
|
||||
></FeedbackDialog>
|
||||
)}
|
||||
{promptVisible && (
|
||||
<PromptModal
|
||||
<PromptDialog
|
||||
visible={promptVisible}
|
||||
hideModal={hidePromptModal}
|
||||
prompt={prompt}
|
||||
></PromptModal>
|
||||
></PromptDialog>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { Modal, Space } from 'antd';
|
||||
import HightLightMarkdown from '../highlight-markdown';
|
||||
import SvgIcon from '../svg-icon';
|
||||
|
||||
const PromptModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: IModalProps<IFeedbackRequestBody> & { prompt?: string }) => {
|
||||
return (
|
||||
<Modal
|
||||
title={
|
||||
<Space>
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
Prompt
|
||||
</Space>
|
||||
}
|
||||
width={'80%'}
|
||||
open={visible}
|
||||
onCancel={hideModal}
|
||||
footer={null}
|
||||
>
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptModal;
|
||||
@ -220,20 +220,23 @@ function MarkdownContent({
|
||||
const docType = chunkItem?.doc_type;
|
||||
|
||||
return showImage(docType) ? (
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<section>
|
||||
<Image
|
||||
id={imageId}
|
||||
className={styles.referenceInnerChunkImage}
|
||||
onClick={
|
||||
documentId
|
||||
? handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)
|
||||
: () => {}
|
||||
}
|
||||
></Image>
|
||||
<span className="text-accent-primary">{imageId}</span>
|
||||
</section>
|
||||
) : (
|
||||
<HoverCard key={i}>
|
||||
<HoverCardTrigger>
|
||||
|
||||
@ -1,51 +0,0 @@
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { useCallback } from 'react';
|
||||
|
||||
type FieldType = {
|
||||
feedback?: string;
|
||||
};
|
||||
|
||||
const FeedbackModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
}: IModalProps<IFeedbackRequestBody>) => {
|
||||
const [form] = Form.useForm();
|
||||
|
||||
const handleOk = useCallback(async () => {
|
||||
const ret = await form.validateFields();
|
||||
return onOk?.({ thumbup: false, feedback: ret.feedback });
|
||||
}, [onOk, form]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title="Feedback"
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={hideModal}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 0 }}
|
||||
wrapperCol={{ span: 24 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
name="feedback"
|
||||
rules={[{ required: true, message: 'Please input your feedback!' }]}
|
||||
>
|
||||
<Input.TextArea rows={8} placeholder="Please input your feedback!" />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default FeedbackModal;
|
||||
@ -3,6 +3,8 @@ import CopyToClipboard from '@/components/copy-to-clipboard';
|
||||
import { useSetModalState } from '@/hooks/common-hooks';
|
||||
import { IRemoveMessageById } from '@/hooks/logic-hooks';
|
||||
import { AgentChatContext } from '@/pages/agent/context';
|
||||
import { downloadFile } from '@/services/file-manager-service';
|
||||
import { downloadFileFromBlob } from '@/utils/file-util';
|
||||
import {
|
||||
DeleteOutlined,
|
||||
DislikeOutlined,
|
||||
@ -12,13 +14,13 @@ import {
|
||||
SyncOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { Radio, Tooltip } from 'antd';
|
||||
import { NotebookText } from 'lucide-react';
|
||||
import { Download, NotebookText } from 'lucide-react';
|
||||
import { useCallback, useContext } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import FeedbackDialog from '../feedback-dialog';
|
||||
import { PromptDialog } from '../prompt-dialog';
|
||||
import { ToggleGroup, ToggleGroupItem } from '../ui/toggle-group';
|
||||
import FeedbackModal from './feedback-modal';
|
||||
import { useRemoveMessage, useSendFeedback, useSpeech } from './hooks';
|
||||
import PromptModal from './prompt-modal';
|
||||
|
||||
interface IProps {
|
||||
messageId: string;
|
||||
@ -28,6 +30,11 @@ interface IProps {
|
||||
audioBinary?: string;
|
||||
showLoudspeaker?: boolean;
|
||||
showLog?: boolean;
|
||||
attachment?: {
|
||||
file_name: string;
|
||||
doc_id: string;
|
||||
format: string;
|
||||
};
|
||||
}
|
||||
|
||||
export const AssistantGroupButton = ({
|
||||
@ -38,6 +45,7 @@ export const AssistantGroupButton = ({
|
||||
showLikeButton,
|
||||
showLoudspeaker = true,
|
||||
showLog = true,
|
||||
attachment,
|
||||
}: IProps) => {
|
||||
const { visible, hideModal, showModal, onFeedbackOk, loading } =
|
||||
useSendFeedback(messageId);
|
||||
@ -98,21 +106,42 @@ export const AssistantGroupButton = ({
|
||||
<NotebookText className="size-4" />
|
||||
</ToggleGroupItem>
|
||||
)}
|
||||
{!!attachment?.doc_id && (
|
||||
<ToggleGroupItem
|
||||
value="g"
|
||||
onClick={async () => {
|
||||
try {
|
||||
const response = await downloadFile({
|
||||
docId: attachment.doc_id,
|
||||
ext: attachment.format,
|
||||
});
|
||||
const blob = new Blob([response.data], {
|
||||
type: response.data.type,
|
||||
});
|
||||
downloadFileFromBlob(blob, attachment.file_name);
|
||||
} catch (error) {
|
||||
console.error('Download failed:', error);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Download size={16} />
|
||||
</ToggleGroupItem>
|
||||
)}
|
||||
</ToggleGroup>
|
||||
{visible && (
|
||||
<FeedbackModal
|
||||
<FeedbackDialog
|
||||
visible={visible}
|
||||
hideModal={hideModal}
|
||||
onOk={onFeedbackOk}
|
||||
loading={loading}
|
||||
></FeedbackModal>
|
||||
></FeedbackDialog>
|
||||
)}
|
||||
{promptVisible && (
|
||||
<PromptModal
|
||||
<PromptDialog
|
||||
visible={promptVisible}
|
||||
hideModal={hidePromptModal}
|
||||
prompt={prompt}
|
||||
></PromptModal>
|
||||
></PromptDialog>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
@ -21,10 +21,8 @@ import { INodeEvent, MessageEventType } from '@/hooks/use-send-message';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { AgentChatContext } from '@/pages/agent/context';
|
||||
import { WorkFlowTimeline } from '@/pages/agent/log-sheet/workflow-timeline';
|
||||
import { downloadFile } from '@/services/file-manager-service';
|
||||
import { downloadFileFromBlob } from '@/utils/file-util';
|
||||
import { isEmpty } from 'lodash';
|
||||
import { Atom, ChevronDown, ChevronUp, Download } from 'lucide-react';
|
||||
import { Atom, ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import MarkdownContent from '../next-markdown-content';
|
||||
import { RAGFlowAvatar } from '../ragflow-avatar';
|
||||
import { useTheme } from '../theme-provider';
|
||||
@ -176,6 +174,7 @@ function MessageItem({
|
||||
audioBinary={item.audio_binary}
|
||||
showLoudspeaker={showLoudspeaker}
|
||||
showLog={showLog}
|
||||
attachment={item.attachment}
|
||||
></AssistantGroupButton>
|
||||
)}
|
||||
{!isShare && (
|
||||
@ -187,6 +186,7 @@ function MessageItem({
|
||||
audioBinary={item.audio_binary}
|
||||
showLoudspeaker={showLoudspeaker}
|
||||
showLog={showLog}
|
||||
attachment={item.attachment}
|
||||
></AssistantGroupButton>
|
||||
)}
|
||||
</>
|
||||
@ -250,7 +250,7 @@ function MessageItem({
|
||||
{isUser && (
|
||||
<UploadedMessageFiles files={item.files}></UploadedMessageFiles>
|
||||
)}
|
||||
{isAssistant && item.attachment && item.attachment.doc_id && (
|
||||
{/* {isAssistant && item.attachment && item.attachment.doc_id && (
|
||||
<div className="w-full flex items-center justify-end">
|
||||
<Button
|
||||
variant="link"
|
||||
@ -275,7 +275,7 @@ function MessageItem({
|
||||
<Download size={16} />
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
)} */}
|
||||
</section>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
@ -1,30 +0,0 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import { Modal, Space } from 'antd';
|
||||
import HightLightMarkdown from '../highlight-markdown';
|
||||
import SvgIcon from '../svg-icon';
|
||||
|
||||
const PromptModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: IModalProps<IFeedbackRequestBody> & { prompt?: string }) => {
|
||||
return (
|
||||
<Modal
|
||||
title={
|
||||
<Space>
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
Prompt
|
||||
</Space>
|
||||
}
|
||||
width={'80%'}
|
||||
open={visible}
|
||||
onCancel={hideModal}
|
||||
footer={null}
|
||||
>
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default PromptModal;
|
||||
@ -1,4 +0,0 @@
|
||||
.delete {
|
||||
// height: 24px;
|
||||
display: inline-block;
|
||||
}
|
||||
@ -1,90 +0,0 @@
|
||||
import { useShowDeleteConfirm } from '@/hooks/common-hooks';
|
||||
import { DeleteOutlined, MoreOutlined } from '@ant-design/icons';
|
||||
import { Dropdown, MenuProps, Space } from 'antd';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
import React, { useMemo } from 'react';
|
||||
import styles from './index.less';
|
||||
|
||||
interface IProps {
|
||||
deleteItem: () => Promise<any> | void;
|
||||
iconFontSize?: number;
|
||||
iconFontColor?: string;
|
||||
items?: MenuProps['items'];
|
||||
height?: number;
|
||||
needsDeletionValidation?: boolean;
|
||||
showDeleteItems?: boolean;
|
||||
}
|
||||
|
||||
const OperateDropdown = ({
|
||||
deleteItem,
|
||||
children,
|
||||
iconFontSize = 30,
|
||||
iconFontColor = 'gray',
|
||||
items: otherItems = [],
|
||||
height = 24,
|
||||
needsDeletionValidation = true,
|
||||
showDeleteItems = true,
|
||||
}: React.PropsWithChildren<IProps>) => {
|
||||
const { t } = useTranslation();
|
||||
const showDeleteConfirm = useShowDeleteConfirm();
|
||||
|
||||
const handleDelete = () => {
|
||||
if (needsDeletionValidation) {
|
||||
showDeleteConfirm({ onOk: deleteItem });
|
||||
} else {
|
||||
deleteItem();
|
||||
}
|
||||
};
|
||||
|
||||
const handleDropdownMenuClick: MenuProps['onClick'] = ({ domEvent, key }) => {
|
||||
domEvent.preventDefault();
|
||||
domEvent.stopPropagation();
|
||||
if (key === '1') {
|
||||
handleDelete();
|
||||
}
|
||||
};
|
||||
|
||||
const items: MenuProps['items'] = useMemo(() => {
|
||||
const items = [];
|
||||
|
||||
if (showDeleteItems) {
|
||||
items.push({
|
||||
key: '1',
|
||||
label: (
|
||||
<Space>
|
||||
{t('common.delete')}
|
||||
<DeleteOutlined />
|
||||
</Space>
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
return [...items, ...otherItems];
|
||||
}, [showDeleteItems, otherItems, t]);
|
||||
|
||||
return (
|
||||
<Dropdown
|
||||
menu={{
|
||||
items,
|
||||
onClick: handleDropdownMenuClick,
|
||||
}}
|
||||
>
|
||||
{children || (
|
||||
<span className={styles.delete}>
|
||||
<MoreOutlined
|
||||
rotate={90}
|
||||
style={{
|
||||
fontSize: iconFontSize,
|
||||
color: iconFontColor,
|
||||
cursor: 'pointer',
|
||||
height,
|
||||
}}
|
||||
/>
|
||||
</span>
|
||||
)}
|
||||
</Dropdown>
|
||||
);
|
||||
};
|
||||
|
||||
export default OperateDropdown;
|
||||
@ -1,28 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Flex, Form, InputNumber, Slider } from 'antd';
|
||||
|
||||
const PageRank = () => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item label={t('pageRank')} tooltip={t('pageRankTip')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['pagerank']}
|
||||
noStyle
|
||||
initialValue={0}
|
||||
rules={[{ required: true }]}
|
||||
>
|
||||
<Slider max={100} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['pagerank']} noStyle rules={[{ required: true }]}>
|
||||
<InputNumber max={100} min={0} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default PageRank;
|
||||
@ -1,138 +0,0 @@
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Form, Select, Switch } from 'antd';
|
||||
import { upperFirst } from 'lodash';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { DatasetConfigurationContainer } from '../dataset-configuration-container';
|
||||
import EntityTypesItem from '../entity-types-item';
|
||||
|
||||
const excludedTagParseMethods = [
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const showTagItems = (parserId: DocumentParserType) => {
|
||||
return !excludedTagParseMethods.includes(parserId);
|
||||
};
|
||||
|
||||
const enum MethodValue {
|
||||
General = 'general',
|
||||
Light = 'light',
|
||||
}
|
||||
|
||||
export const excludedParseMethods = [
|
||||
DocumentParserType.Table,
|
||||
DocumentParserType.Resume,
|
||||
DocumentParserType.Picture,
|
||||
DocumentParserType.KnowledgeGraph,
|
||||
DocumentParserType.Qa,
|
||||
DocumentParserType.Tag,
|
||||
];
|
||||
|
||||
export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
|
||||
return !excludedParseMethods.some((x) => x === parserId);
|
||||
};
|
||||
|
||||
type GraphRagItemsProps = {
|
||||
marginBottom?: boolean;
|
||||
};
|
||||
|
||||
export function UseGraphRagItem() {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'use_graphrag']}
|
||||
label={t('useGraphRag')}
|
||||
initialValue={false}
|
||||
valuePropName="checked"
|
||||
tooltip={t('useGraphRagTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
);
|
||||
}
|
||||
|
||||
// The three types "table", "resume" and "one" do not display this configuration.
|
||||
const GraphRagItems = ({ marginBottom = false }: GraphRagItemsProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
|
||||
const methodOptions = useMemo(() => {
|
||||
return [MethodValue.Light, MethodValue.General].map((x) => ({
|
||||
value: x,
|
||||
label: upperFirst(x),
|
||||
}));
|
||||
}, []);
|
||||
|
||||
const renderWideTooltip = useCallback(
|
||||
(title: React.ReactNode | string) => {
|
||||
return {
|
||||
title: typeof title === 'string' ? t(title) : title,
|
||||
overlayInnerStyle: { width: '32vw' },
|
||||
};
|
||||
},
|
||||
[t],
|
||||
);
|
||||
|
||||
return (
|
||||
<DatasetConfigurationContainer className={cn({ 'mb-4': marginBottom })}>
|
||||
<UseGraphRagItem></UseGraphRagItem>
|
||||
<Form.Item
|
||||
shouldUpdate={(prevValues, curValues) =>
|
||||
prevValues.parser_config.graphrag.use_graphrag !==
|
||||
curValues.parser_config.graphrag.use_graphrag
|
||||
}
|
||||
>
|
||||
{({ getFieldValue }) => {
|
||||
const useRaptor = getFieldValue([
|
||||
'parser_config',
|
||||
'graphrag',
|
||||
'use_graphrag',
|
||||
]);
|
||||
|
||||
return (
|
||||
useRaptor && (
|
||||
<>
|
||||
<EntityTypesItem
|
||||
field={['parser_config', 'graphrag', 'entity_types']}
|
||||
></EntityTypesItem>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'method']}
|
||||
label={t('graphRagMethod')}
|
||||
tooltip={renderWideTooltip(
|
||||
<div
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t('graphRagMethodTip'),
|
||||
}}
|
||||
></div>,
|
||||
)}
|
||||
initialValue={MethodValue.Light}
|
||||
>
|
||||
<Select options={methodOptions} />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'resolution']}
|
||||
label={t('resolution')}
|
||||
tooltip={renderWideTooltip('resolutionTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
name={['parser_config', 'graphrag', 'community']}
|
||||
label={t('community')}
|
||||
tooltip={renderWideTooltip('communityTip')}
|
||||
>
|
||||
<Switch />
|
||||
</Form.Item>
|
||||
</>
|
||||
)
|
||||
);
|
||||
}}
|
||||
</Form.Item>
|
||||
</DatasetConfigurationContainer>
|
||||
);
|
||||
};
|
||||
|
||||
export default GraphRagItems;
|
||||
33
web/src/components/prompt-dialog.tsx
Normal file
33
web/src/components/prompt-dialog.tsx
Normal file
@ -0,0 +1,33 @@
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { IFeedbackRequestBody } from '@/interfaces/request/chat';
|
||||
import HightLightMarkdown from './highlight-markdown';
|
||||
import SvgIcon from './svg-icon';
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle } from './ui/dialog';
|
||||
|
||||
type PromptDialogProps = IModalProps<IFeedbackRequestBody> & {
|
||||
prompt?: string;
|
||||
};
|
||||
|
||||
export function PromptDialog({
|
||||
visible,
|
||||
hideModal,
|
||||
prompt,
|
||||
}: PromptDialogProps) {
|
||||
return (
|
||||
<Dialog open={visible} onOpenChange={hideModal}>
|
||||
<DialogContent className="max-w-[80vw]">
|
||||
<DialogHeader>
|
||||
<DialogTitle>
|
||||
<div className="space-x-2">
|
||||
<SvgIcon name={`prompt`} width={18}></SvgIcon>
|
||||
<span> Prompt</span>
|
||||
</div>
|
||||
</DialogTitle>
|
||||
</DialogHeader>
|
||||
<section className="max-h-[80vh] overflow-auto">
|
||||
<HightLightMarkdown>{prompt}</HightLightMarkdown>
|
||||
</section>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
@ -1,82 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Input, Modal } from 'antd';
|
||||
import { useEffect } from 'react';
|
||||
import { IModalManagerChildrenProps } from '../modal-manager';
|
||||
|
||||
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
|
||||
loading: boolean;
|
||||
initialName: string;
|
||||
onOk: (name: string) => void;
|
||||
showModal?(): void;
|
||||
}
|
||||
|
||||
const RenameModal = ({
|
||||
visible,
|
||||
hideModal,
|
||||
loading,
|
||||
initialName,
|
||||
onOk,
|
||||
}: IProps) => {
|
||||
const [form] = Form.useForm();
|
||||
const { t } = useTranslate('common');
|
||||
|
||||
type FieldType = {
|
||||
name?: string;
|
||||
};
|
||||
|
||||
const handleOk = async () => {
|
||||
const ret = await form.validateFields();
|
||||
|
||||
return onOk(ret.name);
|
||||
};
|
||||
|
||||
const handleCancel = () => {
|
||||
hideModal();
|
||||
};
|
||||
|
||||
const onFinish = (values: any) => {
|
||||
console.log('Success:', values);
|
||||
};
|
||||
|
||||
const onFinishFailed = (errorInfo: any) => {
|
||||
console.log('Failed:', errorInfo);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (visible) {
|
||||
form.setFieldValue('name', initialName);
|
||||
}
|
||||
}, [initialName, form, visible]);
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={t('rename')}
|
||||
open={visible}
|
||||
onOk={handleOk}
|
||||
onCancel={handleCancel}
|
||||
okButtonProps={{ loading }}
|
||||
confirmLoading={loading}
|
||||
>
|
||||
<Form
|
||||
name="basic"
|
||||
labelCol={{ span: 4 }}
|
||||
wrapperCol={{ span: 20 }}
|
||||
style={{ maxWidth: 600 }}
|
||||
onFinish={onFinish}
|
||||
onFinishFailed={onFinishFailed}
|
||||
autoComplete="off"
|
||||
form={form}
|
||||
>
|
||||
<Form.Item<FieldType>
|
||||
label={t('name')}
|
||||
name="name"
|
||||
rules={[{ required: true, message: t('namePlaceholder') }]}
|
||||
>
|
||||
<Input />
|
||||
</Form.Item>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default RenameModal;
|
||||
@ -1,8 +1,6 @@
|
||||
import { LlmModelType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useSelectLlmOptionsByModelType } from '@/hooks/use-llm-request';
|
||||
import { Select as AntSelect, Form, message, Slider } from 'antd';
|
||||
import { useCallback } from 'react';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
import { SelectWithSearch } from './originui/select-with-search';
|
||||
@ -15,47 +13,6 @@ import {
|
||||
FormMessage,
|
||||
} from './ui/form';
|
||||
|
||||
type FieldType = {
|
||||
rerank_id?: string;
|
||||
top_k?: number;
|
||||
};
|
||||
|
||||
export const RerankItem = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const allOptions = useSelectLlmOptionsByModelType();
|
||||
const [messageApi, contextHolder] = message.useMessage();
|
||||
|
||||
const handleChange = useCallback(
|
||||
(val: string) => {
|
||||
if (val) {
|
||||
messageApi.open({
|
||||
type: 'warning',
|
||||
content: t('reRankModelWaring'),
|
||||
});
|
||||
}
|
||||
},
|
||||
[messageApi, t],
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
{contextHolder}
|
||||
<Form.Item
|
||||
label={t('rerankModel')}
|
||||
name={'rerank_id'}
|
||||
tooltip={t('rerankTip')}
|
||||
>
|
||||
<AntSelect
|
||||
options={allOptions[LlmModelType.Rerank]}
|
||||
allowClear
|
||||
placeholder={t('rerankPlaceholder')}
|
||||
onChange={handleChange}
|
||||
/>
|
||||
</Form.Item>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export const topKSchema = {
|
||||
top_k: z.number().optional(),
|
||||
};
|
||||
@ -64,35 +21,6 @@ export const initialTopKValue = {
|
||||
top_k: 1024,
|
||||
};
|
||||
|
||||
const Rerank = () => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<>
|
||||
<RerankItem></RerankItem>
|
||||
<Form.Item noStyle dependencies={['rerank_id']}>
|
||||
{({ getFieldValue }) => {
|
||||
const rerankId = getFieldValue('rerank_id');
|
||||
return (
|
||||
rerankId && (
|
||||
<Form.Item<FieldType>
|
||||
label={t('topK')}
|
||||
name={'top_k'}
|
||||
initialValue={1024}
|
||||
tooltip={t('topKTip')}
|
||||
>
|
||||
<Slider max={2048} min={1} />
|
||||
</Form.Item>
|
||||
)
|
||||
);
|
||||
}}
|
||||
</Form.Item>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default Rerank;
|
||||
|
||||
const RerankId = 'rerank_id';
|
||||
|
||||
function RerankFormField() {
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
.selectFilesCollapse {
|
||||
:global(.ant-collapse-header) {
|
||||
padding-left: 22px;
|
||||
}
|
||||
margin-bottom: 32px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.selectFilesTitle {
|
||||
padding-right: 10px;
|
||||
}
|
||||
@ -1,66 +0,0 @@
|
||||
import { ReactComponent as SelectedFilesCollapseIcon } from '@/assets/svg/selected-files-collapse.svg';
|
||||
import { Collapse, Flex, Space } from 'antd';
|
||||
import SelectFiles from './select-files';
|
||||
|
||||
import {
|
||||
useAllTestingResult,
|
||||
useSelectTestingResult,
|
||||
} from '@/hooks/use-knowledge-request';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import styles from './index.less';
|
||||
|
||||
interface IProps {
|
||||
onTesting(documentIds: string[]): void;
|
||||
setSelectedDocumentIds(documentIds: string[]): void;
|
||||
selectedDocumentIds: string[];
|
||||
}
|
||||
|
||||
const RetrievalDocuments = ({
|
||||
onTesting,
|
||||
selectedDocumentIds,
|
||||
setSelectedDocumentIds,
|
||||
}: IProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { documents: documentsAll } = useAllTestingResult();
|
||||
const { documents } = useSelectTestingResult();
|
||||
const { documents: useDocuments } = {
|
||||
documents:
|
||||
documentsAll?.length > documents?.length ? documentsAll : documents,
|
||||
};
|
||||
|
||||
return (
|
||||
<Collapse
|
||||
expandIcon={() => <SelectedFilesCollapseIcon></SelectedFilesCollapseIcon>}
|
||||
className={styles.selectFilesCollapse}
|
||||
items={[
|
||||
{
|
||||
key: '1',
|
||||
label: (
|
||||
<Flex
|
||||
justify={'space-between'}
|
||||
align="center"
|
||||
className={styles.selectFilesTitle}
|
||||
>
|
||||
<Space>
|
||||
<span>
|
||||
{selectedDocumentIds?.length ?? 0}/{useDocuments?.length ?? 0}
|
||||
</span>
|
||||
{t('knowledgeDetails.filesSelected')}
|
||||
</Space>
|
||||
</Flex>
|
||||
),
|
||||
children: (
|
||||
<div>
|
||||
<SelectFiles
|
||||
setSelectedDocumentIds={setSelectedDocumentIds}
|
||||
handleTesting={onTesting}
|
||||
></SelectFiles>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
]}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
export default RetrievalDocuments;
|
||||
@ -1,79 +0,0 @@
|
||||
import NewDocumentLink from '@/components/new-document-link';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import {
|
||||
useAllTestingResult,
|
||||
useSelectTestingResult,
|
||||
} from '@/hooks/use-knowledge-request';
|
||||
import { ITestingDocument } from '@/interfaces/database/knowledge';
|
||||
import { EyeOutlined } from '@ant-design/icons';
|
||||
import { Button, Table, TableProps, Tooltip } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
handleTesting: (ids: string[]) => void;
|
||||
setSelectedDocumentIds: (ids: string[]) => void;
|
||||
}
|
||||
|
||||
const SelectFiles = ({ setSelectedDocumentIds, handleTesting }: IProps) => {
|
||||
const { documents } = useSelectTestingResult();
|
||||
const { documents: documentsAll } = useAllTestingResult();
|
||||
const useDocuments =
|
||||
documentsAll?.length > documents?.length ? documentsAll : documents;
|
||||
const { t } = useTranslate('fileManager');
|
||||
|
||||
const columns: TableProps<ITestingDocument>['columns'] = [
|
||||
{
|
||||
title: 'Name',
|
||||
dataIndex: 'doc_name',
|
||||
key: 'doc_name',
|
||||
render: (text) => <p>{text}</p>,
|
||||
},
|
||||
|
||||
{
|
||||
title: 'Hits',
|
||||
dataIndex: 'count',
|
||||
key: 'count',
|
||||
width: 80,
|
||||
},
|
||||
{
|
||||
title: 'View',
|
||||
key: 'view',
|
||||
width: 50,
|
||||
render: (_, { doc_id, doc_name }) => (
|
||||
<NewDocumentLink
|
||||
documentName={doc_name}
|
||||
documentId={doc_id}
|
||||
prefix="document"
|
||||
>
|
||||
<Tooltip title={t('preview')}>
|
||||
<Button type="text">
|
||||
<EyeOutlined size={20} />
|
||||
</Button>
|
||||
</Tooltip>
|
||||
</NewDocumentLink>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
const rowSelection = {
|
||||
onChange: (selectedRowKeys: React.Key[]) => {
|
||||
handleTesting(selectedRowKeys as string[]);
|
||||
setSelectedDocumentIds(selectedRowKeys as string[]);
|
||||
},
|
||||
getCheckboxProps: (record: ITestingDocument) => ({
|
||||
disabled: record.doc_name === 'Disabled User', // Column configuration not to be checked
|
||||
name: record.doc_name,
|
||||
}),
|
||||
};
|
||||
|
||||
return (
|
||||
<Table
|
||||
columns={columns}
|
||||
dataSource={useDocuments}
|
||||
showHeader={false}
|
||||
rowSelection={rowSelection}
|
||||
rowKey={'doc_id'}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
export default SelectFiles;
|
||||
@ -1,7 +1,6 @@
|
||||
import { FormLayout } from '@/constants/form';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Form, Slider } from 'antd';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
import { SliderInputFormField } from '../slider-input-form-field';
|
||||
@ -15,46 +14,6 @@ import {
|
||||
} from '../ui/form';
|
||||
import { NumberInput } from '../ui/input';
|
||||
|
||||
type FieldType = {
|
||||
similarity_threshold?: number;
|
||||
// vector_similarity_weight?: number;
|
||||
};
|
||||
|
||||
interface IProps {
|
||||
isTooltipShown?: boolean;
|
||||
vectorSimilarityWeightName?: string;
|
||||
}
|
||||
|
||||
const SimilaritySlider = ({
|
||||
isTooltipShown = false,
|
||||
vectorSimilarityWeightName = 'vector_similarity_weight',
|
||||
}: IProps) => {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
|
||||
return (
|
||||
<>
|
||||
<Form.Item<FieldType>
|
||||
label={t('similarityThreshold')}
|
||||
name={'similarity_threshold'}
|
||||
tooltip={isTooltipShown && t('similarityThresholdTip')}
|
||||
initialValue={0.2}
|
||||
>
|
||||
<Slider max={1} step={0.01} />
|
||||
</Form.Item>
|
||||
<Form.Item
|
||||
label={t('vectorSimilarityWeight')}
|
||||
name={vectorSimilarityWeightName}
|
||||
initialValue={1 - 0.3}
|
||||
tooltip={isTooltipShown && t('vectorSimilarityWeightTip')}
|
||||
>
|
||||
<Slider max={1} step={0.01} />
|
||||
</Form.Item>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default SimilaritySlider;
|
||||
|
||||
interface SimilaritySliderFormFieldProps {
|
||||
similarityName?: string;
|
||||
vectorSimilarityWeightName?: string;
|
||||
|
||||
@ -1,28 +0,0 @@
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Input, Typography } from 'antd';
|
||||
|
||||
interface IProps {
|
||||
name?: string | string[];
|
||||
}
|
||||
|
||||
export function TavilyItem({
|
||||
name = ['prompt_config', 'tavily_api_key'],
|
||||
}: IProps) {
|
||||
const { t } = useTranslate('chat');
|
||||
|
||||
return (
|
||||
<Form.Item label={'Tavily API Key'} tooltip={t('tavilyApiKeyTip')}>
|
||||
<div className="flex flex-col gap-1">
|
||||
<Form.Item name={name} noStyle>
|
||||
<Input.Password
|
||||
placeholder={t('tavilyApiKeyMessage')}
|
||||
autoComplete="new-password"
|
||||
/>
|
||||
</Form.Item>
|
||||
<Typography.Link href="https://app.tavily.com/home" target={'_blank'}>
|
||||
{t('tavilyApiKeyHelp')}
|
||||
</Typography.Link>
|
||||
</div>
|
||||
</Form.Item>
|
||||
);
|
||||
}
|
||||
@ -1,35 +1,8 @@
|
||||
import { FormLayout } from '@/constants/form';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { Form, Slider } from 'antd';
|
||||
import { z } from 'zod';
|
||||
import { SliderInputFormField } from './slider-input-form-field';
|
||||
|
||||
type FieldType = {
|
||||
top_n?: number;
|
||||
};
|
||||
|
||||
interface IProps {
|
||||
initialValue?: number;
|
||||
max?: number;
|
||||
}
|
||||
|
||||
const TopNItem = ({ initialValue = 8, max = 30 }: IProps) => {
|
||||
const { t } = useTranslate('chat');
|
||||
|
||||
return (
|
||||
<Form.Item<FieldType>
|
||||
label={t('topN')}
|
||||
name={'top_n'}
|
||||
initialValue={initialValue}
|
||||
tooltip={t('topNTip')}
|
||||
>
|
||||
<Slider max={max} />
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export default TopNItem;
|
||||
|
||||
interface SimilaritySliderFormFieldProps {
|
||||
max?: number;
|
||||
}
|
||||
|
||||
422
web/src/components/ui/audio-button.tsx
Normal file
422
web/src/components/ui/audio-button.tsx
Normal file
@ -0,0 +1,422 @@
|
||||
import { AudioRecorder, useAudioRecorder } from 'react-audio-voice-recorder';
|
||||
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Authorization } from '@/constants/authorization';
|
||||
import { cn } from '@/lib/utils';
|
||||
import api from '@/utils/api';
|
||||
import { getAuthorization } from '@/utils/authorization-util';
|
||||
import { Loader2, Mic, Square } from 'lucide-react';
|
||||
import { useEffect, useRef, useState } from 'react';
|
||||
import { useIsDarkTheme } from '../theme-provider';
|
||||
import { Input } from './input';
|
||||
import { Popover, PopoverContent, PopoverTrigger } from './popover';
|
||||
const VoiceVisualizer = ({ isRecording }: { isRecording: boolean }) => {
|
||||
const canvasRef = useRef<HTMLCanvasElement>(null);
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const analyserRef = useRef<AnalyserNode | null>(null);
|
||||
const animationFrameRef = useRef<number>(0);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const isDark = useIsDarkTheme();
|
||||
|
||||
const startVisualization = async () => {
|
||||
try {
|
||||
// Check if the browser supports getUserMedia
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
console.error('Browser does not support getUserMedia API');
|
||||
return;
|
||||
}
|
||||
// Request microphone permission
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
streamRef.current = stream;
|
||||
|
||||
// Create audio context and analyzer
|
||||
const audioContext = new (window.AudioContext ||
|
||||
(window as any).webkitAudioContext)();
|
||||
audioContextRef.current = audioContext;
|
||||
|
||||
const analyser = audioContext.createAnalyser();
|
||||
analyserRef.current = analyser;
|
||||
analyser.fftSize = 32;
|
||||
|
||||
// Connect audio nodes
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
source.connect(analyser);
|
||||
|
||||
// Start drawing
|
||||
draw();
|
||||
} catch (error) {
|
||||
console.error(
|
||||
'Unable to access microphone for voice visualization:',
|
||||
error,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const stopVisualization = () => {
|
||||
// Stop animation frame
|
||||
if (animationFrameRef.current) {
|
||||
cancelAnimationFrame(animationFrameRef.current);
|
||||
}
|
||||
|
||||
// Stop audio stream
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((track) => track.stop());
|
||||
}
|
||||
|
||||
// Close audio context
|
||||
if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
|
||||
audioContextRef.current.close();
|
||||
}
|
||||
|
||||
// Clear canvas
|
||||
const canvas = canvasRef.current;
|
||||
if (canvas) {
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (ctx) {
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
||||
}
|
||||
}
|
||||
};
|
||||
useEffect(() => {
|
||||
if (isRecording) {
|
||||
startVisualization();
|
||||
} else {
|
||||
stopVisualization();
|
||||
}
|
||||
|
||||
return () => {
|
||||
stopVisualization();
|
||||
};
|
||||
}, [isRecording]);
|
||||
const draw = () => {
|
||||
const canvas = canvasRef.current;
|
||||
if (!canvas) return;
|
||||
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (!ctx) return;
|
||||
|
||||
const analyser = analyserRef.current;
|
||||
if (!analyser) return;
|
||||
|
||||
// Set canvas dimensions
|
||||
const width = canvas.clientWidth;
|
||||
const height = canvas.clientHeight;
|
||||
const centerY = height / 2;
|
||||
|
||||
if (canvas.width !== width || canvas.height !== height) {
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
}
|
||||
|
||||
// Clear canvas
|
||||
ctx.clearRect(0, 0, width, height);
|
||||
|
||||
// Get frequency data
|
||||
const bufferLength = analyser.frequencyBinCount;
|
||||
const dataArray = new Uint8Array(bufferLength);
|
||||
analyser.getByteFrequencyData(dataArray);
|
||||
|
||||
// Draw waveform
|
||||
const barWidth = (width / bufferLength) * 1.5;
|
||||
let x = 0;
|
||||
|
||||
for (let i = 0; i < bufferLength; i = i + 2) {
|
||||
const barHeight = (dataArray[i] / 255) * centerY;
|
||||
|
||||
// Create gradient
|
||||
const gradient = ctx.createLinearGradient(
|
||||
0,
|
||||
centerY - barHeight,
|
||||
0,
|
||||
centerY + barHeight,
|
||||
);
|
||||
gradient.addColorStop(0, '#3ba05c'); // Blue
|
||||
gradient.addColorStop(1, '#3ba05c'); // Light blue
|
||||
// gradient.addColorStop(0, isDark ? '#fff' : '#000'); // Blue
|
||||
// gradient.addColorStop(1, isDark ? '#eee' : '#eee'); // Light blue
|
||||
|
||||
ctx.fillStyle = gradient;
|
||||
ctx.fillRect(x, centerY - barHeight, barWidth, barHeight * 2);
|
||||
|
||||
x += barWidth + 2;
|
||||
}
|
||||
|
||||
animationFrameRef.current = requestAnimationFrame(draw);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full h-6 bg-transparent flex items-center justify-center overflow-hidden ">
|
||||
<canvas ref={canvasRef} className="w-full h-full" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const VoiceInputBox = ({
|
||||
isRecording,
|
||||
onStop,
|
||||
recordingTime,
|
||||
value,
|
||||
}: {
|
||||
value: string;
|
||||
isRecording: boolean;
|
||||
onStop: () => void;
|
||||
recordingTime: number;
|
||||
}) => {
|
||||
// Format recording time
|
||||
const formatTime = (seconds: number) => {
|
||||
const mins = Math.floor(seconds / 60);
|
||||
const secs = seconds % 60;
|
||||
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full">
|
||||
<div className=" absolute w-full h-6 translate-y-full">
|
||||
<VoiceVisualizer isRecording={isRecording} />
|
||||
</div>
|
||||
<Input
|
||||
rootClassName="w-full"
|
||||
className="flex-1 "
|
||||
readOnly
|
||||
value={value}
|
||||
suffix={
|
||||
<div className="flex justify-end px-1 items-center gap-1 w-20">
|
||||
<Button
|
||||
variant={'ghost'}
|
||||
size="sm"
|
||||
className="text-text-primary p-1 border-none hover:bg-transparent"
|
||||
onClick={onStop}
|
||||
>
|
||||
<Square className="text-text-primary" size={12} />
|
||||
</Button>
|
||||
<span className="text-xs text-text-secondary">
|
||||
{formatTime(recordingTime)}
|
||||
</span>
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
export const AudioButton = ({
|
||||
onOk,
|
||||
}: {
|
||||
onOk?: (transcript: string) => void;
|
||||
}) => {
|
||||
// const [showInputBox, setShowInputBox] = useState(false);
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [recordingTime, setRecordingTime] = useState(0);
|
||||
const [transcript, setTranscript] = useState('');
|
||||
const [popoverOpen, setPopoverOpen] = useState(false);
|
||||
const recorderControls = useAudioRecorder();
|
||||
const intervalRef = useRef<NodeJS.Timeout | null>(null);
|
||||
// Handle logic after recording is complete
|
||||
const handleRecordingComplete = async (blob: Blob) => {
|
||||
setIsRecording(false);
|
||||
|
||||
// const url = URL.createObjectURL(blob);
|
||||
// const a = document.createElement('a');
|
||||
// a.href = url;
|
||||
// a.download = 'recording.webm';
|
||||
// document.body.appendChild(a);
|
||||
// a.click();
|
||||
|
||||
setIsProcessing(true);
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
try {
|
||||
const audioFile = new File([blob], 'recording.webm', {
|
||||
type: blob.type || 'audio/webm',
|
||||
// type: 'audio/mpeg',
|
||||
});
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('file', audioFile);
|
||||
formData.append('stream', 'false');
|
||||
|
||||
const response = await fetch(api.sequence2txt, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
[Authorization]: getAuthorization(),
|
||||
// 'Content-Type': blob.type || 'audio/webm',
|
||||
},
|
||||
body: formData,
|
||||
});
|
||||
|
||||
// if (!response.ok) {
|
||||
// throw new Error(`HTTP error! status: ${response.status}`);
|
||||
// }
|
||||
|
||||
// if (!response.body) {
|
||||
// throw new Error('ReadableStream not supported in this browser');
|
||||
// }
|
||||
|
||||
console.log('Response:', response);
|
||||
const { data, code } = await response.json();
|
||||
if (code === 0 && data && data.text) {
|
||||
setTranscript(data.text);
|
||||
console.log('Transcript:', data.text);
|
||||
onOk?.(data.text);
|
||||
}
|
||||
setPopoverOpen(false);
|
||||
} catch (error) {
|
||||
console.error('Failed to process audio:', error);
|
||||
// setTranscript(t('voiceRecorder.processingError'));
|
||||
} finally {
|
||||
setIsProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Start recording
|
||||
const startRecording = () => {
|
||||
recorderControls.startRecording();
|
||||
setIsRecording(true);
|
||||
// setShowInputBox(true);
|
||||
setPopoverOpen(true);
|
||||
setRecordingTime(0);
|
||||
|
||||
// Start timing
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
}
|
||||
intervalRef.current = setInterval(() => {
|
||||
setRecordingTime((prev) => prev + 1);
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
// Stop recording
|
||||
const stopRecording = () => {
|
||||
recorderControls.stopRecording();
|
||||
setIsRecording(false);
|
||||
// setShowInputBox(false);
|
||||
setPopoverOpen(false);
|
||||
setRecordingTime(0);
|
||||
|
||||
// Clear timer
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
};
|
||||
|
||||
// Clear transcription content
|
||||
// const clearTranscript = () => {
|
||||
// setTranscript('');
|
||||
// };
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
return (
|
||||
<div>
|
||||
{false && (
|
||||
<div className="flex flex-col items-center space-y-4">
|
||||
<div className="relative">
|
||||
<Popover
|
||||
open={popoverOpen}
|
||||
onOpenChange={(open) => {
|
||||
setPopoverOpen(true);
|
||||
}}
|
||||
>
|
||||
<PopoverTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
}}
|
||||
className={`w-6 h-6 p-2 rounded-full border-none bg-transparent hover:bg-transparent ${
|
||||
isRecording ? 'animate-pulse' : ''
|
||||
}`}
|
||||
disabled={isProcessing}
|
||||
>
|
||||
<Mic size={16} className="text-text-primary" />
|
||||
</Button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent
|
||||
align="end"
|
||||
sideOffset={-20}
|
||||
className="p-0 border-none"
|
||||
>
|
||||
<VoiceInputBox
|
||||
isRecording={isRecording}
|
||||
value={transcript}
|
||||
onStop={stopRecording}
|
||||
recordingTime={recordingTime}
|
||||
/>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className=" relative w-6 h-6 flex items-center justify-center">
|
||||
{isRecording && (
|
||||
<div
|
||||
className={cn(
|
||||
'absolute inset-0 w-full h-6 rounded-full overflow-hidden flex items-center justify-center p-1',
|
||||
{ 'bg-state-success-5': isRecording },
|
||||
)}
|
||||
>
|
||||
<VoiceVisualizer isRecording={isRecording} />
|
||||
</div>
|
||||
)}
|
||||
{isRecording && (
|
||||
<div className="absolute inset-0 rounded-full border-2 border-state-success animate-ping opacity-75"></div>
|
||||
)}
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
// onMouseDown={() => {
|
||||
// startRecording();
|
||||
// }}
|
||||
// onMouseUp={() => {
|
||||
// stopRecording();
|
||||
// }}
|
||||
onClick={() => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
}}
|
||||
className={`w-6 h-6 p-2 rounded-md border-none bg-transparent hover:bg-state-success-5 ${
|
||||
isRecording
|
||||
? 'animate-pulse bg-state-success-5 text-state-success'
|
||||
: ''
|
||||
}`}
|
||||
disabled={isProcessing}
|
||||
>
|
||||
{isProcessing ? (
|
||||
<Loader2 size={16} className=" animate-spin" />
|
||||
) : isRecording ? (
|
||||
<></>
|
||||
) : (
|
||||
// <Mic size={16} className="text-text-primary" />
|
||||
// <Square size={12} className="text-text-primary" />
|
||||
<Mic size={16} />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Hide original component */}
|
||||
<div className="hidden">
|
||||
<AudioRecorder
|
||||
onRecordingComplete={handleRecordingComplete}
|
||||
recorderControls={recorderControls}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@ -9,10 +9,23 @@ export interface InputProps
|
||||
value?: string | number | readonly string[] | undefined;
|
||||
prefix?: React.ReactNode;
|
||||
suffix?: React.ReactNode;
|
||||
rootClassName?: string;
|
||||
}
|
||||
|
||||
const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
({ className, type, value, onChange, prefix, suffix, ...props }, ref) => {
|
||||
(
|
||||
{
|
||||
className,
|
||||
rootClassName,
|
||||
type,
|
||||
value,
|
||||
onChange,
|
||||
prefix,
|
||||
suffix,
|
||||
...props
|
||||
},
|
||||
ref,
|
||||
) => {
|
||||
const isControlled = value !== undefined;
|
||||
const { defaultValue, ...restProps } = props;
|
||||
const inputValue = isControlled ? value : defaultValue;
|
||||
@ -89,7 +102,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
|
||||
if (prefix || suffix || isPasswordInput) {
|
||||
return (
|
||||
<div className="relative">
|
||||
<div className={cn('relative', rootClassName)}>
|
||||
{prefix && (
|
||||
<span
|
||||
ref={prefixRef}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user