Compare commits

...

3 Commits

Author SHA1 Message Date
d039d1e73d fix: Added dataset generation logging functionality #9869 (#10180)
### What problem does this PR solve?

fix: Added dataset generation logging functionality #9869

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-09-22 10:01:34 +08:00
d050ef568d Feat: support dataflow run. (#10182)
### What problem does this PR solve?


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-09-22 09:36:21 +08:00
028c2d83e9 Feat: parse email (#10181)
### What problem does this PR solve?

- Dataflow support email.
- Fix old email parser.
- Add new depends to parse msg file.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Other (please describe): add new depends.
2025-09-22 09:29:38 +08:00
28 changed files with 850 additions and 203 deletions

View File

@ -144,11 +144,10 @@ def run():
if cvs.canvas_category == CanvasCategory.DataFlow:
task_id = get_uuid()
flow_id = get_uuid()
ok, error_message = queue_dataflow(dsl=cvs.dsl, tenant_id=user_id, file=files[0], task_id=task_id, flow_id=flow_id, priority=0)
ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=req["id"], task_id=task_id, file=files[0], priority=0)
if not ok:
return server_error_response(error_message)
return get_json_result(data={"task_id": task_id, "message_id": flow_id})
return get_data_error_result(message=error_message)
return get_json_result(data={"message_id": task_id})
try:
canvas = Canvas(cvs.dsl, current_user.id, req["id"])

View File

@ -496,7 +496,7 @@ class FileService(CommonService):
return ParserType.AUDIO.value
if re.search(r"\.(ppt|pptx|pages)$", filename):
return ParserType.PRESENTATION.value
if re.search(r"\.(eml)$", filename):
if re.search(r"\.(msg|eml)$", filename):
return ParserType.EMAIL.value
return default

View File

@ -472,14 +472,10 @@ def has_canceled(task_id):
return False
def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id:str=None, file:dict=None, priority: int=0, callback=None) -> tuple[bool, str]:
"""
Returns a tuple (success: bool, error_message: str).
"""
_ = callback
def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str="x", file:dict=None, priority: int=0) -> tuple[bool, str]:
task = dict(
id=get_uuid() if not task_id else task_id,
id=task_id,
doc_id=doc_id,
from_page=0,
to_page=100000000,
@ -490,15 +486,10 @@ def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
kb_id = DocumentService.get_knowledgebase_id(doc_id)
if not kb_id:
return False, f"Can't find KB of this document: {doc_id}"
task["kb_id"] = kb_id
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
task["tenant_id"] = tenant_id
task["task_type"] = "dataflow"
task["dsl"] = dsl
task["dataflow_id"] = get_uuid() if not flow_id else flow_id
task["dataflow_id"] = flow_id
task["file"] = file
if not REDIS_CONN.queue_product(

View File

@ -155,7 +155,7 @@ def filename_type(filename):
if re.match(r".*\.pdf$", filename):
return FileType.PDF.value
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):

View File

@ -34,6 +34,7 @@ dependencies = [
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
"extract-msg>=0.39.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",

View File

@ -78,7 +78,7 @@ def chunk(
_add_content(msg, msg.get_content_type())
sections = TxtParser.parser_txt("\n".join(text_txt)) + [
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt), chunk_token_num=parser_config["chunk_token_num"]) if line
]
st = timer()

View File

@ -35,9 +35,9 @@ class ProcessBase(ComponentBase):
def __init__(self, pipeline, id, param: ProcessParamBase):
super().__init__(pipeline, id, param)
if hasattr(self._canvas, "callback"):
self.callback = partial(self._canvas.callback, self.component_name)
self.callback = partial(self._canvas.callback, id)
else:
self.callback = partial(lambda *args, **kwargs: None, self.component_name)
self.callback = partial(lambda *args, **kwargs: None, id)
async def invoke(self, **kwargs) -> dict[str, Any]:
self.set_output("_created_time", time.perf_counter())

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import json
import logging
import os
import random
from functools import partial
@ -57,7 +59,10 @@ class ParserParam(ProcessParamBase):
"image": [
"text"
],
"email": [],
"email": [
"text",
"json"
],
"text": [
"text",
"json"
@ -71,7 +76,6 @@ class ParserParam(ProcessParamBase):
self.setups = {
"pdf": {
"parse_method": "deepdoc", # deepdoc/plain_text/vlm
"llm_id": "",
"lang": "Chinese",
"suffix": [
"pdf",
@ -93,8 +97,8 @@ class ParserParam(ProcessParamBase):
],
"output_format": "json",
},
"markdown": {
"suffix": ["md", "markdown", "mdx"],
"text&markdown": {
"suffix": ["md", "markdown", "mdx", "txt"],
"output_format": "json",
},
"slides": {
@ -112,7 +116,11 @@ class ParserParam(ProcessParamBase):
"output_format": "json",
},
"email": {
"fields": []
"suffix": [
"eml", "msg"
],
"fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"],
"output_format": "json",
},
"text": {
"suffix": [
@ -147,13 +155,10 @@ class ParserParam(ProcessParamBase):
pdf_config = self.setups.get("pdf", {})
if pdf_config:
pdf_parse_method = pdf_config.get("parse_method", "")
self.check_valid_value(pdf_parse_method.lower(), "Parse method abnormal.", ["deepdoc", "plain_text", "vlm"])
self.check_empty(pdf_parse_method, "Parse method abnormal.")
if pdf_parse_method not in ["deepdoc", "plain_text"]:
self.check_empty(pdf_config.get("llm_id"), "VLM")
pdf_language = pdf_config.get("lang", "")
self.check_empty(pdf_language, "Language")
if pdf_parse_method.lower() not in ["deepdoc", "plain_text"]:
self.check_empty(pdf_config.get("lang", ""), "Language")
pdf_output_format = pdf_config.get("output_format", "")
self.check_valid_value(pdf_output_format, "PDF output format abnormal.", self.allowed_output_format["pdf"])
@ -194,6 +199,11 @@ class ParserParam(ProcessParamBase):
audio_language = audio_config.get("lang", "")
self.check_empty(audio_language, "Language")
email_config = self.setups.get("email", "")
if email_config:
email_output_format = email_config.get("output_format", "")
self.check_valid_value(email_output_format, "Email output format abnormal.", self.allowed_output_format["email"])
def get_input_form(self) -> dict[str, dict]:
return {}
@ -212,8 +222,7 @@ class Parser(ProcessBase):
lines, _ = PlainParser()(blob)
bboxes = [{"text": t} for t, _ in lines]
else:
assert conf.get("llm_id")
vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("llm_id"), lang=self._param.setups["pdf"].get("lang"))
vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("parse_method"), lang=self._param.setups["pdf"].get("lang"))
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
bboxes = []
for t, poss in lines:
@ -222,6 +231,7 @@ class Parser(ProcessBase):
if conf.get("output_format") == "json":
self.set_output("json", bboxes)
if conf.get("output_format") == "markdown":
mkdn = ""
for b in bboxes:
@ -285,7 +295,6 @@ class Parser(ProcessBase):
def _markdown(self, name, blob):
from functools import reduce
from rag.app.naive import Markdown as naive_markdown_parser
from rag.nlp import concat_img
@ -316,22 +325,6 @@ class Parser(ProcessBase):
else:
self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
def _text(self, name, blob):
from deepdoc.parser.utils import get_text
self.callback(random.randint(1, 5) / 100.0, "Start to work on a text.")
conf = self._param.setups["text"]
self.set_output("output_format", conf["output_format"])
# parse binary to text
text_content = get_text(name, binary=blob)
if conf.get("output_format") == "json":
result = [{"text": text_content}]
self.set_output("json", result)
else:
result = text_content
self.set_output("text", result)
def _image(self, from_upstream: ParserFromUpstream):
from deepdoc.vision import OCR
@ -353,7 +346,7 @@ class Parser(ProcessBase):
else:
# use VLM to describe the picture
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang)
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"],lang=lang)
img_binary = io.BytesIO()
img.save(img_binary, format="JPEG")
img_binary.seek(0)
@ -384,16 +377,134 @@ class Parser(ProcessBase):
self.set_output("text", txt)
def _email(self, from_upstream: ParserFromUpstream):
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
blob = from_upstream.blob
name = from_upstream.name
email_content = {}
conf = self._param.setups["email"]
target_fields = conf["fields"]
_, ext = os.path.splitext(name)
if ext == ".eml":
# handle eml file
from email import policy
from email.parser import BytesParser
msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob))
email_content['metadata'] = {}
# handle header info
for header, value in msg.items():
# get fields like from, to, cc, bcc, date, subject
if header.lower() in target_fields:
email_content[header.lower()] = value
# get metadata
elif header.lower() not in ["from", "to", "cc", "bcc", "date", "subject"]:
email_content["metadata"][header.lower()] = value
# get body
if "body" in target_fields:
body_text, body_html = [], []
def _add_content(m, content_type):
if content_type == "text/plain":
body_text.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
elif content_type == "text/html":
body_html.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
elif "multipart" in content_type:
if m.is_multipart():
for part in m.iter_parts():
_add_content(part, part.get_content_type())
_add_content(msg, msg.get_content_type())
email_content["text"] = body_text
email_content["text_html"] = body_html
# get attachment
if "attachments" in target_fields:
attachments = []
for part in msg.iter_attachments():
content_disposition = part.get("Content-Disposition")
if content_disposition:
dispositions = content_disposition.strip().split(";")
if dispositions[0].lower() == "attachment":
filename = part.get_filename()
payload = part.get_payload(decode=True)
attachments.append({
"filename": filename,
"payload": payload,
})
email_content["attachments"] = attachments
else:
# handle msg file
import extract_msg
print("handle a msg file.")
msg = extract_msg.Message(blob)
# handle header info
basic_content = {
"from": msg.sender,
"to": msg.to,
"cc": msg.cc,
"bcc": msg.bcc,
"date": msg.date,
"subject": msg.subject,
}
email_content.update({k: v for k, v in basic_content.items() if k in target_fields})
# get metadata
email_content['metadata'] = {
'message_id': msg.messageId,
'in_reply_to': msg.inReplyTo,
}
# get body
if "body" in target_fields:
email_content["text"] = msg.body # usually empty. try text_html instead
email_content["text_html"] = msg.htmlBody
# get attachments
if "attachments" in target_fields:
attachments = []
for t in msg.attachments:
attachments.append({
"filename": t.name,
"payload": t.data # binary
})
email_content["attachments"] = attachments
if conf["output_format"] == "json":
self.set_output("json", [email_content])
else:
content_txt = ''
for k, v in email_content.items():
if isinstance(v, str):
# basic info
content_txt += f'{k}:{v}' + "\n"
elif isinstance(v, dict):
# metadata
content_txt += f'{k}:{json.dumps(v)}' + "\n"
elif isinstance(v, list):
# attachments or others
for fb in v:
if isinstance(fb, dict):
# attachments
content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n"
else:
# str, usually plain text
content_txt += fb
self.set_output("text", content_txt)
async def _invoke(self, **kwargs):
function_map = {
"pdf": self._pdf,
"markdown": self._markdown,
"text&markdown": self._markdown,
"spreadsheet": self._spreadsheet,
"slides": self._slides,
"word": self._word,
"text": self._text,
"image": self._image,
"audio": self._audio,
"email": self._email,
}
try:
from_upstream = ParserFromUpstream.model_validate(kwargs)

View File

@ -18,7 +18,7 @@ import json
import logging
import random
import time
from timeit import default_timer as timer
import trio
from agent.canvas import Graph
@ -38,25 +38,26 @@ class Pipeline(Graph):
def callback(self, component_name: str, progress: float | int | None = None, message: str = "") -> None:
log_key = f"{self._flow_id}-{self.task_id}-logs"
timestamp = timer()
try:
bin = REDIS_CONN.get(log_key)
obj = json.loads(bin.encode("utf-8"))
if obj:
if obj[-1]["component_name"] == component_name:
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")})
if obj[-1]["component_id"] == component_name:
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": timestamp-obj[-1]["trace"][-1]["timestamp"]})
else:
obj.append({"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]})
obj.append({"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]})
else:
obj = [{"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]}]
obj = [{"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]}]
REDIS_CONN.set_obj(log_key, obj, 60 * 30)
if self._doc_id:
percentage = 1./len(self.components.items())
msg = ""
finished = 0.
for o in obj:
if o['component_name'] == "END":
if o['component_id'] == "END":
continue
msg += f"\n[{o['component_name']}]:\n"
msg += f"\n[{o['component_id']}]:\n"
for t in o["trace"]:
msg += "%s: %s\n"%(t["datetime"], t["message"])
if t["progress"] < 0:

View File

@ -30,7 +30,7 @@ def print_logs(pipeline: Pipeline):
while True:
time.sleep(5)
logs = pipeline.fetch_logs()
logs_str = json.dumps(logs)
logs_str = json.dumps(logs, ensure_ascii=False)
if logs_str != last_logs:
print(logs_str)
last_logs = logs_str

View File

@ -89,6 +89,22 @@
"lang": "Chinese",
"llm_id": "SenseVoiceSmall",
"output_format": "json"
},
"email": {
"suffix": [
"msg"
],
"fields": [
"from",
"to",
"cc",
"bcc",
"date",
"subject",
"body",
"attachments"
],
"output_format": "json"
}
}
}

View File

@ -20,8 +20,7 @@ import random
import sys
import threading
import time
from api.utils import get_uuid
from api.db.services.canvas_service import UserCanvasService
from api.utils.api_utils import timeout
from api.utils.base64_image import image2id
from api.utils.log_utils import init_root_logger, get_project_base_directory
@ -29,7 +28,6 @@ from graphrag.general.index import run_graphrag
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
from rag.flow.pipeline import Pipeline
from rag.prompts import keyword_extraction, question_proposal, content_tagging
import logging
import os
from datetime import datetime
@ -45,10 +43,8 @@ import signal
import trio
import exceptiongroup
import faulthandler
import numpy as np
from peewee import DoesNotExist
from api.db import LLMType, ParserType
from api.db.services.document_service import DocumentService
from api.db.services.llm_service import LLMBundle
@ -216,7 +212,11 @@ async def collect():
return None, None
canceled = False
task = TaskService.get_task(msg["id"])
if msg.get("doc_id", "") == "x":
task = msg
else:
task = TaskService.get_task(msg["id"])
if task:
canceled = has_canceled(task["id"])
if not task or canceled:
@ -229,9 +229,8 @@ async def collect():
task_type = msg.get("task_type", "")
task["task_type"] = task_type
if task_type == "dataflow":
task["tenant_id"]=msg.get("tenant_id", "")
task["dsl"] = msg.get("dsl", "")
task["dataflow_id"] = msg.get("dataflow_id", get_uuid())
task["tenant_id"] = msg["tenant_id"]
task["dataflow_id"] = msg["dataflow_id"]
task["kb_id"] = msg.get("kb_id", "")
return redis_msg, task
@ -460,13 +459,12 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
return tk_count, vector_size
async def run_dataflow(dsl:str, tenant_id:str, doc_id:str, task_id:str, flow_id:str, callback=None):
_ = callback
pipeline = Pipeline(dsl=dsl, tenant_id=tenant_id, doc_id=doc_id, task_id=task_id, flow_id=flow_id)
async def run_dataflow(task: dict):
dataflow_id = task["dataflow_id"]
e, cvs = UserCanvasService.get_by_id(dataflow_id)
pipeline = Pipeline(cvs.dsl, tenant_id=task["tenant_id"], doc_id=task["doc_id"], task_id=task["id"], flow_id=dataflow_id)
pipeline.reset()
await pipeline.run()
await pipeline.run(file=task.get("file"))
@timeout(3600)
@ -513,6 +511,12 @@ async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
@timeout(60*60*2, 1)
async def do_handle_task(task):
task_type = task.get("task_type", "")
if task_type == "dataflow" and task.get("doc_id", "") == "x":
await run_dataflow(task)
return
task_id = task["id"]
task_from_page = task["from_page"]
task_to_page = task["to_page"]
@ -526,6 +530,7 @@ async def do_handle_task(task):
task_parser_config = task["parser_config"]
task_start_ts = timer()
# prepare the progress callback function
progress_callback = partial(set_progress, task_id, task_from_page, task_to_page)
@ -554,13 +559,11 @@ async def do_handle_task(task):
init_kb(task, vector_size)
task_type = task.get("task_type", "")
if task_type == "dataflow":
task_dataflow_dsl = task["dsl"]
task_dataflow_id = task["dataflow_id"]
await run_dataflow(dsl=task_dataflow_dsl, tenant_id=task_tenant_id, doc_id=task_doc_id, task_id=task_id, flow_id=task_dataflow_id, callback=None)
await run_dataflow(task)
return
elif task_type == "raptor":
if task_type == "raptor":
# bind LLM for raptor
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
# run RAPTOR

155
uv.lock generated
View File

@ -1,5 +1,5 @@
version = 1
revision = 1
revision = 3
requires-python = ">=3.10, <3.13"
resolution-markers = [
"python_full_version >= '3.12' and sys_platform == 'darwin'",
@ -861,6 +861,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
]
[[package]]
name = "colorclass"
version = "2.2.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55" },
]
[[package]]
name = "coloredlogs"
version = "15.0.1"
@ -873,6 +882,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" },
]
[[package]]
name = "compressed-rtf"
version = "1.0.7"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0" },
]
[[package]]
name = "contourpy"
version = "1.3.2"
@ -1322,6 +1340,23 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" },
]
[[package]]
name = "easygui"
version = "0.98.3"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba" },
]
[[package]]
name = "ebcdic"
version = "1.1.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1" },
]
[[package]]
name = "editdistance"
version = "0.8.1"
@ -1435,6 +1470,24 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
]
[[package]]
name = "extract-msg"
version = "0.55.0"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "compressed-rtf" },
{ name = "ebcdic" },
{ name = "olefile" },
{ name = "red-black-tree-mod" },
{ name = "rtfde" },
{ name = "tzlocal" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c" },
]
[[package]]
name = "fake-http-header"
version = "0.3.5"
@ -2893,6 +2946,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" },
]
[[package]]
name = "lark"
version = "1.1.9"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2c/e1/804b6196b3fbdd0f8ba785fc62837b034782a891d6f663eea2f30ca23cfa/lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/e7/9c/eef7c591e6dc952f3636cfe0df712c0f9916cedf317810a3bb53ccb65cdd/lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db" },
]
[[package]]
name = "litellm"
version = "1.75.5.post1"
@ -3377,6 +3439,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" },
]
[[package]]
name = "msoffcrypto-tool"
version = "5.4.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "olefile" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e" },
]
[[package]]
name = "multidict"
version = "6.6.3"
@ -3726,6 +3801,32 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" },
]
[[package]]
name = "olefile"
version = "0.47"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f" },
]
[[package]]
name = "oletools"
version = "0.60.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "colorclass" },
{ name = "easygui" },
{ name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" },
{ name = "olefile" },
{ name = "pcodedmp" },
{ name = "pyparsing" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96" },
]
[[package]]
name = "ollama"
version = "0.2.1"
@ -4188,6 +4289,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" },
]
[[package]]
name = "pcodedmp"
version = "1.2.6"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "oletools" },
{ name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278" },
]
[[package]]
name = "pdfminer-six"
version = "20221105"
@ -5300,6 +5414,7 @@ dependencies = [
{ name = "elastic-transport" },
{ name = "elasticsearch" },
{ name = "elasticsearch-dsl" },
{ name = "extract-msg" },
{ name = "filelock" },
{ name = "flasgger" },
{ name = "flask" },
@ -5452,6 +5567,7 @@ requires-dist = [
{ name = "elastic-transport", specifier = "==8.12.0" },
{ name = "elasticsearch", specifier = "==8.12.1" },
{ name = "elasticsearch-dsl", specifier = "==8.12.0" },
{ name = "extract-msg", specifier = ">=0.39.0" },
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
{ name = "filelock", specifier = "==3.15.4" },
@ -5630,6 +5746,12 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" },
]
[[package]]
name = "red-black-tree-mod"
version = "1.22"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908" }
[[package]]
name = "referencing"
version = "0.36.2"
@ -5883,6 +6005,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
]
[[package]]
name = "rtfde"
version = "0.1.2.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "lark" },
{ name = "oletools" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/66/f1/3fafc33cd80cc605509ced36dbbb74c3c365d5859b0b57b6500e4a8ca8a5/rtfde-0.1.2.1.tar.gz", hash = "sha256:ea2653fb163ef1e9fdd1b0849bef88b0ba82537f860d4aca5b2c49f556efaaaa" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/b6/dd/641e9cf68d4242aaf7ce9653498009d8925080b6664993988bd50468932a/rtfde-0.1.2.1-py3-none-any.whl", hash = "sha256:c44dfa923a435c54cdbdd0e0f5352a4075542af317af061f82f2d4f032271645" },
]
[[package]]
name = "ruamel-base"
version = "1.0.0"
@ -6890,6 +7025,18 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
]
[[package]]
name = "tzlocal"
version = "5.3.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "tzdata", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d" },
]
[[package]]
name = "umap-learn"
version = "0.5.6"
@ -7134,6 +7281,12 @@ dependencies = [
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" }
[[package]]
name = "win-unicode-console"
version = "0.5"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e" }
[[package]]
name = "win32-setctime"
version = "1.2.0"

View File

@ -20,17 +20,10 @@ interface IProps {
isMult?: boolean;
}
const data = [
{ id: '1', name: 'data-pipeline-1' },
{ id: '2', name: 'data-pipeline-2' },
{ id: '3', name: 'data-pipeline-3' },
{ id: '4', name: 'data-pipeline-4' },
];
export function DataFlowSelect(props: IProps) {
const { toDataPipeline, formFieldName, isMult = true } = props;
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
console.log('data-pipline form', form);
const toDataPipLine = () => {
toDataPipeline?.();
};

View File

@ -1,6 +1,10 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { cn } from '@/lib/utils';
import {
GenerateLogButton,
GenerateType,
} from '@/pages/dataset/dataset/generate-button/generate';
import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react';
import { useFormContext, useWatch } from 'react-hook-form';
@ -47,6 +51,7 @@ export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
type GraphRagItemsProps = {
marginBottom?: boolean;
className?: string;
showGenerateItem?: boolean;
};
export function UseGraphRagFormField() {
@ -88,6 +93,7 @@ export function UseGraphRagFormField() {
// The three types "table", "resume" and "one" do not display this configuration.
const GraphRagItems = ({
marginBottom = false,
showGenerateItem = false,
className = 'p-10',
}: GraphRagItemsProps) => {
const { t } = useTranslate('knowledgeConfiguration');
@ -210,6 +216,18 @@ const GraphRagItems = ({
</FormItem>
)}
/>
{showGenerateItem && (
<div className="w-full flex items-center">
<div className="text-sm whitespace-nowrap w-1/4">
{t('extractKnowledgeGraph')}
</div>
<GenerateLogButton
className="w-3/4 text-text-secondary"
status={1}
type={GenerateType.KnowledgeGraph}
/>
</div>
)}
</>
)}
</FormContainer>

View File

@ -1,6 +1,10 @@
import { FormLayout } from '@/constants/form';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import {
GenerateLogButton,
GenerateType,
} from '@/pages/dataset/dataset/generate-button/generate';
import random from 'lodash/random';
import { Shuffle } from 'lucide-react';
import { useCallback } from 'react';
@ -52,7 +56,11 @@ const Prompt = 'parser_config.raptor.prompt';
// The three types "table", "resume" and "one" do not display this configuration.
const RaptorFormFields = () => {
const RaptorFormFields = ({
showGenerateItem = false,
}: {
showGenerateItem?: boolean;
}) => {
const form = useFormContext();
const { t } = useTranslate('knowledgeConfiguration');
const useRaptor = useWatch({ name: UseRaptorField });
@ -211,6 +219,18 @@ const RaptorFormFields = () => {
</FormItem>
)}
/>
{showGenerateItem && (
<div className="w-full flex items-center">
<div className="text-sm whitespace-nowrap w-1/4">
{t('extractRaptor')}
</div>
<GenerateLogButton
className="w-3/4 text-text-secondary"
status={1}
type={GenerateType.Raptor}
/>
</div>
)}
</div>
)}
</>

View File

@ -75,21 +75,21 @@ const Modal: ModalType = ({
const handleCancel = useCallback(() => {
onOpenChange?.(false);
onCancel?.();
}, [onOpenChange, onCancel]);
// onCancel?.();
}, [onOpenChange]);
const handleOk = useCallback(() => {
onOpenChange?.(true);
onOk?.();
}, [onOpenChange, onOk]);
// onOk?.();
}, [onOpenChange]);
const handleChange = (open: boolean) => {
onOpenChange?.(open);
console.log('open', open, onOpenChange);
if (open) {
handleOk();
onOk?.();
}
if (!open) {
handleCancel();
onCancel?.();
}
};
const footEl = useMemo(() => {

View File

@ -102,13 +102,15 @@ export default {
noMoreData: `That's all. Nothing more.`,
},
knowledgeDetails: {
notGenerated: 'Not generated',
generatedOn: 'Generated on',
subbarFiles: 'Files',
generateKnowledgeGraph:
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
generateRaptor:
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
generate: 'Generate',
raptor: 'Raptor',
knowledgeGraph: 'Knowledge Graph',
processingType: 'Processing Type',
dataPipeline: 'Data Pipeline',
operations: 'Operations',
@ -138,12 +140,12 @@ export default {
testing: 'Retrieval testing',
files: 'files',
configuration: 'Configuration',
knowledgeGraph: 'Knowledge graph',
knowledgeGraph: 'Knowledge Graph',
name: 'Name',
namePlaceholder: 'Please input name!',
doc: 'Docs',
datasetDescription:
'😉 Please wait for your files to finish parsing before starting an AI-powered chat.',
'Please wait for your files to finish parsing before starting an AI-powered chat.',
addFile: 'Add file',
searchFiles: 'Search your files',
localFiles: 'Local files',
@ -261,6 +263,22 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.',
},
knowledgeConfiguration: {
deleteGenerateModalContent: `
<p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results
will remove all derived entities and relationships from this dataset.
Your original files will remain intact.<p>
<br/>
Do you want to continue?
`,
extractRaptor: 'Extract Raptor',
extractKnowledgeGraph: 'Extract Knowledge Graph',
filterPlaceholder: 'please input filter',
fileFilterTip: '',
fileFilter: 'File Filter',
setDefaultTip: '',
setDefault: 'Set as Default',
eidtLinkDataPipeline: 'Edit Data Pipeline',
linkPipelineSetTip: 'Manage data pipeline linkage with this dataset',
default: 'Default',
dataPipeline: 'Data Pipeline',
linkDataPipeline: 'Link Data Pipeline',
@ -1646,6 +1664,13 @@ This delimiter is used to split the input text into several text pieces echo of
<p>To keep them, please click Rerun to re-run the current stage.</p> `,
changeStepModalConfirmText: 'Switch Anyway',
changeStepModalCancelText: 'Cancel',
unlinkPipelineModalTitle: 'Unlink data pipeline',
unlinkPipelineModalContent: `
<p>Once unlinked, this Dataset will no longer be connected to the current Data Pipeline.</p>
<p>Files that are already being parsed will continue until completion</p>
<p>Files that are not yet parsed will no longer be processed</p> <br/>
<p>Are you sure you want to proceed?</p> `,
unlinkPipelineModalConfirmText: 'Unlink',
},
dataflow: {
parser: 'Parser',

View File

@ -94,9 +94,11 @@ export default {
noMoreData: '没有更多数据了',
},
knowledgeDetails: {
notGenerated: '未生成',
generatedOn: '生成于',
subbarFiles: '文件列表',
generate: '生成',
raptor: 'Raptor',
knowledgeGraph: '知识图谱',
processingType: '处理类型',
dataPipeline: '数据管道',
operations: '操作',
@ -130,7 +132,7 @@ export default {
name: '名称',
namePlaceholder: '请输入名称',
doc: '文档',
datasetDescription: '😉 解析成功后才能问答哦。',
datasetDescription: '解析成功后才能问答哦。',
addFile: '新增文件',
searchFiles: '搜索文件',
localFiles: '本地文件',
@ -246,6 +248,22 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
},
knowledgeConfiguration: {
deleteGenerateModalContent: `
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
将从此数据集中移除所有派生实体和关系。
您的原始文件将保持不变。<p>
<br/>
是否要继续?
`,
extractRaptor: '从文档中提取Raptor',
extractKnowledgeGraph: '从文档中提取知识图谱',
filterPlaceholder: '请输入',
fileFilterTip: '',
fileFilter: '正则匹配表达式',
setDefaultTip: '',
setDefault: '设置默认',
eidtLinkDataPipeline: '编辑数据流',
linkPipelineSetTip: '管理与此数据集的数据管道链接',
default: '默认',
dataPipeline: '数据流',
linkDataPipeline: '关联数据流',
@ -1556,6 +1574,13 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
<p>要保留这些更改,请点击“重新运行”以重新运行当前阶段。</p> `,
changeStepModalConfirmText: '继续切换',
changeStepModalCancelText: '取消',
unlinkPipelineModalTitle: '解绑数据流',
unlinkPipelineModalContent: `
<p>一旦取消链接,该数据集将不再连接到当前数据管道。</p>
<p>正在解析的文件将继续解析,直到完成。</p>
<p>尚未解析的文件将不再被处理。</p> <br/>
<p>你确定要继续吗?</p> `,
unlinkPipelineModalConfirmText: '解绑',
},
dataflow: {
parser: '解析器',

View File

@ -2,7 +2,7 @@ import SvgIcon from '@/components/svg-icon';
import { useIsDarkTheme } from '@/components/theme-provider';
import { parseColorToRGBA } from '@/utils/common-util';
import { CircleQuestionMark } from 'lucide-react';
import { FC, useMemo, useState } from 'react';
import { FC, useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { LogTabs } from './dataset-common';
import { DatasetFilter } from './dataset-filter';
@ -74,25 +74,35 @@ const FileLogsPage: FC = () => {
const [active, setActive] = useState<(typeof LogTabs)[keyof typeof LogTabs]>(
LogTabs.FILE_LOGS,
);
const topMockData = {
const [topAllData, setTopAllData] = useState({
totalFiles: {
value: 2827,
precent: 12.5,
value: 0,
precent: 0,
},
downloads: {
value: 28,
success: 8,
failed: 2,
value: 0,
success: 0,
failed: 0,
},
processing: {
value: 156,
success: 8,
failed: 2,
value: 0,
success: 0,
failed: 0,
},
};
});
const { data: topData } = useFetchOverviewTital();
console.log('topData --> ', topData);
useEffect(() => {
setTopAllData({
...topAllData,
processing: {
value: topData?.processing || 0,
success: topData?.finished || 0,
failed: topData?.failed || 0,
},
});
}, [topData, topAllData]);
const mockData = useMemo(() => {
if (active === LogTabs.FILE_LOGS) {
@ -161,7 +171,7 @@ const FileLogsPage: FC = () => {
<div className="grid grid-cols-3 md:grid-cols-3 gap-4 mb-6">
<StatCard
title="Total Files"
value={topMockData.totalFiles.value}
value={topAllData.totalFiles.value}
icon={
isDark ? (
<SvgIcon name="data-flow/total-files-icon" width={40} />
@ -172,15 +182,15 @@ const FileLogsPage: FC = () => {
>
<div>
<span className="text-accent-primary">
{topMockData.totalFiles.precent > 0 ? '+' : ''}
{topMockData.totalFiles.precent}%{' '}
{topAllData.totalFiles.precent > 0 ? '+' : ''}
{topAllData.totalFiles.precent}%{' '}
</span>
from last week
</div>
</StatCard>
<StatCard
title="Downloading"
value={topMockData.downloads.value}
value={topAllData.downloads.value}
icon={
isDark ? (
<SvgIcon name="data-flow/data-icon" width={40} />
@ -190,13 +200,13 @@ const FileLogsPage: FC = () => {
}
>
<CardFooterProcess
success={topMockData.downloads.success}
failed={topMockData.downloads.failed}
success={topAllData.downloads.success}
failed={topAllData.downloads.failed}
/>
</StatCard>
<StatCard
title="Processing"
value={topMockData.processing.value}
value={topAllData.processing.value}
icon={
isDark ? (
<SvgIcon name="data-flow/processing-icon" width={40} />
@ -206,8 +216,8 @@ const FileLogsPage: FC = () => {
}
>
<CardFooterProcess
success={topMockData.processing.success}
failed={topMockData.processing.failed}
success={topAllData.processing.success}
failed={topAllData.processing.failed}
/>
</StatCard>
</div>

View File

@ -65,25 +65,25 @@ export const getFileLogsTableColumns = (
) => {
// const { t } = useTranslate('knowledgeDetails');
const columns: ColumnDef<DocumentLog>[] = [
{
id: 'select',
header: ({ table }) => (
<input
type="checkbox"
checked={table.getIsAllRowsSelected()}
onChange={table.getToggleAllRowsSelectedHandler()}
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
/>
),
cell: ({ row }) => (
<input
type="checkbox"
checked={row.getIsSelected()}
onChange={row.getToggleSelectedHandler()}
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
/>
),
},
// {
// id: 'select',
// header: ({ table }) => (
// <input
// type="checkbox"
// checked={table.getIsAllRowsSelected()}
// onChange={table.getToggleAllRowsSelectedHandler()}
// className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
// />
// ),
// cell: ({ row }) => (
// <input
// type="checkbox"
// checked={row.getIsSelected()}
// onChange={row.getToggleSelectedHandler()}
// className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
// />
// ),
// },
{
accessorKey: 'id',
header: 'ID',
@ -156,7 +156,7 @@ export const getFileLogsTableColumns = (
id: 'operations',
header: t('operations'),
cell: ({ row }) => (
<div className="flex justify-start space-x-2">
<div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
<Button
variant="ghost"
size="sm"
@ -189,25 +189,25 @@ export const getDatasetLogsTableColumns = (
) => {
// const { t } = useTranslate('knowledgeDetails');
const columns: ColumnDef<DocumentLog>[] = [
{
id: 'select',
header: ({ table }) => (
<input
type="checkbox"
checked={table.getIsAllRowsSelected()}
onChange={table.getToggleAllRowsSelectedHandler()}
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
/>
),
cell: ({ row }) => (
<input
type="checkbox"
checked={row.getIsSelected()}
onChange={row.getToggleSelectedHandler()}
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
/>
),
},
// {
// id: 'select',
// header: ({ table }) => (
// <input
// type="checkbox"
// checked={table.getIsAllRowsSelected()}
// onChange={table.getToggleAllRowsSelectedHandler()}
// className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
// />
// ),
// cell: ({ row }) => (
// <input
// type="checkbox"
// checked={row.getIsSelected()}
// onChange={row.getToggleSelectedHandler()}
// className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
// />
// ),
// },
{
accessorKey: 'id',
header: 'ID',
@ -251,7 +251,7 @@ export const getDatasetLogsTableColumns = (
id: 'operations',
header: t('operations'),
cell: ({ row }) => (
<div className="flex justify-start space-x-2">
<div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
<Button
variant="ghost"
size="sm"

View File

@ -1,21 +1,61 @@
import { IconFont } from '@/components/icon-font';
import { RAGFlowAvatar } from '@/components/ragflow-avatar';
import { Button } from '@/components/ui/button';
import { Modal } from '@/components/ui/modal/modal';
import { omit } from 'lodash';
import { Link, Settings2, Unlink } from 'lucide-react';
import { useState } from 'react';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { linkPiplineFormSchema } from '../form-schema';
import LinkDataPipelineModal from './link-data-pipline-modal';
interface DataPipelineItemProps {
id: string;
name: string;
avatar?: string;
isDefault?: boolean;
linked?: boolean;
openLinkModalFunc?: (open: boolean) => void;
openLinkModalFunc?: (open: boolean, data?: IDataPipelineNodeProps) => void;
}
const DataPipelineItem = (props: DataPipelineItemProps) => {
const { t } = useTranslation();
const { name, avatar, isDefault, linked, openLinkModalFunc } = props;
const openUnlinkModal = () => {
Modal.show({
visible: true,
className: '!w-[560px]',
title: t('dataflowParser.unlinkPipelineModalTitle'),
children: (
<div
className="text-sm text-text-secondary"
dangerouslySetInnerHTML={{
__html: t('dataflowParser.unlinkPipelineModalContent'),
}}
></div>
),
onVisibleChange: () => {
Modal.hide();
},
footer: (
<div className="flex justify-end gap-2">
<Button variant={'outline'} onClick={() => Modal.hide()}>
{t('dataflowParser.changeStepModalCancelText')}
</Button>
<Button
variant={'secondary'}
className="!bg-state-error text-bg-base"
onClick={() => {
Modal.hide();
}}
>
{t('dataflowParser.unlinkPipelineModalConfirmText')}
</Button>
</div>
),
});
};
return (
<div className="flex items-center justify-between gap-1 px-2 rounded-lg border">
<div className="flex items-center gap-1">
@ -28,42 +68,89 @@ const DataPipelineItem = (props: DataPipelineItemProps) => {
)}
</div>
<div className="flex gap-1 items-center">
<Button variant={'transparent'} className="border-none">
<Button
variant={'transparent'}
className="border-none"
type="button"
onClick={() =>
openLinkModalFunc?.(true, { ...omit(props, ['openLinkModalFunc']) })
}
>
<Settings2 />
</Button>
{!isDefault && (
<Button
variant={'transparent'}
className="border-none"
onClick={() => {
openLinkModalFunc?.(true);
}}
>
{linked ? <Link /> : <Unlink />}
</Button>
<>
{linked && (
<Button
type="button"
variant={'transparent'}
className="border-none"
onClick={() => {
openUnlinkModal();
}}
>
<Unlink />
</Button>
)}
</>
)}
</div>
</div>
);
};
export interface IDataPipelineNodeProps {
id: string;
name: string;
avatar?: string;
isDefault?: boolean;
linked?: boolean;
}
const LinkDataPipeline = () => {
const { t } = useTranslation();
const [openLinkModal, setOpenLinkModal] = useState(false);
const [currentDataPipeline, setCurrentDataPipeline] =
useState<IDataPipelineNodeProps>();
const testNode = [
{
id: '1',
name: 'Data Pipeline 1',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
isDefault: true,
linked: true,
},
{
id: '2',
name: 'Data Pipeline 2',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: false,
},
{
id: '3',
name: 'Data Pipeline 3',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: false,
},
{
id: '4',
name: 'Data Pipeline 4',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: true,
},
];
const openLinkModalFunc = (open: boolean) => {
const openLinkModalFunc = (open: boolean, data?: IDataPipelineNodeProps) => {
console.log('open', open, data);
setOpenLinkModal(open);
if (data) {
setCurrentDataPipeline(data);
} else {
setCurrentDataPipeline(undefined);
}
};
const handleLinkOrEditSubmit = (
data: z.infer<typeof linkPiplineFormSchema>,
) => {
console.log('handleLinkOrEditSubmit', data);
};
return (
<div className="flex flex-col gap-2">
@ -74,9 +161,15 @@ const LinkDataPipeline = () => {
</div>
<div className="flex justify-between items-center">
<div className="text-center text-xs text-text-secondary">
Manage data pipeline linkage with this dataset
{t('knowledgeConfiguration.linkPipelineSetTip')}
</div>
<Button variant={'transparent'}>
<Button
type="button"
variant={'transparent'}
onClick={() => {
openLinkModalFunc?.(true);
}}
>
<Link />
<span className="text-xs text-text-primary">
{t('knowledgeConfiguration.linkDataPipeline')}
@ -94,10 +187,12 @@ const LinkDataPipeline = () => {
))}
</section>
<LinkDataPipelineModal
data={currentDataPipeline}
open={openLinkModal}
setOpen={(open: boolean) => {
openLinkModalFunc(open);
}}
onSubmit={handleLinkOrEditSubmit}
/>
</div>
);

View File

@ -10,32 +10,53 @@ import {
FormMessage,
} from '@/components/ui/form';
import { Modal } from '@/components/ui/modal/modal';
import { Switch } from '@/components/ui/switch';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next';
import { useForm } from 'react-hook-form';
import { z } from 'zod';
import { linkPiplineFormSchema } from '../form-schema';
import { pipelineFormSchema } from '../form-schema';
import { IDataPipelineNodeProps } from './link-data-pipeline';
const LinkDataPipelineModal = ({
data,
open,
setOpen,
onSubmit,
}: {
data: IDataPipelineNodeProps | undefined;
open: boolean;
setOpen: (open: boolean) => void;
onSubmit?: (data: any) => void;
}) => {
const form = useForm<z.infer<typeof linkPiplineFormSchema>>({
resolver: zodResolver(linkPiplineFormSchema),
defaultValues: { data_flow: ['888'], file_filter: '' },
const isEdit = !!data;
const form = useForm<z.infer<typeof pipelineFormSchema>>({
resolver: zodResolver(pipelineFormSchema),
defaultValues: {
data_flow: [],
set_default: false,
file_filter: '',
},
});
// const [open, setOpen] = useState(false);
const { navigateToAgents } = useNavigatePage();
const handleFormSubmit = (values: any) => {
console.log(values);
console.log(values, data);
const param = {
...data,
...values,
};
onSubmit?.(param);
};
return (
<Modal
title={t('knowledgeConfiguration.linkDataPipeline')}
className="!w-[560px]"
title={
!isEdit
? t('knowledgeConfiguration.linkDataPipeline')
: t('knowledgeConfiguration.eidtLinkDataPipeline')
}
open={open}
onOpenChange={setOpen}
showfooter={false}
@ -43,10 +64,12 @@ const LinkDataPipelineModal = ({
<Form {...form}>
<form onSubmit={form.handleSubmit(handleFormSubmit)}>
<div className="flex flex-col gap-4 ">
<DataFlowSelect
toDataPipeline={navigateToAgents}
formFieldName="data_flow"
/>
{!isEdit && (
<DataFlowSelect
toDataPipeline={navigateToAgents}
formFieldName="data_flow"
/>
)}
<FormField
control={form.control}
name={'file_filter'}
@ -65,7 +88,9 @@ const LinkDataPipelineModal = ({
<div className="text-muted-foreground">
<FormControl>
<Input
placeholder={t('dataFlowPlaceholder')}
placeholder={t(
'knowledgeConfiguration.filterPlaceholder',
)}
{...field}
/>
</FormControl>
@ -78,11 +103,56 @@ const LinkDataPipelineModal = ({
</FormItem>
)}
/>
{isEdit && (
<FormField
control={form.control}
name={'set_default'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex flex-col gap-1">
<div className="flex gap-2 justify-between ">
<FormLabel
tooltip={t('knowledgeConfiguration.setDefaultTip')}
className="text-sm text-text-primary whitespace-wrap "
>
{t('knowledgeConfiguration.setDefault')}
</FormLabel>
</div>
<div className="text-muted-foreground">
<FormControl>
<Switch
value={field.value}
onCheckedChange={field.onChange}
/>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-full"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
)}
<div className="flex justify-end gap-1">
<Button type="reset" variant={'outline'} className="btn-primary">
<Button
type="button"
variant={'outline'}
className="btn-primary"
onClick={() => {
setOpen(false);
}}
>
{t('modal.cancelText')}
</Button>
<Button type="submit" variant={'default'} className="btn-primary">
<Button
type="button"
variant={'default'}
className="btn-primary"
onClick={form.handleSubmit(handleFormSubmit)}
>
{t('modal.okText')}
</Button>
</div>

View File

@ -72,7 +72,17 @@ export const formSchema = z.object({
// icon: z.array(z.instanceof(File)),
});
export const linkPiplineFormSchema = z.object({
data_flow: z.array(z.string()),
export const pipelineFormSchema = z.object({
data_flow: z.array(z.string()).optional(),
set_default: z.boolean().optional(),
file_filter: z.string().optional(),
});
export const linkPiplineFormSchema = pipelineFormSchema.pick({
data_flow: true,
file_filter: true,
});
export const editPiplineFormSchema = pipelineFormSchema.pick({
set_default: true,
file_filter: true,
});

View File

@ -86,9 +86,12 @@ export default function DatasetSettings() {
<GeneralForm></GeneralForm>
<Divider />
<GraphRagItems className="border-none p-0"></GraphRagItems>
<GraphRagItems
className="border-none p-0"
showGenerateItem={true}
></GraphRagItems>
<Divider />
<RaptorFormFields></RaptorFormFields>
<RaptorFormFields showGenerateItem={true}></RaptorFormFields>
<Divider />
<LinkDataPipeline />
</MainContainer>

View File

@ -6,16 +6,20 @@ import {
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu';
import { Modal } from '@/components/ui/modal/modal';
import { cn } from '@/lib/utils';
import { toFixed } from '@/utils/common-util';
import { t } from 'i18next';
import { lowerFirst } from 'lodash';
import { CirclePause, WandSparkles } from 'lucide-react';
import { CirclePause, Trash2, WandSparkles } from 'lucide-react';
import { useState } from 'react';
import { useTranslation } from 'react-i18next';
import { generateStatus, useFetchGenerateData } from './hook';
const MenuItem: React.FC<{ name: 'KnowledgeGraph' | 'Raptor' }> = ({
name,
}) => {
export enum GenerateType {
KnowledgeGraph = 'KnowledgeGraph',
Raptor = 'Raptor',
}
const MenuItem: React.FC<{ name: GenerateType }> = ({ name }) => {
console.log(name, 'pppp');
const iconKeyMap = {
KnowledgeGraph: 'knowledgegraph',
@ -111,3 +115,102 @@ const Generate: React.FC = () => {
};
export default Generate;
export type IGenerateLogProps = {
id?: string;
status: 0 | 1;
message?: string;
created_at?: string;
updated_at?: string;
type?: GenerateType;
className?: string;
onDelete?: () => void;
};
export const GenerateLogButton = (props: IGenerateLogProps) => {
const { t } = useTranslation();
const {
id,
status,
message,
created_at,
updated_at,
type,
className,
onDelete,
} = props;
const handleDelete = () => {
Modal.show({
visible: true,
className: '!w-[560px]',
title:
t('common.delete') +
' ' +
(type === GenerateType.KnowledgeGraph
? t('knowledgeDetails.knowledgeGraph')
: t('knowledgeDetails.raptor')),
children: (
<div
className="text-sm text-text-secondary"
dangerouslySetInnerHTML={{
__html: t('knowledgeConfiguration.deleteGenerateModalContent', {
type:
type === GenerateType.KnowledgeGraph
? t('knowledgeDetails.knowledgeGraph')
: t('knowledgeDetails.raptor'),
}),
}}
></div>
),
onVisibleChange: () => {
Modal.hide();
},
footer: (
<div className="flex justify-end gap-2">
<Button
type="button"
variant={'outline'}
onClick={() => Modal.hide()}
>
{t('dataflowParser.changeStepModalCancelText')}
</Button>
<Button
type="button"
variant={'secondary'}
className="!bg-state-error text-text-primary"
onClick={() => {
Modal.hide();
}}
>
{t('common.delete')}
</Button>
</div>
),
});
};
return (
<div
className={cn('flex bg-bg-card rounded-md py-1 px-3', props.className)}
>
<div className="flex items-center justify-between w-full">
{status === 1 && (
<>
<div>
{message || t('knowledgeDetails.generatedOn')}
{created_at}
</div>
<Trash2
size={14}
className="cursor-pointer"
onClick={(e) => {
console.log('delete');
handleDelete();
e.stopPropagation();
}}
/>
</>
)}
{status === 0 && <div>{t('knowledgeDetails.notGenerated')}</div>}
</div>
</div>
);
};

View File

@ -75,7 +75,7 @@ export default function Dataset() {
filters={filters}
leftPanel={
<div className="items-start">
<div className="pb-1">{t('knowledgeDetails.dataset')}</div>
<div className="pb-1">{t('knowledgeDetails.subbarFiles')}</div>
<div className="text-text-sub-title-invert text-sm">
{t('knowledgeDetails.datasetDescription')}
</div>

View File

@ -9,7 +9,7 @@ import { cn, formatBytes } from '@/lib/utils';
import { Routes } from '@/routes';
import { formatPureDate } from '@/utils/date';
import { isEmpty } from 'lodash';
import { Banknote, Database, FileSearch2, GitGraph } from 'lucide-react';
import { Banknote, FileSearch2, FolderOpen, GitGraph } from 'lucide-react';
import { useMemo } from 'react';
import { useTranslation } from 'react-i18next';
import { useHandleMenuClick } from './hooks';
@ -34,8 +34,8 @@ export function SideBar({ refreshCount }: PropType) {
// key: Routes.DataSetOverview,
// },
{
icon: Database,
label: t(`knowledgeDetails.dataset`),
icon: FolderOpen,
label: t(`knowledgeDetails.subbarFiles`),
key: Routes.DatasetBase,
},
{