Compare commits

...

3 Commits

Author SHA1 Message Date
d039d1e73d fix: Added dataset generation logging functionality #9869 (#10180)
### What problem does this PR solve?

fix: Added dataset generation logging functionality #9869

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-09-22 10:01:34 +08:00
d050ef568d Feat: support dataflow run. (#10182)
### What problem does this PR solve?


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-09-22 09:36:21 +08:00
028c2d83e9 Feat: parse email (#10181)
### What problem does this PR solve?

- Dataflow support email.
- Fix old email parser.
- Add new depends to parse msg file.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Other (please describe): add new depends.
2025-09-22 09:29:38 +08:00
28 changed files with 850 additions and 203 deletions

View File

@ -144,11 +144,10 @@ def run():
if cvs.canvas_category == CanvasCategory.DataFlow: if cvs.canvas_category == CanvasCategory.DataFlow:
task_id = get_uuid() task_id = get_uuid()
flow_id = get_uuid() ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=req["id"], task_id=task_id, file=files[0], priority=0)
ok, error_message = queue_dataflow(dsl=cvs.dsl, tenant_id=user_id, file=files[0], task_id=task_id, flow_id=flow_id, priority=0)
if not ok: if not ok:
return server_error_response(error_message) return get_data_error_result(message=error_message)
return get_json_result(data={"task_id": task_id, "message_id": flow_id}) return get_json_result(data={"message_id": task_id})
try: try:
canvas = Canvas(cvs.dsl, current_user.id, req["id"]) canvas = Canvas(cvs.dsl, current_user.id, req["id"])

View File

@ -496,7 +496,7 @@ class FileService(CommonService):
return ParserType.AUDIO.value return ParserType.AUDIO.value
if re.search(r"\.(ppt|pptx|pages)$", filename): if re.search(r"\.(ppt|pptx|pages)$", filename):
return ParserType.PRESENTATION.value return ParserType.PRESENTATION.value
if re.search(r"\.(eml)$", filename): if re.search(r"\.(msg|eml)$", filename):
return ParserType.EMAIL.value return ParserType.EMAIL.value
return default return default

View File

@ -472,14 +472,10 @@ def has_canceled(task_id):
return False return False
def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id:str=None, file:dict=None, priority: int=0, callback=None) -> tuple[bool, str]: def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str="x", file:dict=None, priority: int=0) -> tuple[bool, str]:
"""
Returns a tuple (success: bool, error_message: str).
"""
_ = callback
task = dict( task = dict(
id=get_uuid() if not task_id else task_id, id=task_id,
doc_id=doc_id, doc_id=doc_id,
from_page=0, from_page=0,
to_page=100000000, to_page=100000000,
@ -490,15 +486,10 @@ def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute() TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True) bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
kb_id = DocumentService.get_knowledgebase_id(doc_id) task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
if not kb_id:
return False, f"Can't find KB of this document: {doc_id}"
task["kb_id"] = kb_id
task["tenant_id"] = tenant_id task["tenant_id"] = tenant_id
task["task_type"] = "dataflow" task["task_type"] = "dataflow"
task["dsl"] = dsl task["dataflow_id"] = flow_id
task["dataflow_id"] = get_uuid() if not flow_id else flow_id
task["file"] = file task["file"] = file
if not REDIS_CONN.queue_product( if not REDIS_CONN.queue_product(

View File

@ -155,7 +155,7 @@ def filename_type(filename):
if re.match(r".*\.pdf$", filename): if re.match(r".*\.pdf$", filename):
return FileType.PDF.value return FileType.PDF.value
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value return FileType.DOC.value
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename): if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):

View File

@ -34,6 +34,7 @@ dependencies = [
"elastic-transport==8.12.0", "elastic-transport==8.12.0",
"elasticsearch==8.12.1", "elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0", "elasticsearch-dsl==8.12.0",
"extract-msg>=0.39.0",
"filelock==3.15.4", "filelock==3.15.4",
"flask==3.0.3", "flask==3.0.3",
"flask-cors==5.0.0", "flask-cors==5.0.0",

View File

@ -78,7 +78,7 @@ def chunk(
_add_content(msg, msg.get_content_type()) _add_content(msg, msg.get_content_type())
sections = TxtParser.parser_txt("\n".join(text_txt)) + [ sections = TxtParser.parser_txt("\n".join(text_txt)) + [
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line (line, "") for line in HtmlParser.parser_txt("\n".join(html_txt), chunk_token_num=parser_config["chunk_token_num"]) if line
] ]
st = timer() st = timer()

View File

@ -35,9 +35,9 @@ class ProcessBase(ComponentBase):
def __init__(self, pipeline, id, param: ProcessParamBase): def __init__(self, pipeline, id, param: ProcessParamBase):
super().__init__(pipeline, id, param) super().__init__(pipeline, id, param)
if hasattr(self._canvas, "callback"): if hasattr(self._canvas, "callback"):
self.callback = partial(self._canvas.callback, self.component_name) self.callback = partial(self._canvas.callback, id)
else: else:
self.callback = partial(lambda *args, **kwargs: None, self.component_name) self.callback = partial(lambda *args, **kwargs: None, id)
async def invoke(self, **kwargs) -> dict[str, Any]: async def invoke(self, **kwargs) -> dict[str, Any]:
self.set_output("_created_time", time.perf_counter()) self.set_output("_created_time", time.perf_counter())

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import io import io
import json
import logging import logging
import os
import random import random
from functools import partial from functools import partial
@ -57,7 +59,10 @@ class ParserParam(ProcessParamBase):
"image": [ "image": [
"text" "text"
], ],
"email": [], "email": [
"text",
"json"
],
"text": [ "text": [
"text", "text",
"json" "json"
@ -71,7 +76,6 @@ class ParserParam(ProcessParamBase):
self.setups = { self.setups = {
"pdf": { "pdf": {
"parse_method": "deepdoc", # deepdoc/plain_text/vlm "parse_method": "deepdoc", # deepdoc/plain_text/vlm
"llm_id": "",
"lang": "Chinese", "lang": "Chinese",
"suffix": [ "suffix": [
"pdf", "pdf",
@ -93,8 +97,8 @@ class ParserParam(ProcessParamBase):
], ],
"output_format": "json", "output_format": "json",
}, },
"markdown": { "text&markdown": {
"suffix": ["md", "markdown", "mdx"], "suffix": ["md", "markdown", "mdx", "txt"],
"output_format": "json", "output_format": "json",
}, },
"slides": { "slides": {
@ -112,7 +116,11 @@ class ParserParam(ProcessParamBase):
"output_format": "json", "output_format": "json",
}, },
"email": { "email": {
"fields": [] "suffix": [
"eml", "msg"
],
"fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"],
"output_format": "json",
}, },
"text": { "text": {
"suffix": [ "suffix": [
@ -147,13 +155,10 @@ class ParserParam(ProcessParamBase):
pdf_config = self.setups.get("pdf", {}) pdf_config = self.setups.get("pdf", {})
if pdf_config: if pdf_config:
pdf_parse_method = pdf_config.get("parse_method", "") pdf_parse_method = pdf_config.get("parse_method", "")
self.check_valid_value(pdf_parse_method.lower(), "Parse method abnormal.", ["deepdoc", "plain_text", "vlm"]) self.check_empty(pdf_parse_method, "Parse method abnormal.")
if pdf_parse_method not in ["deepdoc", "plain_text"]: if pdf_parse_method.lower() not in ["deepdoc", "plain_text"]:
self.check_empty(pdf_config.get("llm_id"), "VLM") self.check_empty(pdf_config.get("lang", ""), "Language")
pdf_language = pdf_config.get("lang", "")
self.check_empty(pdf_language, "Language")
pdf_output_format = pdf_config.get("output_format", "") pdf_output_format = pdf_config.get("output_format", "")
self.check_valid_value(pdf_output_format, "PDF output format abnormal.", self.allowed_output_format["pdf"]) self.check_valid_value(pdf_output_format, "PDF output format abnormal.", self.allowed_output_format["pdf"])
@ -194,6 +199,11 @@ class ParserParam(ProcessParamBase):
audio_language = audio_config.get("lang", "") audio_language = audio_config.get("lang", "")
self.check_empty(audio_language, "Language") self.check_empty(audio_language, "Language")
email_config = self.setups.get("email", "")
if email_config:
email_output_format = email_config.get("output_format", "")
self.check_valid_value(email_output_format, "Email output format abnormal.", self.allowed_output_format["email"])
def get_input_form(self) -> dict[str, dict]: def get_input_form(self) -> dict[str, dict]:
return {} return {}
@ -212,8 +222,7 @@ class Parser(ProcessBase):
lines, _ = PlainParser()(blob) lines, _ = PlainParser()(blob)
bboxes = [{"text": t} for t, _ in lines] bboxes = [{"text": t} for t, _ in lines]
else: else:
assert conf.get("llm_id") vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("parse_method"), lang=self._param.setups["pdf"].get("lang"))
vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("llm_id"), lang=self._param.setups["pdf"].get("lang"))
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback) lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
bboxes = [] bboxes = []
for t, poss in lines: for t, poss in lines:
@ -222,6 +231,7 @@ class Parser(ProcessBase):
if conf.get("output_format") == "json": if conf.get("output_format") == "json":
self.set_output("json", bboxes) self.set_output("json", bboxes)
if conf.get("output_format") == "markdown": if conf.get("output_format") == "markdown":
mkdn = "" mkdn = ""
for b in bboxes: for b in bboxes:
@ -285,7 +295,6 @@ class Parser(ProcessBase):
def _markdown(self, name, blob): def _markdown(self, name, blob):
from functools import reduce from functools import reduce
from rag.app.naive import Markdown as naive_markdown_parser from rag.app.naive import Markdown as naive_markdown_parser
from rag.nlp import concat_img from rag.nlp import concat_img
@ -316,22 +325,6 @@ class Parser(ProcessBase):
else: else:
self.set_output("text", "\n".join([section_text for section_text, _ in sections])) self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
def _text(self, name, blob):
from deepdoc.parser.utils import get_text
self.callback(random.randint(1, 5) / 100.0, "Start to work on a text.")
conf = self._param.setups["text"]
self.set_output("output_format", conf["output_format"])
# parse binary to text
text_content = get_text(name, binary=blob)
if conf.get("output_format") == "json":
result = [{"text": text_content}]
self.set_output("json", result)
else:
result = text_content
self.set_output("text", result)
def _image(self, from_upstream: ParserFromUpstream): def _image(self, from_upstream: ParserFromUpstream):
from deepdoc.vision import OCR from deepdoc.vision import OCR
@ -353,7 +346,7 @@ class Parser(ProcessBase):
else: else:
# use VLM to describe the picture # use VLM to describe the picture
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang) cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"],lang=lang)
img_binary = io.BytesIO() img_binary = io.BytesIO()
img.save(img_binary, format="JPEG") img.save(img_binary, format="JPEG")
img_binary.seek(0) img_binary.seek(0)
@ -384,16 +377,134 @@ class Parser(ProcessBase):
self.set_output("text", txt) self.set_output("text", txt)
def _email(self, from_upstream: ParserFromUpstream):
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
blob = from_upstream.blob
name = from_upstream.name
email_content = {}
conf = self._param.setups["email"]
target_fields = conf["fields"]
_, ext = os.path.splitext(name)
if ext == ".eml":
# handle eml file
from email import policy
from email.parser import BytesParser
msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob))
email_content['metadata'] = {}
# handle header info
for header, value in msg.items():
# get fields like from, to, cc, bcc, date, subject
if header.lower() in target_fields:
email_content[header.lower()] = value
# get metadata
elif header.lower() not in ["from", "to", "cc", "bcc", "date", "subject"]:
email_content["metadata"][header.lower()] = value
# get body
if "body" in target_fields:
body_text, body_html = [], []
def _add_content(m, content_type):
if content_type == "text/plain":
body_text.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
elif content_type == "text/html":
body_html.append(
m.get_payload(decode=True).decode(m.get_content_charset())
)
elif "multipart" in content_type:
if m.is_multipart():
for part in m.iter_parts():
_add_content(part, part.get_content_type())
_add_content(msg, msg.get_content_type())
email_content["text"] = body_text
email_content["text_html"] = body_html
# get attachment
if "attachments" in target_fields:
attachments = []
for part in msg.iter_attachments():
content_disposition = part.get("Content-Disposition")
if content_disposition:
dispositions = content_disposition.strip().split(";")
if dispositions[0].lower() == "attachment":
filename = part.get_filename()
payload = part.get_payload(decode=True)
attachments.append({
"filename": filename,
"payload": payload,
})
email_content["attachments"] = attachments
else:
# handle msg file
import extract_msg
print("handle a msg file.")
msg = extract_msg.Message(blob)
# handle header info
basic_content = {
"from": msg.sender,
"to": msg.to,
"cc": msg.cc,
"bcc": msg.bcc,
"date": msg.date,
"subject": msg.subject,
}
email_content.update({k: v for k, v in basic_content.items() if k in target_fields})
# get metadata
email_content['metadata'] = {
'message_id': msg.messageId,
'in_reply_to': msg.inReplyTo,
}
# get body
if "body" in target_fields:
email_content["text"] = msg.body # usually empty. try text_html instead
email_content["text_html"] = msg.htmlBody
# get attachments
if "attachments" in target_fields:
attachments = []
for t in msg.attachments:
attachments.append({
"filename": t.name,
"payload": t.data # binary
})
email_content["attachments"] = attachments
if conf["output_format"] == "json":
self.set_output("json", [email_content])
else:
content_txt = ''
for k, v in email_content.items():
if isinstance(v, str):
# basic info
content_txt += f'{k}:{v}' + "\n"
elif isinstance(v, dict):
# metadata
content_txt += f'{k}:{json.dumps(v)}' + "\n"
elif isinstance(v, list):
# attachments or others
for fb in v:
if isinstance(fb, dict):
# attachments
content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n"
else:
# str, usually plain text
content_txt += fb
self.set_output("text", content_txt)
async def _invoke(self, **kwargs): async def _invoke(self, **kwargs):
function_map = { function_map = {
"pdf": self._pdf, "pdf": self._pdf,
"markdown": self._markdown, "text&markdown": self._markdown,
"spreadsheet": self._spreadsheet, "spreadsheet": self._spreadsheet,
"slides": self._slides, "slides": self._slides,
"word": self._word, "word": self._word,
"text": self._text,
"image": self._image, "image": self._image,
"audio": self._audio, "audio": self._audio,
"email": self._email,
} }
try: try:
from_upstream = ParserFromUpstream.model_validate(kwargs) from_upstream = ParserFromUpstream.model_validate(kwargs)

View File

@ -18,7 +18,7 @@ import json
import logging import logging
import random import random
import time import time
from timeit import default_timer as timer
import trio import trio
from agent.canvas import Graph from agent.canvas import Graph
@ -38,25 +38,26 @@ class Pipeline(Graph):
def callback(self, component_name: str, progress: float | int | None = None, message: str = "") -> None: def callback(self, component_name: str, progress: float | int | None = None, message: str = "") -> None:
log_key = f"{self._flow_id}-{self.task_id}-logs" log_key = f"{self._flow_id}-{self.task_id}-logs"
timestamp = timer()
try: try:
bin = REDIS_CONN.get(log_key) bin = REDIS_CONN.get(log_key)
obj = json.loads(bin.encode("utf-8")) obj = json.loads(bin.encode("utf-8"))
if obj: if obj:
if obj[-1]["component_name"] == component_name: if obj[-1]["component_id"] == component_name:
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}) obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": timestamp-obj[-1]["trace"][-1]["timestamp"]})
else: else:
obj.append({"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]}) obj.append({"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]})
else: else:
obj = [{"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]}] obj = [{"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]}]
REDIS_CONN.set_obj(log_key, obj, 60 * 30) REDIS_CONN.set_obj(log_key, obj, 60 * 30)
if self._doc_id: if self._doc_id:
percentage = 1./len(self.components.items()) percentage = 1./len(self.components.items())
msg = "" msg = ""
finished = 0. finished = 0.
for o in obj: for o in obj:
if o['component_name'] == "END": if o['component_id'] == "END":
continue continue
msg += f"\n[{o['component_name']}]:\n" msg += f"\n[{o['component_id']}]:\n"
for t in o["trace"]: for t in o["trace"]:
msg += "%s: %s\n"%(t["datetime"], t["message"]) msg += "%s: %s\n"%(t["datetime"], t["message"])
if t["progress"] < 0: if t["progress"] < 0:

View File

@ -30,7 +30,7 @@ def print_logs(pipeline: Pipeline):
while True: while True:
time.sleep(5) time.sleep(5)
logs = pipeline.fetch_logs() logs = pipeline.fetch_logs()
logs_str = json.dumps(logs) logs_str = json.dumps(logs, ensure_ascii=False)
if logs_str != last_logs: if logs_str != last_logs:
print(logs_str) print(logs_str)
last_logs = logs_str last_logs = logs_str

View File

@ -89,6 +89,22 @@
"lang": "Chinese", "lang": "Chinese",
"llm_id": "SenseVoiceSmall", "llm_id": "SenseVoiceSmall",
"output_format": "json" "output_format": "json"
},
"email": {
"suffix": [
"msg"
],
"fields": [
"from",
"to",
"cc",
"bcc",
"date",
"subject",
"body",
"attachments"
],
"output_format": "json"
} }
} }
} }

View File

@ -20,8 +20,7 @@ import random
import sys import sys
import threading import threading
import time import time
from api.db.services.canvas_service import UserCanvasService
from api.utils import get_uuid
from api.utils.api_utils import timeout from api.utils.api_utils import timeout
from api.utils.base64_image import image2id from api.utils.base64_image import image2id
from api.utils.log_utils import init_root_logger, get_project_base_directory from api.utils.log_utils import init_root_logger, get_project_base_directory
@ -29,7 +28,6 @@ from graphrag.general.index import run_graphrag
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
from rag.flow.pipeline import Pipeline from rag.flow.pipeline import Pipeline
from rag.prompts import keyword_extraction, question_proposal, content_tagging from rag.prompts import keyword_extraction, question_proposal, content_tagging
import logging import logging
import os import os
from datetime import datetime from datetime import datetime
@ -45,10 +43,8 @@ import signal
import trio import trio
import exceptiongroup import exceptiongroup
import faulthandler import faulthandler
import numpy as np import numpy as np
from peewee import DoesNotExist from peewee import DoesNotExist
from api.db import LLMType, ParserType from api.db import LLMType, ParserType
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.db.services.llm_service import LLMBundle from api.db.services.llm_service import LLMBundle
@ -216,7 +212,11 @@ async def collect():
return None, None return None, None
canceled = False canceled = False
task = TaskService.get_task(msg["id"]) if msg.get("doc_id", "") == "x":
task = msg
else:
task = TaskService.get_task(msg["id"])
if task: if task:
canceled = has_canceled(task["id"]) canceled = has_canceled(task["id"])
if not task or canceled: if not task or canceled:
@ -229,9 +229,8 @@ async def collect():
task_type = msg.get("task_type", "") task_type = msg.get("task_type", "")
task["task_type"] = task_type task["task_type"] = task_type
if task_type == "dataflow": if task_type == "dataflow":
task["tenant_id"]=msg.get("tenant_id", "") task["tenant_id"] = msg["tenant_id"]
task["dsl"] = msg.get("dsl", "") task["dataflow_id"] = msg["dataflow_id"]
task["dataflow_id"] = msg.get("dataflow_id", get_uuid())
task["kb_id"] = msg.get("kb_id", "") task["kb_id"] = msg.get("kb_id", "")
return redis_msg, task return redis_msg, task
@ -460,13 +459,12 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
return tk_count, vector_size return tk_count, vector_size
async def run_dataflow(dsl:str, tenant_id:str, doc_id:str, task_id:str, flow_id:str, callback=None): async def run_dataflow(task: dict):
_ = callback dataflow_id = task["dataflow_id"]
e, cvs = UserCanvasService.get_by_id(dataflow_id)
pipeline = Pipeline(dsl=dsl, tenant_id=tenant_id, doc_id=doc_id, task_id=task_id, flow_id=flow_id) pipeline = Pipeline(cvs.dsl, tenant_id=task["tenant_id"], doc_id=task["doc_id"], task_id=task["id"], flow_id=dataflow_id)
pipeline.reset() pipeline.reset()
await pipeline.run(file=task.get("file"))
await pipeline.run()
@timeout(3600) @timeout(3600)
@ -513,6 +511,12 @@ async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
@timeout(60*60*2, 1) @timeout(60*60*2, 1)
async def do_handle_task(task): async def do_handle_task(task):
task_type = task.get("task_type", "")
if task_type == "dataflow" and task.get("doc_id", "") == "x":
await run_dataflow(task)
return
task_id = task["id"] task_id = task["id"]
task_from_page = task["from_page"] task_from_page = task["from_page"]
task_to_page = task["to_page"] task_to_page = task["to_page"]
@ -526,6 +530,7 @@ async def do_handle_task(task):
task_parser_config = task["parser_config"] task_parser_config = task["parser_config"]
task_start_ts = timer() task_start_ts = timer()
# prepare the progress callback function # prepare the progress callback function
progress_callback = partial(set_progress, task_id, task_from_page, task_to_page) progress_callback = partial(set_progress, task_id, task_from_page, task_to_page)
@ -554,13 +559,11 @@ async def do_handle_task(task):
init_kb(task, vector_size) init_kb(task, vector_size)
task_type = task.get("task_type", "")
if task_type == "dataflow": if task_type == "dataflow":
task_dataflow_dsl = task["dsl"] await run_dataflow(task)
task_dataflow_id = task["dataflow_id"]
await run_dataflow(dsl=task_dataflow_dsl, tenant_id=task_tenant_id, doc_id=task_doc_id, task_id=task_id, flow_id=task_dataflow_id, callback=None)
return return
elif task_type == "raptor":
if task_type == "raptor":
# bind LLM for raptor # bind LLM for raptor
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language) chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
# run RAPTOR # run RAPTOR

155
uv.lock generated
View File

@ -1,5 +1,5 @@
version = 1 version = 1
revision = 1 revision = 3
requires-python = ">=3.10, <3.13" requires-python = ">=3.10, <3.13"
resolution-markers = [ resolution-markers = [
"python_full_version >= '3.12' and sys_platform == 'darwin'", "python_full_version >= '3.12' and sys_platform == 'darwin'",
@ -861,6 +861,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" }, { url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
] ]
[[package]]
name = "colorclass"
version = "2.2.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55" },
]
[[package]] [[package]]
name = "coloredlogs" name = "coloredlogs"
version = "15.0.1" version = "15.0.1"
@ -873,6 +882,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" }, { url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" },
] ]
[[package]]
name = "compressed-rtf"
version = "1.0.7"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0" },
]
[[package]] [[package]]
name = "contourpy" name = "contourpy"
version = "1.3.2" version = "1.3.2"
@ -1322,6 +1340,23 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" }, { url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" },
] ]
[[package]]
name = "easygui"
version = "0.98.3"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba" },
]
[[package]]
name = "ebcdic"
version = "1.1.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1" },
]
[[package]] [[package]]
name = "editdistance" name = "editdistance"
version = "0.8.1" version = "0.8.1"
@ -1435,6 +1470,24 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" }, { url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
] ]
[[package]]
name = "extract-msg"
version = "0.55.0"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "beautifulsoup4" },
{ name = "compressed-rtf" },
{ name = "ebcdic" },
{ name = "olefile" },
{ name = "red-black-tree-mod" },
{ name = "rtfde" },
{ name = "tzlocal" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c" },
]
[[package]] [[package]]
name = "fake-http-header" name = "fake-http-header"
version = "0.3.5" version = "0.3.5"
@ -2893,6 +2946,15 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" }, { url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" },
] ]
[[package]]
name = "lark"
version = "1.1.9"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2c/e1/804b6196b3fbdd0f8ba785fc62837b034782a891d6f663eea2f30ca23cfa/lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/e7/9c/eef7c591e6dc952f3636cfe0df712c0f9916cedf317810a3bb53ccb65cdd/lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db" },
]
[[package]] [[package]]
name = "litellm" name = "litellm"
version = "1.75.5.post1" version = "1.75.5.post1"
@ -3377,6 +3439,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" }, { url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" },
] ]
[[package]]
name = "msoffcrypto-tool"
version = "5.4.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "cryptography" },
{ name = "olefile" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e" },
]
[[package]] [[package]]
name = "multidict" name = "multidict"
version = "6.6.3" version = "6.6.3"
@ -3726,6 +3801,32 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" }, { url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" },
] ]
[[package]]
name = "olefile"
version = "0.47"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f" },
]
[[package]]
name = "oletools"
version = "0.60.2"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "colorclass" },
{ name = "easygui" },
{ name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" },
{ name = "olefile" },
{ name = "pcodedmp" },
{ name = "pyparsing" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96" },
]
[[package]] [[package]]
name = "ollama" name = "ollama"
version = "0.2.1" version = "0.2.1"
@ -4188,6 +4289,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" }, { url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" },
] ]
[[package]]
name = "pcodedmp"
version = "1.2.6"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "oletools" },
{ name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278" },
]
[[package]] [[package]]
name = "pdfminer-six" name = "pdfminer-six"
version = "20221105" version = "20221105"
@ -5300,6 +5414,7 @@ dependencies = [
{ name = "elastic-transport" }, { name = "elastic-transport" },
{ name = "elasticsearch" }, { name = "elasticsearch" },
{ name = "elasticsearch-dsl" }, { name = "elasticsearch-dsl" },
{ name = "extract-msg" },
{ name = "filelock" }, { name = "filelock" },
{ name = "flasgger" }, { name = "flasgger" },
{ name = "flask" }, { name = "flask" },
@ -5452,6 +5567,7 @@ requires-dist = [
{ name = "elastic-transport", specifier = "==8.12.0" }, { name = "elastic-transport", specifier = "==8.12.0" },
{ name = "elasticsearch", specifier = "==8.12.1" }, { name = "elasticsearch", specifier = "==8.12.1" },
{ name = "elasticsearch-dsl", specifier = "==8.12.0" }, { name = "elasticsearch-dsl", specifier = "==8.12.0" },
{ name = "extract-msg", specifier = ">=0.39.0" },
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" }, { name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" }, { name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
{ name = "filelock", specifier = "==3.15.4" }, { name = "filelock", specifier = "==3.15.4" },
@ -5630,6 +5746,12 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" }, { url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" },
] ]
[[package]]
name = "red-black-tree-mod"
version = "1.22"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908" }
[[package]] [[package]]
name = "referencing" name = "referencing"
version = "0.36.2" version = "0.36.2"
@ -5883,6 +6005,19 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" }, { url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
] ]
[[package]]
name = "rtfde"
version = "0.1.2.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "lark" },
{ name = "oletools" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/66/f1/3fafc33cd80cc605509ced36dbbb74c3c365d5859b0b57b6500e4a8ca8a5/rtfde-0.1.2.1.tar.gz", hash = "sha256:ea2653fb163ef1e9fdd1b0849bef88b0ba82537f860d4aca5b2c49f556efaaaa" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/b6/dd/641e9cf68d4242aaf7ce9653498009d8925080b6664993988bd50468932a/rtfde-0.1.2.1-py3-none-any.whl", hash = "sha256:c44dfa923a435c54cdbdd0e0f5352a4075542af317af061f82f2d4f032271645" },
]
[[package]] [[package]]
name = "ruamel-base" name = "ruamel-base"
version = "1.0.0" version = "1.0.0"
@ -6890,6 +7025,18 @@ wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" }, { url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
] ]
[[package]]
name = "tzlocal"
version = "5.3.1"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
dependencies = [
{ name = "tzdata", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd" }
wheels = [
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d" },
]
[[package]] [[package]]
name = "umap-learn" name = "umap-learn"
version = "0.5.6" version = "0.5.6"
@ -7134,6 +7281,12 @@ dependencies = [
] ]
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" } sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" }
[[package]]
name = "win-unicode-console"
version = "0.5"
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e" }
[[package]] [[package]]
name = "win32-setctime" name = "win32-setctime"
version = "1.2.0" version = "1.2.0"

View File

@ -20,17 +20,10 @@ interface IProps {
isMult?: boolean; isMult?: boolean;
} }
const data = [
{ id: '1', name: 'data-pipeline-1' },
{ id: '2', name: 'data-pipeline-2' },
{ id: '3', name: 'data-pipeline-3' },
{ id: '4', name: 'data-pipeline-4' },
];
export function DataFlowSelect(props: IProps) { export function DataFlowSelect(props: IProps) {
const { toDataPipeline, formFieldName, isMult = true } = props; const { toDataPipeline, formFieldName, isMult = true } = props;
const { t } = useTranslate('knowledgeConfiguration'); const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext(); const form = useFormContext();
console.log('data-pipline form', form);
const toDataPipLine = () => { const toDataPipLine = () => {
toDataPipeline?.(); toDataPipeline?.();
}; };

View File

@ -1,6 +1,10 @@
import { DocumentParserType } from '@/constants/knowledge'; import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { cn } from '@/lib/utils'; import { cn } from '@/lib/utils';
import {
GenerateLogButton,
GenerateType,
} from '@/pages/dataset/dataset/generate-button/generate';
import { upperFirst } from 'lodash'; import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react'; import { useCallback, useMemo } from 'react';
import { useFormContext, useWatch } from 'react-hook-form'; import { useFormContext, useWatch } from 'react-hook-form';
@ -47,6 +51,7 @@ export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
type GraphRagItemsProps = { type GraphRagItemsProps = {
marginBottom?: boolean; marginBottom?: boolean;
className?: string; className?: string;
showGenerateItem?: boolean;
}; };
export function UseGraphRagFormField() { export function UseGraphRagFormField() {
@ -88,6 +93,7 @@ export function UseGraphRagFormField() {
// The three types "table", "resume" and "one" do not display this configuration. // The three types "table", "resume" and "one" do not display this configuration.
const GraphRagItems = ({ const GraphRagItems = ({
marginBottom = false, marginBottom = false,
showGenerateItem = false,
className = 'p-10', className = 'p-10',
}: GraphRagItemsProps) => { }: GraphRagItemsProps) => {
const { t } = useTranslate('knowledgeConfiguration'); const { t } = useTranslate('knowledgeConfiguration');
@ -210,6 +216,18 @@ const GraphRagItems = ({
</FormItem> </FormItem>
)} )}
/> />
{showGenerateItem && (
<div className="w-full flex items-center">
<div className="text-sm whitespace-nowrap w-1/4">
{t('extractKnowledgeGraph')}
</div>
<GenerateLogButton
className="w-3/4 text-text-secondary"
status={1}
type={GenerateType.KnowledgeGraph}
/>
</div>
)}
</> </>
)} )}
</FormContainer> </FormContainer>

View File

@ -1,6 +1,10 @@
import { FormLayout } from '@/constants/form'; import { FormLayout } from '@/constants/form';
import { DocumentParserType } from '@/constants/knowledge'; import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import {
GenerateLogButton,
GenerateType,
} from '@/pages/dataset/dataset/generate-button/generate';
import random from 'lodash/random'; import random from 'lodash/random';
import { Shuffle } from 'lucide-react'; import { Shuffle } from 'lucide-react';
import { useCallback } from 'react'; import { useCallback } from 'react';
@ -52,7 +56,11 @@ const Prompt = 'parser_config.raptor.prompt';
// The three types "table", "resume" and "one" do not display this configuration. // The three types "table", "resume" and "one" do not display this configuration.
const RaptorFormFields = () => { const RaptorFormFields = ({
showGenerateItem = false,
}: {
showGenerateItem?: boolean;
}) => {
const form = useFormContext(); const form = useFormContext();
const { t } = useTranslate('knowledgeConfiguration'); const { t } = useTranslate('knowledgeConfiguration');
const useRaptor = useWatch({ name: UseRaptorField }); const useRaptor = useWatch({ name: UseRaptorField });
@ -211,6 +219,18 @@ const RaptorFormFields = () => {
</FormItem> </FormItem>
)} )}
/> />
{showGenerateItem && (
<div className="w-full flex items-center">
<div className="text-sm whitespace-nowrap w-1/4">
{t('extractRaptor')}
</div>
<GenerateLogButton
className="w-3/4 text-text-secondary"
status={1}
type={GenerateType.Raptor}
/>
</div>
)}
</div> </div>
)} )}
</> </>

View File

@ -75,21 +75,21 @@ const Modal: ModalType = ({
const handleCancel = useCallback(() => { const handleCancel = useCallback(() => {
onOpenChange?.(false); onOpenChange?.(false);
onCancel?.(); // onCancel?.();
}, [onOpenChange, onCancel]); }, [onOpenChange]);
const handleOk = useCallback(() => { const handleOk = useCallback(() => {
onOpenChange?.(true); onOpenChange?.(true);
onOk?.(); // onOk?.();
}, [onOpenChange, onOk]); }, [onOpenChange]);
const handleChange = (open: boolean) => { const handleChange = (open: boolean) => {
onOpenChange?.(open); onOpenChange?.(open);
console.log('open', open, onOpenChange); console.log('open', open, onOpenChange);
if (open) { if (open) {
handleOk(); onOk?.();
} }
if (!open) { if (!open) {
handleCancel(); onCancel?.();
} }
}; };
const footEl = useMemo(() => { const footEl = useMemo(() => {

View File

@ -102,13 +102,15 @@ export default {
noMoreData: `That's all. Nothing more.`, noMoreData: `That's all. Nothing more.`,
}, },
knowledgeDetails: { knowledgeDetails: {
notGenerated: 'Not generated',
generatedOn: 'Generated on',
subbarFiles: 'Files',
generateKnowledgeGraph: generateKnowledgeGraph:
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.', 'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
generateRaptor: generateRaptor:
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.', 'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
generate: 'Generate', generate: 'Generate',
raptor: 'Raptor', raptor: 'Raptor',
knowledgeGraph: 'Knowledge Graph',
processingType: 'Processing Type', processingType: 'Processing Type',
dataPipeline: 'Data Pipeline', dataPipeline: 'Data Pipeline',
operations: 'Operations', operations: 'Operations',
@ -138,12 +140,12 @@ export default {
testing: 'Retrieval testing', testing: 'Retrieval testing',
files: 'files', files: 'files',
configuration: 'Configuration', configuration: 'Configuration',
knowledgeGraph: 'Knowledge graph', knowledgeGraph: 'Knowledge Graph',
name: 'Name', name: 'Name',
namePlaceholder: 'Please input name!', namePlaceholder: 'Please input name!',
doc: 'Docs', doc: 'Docs',
datasetDescription: datasetDescription:
'😉 Please wait for your files to finish parsing before starting an AI-powered chat.', 'Please wait for your files to finish parsing before starting an AI-powered chat.',
addFile: 'Add file', addFile: 'Add file',
searchFiles: 'Search your files', searchFiles: 'Search your files',
localFiles: 'Local files', localFiles: 'Local files',
@ -261,6 +263,22 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.', reRankModelWaring: 'Re-rank model is very time consuming.',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
deleteGenerateModalContent: `
<p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results
will remove all derived entities and relationships from this dataset.
Your original files will remain intact.<p>
<br/>
Do you want to continue?
`,
extractRaptor: 'Extract Raptor',
extractKnowledgeGraph: 'Extract Knowledge Graph',
filterPlaceholder: 'please input filter',
fileFilterTip: '',
fileFilter: 'File Filter',
setDefaultTip: '',
setDefault: 'Set as Default',
eidtLinkDataPipeline: 'Edit Data Pipeline',
linkPipelineSetTip: 'Manage data pipeline linkage with this dataset',
default: 'Default', default: 'Default',
dataPipeline: 'Data Pipeline', dataPipeline: 'Data Pipeline',
linkDataPipeline: 'Link Data Pipeline', linkDataPipeline: 'Link Data Pipeline',
@ -1646,6 +1664,13 @@ This delimiter is used to split the input text into several text pieces echo of
<p>To keep them, please click Rerun to re-run the current stage.</p> `, <p>To keep them, please click Rerun to re-run the current stage.</p> `,
changeStepModalConfirmText: 'Switch Anyway', changeStepModalConfirmText: 'Switch Anyway',
changeStepModalCancelText: 'Cancel', changeStepModalCancelText: 'Cancel',
unlinkPipelineModalTitle: 'Unlink data pipeline',
unlinkPipelineModalContent: `
<p>Once unlinked, this Dataset will no longer be connected to the current Data Pipeline.</p>
<p>Files that are already being parsed will continue until completion</p>
<p>Files that are not yet parsed will no longer be processed</p> <br/>
<p>Are you sure you want to proceed?</p> `,
unlinkPipelineModalConfirmText: 'Unlink',
}, },
dataflow: { dataflow: {
parser: 'Parser', parser: 'Parser',

View File

@ -94,9 +94,11 @@ export default {
noMoreData: '没有更多数据了', noMoreData: '没有更多数据了',
}, },
knowledgeDetails: { knowledgeDetails: {
notGenerated: '未生成',
generatedOn: '生成于',
subbarFiles: '文件列表',
generate: '生成', generate: '生成',
raptor: 'Raptor', raptor: 'Raptor',
knowledgeGraph: '知识图谱',
processingType: '处理类型', processingType: '处理类型',
dataPipeline: '数据管道', dataPipeline: '数据管道',
operations: '操作', operations: '操作',
@ -130,7 +132,7 @@ export default {
name: '名称', name: '名称',
namePlaceholder: '请输入名称', namePlaceholder: '请输入名称',
doc: '文档', doc: '文档',
datasetDescription: '😉 解析成功后才能问答哦。', datasetDescription: '解析成功后才能问答哦。',
addFile: '新增文件', addFile: '新增文件',
searchFiles: '搜索文件', searchFiles: '搜索文件',
localFiles: '本地文件', localFiles: '本地文件',
@ -246,6 +248,22 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除', theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
deleteGenerateModalContent: `
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
将从此数据集中移除所有派生实体和关系。
您的原始文件将保持不变。<p>
<br/>
是否要继续?
`,
extractRaptor: '从文档中提取Raptor',
extractKnowledgeGraph: '从文档中提取知识图谱',
filterPlaceholder: '请输入',
fileFilterTip: '',
fileFilter: '正则匹配表达式',
setDefaultTip: '',
setDefault: '设置默认',
eidtLinkDataPipeline: '编辑数据流',
linkPipelineSetTip: '管理与此数据集的数据管道链接',
default: '默认', default: '默认',
dataPipeline: '数据流', dataPipeline: '数据流',
linkDataPipeline: '关联数据流', linkDataPipeline: '关联数据流',
@ -1556,6 +1574,13 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
<p>要保留这些更改,请点击“重新运行”以重新运行当前阶段。</p> `, <p>要保留这些更改,请点击“重新运行”以重新运行当前阶段。</p> `,
changeStepModalConfirmText: '继续切换', changeStepModalConfirmText: '继续切换',
changeStepModalCancelText: '取消', changeStepModalCancelText: '取消',
unlinkPipelineModalTitle: '解绑数据流',
unlinkPipelineModalContent: `
<p>一旦取消链接,该数据集将不再连接到当前数据管道。</p>
<p>正在解析的文件将继续解析,直到完成。</p>
<p>尚未解析的文件将不再被处理。</p> <br/>
<p>你确定要继续吗?</p> `,
unlinkPipelineModalConfirmText: '解绑',
}, },
dataflow: { dataflow: {
parser: '解析器', parser: '解析器',

View File

@ -2,7 +2,7 @@ import SvgIcon from '@/components/svg-icon';
import { useIsDarkTheme } from '@/components/theme-provider'; import { useIsDarkTheme } from '@/components/theme-provider';
import { parseColorToRGBA } from '@/utils/common-util'; import { parseColorToRGBA } from '@/utils/common-util';
import { CircleQuestionMark } from 'lucide-react'; import { CircleQuestionMark } from 'lucide-react';
import { FC, useMemo, useState } from 'react'; import { FC, useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { LogTabs } from './dataset-common'; import { LogTabs } from './dataset-common';
import { DatasetFilter } from './dataset-filter'; import { DatasetFilter } from './dataset-filter';
@ -74,25 +74,35 @@ const FileLogsPage: FC = () => {
const [active, setActive] = useState<(typeof LogTabs)[keyof typeof LogTabs]>( const [active, setActive] = useState<(typeof LogTabs)[keyof typeof LogTabs]>(
LogTabs.FILE_LOGS, LogTabs.FILE_LOGS,
); );
const topMockData = { const [topAllData, setTopAllData] = useState({
totalFiles: { totalFiles: {
value: 2827, value: 0,
precent: 12.5, precent: 0,
}, },
downloads: { downloads: {
value: 28, value: 0,
success: 8, success: 0,
failed: 2, failed: 0,
}, },
processing: { processing: {
value: 156, value: 0,
success: 8, success: 0,
failed: 2, failed: 0,
}, },
}; });
const { data: topData } = useFetchOverviewTital(); const { data: topData } = useFetchOverviewTital();
console.log('topData --> ', topData); console.log('topData --> ', topData);
useEffect(() => {
setTopAllData({
...topAllData,
processing: {
value: topData?.processing || 0,
success: topData?.finished || 0,
failed: topData?.failed || 0,
},
});
}, [topData, topAllData]);
const mockData = useMemo(() => { const mockData = useMemo(() => {
if (active === LogTabs.FILE_LOGS) { if (active === LogTabs.FILE_LOGS) {
@ -161,7 +171,7 @@ const FileLogsPage: FC = () => {
<div className="grid grid-cols-3 md:grid-cols-3 gap-4 mb-6"> <div className="grid grid-cols-3 md:grid-cols-3 gap-4 mb-6">
<StatCard <StatCard
title="Total Files" title="Total Files"
value={topMockData.totalFiles.value} value={topAllData.totalFiles.value}
icon={ icon={
isDark ? ( isDark ? (
<SvgIcon name="data-flow/total-files-icon" width={40} /> <SvgIcon name="data-flow/total-files-icon" width={40} />
@ -172,15 +182,15 @@ const FileLogsPage: FC = () => {
> >
<div> <div>
<span className="text-accent-primary"> <span className="text-accent-primary">
{topMockData.totalFiles.precent > 0 ? '+' : ''} {topAllData.totalFiles.precent > 0 ? '+' : ''}
{topMockData.totalFiles.precent}%{' '} {topAllData.totalFiles.precent}%{' '}
</span> </span>
from last week from last week
</div> </div>
</StatCard> </StatCard>
<StatCard <StatCard
title="Downloading" title="Downloading"
value={topMockData.downloads.value} value={topAllData.downloads.value}
icon={ icon={
isDark ? ( isDark ? (
<SvgIcon name="data-flow/data-icon" width={40} /> <SvgIcon name="data-flow/data-icon" width={40} />
@ -190,13 +200,13 @@ const FileLogsPage: FC = () => {
} }
> >
<CardFooterProcess <CardFooterProcess
success={topMockData.downloads.success} success={topAllData.downloads.success}
failed={topMockData.downloads.failed} failed={topAllData.downloads.failed}
/> />
</StatCard> </StatCard>
<StatCard <StatCard
title="Processing" title="Processing"
value={topMockData.processing.value} value={topAllData.processing.value}
icon={ icon={
isDark ? ( isDark ? (
<SvgIcon name="data-flow/processing-icon" width={40} /> <SvgIcon name="data-flow/processing-icon" width={40} />
@ -206,8 +216,8 @@ const FileLogsPage: FC = () => {
} }
> >
<CardFooterProcess <CardFooterProcess
success={topMockData.processing.success} success={topAllData.processing.success}
failed={topMockData.processing.failed} failed={topAllData.processing.failed}
/> />
</StatCard> </StatCard>
</div> </div>

View File

@ -65,25 +65,25 @@ export const getFileLogsTableColumns = (
) => { ) => {
// const { t } = useTranslate('knowledgeDetails'); // const { t } = useTranslate('knowledgeDetails');
const columns: ColumnDef<DocumentLog>[] = [ const columns: ColumnDef<DocumentLog>[] = [
{ // {
id: 'select', // id: 'select',
header: ({ table }) => ( // header: ({ table }) => (
<input // <input
type="checkbox" // type="checkbox"
checked={table.getIsAllRowsSelected()} // checked={table.getIsAllRowsSelected()}
onChange={table.getToggleAllRowsSelectedHandler()} // onChange={table.getToggleAllRowsSelectedHandler()}
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500" // className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
/> // />
), // ),
cell: ({ row }) => ( // cell: ({ row }) => (
<input // <input
type="checkbox" // type="checkbox"
checked={row.getIsSelected()} // checked={row.getIsSelected()}
onChange={row.getToggleSelectedHandler()} // onChange={row.getToggleSelectedHandler()}
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500" // className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
/> // />
), // ),
}, // },
{ {
accessorKey: 'id', accessorKey: 'id',
header: 'ID', header: 'ID',
@ -156,7 +156,7 @@ export const getFileLogsTableColumns = (
id: 'operations', id: 'operations',
header: t('operations'), header: t('operations'),
cell: ({ row }) => ( cell: ({ row }) => (
<div className="flex justify-start space-x-2"> <div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
<Button <Button
variant="ghost" variant="ghost"
size="sm" size="sm"
@ -189,25 +189,25 @@ export const getDatasetLogsTableColumns = (
) => { ) => {
// const { t } = useTranslate('knowledgeDetails'); // const { t } = useTranslate('knowledgeDetails');
const columns: ColumnDef<DocumentLog>[] = [ const columns: ColumnDef<DocumentLog>[] = [
{ // {
id: 'select', // id: 'select',
header: ({ table }) => ( // header: ({ table }) => (
<input // <input
type="checkbox" // type="checkbox"
checked={table.getIsAllRowsSelected()} // checked={table.getIsAllRowsSelected()}
onChange={table.getToggleAllRowsSelectedHandler()} // onChange={table.getToggleAllRowsSelectedHandler()}
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500" // className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
/> // />
), // ),
cell: ({ row }) => ( // cell: ({ row }) => (
<input // <input
type="checkbox" // type="checkbox"
checked={row.getIsSelected()} // checked={row.getIsSelected()}
onChange={row.getToggleSelectedHandler()} // onChange={row.getToggleSelectedHandler()}
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500" // className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
/> // />
), // ),
}, // },
{ {
accessorKey: 'id', accessorKey: 'id',
header: 'ID', header: 'ID',
@ -251,7 +251,7 @@ export const getDatasetLogsTableColumns = (
id: 'operations', id: 'operations',
header: t('operations'), header: t('operations'),
cell: ({ row }) => ( cell: ({ row }) => (
<div className="flex justify-start space-x-2"> <div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
<Button <Button
variant="ghost" variant="ghost"
size="sm" size="sm"

View File

@ -1,21 +1,61 @@
import { IconFont } from '@/components/icon-font'; import { IconFont } from '@/components/icon-font';
import { RAGFlowAvatar } from '@/components/ragflow-avatar'; import { RAGFlowAvatar } from '@/components/ragflow-avatar';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import { Modal } from '@/components/ui/modal/modal';
import { omit } from 'lodash';
import { Link, Settings2, Unlink } from 'lucide-react'; import { Link, Settings2, Unlink } from 'lucide-react';
import { useState } from 'react'; import { useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { linkPiplineFormSchema } from '../form-schema';
import LinkDataPipelineModal from './link-data-pipline-modal'; import LinkDataPipelineModal from './link-data-pipline-modal';
interface DataPipelineItemProps { interface DataPipelineItemProps {
id: string;
name: string; name: string;
avatar?: string; avatar?: string;
isDefault?: boolean; isDefault?: boolean;
linked?: boolean; linked?: boolean;
openLinkModalFunc?: (open: boolean) => void; openLinkModalFunc?: (open: boolean, data?: IDataPipelineNodeProps) => void;
} }
const DataPipelineItem = (props: DataPipelineItemProps) => { const DataPipelineItem = (props: DataPipelineItemProps) => {
const { t } = useTranslation(); const { t } = useTranslation();
const { name, avatar, isDefault, linked, openLinkModalFunc } = props; const { name, avatar, isDefault, linked, openLinkModalFunc } = props;
const openUnlinkModal = () => {
Modal.show({
visible: true,
className: '!w-[560px]',
title: t('dataflowParser.unlinkPipelineModalTitle'),
children: (
<div
className="text-sm text-text-secondary"
dangerouslySetInnerHTML={{
__html: t('dataflowParser.unlinkPipelineModalContent'),
}}
></div>
),
onVisibleChange: () => {
Modal.hide();
},
footer: (
<div className="flex justify-end gap-2">
<Button variant={'outline'} onClick={() => Modal.hide()}>
{t('dataflowParser.changeStepModalCancelText')}
</Button>
<Button
variant={'secondary'}
className="!bg-state-error text-bg-base"
onClick={() => {
Modal.hide();
}}
>
{t('dataflowParser.unlinkPipelineModalConfirmText')}
</Button>
</div>
),
});
};
return ( return (
<div className="flex items-center justify-between gap-1 px-2 rounded-lg border"> <div className="flex items-center justify-between gap-1 px-2 rounded-lg border">
<div className="flex items-center gap-1"> <div className="flex items-center gap-1">
@ -28,42 +68,89 @@ const DataPipelineItem = (props: DataPipelineItemProps) => {
)} )}
</div> </div>
<div className="flex gap-1 items-center"> <div className="flex gap-1 items-center">
<Button variant={'transparent'} className="border-none"> <Button
variant={'transparent'}
className="border-none"
type="button"
onClick={() =>
openLinkModalFunc?.(true, { ...omit(props, ['openLinkModalFunc']) })
}
>
<Settings2 /> <Settings2 />
</Button> </Button>
{!isDefault && ( {!isDefault && (
<Button <>
variant={'transparent'} {linked && (
className="border-none" <Button
onClick={() => { type="button"
openLinkModalFunc?.(true); variant={'transparent'}
}} className="border-none"
> onClick={() => {
{linked ? <Link /> : <Unlink />} openUnlinkModal();
</Button> }}
>
<Unlink />
</Button>
)}
</>
)} )}
</div> </div>
</div> </div>
); );
}; };
export interface IDataPipelineNodeProps {
id: string;
name: string;
avatar?: string;
isDefault?: boolean;
linked?: boolean;
}
const LinkDataPipeline = () => { const LinkDataPipeline = () => {
const { t } = useTranslation(); const { t } = useTranslation();
const [openLinkModal, setOpenLinkModal] = useState(false); const [openLinkModal, setOpenLinkModal] = useState(false);
const [currentDataPipeline, setCurrentDataPipeline] =
useState<IDataPipelineNodeProps>();
const testNode = [ const testNode = [
{ {
id: '1',
name: 'Data Pipeline 1', name: 'Data Pipeline 1',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4', avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
isDefault: true, isDefault: true,
linked: true, linked: true,
}, },
{ {
id: '2',
name: 'Data Pipeline 2', name: 'Data Pipeline 2',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4', avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: false, linked: false,
}, },
{
id: '3',
name: 'Data Pipeline 3',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: false,
},
{
id: '4',
name: 'Data Pipeline 4',
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
linked: true,
},
]; ];
const openLinkModalFunc = (open: boolean) => { const openLinkModalFunc = (open: boolean, data?: IDataPipelineNodeProps) => {
console.log('open', open, data);
setOpenLinkModal(open); setOpenLinkModal(open);
if (data) {
setCurrentDataPipeline(data);
} else {
setCurrentDataPipeline(undefined);
}
};
const handleLinkOrEditSubmit = (
data: z.infer<typeof linkPiplineFormSchema>,
) => {
console.log('handleLinkOrEditSubmit', data);
}; };
return ( return (
<div className="flex flex-col gap-2"> <div className="flex flex-col gap-2">
@ -74,9 +161,15 @@ const LinkDataPipeline = () => {
</div> </div>
<div className="flex justify-between items-center"> <div className="flex justify-between items-center">
<div className="text-center text-xs text-text-secondary"> <div className="text-center text-xs text-text-secondary">
Manage data pipeline linkage with this dataset {t('knowledgeConfiguration.linkPipelineSetTip')}
</div> </div>
<Button variant={'transparent'}> <Button
type="button"
variant={'transparent'}
onClick={() => {
openLinkModalFunc?.(true);
}}
>
<Link /> <Link />
<span className="text-xs text-text-primary"> <span className="text-xs text-text-primary">
{t('knowledgeConfiguration.linkDataPipeline')} {t('knowledgeConfiguration.linkDataPipeline')}
@ -94,10 +187,12 @@ const LinkDataPipeline = () => {
))} ))}
</section> </section>
<LinkDataPipelineModal <LinkDataPipelineModal
data={currentDataPipeline}
open={openLinkModal} open={openLinkModal}
setOpen={(open: boolean) => { setOpen={(open: boolean) => {
openLinkModalFunc(open); openLinkModalFunc(open);
}} }}
onSubmit={handleLinkOrEditSubmit}
/> />
</div> </div>
); );

View File

@ -10,32 +10,53 @@ import {
FormMessage, FormMessage,
} from '@/components/ui/form'; } from '@/components/ui/form';
import { Modal } from '@/components/ui/modal/modal'; import { Modal } from '@/components/ui/modal/modal';
import { Switch } from '@/components/ui/switch';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks'; import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { zodResolver } from '@hookform/resolvers/zod'; import { zodResolver } from '@hookform/resolvers/zod';
import { t } from 'i18next'; import { t } from 'i18next';
import { useForm } from 'react-hook-form'; import { useForm } from 'react-hook-form';
import { z } from 'zod'; import { z } from 'zod';
import { linkPiplineFormSchema } from '../form-schema'; import { pipelineFormSchema } from '../form-schema';
import { IDataPipelineNodeProps } from './link-data-pipeline';
const LinkDataPipelineModal = ({ const LinkDataPipelineModal = ({
data,
open, open,
setOpen, setOpen,
onSubmit,
}: { }: {
data: IDataPipelineNodeProps | undefined;
open: boolean; open: boolean;
setOpen: (open: boolean) => void; setOpen: (open: boolean) => void;
onSubmit?: (data: any) => void;
}) => { }) => {
const form = useForm<z.infer<typeof linkPiplineFormSchema>>({ const isEdit = !!data;
resolver: zodResolver(linkPiplineFormSchema), const form = useForm<z.infer<typeof pipelineFormSchema>>({
defaultValues: { data_flow: ['888'], file_filter: '' }, resolver: zodResolver(pipelineFormSchema),
defaultValues: {
data_flow: [],
set_default: false,
file_filter: '',
},
}); });
// const [open, setOpen] = useState(false); // const [open, setOpen] = useState(false);
const { navigateToAgents } = useNavigatePage(); const { navigateToAgents } = useNavigatePage();
const handleFormSubmit = (values: any) => { const handleFormSubmit = (values: any) => {
console.log(values); console.log(values, data);
const param = {
...data,
...values,
};
onSubmit?.(param);
}; };
return ( return (
<Modal <Modal
title={t('knowledgeConfiguration.linkDataPipeline')} className="!w-[560px]"
title={
!isEdit
? t('knowledgeConfiguration.linkDataPipeline')
: t('knowledgeConfiguration.eidtLinkDataPipeline')
}
open={open} open={open}
onOpenChange={setOpen} onOpenChange={setOpen}
showfooter={false} showfooter={false}
@ -43,10 +64,12 @@ const LinkDataPipelineModal = ({
<Form {...form}> <Form {...form}>
<form onSubmit={form.handleSubmit(handleFormSubmit)}> <form onSubmit={form.handleSubmit(handleFormSubmit)}>
<div className="flex flex-col gap-4 "> <div className="flex flex-col gap-4 ">
<DataFlowSelect {!isEdit && (
toDataPipeline={navigateToAgents} <DataFlowSelect
formFieldName="data_flow" toDataPipeline={navigateToAgents}
/> formFieldName="data_flow"
/>
)}
<FormField <FormField
control={form.control} control={form.control}
name={'file_filter'} name={'file_filter'}
@ -65,7 +88,9 @@ const LinkDataPipelineModal = ({
<div className="text-muted-foreground"> <div className="text-muted-foreground">
<FormControl> <FormControl>
<Input <Input
placeholder={t('dataFlowPlaceholder')} placeholder={t(
'knowledgeConfiguration.filterPlaceholder',
)}
{...field} {...field}
/> />
</FormControl> </FormControl>
@ -78,11 +103,56 @@ const LinkDataPipelineModal = ({
</FormItem> </FormItem>
)} )}
/> />
{isEdit && (
<FormField
control={form.control}
name={'set_default'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex flex-col gap-1">
<div className="flex gap-2 justify-between ">
<FormLabel
tooltip={t('knowledgeConfiguration.setDefaultTip')}
className="text-sm text-text-primary whitespace-wrap "
>
{t('knowledgeConfiguration.setDefault')}
</FormLabel>
</div>
<div className="text-muted-foreground">
<FormControl>
<Switch
value={field.value}
onCheckedChange={field.onChange}
/>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-full"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
)}
<div className="flex justify-end gap-1"> <div className="flex justify-end gap-1">
<Button type="reset" variant={'outline'} className="btn-primary"> <Button
type="button"
variant={'outline'}
className="btn-primary"
onClick={() => {
setOpen(false);
}}
>
{t('modal.cancelText')} {t('modal.cancelText')}
</Button> </Button>
<Button type="submit" variant={'default'} className="btn-primary"> <Button
type="button"
variant={'default'}
className="btn-primary"
onClick={form.handleSubmit(handleFormSubmit)}
>
{t('modal.okText')} {t('modal.okText')}
</Button> </Button>
</div> </div>

View File

@ -72,7 +72,17 @@ export const formSchema = z.object({
// icon: z.array(z.instanceof(File)), // icon: z.array(z.instanceof(File)),
}); });
export const linkPiplineFormSchema = z.object({ export const pipelineFormSchema = z.object({
data_flow: z.array(z.string()), data_flow: z.array(z.string()).optional(),
set_default: z.boolean().optional(),
file_filter: z.string().optional(), file_filter: z.string().optional(),
}); });
export const linkPiplineFormSchema = pipelineFormSchema.pick({
data_flow: true,
file_filter: true,
});
export const editPiplineFormSchema = pipelineFormSchema.pick({
set_default: true,
file_filter: true,
});

View File

@ -86,9 +86,12 @@ export default function DatasetSettings() {
<GeneralForm></GeneralForm> <GeneralForm></GeneralForm>
<Divider /> <Divider />
<GraphRagItems className="border-none p-0"></GraphRagItems> <GraphRagItems
className="border-none p-0"
showGenerateItem={true}
></GraphRagItems>
<Divider /> <Divider />
<RaptorFormFields></RaptorFormFields> <RaptorFormFields showGenerateItem={true}></RaptorFormFields>
<Divider /> <Divider />
<LinkDataPipeline /> <LinkDataPipeline />
</MainContainer> </MainContainer>

View File

@ -6,16 +6,20 @@ import {
DropdownMenuItem, DropdownMenuItem,
DropdownMenuTrigger, DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu'; } from '@/components/ui/dropdown-menu';
import { Modal } from '@/components/ui/modal/modal';
import { cn } from '@/lib/utils';
import { toFixed } from '@/utils/common-util'; import { toFixed } from '@/utils/common-util';
import { t } from 'i18next'; import { t } from 'i18next';
import { lowerFirst } from 'lodash'; import { lowerFirst } from 'lodash';
import { CirclePause, WandSparkles } from 'lucide-react'; import { CirclePause, Trash2, WandSparkles } from 'lucide-react';
import { useState } from 'react'; import { useState } from 'react';
import { useTranslation } from 'react-i18next';
import { generateStatus, useFetchGenerateData } from './hook'; import { generateStatus, useFetchGenerateData } from './hook';
export enum GenerateType {
const MenuItem: React.FC<{ name: 'KnowledgeGraph' | 'Raptor' }> = ({ KnowledgeGraph = 'KnowledgeGraph',
name, Raptor = 'Raptor',
}) => { }
const MenuItem: React.FC<{ name: GenerateType }> = ({ name }) => {
console.log(name, 'pppp'); console.log(name, 'pppp');
const iconKeyMap = { const iconKeyMap = {
KnowledgeGraph: 'knowledgegraph', KnowledgeGraph: 'knowledgegraph',
@ -111,3 +115,102 @@ const Generate: React.FC = () => {
}; };
export default Generate; export default Generate;
export type IGenerateLogProps = {
id?: string;
status: 0 | 1;
message?: string;
created_at?: string;
updated_at?: string;
type?: GenerateType;
className?: string;
onDelete?: () => void;
};
export const GenerateLogButton = (props: IGenerateLogProps) => {
const { t } = useTranslation();
const {
id,
status,
message,
created_at,
updated_at,
type,
className,
onDelete,
} = props;
const handleDelete = () => {
Modal.show({
visible: true,
className: '!w-[560px]',
title:
t('common.delete') +
' ' +
(type === GenerateType.KnowledgeGraph
? t('knowledgeDetails.knowledgeGraph')
: t('knowledgeDetails.raptor')),
children: (
<div
className="text-sm text-text-secondary"
dangerouslySetInnerHTML={{
__html: t('knowledgeConfiguration.deleteGenerateModalContent', {
type:
type === GenerateType.KnowledgeGraph
? t('knowledgeDetails.knowledgeGraph')
: t('knowledgeDetails.raptor'),
}),
}}
></div>
),
onVisibleChange: () => {
Modal.hide();
},
footer: (
<div className="flex justify-end gap-2">
<Button
type="button"
variant={'outline'}
onClick={() => Modal.hide()}
>
{t('dataflowParser.changeStepModalCancelText')}
</Button>
<Button
type="button"
variant={'secondary'}
className="!bg-state-error text-text-primary"
onClick={() => {
Modal.hide();
}}
>
{t('common.delete')}
</Button>
</div>
),
});
};
return (
<div
className={cn('flex bg-bg-card rounded-md py-1 px-3', props.className)}
>
<div className="flex items-center justify-between w-full">
{status === 1 && (
<>
<div>
{message || t('knowledgeDetails.generatedOn')}
{created_at}
</div>
<Trash2
size={14}
className="cursor-pointer"
onClick={(e) => {
console.log('delete');
handleDelete();
e.stopPropagation();
}}
/>
</>
)}
{status === 0 && <div>{t('knowledgeDetails.notGenerated')}</div>}
</div>
</div>
);
};

View File

@ -75,7 +75,7 @@ export default function Dataset() {
filters={filters} filters={filters}
leftPanel={ leftPanel={
<div className="items-start"> <div className="items-start">
<div className="pb-1">{t('knowledgeDetails.dataset')}</div> <div className="pb-1">{t('knowledgeDetails.subbarFiles')}</div>
<div className="text-text-sub-title-invert text-sm"> <div className="text-text-sub-title-invert text-sm">
{t('knowledgeDetails.datasetDescription')} {t('knowledgeDetails.datasetDescription')}
</div> </div>

View File

@ -9,7 +9,7 @@ import { cn, formatBytes } from '@/lib/utils';
import { Routes } from '@/routes'; import { Routes } from '@/routes';
import { formatPureDate } from '@/utils/date'; import { formatPureDate } from '@/utils/date';
import { isEmpty } from 'lodash'; import { isEmpty } from 'lodash';
import { Banknote, Database, FileSearch2, GitGraph } from 'lucide-react'; import { Banknote, FileSearch2, FolderOpen, GitGraph } from 'lucide-react';
import { useMemo } from 'react'; import { useMemo } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { useHandleMenuClick } from './hooks'; import { useHandleMenuClick } from './hooks';
@ -34,8 +34,8 @@ export function SideBar({ refreshCount }: PropType) {
// key: Routes.DataSetOverview, // key: Routes.DataSetOverview,
// }, // },
{ {
icon: Database, icon: FolderOpen,
label: t(`knowledgeDetails.dataset`), label: t(`knowledgeDetails.subbarFiles`),
key: Routes.DatasetBase, key: Routes.DatasetBase,
}, },
{ {