mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Compare commits
3 Commits
b5d6a6e8f2
...
d039d1e73d
| Author | SHA1 | Date | |
|---|---|---|---|
| d039d1e73d | |||
| d050ef568d | |||
| 028c2d83e9 |
@ -144,11 +144,10 @@ def run():
|
||||
|
||||
if cvs.canvas_category == CanvasCategory.DataFlow:
|
||||
task_id = get_uuid()
|
||||
flow_id = get_uuid()
|
||||
ok, error_message = queue_dataflow(dsl=cvs.dsl, tenant_id=user_id, file=files[0], task_id=task_id, flow_id=flow_id, priority=0)
|
||||
ok, error_message = queue_dataflow(tenant_id=user_id, flow_id=req["id"], task_id=task_id, file=files[0], priority=0)
|
||||
if not ok:
|
||||
return server_error_response(error_message)
|
||||
return get_json_result(data={"task_id": task_id, "message_id": flow_id})
|
||||
return get_data_error_result(message=error_message)
|
||||
return get_json_result(data={"message_id": task_id})
|
||||
|
||||
try:
|
||||
canvas = Canvas(cvs.dsl, current_user.id, req["id"])
|
||||
|
||||
@ -496,7 +496,7 @@ class FileService(CommonService):
|
||||
return ParserType.AUDIO.value
|
||||
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||
return ParserType.PRESENTATION.value
|
||||
if re.search(r"\.(eml)$", filename):
|
||||
if re.search(r"\.(msg|eml)$", filename):
|
||||
return ParserType.EMAIL.value
|
||||
return default
|
||||
|
||||
|
||||
@ -472,14 +472,10 @@ def has_canceled(task_id):
|
||||
return False
|
||||
|
||||
|
||||
def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id:str=None, file:dict=None, priority: int=0, callback=None) -> tuple[bool, str]:
|
||||
"""
|
||||
Returns a tuple (success: bool, error_message: str).
|
||||
"""
|
||||
_ = callback
|
||||
def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str="x", file:dict=None, priority: int=0) -> tuple[bool, str]:
|
||||
|
||||
task = dict(
|
||||
id=get_uuid() if not task_id else task_id,
|
||||
id=task_id,
|
||||
doc_id=doc_id,
|
||||
from_page=0,
|
||||
to_page=100000000,
|
||||
@ -490,15 +486,10 @@ def queue_dataflow(dsl:str, tenant_id:str, task_id:str, flow_id:str=None, doc_id
|
||||
TaskService.model.delete().where(TaskService.model.id == task["id"]).execute()
|
||||
bulk_insert_into_db(model=Task, data_source=[task], replace_on_conflict=True)
|
||||
|
||||
kb_id = DocumentService.get_knowledgebase_id(doc_id)
|
||||
if not kb_id:
|
||||
return False, f"Can't find KB of this document: {doc_id}"
|
||||
|
||||
task["kb_id"] = kb_id
|
||||
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
|
||||
task["tenant_id"] = tenant_id
|
||||
task["task_type"] = "dataflow"
|
||||
task["dsl"] = dsl
|
||||
task["dataflow_id"] = get_uuid() if not flow_id else flow_id
|
||||
task["dataflow_id"] = flow_id
|
||||
task["file"] = file
|
||||
|
||||
if not REDIS_CONN.queue_product(
|
||||
|
||||
@ -155,7 +155,7 @@ def filename_type(filename):
|
||||
if re.match(r".*\.pdf$", filename):
|
||||
return FileType.PDF.value
|
||||
|
||||
if re.match(r".*\.(eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||
return FileType.DOC.value
|
||||
|
||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||
|
||||
@ -34,6 +34,7 @@ dependencies = [
|
||||
"elastic-transport==8.12.0",
|
||||
"elasticsearch==8.12.1",
|
||||
"elasticsearch-dsl==8.12.0",
|
||||
"extract-msg>=0.39.0",
|
||||
"filelock==3.15.4",
|
||||
"flask==3.0.3",
|
||||
"flask-cors==5.0.0",
|
||||
|
||||
@ -78,7 +78,7 @@ def chunk(
|
||||
_add_content(msg, msg.get_content_type())
|
||||
|
||||
sections = TxtParser.parser_txt("\n".join(text_txt)) + [
|
||||
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt)) if line
|
||||
(line, "") for line in HtmlParser.parser_txt("\n".join(html_txt), chunk_token_num=parser_config["chunk_token_num"]) if line
|
||||
]
|
||||
|
||||
st = timer()
|
||||
|
||||
@ -35,9 +35,9 @@ class ProcessBase(ComponentBase):
|
||||
def __init__(self, pipeline, id, param: ProcessParamBase):
|
||||
super().__init__(pipeline, id, param)
|
||||
if hasattr(self._canvas, "callback"):
|
||||
self.callback = partial(self._canvas.callback, self.component_name)
|
||||
self.callback = partial(self._canvas.callback, id)
|
||||
else:
|
||||
self.callback = partial(lambda *args, **kwargs: None, self.component_name)
|
||||
self.callback = partial(lambda *args, **kwargs: None, id)
|
||||
|
||||
async def invoke(self, **kwargs) -> dict[str, Any]:
|
||||
self.set_output("_created_time", time.perf_counter())
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
from functools import partial
|
||||
|
||||
@ -57,7 +59,10 @@ class ParserParam(ProcessParamBase):
|
||||
"image": [
|
||||
"text"
|
||||
],
|
||||
"email": [],
|
||||
"email": [
|
||||
"text",
|
||||
"json"
|
||||
],
|
||||
"text": [
|
||||
"text",
|
||||
"json"
|
||||
@ -71,7 +76,6 @@ class ParserParam(ProcessParamBase):
|
||||
self.setups = {
|
||||
"pdf": {
|
||||
"parse_method": "deepdoc", # deepdoc/plain_text/vlm
|
||||
"llm_id": "",
|
||||
"lang": "Chinese",
|
||||
"suffix": [
|
||||
"pdf",
|
||||
@ -93,8 +97,8 @@ class ParserParam(ProcessParamBase):
|
||||
],
|
||||
"output_format": "json",
|
||||
},
|
||||
"markdown": {
|
||||
"suffix": ["md", "markdown", "mdx"],
|
||||
"text&markdown": {
|
||||
"suffix": ["md", "markdown", "mdx", "txt"],
|
||||
"output_format": "json",
|
||||
},
|
||||
"slides": {
|
||||
@ -112,7 +116,11 @@ class ParserParam(ProcessParamBase):
|
||||
"output_format": "json",
|
||||
},
|
||||
"email": {
|
||||
"fields": []
|
||||
"suffix": [
|
||||
"eml", "msg"
|
||||
],
|
||||
"fields": ["from", "to", "cc", "bcc", "date", "subject", "body", "attachments", "metadata"],
|
||||
"output_format": "json",
|
||||
},
|
||||
"text": {
|
||||
"suffix": [
|
||||
@ -147,13 +155,10 @@ class ParserParam(ProcessParamBase):
|
||||
pdf_config = self.setups.get("pdf", {})
|
||||
if pdf_config:
|
||||
pdf_parse_method = pdf_config.get("parse_method", "")
|
||||
self.check_valid_value(pdf_parse_method.lower(), "Parse method abnormal.", ["deepdoc", "plain_text", "vlm"])
|
||||
self.check_empty(pdf_parse_method, "Parse method abnormal.")
|
||||
|
||||
if pdf_parse_method not in ["deepdoc", "plain_text"]:
|
||||
self.check_empty(pdf_config.get("llm_id"), "VLM")
|
||||
|
||||
pdf_language = pdf_config.get("lang", "")
|
||||
self.check_empty(pdf_language, "Language")
|
||||
if pdf_parse_method.lower() not in ["deepdoc", "plain_text"]:
|
||||
self.check_empty(pdf_config.get("lang", ""), "Language")
|
||||
|
||||
pdf_output_format = pdf_config.get("output_format", "")
|
||||
self.check_valid_value(pdf_output_format, "PDF output format abnormal.", self.allowed_output_format["pdf"])
|
||||
@ -194,6 +199,11 @@ class ParserParam(ProcessParamBase):
|
||||
audio_language = audio_config.get("lang", "")
|
||||
self.check_empty(audio_language, "Language")
|
||||
|
||||
email_config = self.setups.get("email", "")
|
||||
if email_config:
|
||||
email_output_format = email_config.get("output_format", "")
|
||||
self.check_valid_value(email_output_format, "Email output format abnormal.", self.allowed_output_format["email"])
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {}
|
||||
|
||||
@ -212,8 +222,7 @@ class Parser(ProcessBase):
|
||||
lines, _ = PlainParser()(blob)
|
||||
bboxes = [{"text": t} for t, _ in lines]
|
||||
else:
|
||||
assert conf.get("llm_id")
|
||||
vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("llm_id"), lang=self._param.setups["pdf"].get("lang"))
|
||||
vision_model = LLMBundle(self._canvas._tenant_id, LLMType.IMAGE2TEXT, llm_name=conf.get("parse_method"), lang=self._param.setups["pdf"].get("lang"))
|
||||
lines, _ = VisionParser(vision_model=vision_model)(blob, callback=self.callback)
|
||||
bboxes = []
|
||||
for t, poss in lines:
|
||||
@ -222,6 +231,7 @@ class Parser(ProcessBase):
|
||||
|
||||
if conf.get("output_format") == "json":
|
||||
self.set_output("json", bboxes)
|
||||
|
||||
if conf.get("output_format") == "markdown":
|
||||
mkdn = ""
|
||||
for b in bboxes:
|
||||
@ -285,7 +295,6 @@ class Parser(ProcessBase):
|
||||
|
||||
def _markdown(self, name, blob):
|
||||
from functools import reduce
|
||||
|
||||
from rag.app.naive import Markdown as naive_markdown_parser
|
||||
from rag.nlp import concat_img
|
||||
|
||||
@ -316,22 +325,6 @@ class Parser(ProcessBase):
|
||||
else:
|
||||
self.set_output("text", "\n".join([section_text for section_text, _ in sections]))
|
||||
|
||||
def _text(self, name, blob):
|
||||
from deepdoc.parser.utils import get_text
|
||||
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a text.")
|
||||
conf = self._param.setups["text"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
|
||||
# parse binary to text
|
||||
text_content = get_text(name, binary=blob)
|
||||
|
||||
if conf.get("output_format") == "json":
|
||||
result = [{"text": text_content}]
|
||||
self.set_output("json", result)
|
||||
else:
|
||||
result = text_content
|
||||
self.set_output("text", result)
|
||||
|
||||
def _image(self, from_upstream: ParserFromUpstream):
|
||||
from deepdoc.vision import OCR
|
||||
@ -353,7 +346,7 @@ class Parser(ProcessBase):
|
||||
|
||||
else:
|
||||
# use VLM to describe the picture
|
||||
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"], lang=lang)
|
||||
cv_model = LLMBundle(self._canvas.get_tenant_id(), LLMType.IMAGE2TEXT, llm_name=conf["llm_id"],lang=lang)
|
||||
img_binary = io.BytesIO()
|
||||
img.save(img_binary, format="JPEG")
|
||||
img_binary.seek(0)
|
||||
@ -384,16 +377,134 @@ class Parser(ProcessBase):
|
||||
|
||||
self.set_output("text", txt)
|
||||
|
||||
def _email(self, from_upstream: ParserFromUpstream):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on an email.")
|
||||
|
||||
blob = from_upstream.blob
|
||||
name = from_upstream.name
|
||||
|
||||
email_content = {}
|
||||
conf = self._param.setups["email"]
|
||||
target_fields = conf["fields"]
|
||||
|
||||
_, ext = os.path.splitext(name)
|
||||
if ext == ".eml":
|
||||
# handle eml file
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
|
||||
msg = BytesParser(policy=policy.default).parse(io.BytesIO(blob))
|
||||
email_content['metadata'] = {}
|
||||
# handle header info
|
||||
for header, value in msg.items():
|
||||
# get fields like from, to, cc, bcc, date, subject
|
||||
if header.lower() in target_fields:
|
||||
email_content[header.lower()] = value
|
||||
# get metadata
|
||||
elif header.lower() not in ["from", "to", "cc", "bcc", "date", "subject"]:
|
||||
email_content["metadata"][header.lower()] = value
|
||||
# get body
|
||||
if "body" in target_fields:
|
||||
body_text, body_html = [], []
|
||||
def _add_content(m, content_type):
|
||||
if content_type == "text/plain":
|
||||
body_text.append(
|
||||
m.get_payload(decode=True).decode(m.get_content_charset())
|
||||
)
|
||||
elif content_type == "text/html":
|
||||
body_html.append(
|
||||
m.get_payload(decode=True).decode(m.get_content_charset())
|
||||
)
|
||||
elif "multipart" in content_type:
|
||||
if m.is_multipart():
|
||||
for part in m.iter_parts():
|
||||
_add_content(part, part.get_content_type())
|
||||
|
||||
_add_content(msg, msg.get_content_type())
|
||||
|
||||
email_content["text"] = body_text
|
||||
email_content["text_html"] = body_html
|
||||
# get attachment
|
||||
if "attachments" in target_fields:
|
||||
attachments = []
|
||||
for part in msg.iter_attachments():
|
||||
content_disposition = part.get("Content-Disposition")
|
||||
if content_disposition:
|
||||
dispositions = content_disposition.strip().split(";")
|
||||
if dispositions[0].lower() == "attachment":
|
||||
filename = part.get_filename()
|
||||
payload = part.get_payload(decode=True)
|
||||
attachments.append({
|
||||
"filename": filename,
|
||||
"payload": payload,
|
||||
})
|
||||
email_content["attachments"] = attachments
|
||||
else:
|
||||
# handle msg file
|
||||
import extract_msg
|
||||
print("handle a msg file.")
|
||||
msg = extract_msg.Message(blob)
|
||||
# handle header info
|
||||
basic_content = {
|
||||
"from": msg.sender,
|
||||
"to": msg.to,
|
||||
"cc": msg.cc,
|
||||
"bcc": msg.bcc,
|
||||
"date": msg.date,
|
||||
"subject": msg.subject,
|
||||
}
|
||||
email_content.update({k: v for k, v in basic_content.items() if k in target_fields})
|
||||
# get metadata
|
||||
email_content['metadata'] = {
|
||||
'message_id': msg.messageId,
|
||||
'in_reply_to': msg.inReplyTo,
|
||||
}
|
||||
# get body
|
||||
if "body" in target_fields:
|
||||
email_content["text"] = msg.body # usually empty. try text_html instead
|
||||
email_content["text_html"] = msg.htmlBody
|
||||
# get attachments
|
||||
if "attachments" in target_fields:
|
||||
attachments = []
|
||||
for t in msg.attachments:
|
||||
attachments.append({
|
||||
"filename": t.name,
|
||||
"payload": t.data # binary
|
||||
})
|
||||
email_content["attachments"] = attachments
|
||||
|
||||
if conf["output_format"] == "json":
|
||||
self.set_output("json", [email_content])
|
||||
else:
|
||||
content_txt = ''
|
||||
for k, v in email_content.items():
|
||||
if isinstance(v, str):
|
||||
# basic info
|
||||
content_txt += f'{k}:{v}' + "\n"
|
||||
elif isinstance(v, dict):
|
||||
# metadata
|
||||
content_txt += f'{k}:{json.dumps(v)}' + "\n"
|
||||
elif isinstance(v, list):
|
||||
# attachments or others
|
||||
for fb in v:
|
||||
if isinstance(fb, dict):
|
||||
# attachments
|
||||
content_txt += f'{fb["filename"]}:{fb["payload"]}' + "\n"
|
||||
else:
|
||||
# str, usually plain text
|
||||
content_txt += fb
|
||||
self.set_output("text", content_txt)
|
||||
|
||||
async def _invoke(self, **kwargs):
|
||||
function_map = {
|
||||
"pdf": self._pdf,
|
||||
"markdown": self._markdown,
|
||||
"text&markdown": self._markdown,
|
||||
"spreadsheet": self._spreadsheet,
|
||||
"slides": self._slides,
|
||||
"word": self._word,
|
||||
"text": self._text,
|
||||
"image": self._image,
|
||||
"audio": self._audio,
|
||||
"email": self._email,
|
||||
}
|
||||
try:
|
||||
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
||||
|
||||
@ -18,7 +18,7 @@ import json
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
from timeit import default_timer as timer
|
||||
import trio
|
||||
|
||||
from agent.canvas import Graph
|
||||
@ -38,25 +38,26 @@ class Pipeline(Graph):
|
||||
|
||||
def callback(self, component_name: str, progress: float | int | None = None, message: str = "") -> None:
|
||||
log_key = f"{self._flow_id}-{self.task_id}-logs"
|
||||
timestamp = timer()
|
||||
try:
|
||||
bin = REDIS_CONN.get(log_key)
|
||||
obj = json.loads(bin.encode("utf-8"))
|
||||
if obj:
|
||||
if obj[-1]["component_name"] == component_name:
|
||||
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")})
|
||||
if obj[-1]["component_id"] == component_name:
|
||||
obj[-1]["trace"].append({"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": timestamp-obj[-1]["trace"][-1]["timestamp"]})
|
||||
else:
|
||||
obj.append({"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]})
|
||||
obj.append({"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]})
|
||||
else:
|
||||
obj = [{"component_name": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S")}]}]
|
||||
obj = [{"component_id": component_name, "trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}]}]
|
||||
REDIS_CONN.set_obj(log_key, obj, 60 * 30)
|
||||
if self._doc_id:
|
||||
percentage = 1./len(self.components.items())
|
||||
msg = ""
|
||||
finished = 0.
|
||||
for o in obj:
|
||||
if o['component_name'] == "END":
|
||||
if o['component_id'] == "END":
|
||||
continue
|
||||
msg += f"\n[{o['component_name']}]:\n"
|
||||
msg += f"\n[{o['component_id']}]:\n"
|
||||
for t in o["trace"]:
|
||||
msg += "%s: %s\n"%(t["datetime"], t["message"])
|
||||
if t["progress"] < 0:
|
||||
|
||||
@ -30,7 +30,7 @@ def print_logs(pipeline: Pipeline):
|
||||
while True:
|
||||
time.sleep(5)
|
||||
logs = pipeline.fetch_logs()
|
||||
logs_str = json.dumps(logs)
|
||||
logs_str = json.dumps(logs, ensure_ascii=False)
|
||||
if logs_str != last_logs:
|
||||
print(logs_str)
|
||||
last_logs = logs_str
|
||||
|
||||
@ -89,6 +89,22 @@
|
||||
"lang": "Chinese",
|
||||
"llm_id": "SenseVoiceSmall",
|
||||
"output_format": "json"
|
||||
},
|
||||
"email": {
|
||||
"suffix": [
|
||||
"msg"
|
||||
],
|
||||
"fields": [
|
||||
"from",
|
||||
"to",
|
||||
"cc",
|
||||
"bcc",
|
||||
"date",
|
||||
"subject",
|
||||
"body",
|
||||
"attachments"
|
||||
],
|
||||
"output_format": "json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,8 +20,7 @@ import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
from api.utils import get_uuid
|
||||
from api.db.services.canvas_service import UserCanvasService
|
||||
from api.utils.api_utils import timeout
|
||||
from api.utils.base64_image import image2id
|
||||
from api.utils.log_utils import init_root_logger, get_project_base_directory
|
||||
@ -29,7 +28,6 @@ from graphrag.general.index import run_graphrag
|
||||
from graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache
|
||||
from rag.flow.pipeline import Pipeline
|
||||
from rag.prompts import keyword_extraction, question_proposal, content_tagging
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
@ -45,10 +43,8 @@ import signal
|
||||
import trio
|
||||
import exceptiongroup
|
||||
import faulthandler
|
||||
|
||||
import numpy as np
|
||||
from peewee import DoesNotExist
|
||||
|
||||
from api.db import LLMType, ParserType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
@ -216,7 +212,11 @@ async def collect():
|
||||
return None, None
|
||||
|
||||
canceled = False
|
||||
task = TaskService.get_task(msg["id"])
|
||||
if msg.get("doc_id", "") == "x":
|
||||
task = msg
|
||||
else:
|
||||
task = TaskService.get_task(msg["id"])
|
||||
|
||||
if task:
|
||||
canceled = has_canceled(task["id"])
|
||||
if not task or canceled:
|
||||
@ -229,9 +229,8 @@ async def collect():
|
||||
task_type = msg.get("task_type", "")
|
||||
task["task_type"] = task_type
|
||||
if task_type == "dataflow":
|
||||
task["tenant_id"]=msg.get("tenant_id", "")
|
||||
task["dsl"] = msg.get("dsl", "")
|
||||
task["dataflow_id"] = msg.get("dataflow_id", get_uuid())
|
||||
task["tenant_id"] = msg["tenant_id"]
|
||||
task["dataflow_id"] = msg["dataflow_id"]
|
||||
task["kb_id"] = msg.get("kb_id", "")
|
||||
return redis_msg, task
|
||||
|
||||
@ -460,13 +459,12 @@ async def embedding(docs, mdl, parser_config=None, callback=None):
|
||||
return tk_count, vector_size
|
||||
|
||||
|
||||
async def run_dataflow(dsl:str, tenant_id:str, doc_id:str, task_id:str, flow_id:str, callback=None):
|
||||
_ = callback
|
||||
|
||||
pipeline = Pipeline(dsl=dsl, tenant_id=tenant_id, doc_id=doc_id, task_id=task_id, flow_id=flow_id)
|
||||
async def run_dataflow(task: dict):
|
||||
dataflow_id = task["dataflow_id"]
|
||||
e, cvs = UserCanvasService.get_by_id(dataflow_id)
|
||||
pipeline = Pipeline(cvs.dsl, tenant_id=task["tenant_id"], doc_id=task["doc_id"], task_id=task["id"], flow_id=dataflow_id)
|
||||
pipeline.reset()
|
||||
|
||||
await pipeline.run()
|
||||
await pipeline.run(file=task.get("file"))
|
||||
|
||||
|
||||
@timeout(3600)
|
||||
@ -513,6 +511,12 @@ async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
|
||||
|
||||
@timeout(60*60*2, 1)
|
||||
async def do_handle_task(task):
|
||||
task_type = task.get("task_type", "")
|
||||
|
||||
if task_type == "dataflow" and task.get("doc_id", "") == "x":
|
||||
await run_dataflow(task)
|
||||
return
|
||||
|
||||
task_id = task["id"]
|
||||
task_from_page = task["from_page"]
|
||||
task_to_page = task["to_page"]
|
||||
@ -526,6 +530,7 @@ async def do_handle_task(task):
|
||||
task_parser_config = task["parser_config"]
|
||||
task_start_ts = timer()
|
||||
|
||||
|
||||
# prepare the progress callback function
|
||||
progress_callback = partial(set_progress, task_id, task_from_page, task_to_page)
|
||||
|
||||
@ -554,13 +559,11 @@ async def do_handle_task(task):
|
||||
|
||||
init_kb(task, vector_size)
|
||||
|
||||
task_type = task.get("task_type", "")
|
||||
if task_type == "dataflow":
|
||||
task_dataflow_dsl = task["dsl"]
|
||||
task_dataflow_id = task["dataflow_id"]
|
||||
await run_dataflow(dsl=task_dataflow_dsl, tenant_id=task_tenant_id, doc_id=task_doc_id, task_id=task_id, flow_id=task_dataflow_id, callback=None)
|
||||
await run_dataflow(task)
|
||||
return
|
||||
elif task_type == "raptor":
|
||||
|
||||
if task_type == "raptor":
|
||||
# bind LLM for raptor
|
||||
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
|
||||
# run RAPTOR
|
||||
|
||||
155
uv.lock
generated
155
uv.lock
generated
@ -1,5 +1,5 @@
|
||||
version = 1
|
||||
revision = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.10, <3.13"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.12' and sys_platform == 'darwin'",
|
||||
@ -861,6 +861,15 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorclass"
|
||||
version = "2.2.2"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d7/1a/31ff00a33569a3b59d65bbdc445c73e12f92ad28195b7ace299f68b9af70/colorclass-2.2.2.tar.gz", hash = "sha256:6d4fe287766166a98ca7bc6f6312daf04a0481b1eda43e7173484051c0ab4366" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/30/b6/daf3e2976932da4ed3579cff7a30a53d22ea9323ee4f0d8e43be60454897/colorclass-2.2.2-py2.py3-none-any.whl", hash = "sha256:6f10c273a0ef7a1150b1120b6095cbdd68e5cf36dfd5d0fc957a2500bbf99a55" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coloredlogs"
|
||||
version = "15.0.1"
|
||||
@ -873,6 +882,15 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "compressed-rtf"
|
||||
version = "1.0.7"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b7/0c/929a4e8ef9d7143f54d77dadb5f370cc7b98534b1bd6e1124d0abe8efb24/compressed_rtf-1.0.7.tar.gz", hash = "sha256:7c30859334839f3cdc7d10796af5b434bb326b9df7cb5a65e95a8eacb2951b0e" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "contourpy"
|
||||
version = "1.3.2"
|
||||
@ -1322,6 +1340,23 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/fc/da/8376678b4a9ae0f9418d93df9c9cf851dced49c95ceb38daac6651e38f7a/duckduckgo_search-7.5.5-py3-none-any.whl", hash = "sha256:c71a0661aa436f215d9a05d653af424affb58825ab3e79f3b788053cbdee9ebc" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "easygui"
|
||||
version = "0.98.3"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/cc/ad/e35f7a30272d322be09dc98592d2f55d27cc933a7fde8baccbbeb2bd9409/easygui-0.98.3.tar.gz", hash = "sha256:d653ff79ee1f42f63b5a090f2f98ce02335d86ad8963b3ce2661805cafe99a04" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/8e/a7/b276ff776533b423710a285c8168b52551cb2ab0855443131fdc7fd8c16f/easygui-0.98.3-py2.py3-none-any.whl", hash = "sha256:33498710c68b5376b459cd3fc48d1d1f33822139eb3ed01defbc0528326da3ba" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ebcdic"
|
||||
version = "1.1.1"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/0d/2f/633031205333bee5f9f93761af8268746aa75f38754823aabb8570eb245b/ebcdic-1.1.1-py2.py3-none-any.whl", hash = "sha256:33b4cb729bc2d0bf46cc1847b0e5946897cb8d3f53520c5b9aa5fa98d7e735f1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "editdistance"
|
||||
version = "0.8.1"
|
||||
@ -1435,6 +1470,24 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "extract-msg"
|
||||
version = "0.55.0"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "compressed-rtf" },
|
||||
{ name = "ebcdic" },
|
||||
{ name = "olefile" },
|
||||
{ name = "red-black-tree-mod" },
|
||||
{ name = "rtfde" },
|
||||
{ name = "tzlocal" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5e/65/c70afb3b119a44b3ee36b029485dc15326cf3a7c50da19a1ecbbf949c5d1/extract_msg-0.55.0.tar.gz", hash = "sha256:cf08283498c3dfcc7f894dad1579f52e3ced9fb76b865c2355cbe757af8a54e1" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake-http-header"
|
||||
version = "0.3.5"
|
||||
@ -2893,6 +2946,15 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/92/b0/8f08df3f0fa584c4132937690c6dd33e0a116f963ecf2b35567f614e0ca7/langfuse-3.2.1-py3-none-any.whl", hash = "sha256:07a84e8c1eed6ac8e149bdda1431fd866e4aee741b66124316336fb2bc7e6a32" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lark"
|
||||
version = "1.1.9"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/2c/e1/804b6196b3fbdd0f8ba785fc62837b034782a891d6f663eea2f30ca23cfa/lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/e7/9c/eef7c591e6dc952f3636cfe0df712c0f9916cedf317810a3bb53ccb65cdd/lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "litellm"
|
||||
version = "1.75.5.post1"
|
||||
@ -3377,6 +3439,19 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msoffcrypto-tool"
|
||||
version = "5.4.2"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
{ name = "olefile" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/d2/b7/0fd6573157e0ec60c0c470e732ab3322fba4d2834fd24e1088d670522a01/msoffcrypto_tool-5.4.2.tar.gz", hash = "sha256:44b545adba0407564a0cc3d6dde6ca36b7c0fdf352b85bca51618fa1d4817370" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/03/54/7f6d3d9acad083dae8c22d9ab483b657359a1bf56fee1d7af88794677707/msoffcrypto_tool-5.4.2-py3-none-any.whl", hash = "sha256:274fe2181702d1e5a107ec1b68a4c9fea997a44972ae1cc9ae0cb4f6a50fef0e" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.6.3"
|
||||
@ -3726,6 +3801,32 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "olefile"
|
||||
version = "0.47"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oletools"
|
||||
version = "0.60.2"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorclass" },
|
||||
{ name = "easygui" },
|
||||
{ name = "msoffcrypto-tool", marker = "(platform_python_implementation != 'PyPy' and sys_platform == 'darwin') or (platform_python_implementation != 'PyPy' and sys_platform == 'win32') or (sys_platform != 'darwin' and sys_platform != 'win32')" },
|
||||
{ name = "olefile" },
|
||||
{ name = "pcodedmp" },
|
||||
{ name = "pyparsing" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/5c/2f/037f40e44706d542b94a2312ccc33ee2701ebfc9a83b46b55263d49ce55a/oletools-0.60.2.zip", hash = "sha256:ad452099f4695ffd8855113f453348200d195ee9fa341a09e197d66ee7e0b2c3" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/ac/ff/05257b7183279b80ecec6333744de23f48f0faeeba46c93e6d13ce835515/oletools-0.60.2-py2.py3-none-any.whl", hash = "sha256:72ad8bd748fd0c4e7b5b4733af770d11543ebb2bf2697455f99f975fcd50cc96" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ollama"
|
||||
version = "0.2.1"
|
||||
@ -4188,6 +4289,19 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pcodedmp"
|
||||
version = "1.2.6"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "oletools" },
|
||||
{ name = "win-unicode-console", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/3d/20/6d461e29135f474408d0d7f95b2456a9ba245560768ee51b788af10f7429/pcodedmp-1.2.6.tar.gz", hash = "sha256:025f8c809a126f45a082ffa820893e6a8d990d9d7ddb68694b5a9f0a6dbcd955" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/ba/72/b380fb5c89d89c3afafac8cf02a71a45f4f4a4f35531ca949a34683962d1/pcodedmp-1.2.6-py2.py3-none-any.whl", hash = "sha256:4441f7c0ab4cbda27bd4668db3b14f36261d86e5059ce06c0828602cbe1c4278" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfminer-six"
|
||||
version = "20221105"
|
||||
@ -5300,6 +5414,7 @@ dependencies = [
|
||||
{ name = "elastic-transport" },
|
||||
{ name = "elasticsearch" },
|
||||
{ name = "elasticsearch-dsl" },
|
||||
{ name = "extract-msg" },
|
||||
{ name = "filelock" },
|
||||
{ name = "flasgger" },
|
||||
{ name = "flask" },
|
||||
@ -5452,6 +5567,7 @@ requires-dist = [
|
||||
{ name = "elastic-transport", specifier = "==8.12.0" },
|
||||
{ name = "elasticsearch", specifier = "==8.12.1" },
|
||||
{ name = "elasticsearch-dsl", specifier = "==8.12.0" },
|
||||
{ name = "extract-msg", specifier = ">=0.39.0" },
|
||||
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
|
||||
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
|
||||
{ name = "filelock", specifier = "==3.15.4" },
|
||||
@ -5630,6 +5746,12 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/5a/2f2e7fc026d5e64b5408aa3fbe0296a6407b8481196cae4daacacb3a3ae0/readerwriterlock-1.0.9-py3-none-any.whl", hash = "sha256:8c4b704e60d15991462081a27ef46762fea49b478aa4426644f2146754759ca7" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "red-black-tree-mod"
|
||||
version = "1.22"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908" }
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.36.2"
|
||||
@ -5883,6 +6005,19 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rtfde"
|
||||
version = "0.1.2.1"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "lark" },
|
||||
{ name = "oletools" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/66/f1/3fafc33cd80cc605509ced36dbbb74c3c365d5859b0b57b6500e4a8ca8a5/rtfde-0.1.2.1.tar.gz", hash = "sha256:ea2653fb163ef1e9fdd1b0849bef88b0ba82537f860d4aca5b2c49f556efaaaa" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/b6/dd/641e9cf68d4242aaf7ce9653498009d8925080b6664993988bd50468932a/rtfde-0.1.2.1-py3-none-any.whl", hash = "sha256:c44dfa923a435c54cdbdd0e0f5352a4075542af317af061f82f2d4f032271645" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruamel-base"
|
||||
version = "1.0.0"
|
||||
@ -6890,6 +7025,18 @@ wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tzlocal"
|
||||
version = "5.3.1"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
dependencies = [
|
||||
{ name = "tzdata", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd" }
|
||||
wheels = [
|
||||
{ url = "https://mirrors.aliyun.com/pypi/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "umap-learn"
|
||||
version = "0.5.6"
|
||||
@ -7134,6 +7281,12 @@ dependencies = [
|
||||
]
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/67/35/25e68fbc99e672127cc6fbb14b8ec1ba3dfef035bf1e4c90f78f24a80b7d/wikipedia-1.4.0.tar.gz", hash = "sha256:db0fad1829fdd441b1852306e9856398204dc0786d2996dd2e0c8bb8e26133b2" }
|
||||
|
||||
[[package]]
|
||||
name = "win-unicode-console"
|
||||
version = "0.5"
|
||||
source = { registry = "https://mirrors.aliyun.com/pypi/simple" }
|
||||
sdist = { url = "https://mirrors.aliyun.com/pypi/packages/89/8d/7aad74930380c8972ab282304a2ff45f3d4927108bb6693cabcc9fc6a099/win_unicode_console-0.5.zip", hash = "sha256:d4142d4d56d46f449d6f00536a73625a871cba040f0bc1a2e305a04578f07d1e" }
|
||||
|
||||
[[package]]
|
||||
name = "win32-setctime"
|
||||
version = "1.2.0"
|
||||
|
||||
@ -20,17 +20,10 @@ interface IProps {
|
||||
isMult?: boolean;
|
||||
}
|
||||
|
||||
const data = [
|
||||
{ id: '1', name: 'data-pipeline-1' },
|
||||
{ id: '2', name: 'data-pipeline-2' },
|
||||
{ id: '3', name: 'data-pipeline-3' },
|
||||
{ id: '4', name: 'data-pipeline-4' },
|
||||
];
|
||||
export function DataFlowSelect(props: IProps) {
|
||||
const { toDataPipeline, formFieldName, isMult = true } = props;
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
const form = useFormContext();
|
||||
console.log('data-pipline form', form);
|
||||
const toDataPipLine = () => {
|
||||
toDataPipeline?.();
|
||||
};
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { cn } from '@/lib/utils';
|
||||
import {
|
||||
GenerateLogButton,
|
||||
GenerateType,
|
||||
} from '@/pages/dataset/dataset/generate-button/generate';
|
||||
import { upperFirst } from 'lodash';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { useFormContext, useWatch } from 'react-hook-form';
|
||||
@ -47,6 +51,7 @@ export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
|
||||
type GraphRagItemsProps = {
|
||||
marginBottom?: boolean;
|
||||
className?: string;
|
||||
showGenerateItem?: boolean;
|
||||
};
|
||||
|
||||
export function UseGraphRagFormField() {
|
||||
@ -88,6 +93,7 @@ export function UseGraphRagFormField() {
|
||||
// The three types "table", "resume" and "one" do not display this configuration.
|
||||
const GraphRagItems = ({
|
||||
marginBottom = false,
|
||||
showGenerateItem = false,
|
||||
className = 'p-10',
|
||||
}: GraphRagItemsProps) => {
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
@ -210,6 +216,18 @@ const GraphRagItems = ({
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
{showGenerateItem && (
|
||||
<div className="w-full flex items-center">
|
||||
<div className="text-sm whitespace-nowrap w-1/4">
|
||||
{t('extractKnowledgeGraph')}
|
||||
</div>
|
||||
<GenerateLogButton
|
||||
className="w-3/4 text-text-secondary"
|
||||
status={1}
|
||||
type={GenerateType.KnowledgeGraph}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</FormContainer>
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
import { FormLayout } from '@/constants/form';
|
||||
import { DocumentParserType } from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import {
|
||||
GenerateLogButton,
|
||||
GenerateType,
|
||||
} from '@/pages/dataset/dataset/generate-button/generate';
|
||||
import random from 'lodash/random';
|
||||
import { Shuffle } from 'lucide-react';
|
||||
import { useCallback } from 'react';
|
||||
@ -52,7 +56,11 @@ const Prompt = 'parser_config.raptor.prompt';
|
||||
|
||||
// The three types "table", "resume" and "one" do not display this configuration.
|
||||
|
||||
const RaptorFormFields = () => {
|
||||
const RaptorFormFields = ({
|
||||
showGenerateItem = false,
|
||||
}: {
|
||||
showGenerateItem?: boolean;
|
||||
}) => {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslate('knowledgeConfiguration');
|
||||
const useRaptor = useWatch({ name: UseRaptorField });
|
||||
@ -211,6 +219,18 @@ const RaptorFormFields = () => {
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
{showGenerateItem && (
|
||||
<div className="w-full flex items-center">
|
||||
<div className="text-sm whitespace-nowrap w-1/4">
|
||||
{t('extractRaptor')}
|
||||
</div>
|
||||
<GenerateLogButton
|
||||
className="w-3/4 text-text-secondary"
|
||||
status={1}
|
||||
type={GenerateType.Raptor}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
|
||||
@ -75,21 +75,21 @@ const Modal: ModalType = ({
|
||||
|
||||
const handleCancel = useCallback(() => {
|
||||
onOpenChange?.(false);
|
||||
onCancel?.();
|
||||
}, [onOpenChange, onCancel]);
|
||||
// onCancel?.();
|
||||
}, [onOpenChange]);
|
||||
|
||||
const handleOk = useCallback(() => {
|
||||
onOpenChange?.(true);
|
||||
onOk?.();
|
||||
}, [onOpenChange, onOk]);
|
||||
// onOk?.();
|
||||
}, [onOpenChange]);
|
||||
const handleChange = (open: boolean) => {
|
||||
onOpenChange?.(open);
|
||||
console.log('open', open, onOpenChange);
|
||||
if (open) {
|
||||
handleOk();
|
||||
onOk?.();
|
||||
}
|
||||
if (!open) {
|
||||
handleCancel();
|
||||
onCancel?.();
|
||||
}
|
||||
};
|
||||
const footEl = useMemo(() => {
|
||||
|
||||
@ -102,13 +102,15 @@ export default {
|
||||
noMoreData: `That's all. Nothing more.`,
|
||||
},
|
||||
knowledgeDetails: {
|
||||
notGenerated: 'Not generated',
|
||||
generatedOn: 'Generated on',
|
||||
subbarFiles: 'Files',
|
||||
generateKnowledgeGraph:
|
||||
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
|
||||
generateRaptor:
|
||||
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
|
||||
generate: 'Generate',
|
||||
raptor: 'Raptor',
|
||||
knowledgeGraph: 'Knowledge Graph',
|
||||
processingType: 'Processing Type',
|
||||
dataPipeline: 'Data Pipeline',
|
||||
operations: 'Operations',
|
||||
@ -138,12 +140,12 @@ export default {
|
||||
testing: 'Retrieval testing',
|
||||
files: 'files',
|
||||
configuration: 'Configuration',
|
||||
knowledgeGraph: 'Knowledge graph',
|
||||
knowledgeGraph: 'Knowledge Graph',
|
||||
name: 'Name',
|
||||
namePlaceholder: 'Please input name!',
|
||||
doc: 'Docs',
|
||||
datasetDescription:
|
||||
'😉 Please wait for your files to finish parsing before starting an AI-powered chat.',
|
||||
'Please wait for your files to finish parsing before starting an AI-powered chat.',
|
||||
addFile: 'Add file',
|
||||
searchFiles: 'Search your files',
|
||||
localFiles: 'Local files',
|
||||
@ -261,6 +263,22 @@ export default {
|
||||
reRankModelWaring: 'Re-rank model is very time consuming.',
|
||||
},
|
||||
knowledgeConfiguration: {
|
||||
deleteGenerateModalContent: `
|
||||
<p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results
|
||||
will remove all derived entities and relationships from this dataset.
|
||||
Your original files will remain intact.<p>
|
||||
<br/>
|
||||
Do you want to continue?
|
||||
`,
|
||||
extractRaptor: 'Extract Raptor',
|
||||
extractKnowledgeGraph: 'Extract Knowledge Graph',
|
||||
filterPlaceholder: 'please input filter',
|
||||
fileFilterTip: '',
|
||||
fileFilter: 'File Filter',
|
||||
setDefaultTip: '',
|
||||
setDefault: 'Set as Default',
|
||||
eidtLinkDataPipeline: 'Edit Data Pipeline',
|
||||
linkPipelineSetTip: 'Manage data pipeline linkage with this dataset',
|
||||
default: 'Default',
|
||||
dataPipeline: 'Data Pipeline',
|
||||
linkDataPipeline: 'Link Data Pipeline',
|
||||
@ -1646,6 +1664,13 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
<p>To keep them, please click Rerun to re-run the current stage.</p> `,
|
||||
changeStepModalConfirmText: 'Switch Anyway',
|
||||
changeStepModalCancelText: 'Cancel',
|
||||
unlinkPipelineModalTitle: 'Unlink data pipeline',
|
||||
unlinkPipelineModalContent: `
|
||||
<p>Once unlinked, this Dataset will no longer be connected to the current Data Pipeline.</p>
|
||||
<p>Files that are already being parsed will continue until completion</p>
|
||||
<p>Files that are not yet parsed will no longer be processed</p> <br/>
|
||||
<p>Are you sure you want to proceed?</p> `,
|
||||
unlinkPipelineModalConfirmText: 'Unlink',
|
||||
},
|
||||
dataflow: {
|
||||
parser: 'Parser',
|
||||
|
||||
@ -94,9 +94,11 @@ export default {
|
||||
noMoreData: '没有更多数据了',
|
||||
},
|
||||
knowledgeDetails: {
|
||||
notGenerated: '未生成',
|
||||
generatedOn: '生成于',
|
||||
subbarFiles: '文件列表',
|
||||
generate: '生成',
|
||||
raptor: 'Raptor',
|
||||
knowledgeGraph: '知识图谱',
|
||||
processingType: '处理类型',
|
||||
dataPipeline: '数据管道',
|
||||
operations: '操作',
|
||||
@ -130,7 +132,7 @@ export default {
|
||||
name: '名称',
|
||||
namePlaceholder: '请输入名称',
|
||||
doc: '文档',
|
||||
datasetDescription: '😉 解析成功后才能问答哦。',
|
||||
datasetDescription: '解析成功后才能问答哦。',
|
||||
addFile: '新增文件',
|
||||
searchFiles: '搜索文件',
|
||||
localFiles: '本地文件',
|
||||
@ -246,6 +248,22 @@ export default {
|
||||
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
|
||||
},
|
||||
knowledgeConfiguration: {
|
||||
deleteGenerateModalContent: `
|
||||
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
|
||||
将从此数据集中移除所有派生实体和关系。
|
||||
您的原始文件将保持不变。<p>
|
||||
<br/>
|
||||
是否要继续?
|
||||
`,
|
||||
extractRaptor: '从文档中提取Raptor',
|
||||
extractKnowledgeGraph: '从文档中提取知识图谱',
|
||||
filterPlaceholder: '请输入',
|
||||
fileFilterTip: '',
|
||||
fileFilter: '正则匹配表达式',
|
||||
setDefaultTip: '',
|
||||
setDefault: '设置默认',
|
||||
eidtLinkDataPipeline: '编辑数据流',
|
||||
linkPipelineSetTip: '管理与此数据集的数据管道链接',
|
||||
default: '默认',
|
||||
dataPipeline: '数据流',
|
||||
linkDataPipeline: '关联数据流',
|
||||
@ -1556,6 +1574,13 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
<p>要保留这些更改,请点击“重新运行”以重新运行当前阶段。</p> `,
|
||||
changeStepModalConfirmText: '继续切换',
|
||||
changeStepModalCancelText: '取消',
|
||||
unlinkPipelineModalTitle: '解绑数据流',
|
||||
unlinkPipelineModalContent: `
|
||||
<p>一旦取消链接,该数据集将不再连接到当前数据管道。</p>
|
||||
<p>正在解析的文件将继续解析,直到完成。</p>
|
||||
<p>尚未解析的文件将不再被处理。</p> <br/>
|
||||
<p>你确定要继续吗?</p> `,
|
||||
unlinkPipelineModalConfirmText: '解绑',
|
||||
},
|
||||
dataflow: {
|
||||
parser: '解析器',
|
||||
|
||||
@ -2,7 +2,7 @@ import SvgIcon from '@/components/svg-icon';
|
||||
import { useIsDarkTheme } from '@/components/theme-provider';
|
||||
import { parseColorToRGBA } from '@/utils/common-util';
|
||||
import { CircleQuestionMark } from 'lucide-react';
|
||||
import { FC, useMemo, useState } from 'react';
|
||||
import { FC, useEffect, useMemo, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { LogTabs } from './dataset-common';
|
||||
import { DatasetFilter } from './dataset-filter';
|
||||
@ -74,25 +74,35 @@ const FileLogsPage: FC = () => {
|
||||
const [active, setActive] = useState<(typeof LogTabs)[keyof typeof LogTabs]>(
|
||||
LogTabs.FILE_LOGS,
|
||||
);
|
||||
const topMockData = {
|
||||
const [topAllData, setTopAllData] = useState({
|
||||
totalFiles: {
|
||||
value: 2827,
|
||||
precent: 12.5,
|
||||
value: 0,
|
||||
precent: 0,
|
||||
},
|
||||
downloads: {
|
||||
value: 28,
|
||||
success: 8,
|
||||
failed: 2,
|
||||
value: 0,
|
||||
success: 0,
|
||||
failed: 0,
|
||||
},
|
||||
processing: {
|
||||
value: 156,
|
||||
success: 8,
|
||||
failed: 2,
|
||||
value: 0,
|
||||
success: 0,
|
||||
failed: 0,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const { data: topData } = useFetchOverviewTital();
|
||||
console.log('topData --> ', topData);
|
||||
useEffect(() => {
|
||||
setTopAllData({
|
||||
...topAllData,
|
||||
processing: {
|
||||
value: topData?.processing || 0,
|
||||
success: topData?.finished || 0,
|
||||
failed: topData?.failed || 0,
|
||||
},
|
||||
});
|
||||
}, [topData, topAllData]);
|
||||
|
||||
const mockData = useMemo(() => {
|
||||
if (active === LogTabs.FILE_LOGS) {
|
||||
@ -161,7 +171,7 @@ const FileLogsPage: FC = () => {
|
||||
<div className="grid grid-cols-3 md:grid-cols-3 gap-4 mb-6">
|
||||
<StatCard
|
||||
title="Total Files"
|
||||
value={topMockData.totalFiles.value}
|
||||
value={topAllData.totalFiles.value}
|
||||
icon={
|
||||
isDark ? (
|
||||
<SvgIcon name="data-flow/total-files-icon" width={40} />
|
||||
@ -172,15 +182,15 @@ const FileLogsPage: FC = () => {
|
||||
>
|
||||
<div>
|
||||
<span className="text-accent-primary">
|
||||
{topMockData.totalFiles.precent > 0 ? '+' : ''}
|
||||
{topMockData.totalFiles.precent}%{' '}
|
||||
{topAllData.totalFiles.precent > 0 ? '+' : ''}
|
||||
{topAllData.totalFiles.precent}%{' '}
|
||||
</span>
|
||||
from last week
|
||||
</div>
|
||||
</StatCard>
|
||||
<StatCard
|
||||
title="Downloading"
|
||||
value={topMockData.downloads.value}
|
||||
value={topAllData.downloads.value}
|
||||
icon={
|
||||
isDark ? (
|
||||
<SvgIcon name="data-flow/data-icon" width={40} />
|
||||
@ -190,13 +200,13 @@ const FileLogsPage: FC = () => {
|
||||
}
|
||||
>
|
||||
<CardFooterProcess
|
||||
success={topMockData.downloads.success}
|
||||
failed={topMockData.downloads.failed}
|
||||
success={topAllData.downloads.success}
|
||||
failed={topAllData.downloads.failed}
|
||||
/>
|
||||
</StatCard>
|
||||
<StatCard
|
||||
title="Processing"
|
||||
value={topMockData.processing.value}
|
||||
value={topAllData.processing.value}
|
||||
icon={
|
||||
isDark ? (
|
||||
<SvgIcon name="data-flow/processing-icon" width={40} />
|
||||
@ -206,8 +216,8 @@ const FileLogsPage: FC = () => {
|
||||
}
|
||||
>
|
||||
<CardFooterProcess
|
||||
success={topMockData.processing.success}
|
||||
failed={topMockData.processing.failed}
|
||||
success={topAllData.processing.success}
|
||||
failed={topAllData.processing.failed}
|
||||
/>
|
||||
</StatCard>
|
||||
</div>
|
||||
|
||||
@ -65,25 +65,25 @@ export const getFileLogsTableColumns = (
|
||||
) => {
|
||||
// const { t } = useTranslate('knowledgeDetails');
|
||||
const columns: ColumnDef<DocumentLog>[] = [
|
||||
{
|
||||
id: 'select',
|
||||
header: ({ table }) => (
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={table.getIsAllRowsSelected()}
|
||||
onChange={table.getToggleAllRowsSelectedHandler()}
|
||||
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
/>
|
||||
),
|
||||
cell: ({ row }) => (
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={row.getIsSelected()}
|
||||
onChange={row.getToggleSelectedHandler()}
|
||||
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
/>
|
||||
),
|
||||
},
|
||||
// {
|
||||
// id: 'select',
|
||||
// header: ({ table }) => (
|
||||
// <input
|
||||
// type="checkbox"
|
||||
// checked={table.getIsAllRowsSelected()}
|
||||
// onChange={table.getToggleAllRowsSelectedHandler()}
|
||||
// className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
// />
|
||||
// ),
|
||||
// cell: ({ row }) => (
|
||||
// <input
|
||||
// type="checkbox"
|
||||
// checked={row.getIsSelected()}
|
||||
// onChange={row.getToggleSelectedHandler()}
|
||||
// className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
// />
|
||||
// ),
|
||||
// },
|
||||
{
|
||||
accessorKey: 'id',
|
||||
header: 'ID',
|
||||
@ -156,7 +156,7 @@ export const getFileLogsTableColumns = (
|
||||
id: 'operations',
|
||||
header: t('operations'),
|
||||
cell: ({ row }) => (
|
||||
<div className="flex justify-start space-x-2">
|
||||
<div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
@ -189,25 +189,25 @@ export const getDatasetLogsTableColumns = (
|
||||
) => {
|
||||
// const { t } = useTranslate('knowledgeDetails');
|
||||
const columns: ColumnDef<DocumentLog>[] = [
|
||||
{
|
||||
id: 'select',
|
||||
header: ({ table }) => (
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={table.getIsAllRowsSelected()}
|
||||
onChange={table.getToggleAllRowsSelectedHandler()}
|
||||
className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
/>
|
||||
),
|
||||
cell: ({ row }) => (
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={row.getIsSelected()}
|
||||
onChange={row.getToggleSelectedHandler()}
|
||||
className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
/>
|
||||
),
|
||||
},
|
||||
// {
|
||||
// id: 'select',
|
||||
// header: ({ table }) => (
|
||||
// <input
|
||||
// type="checkbox"
|
||||
// checked={table.getIsAllRowsSelected()}
|
||||
// onChange={table.getToggleAllRowsSelectedHandler()}
|
||||
// className="rounded bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
// />
|
||||
// ),
|
||||
// cell: ({ row }) => (
|
||||
// <input
|
||||
// type="checkbox"
|
||||
// checked={row.getIsSelected()}
|
||||
// onChange={row.getToggleSelectedHandler()}
|
||||
// className="rounded border-gray-600 bg-gray-900 text-blue-500 focus:ring-blue-500"
|
||||
// />
|
||||
// ),
|
||||
// },
|
||||
{
|
||||
accessorKey: 'id',
|
||||
header: 'ID',
|
||||
@ -251,7 +251,7 @@ export const getDatasetLogsTableColumns = (
|
||||
id: 'operations',
|
||||
header: t('operations'),
|
||||
cell: ({ row }) => (
|
||||
<div className="flex justify-start space-x-2">
|
||||
<div className="flex justify-start space-x-2 opacity-0 group-hover:opacity-100 transition-opacity">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
|
||||
@ -1,21 +1,61 @@
|
||||
import { IconFont } from '@/components/icon-font';
|
||||
import { RAGFlowAvatar } from '@/components/ragflow-avatar';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Modal } from '@/components/ui/modal/modal';
|
||||
import { omit } from 'lodash';
|
||||
import { Link, Settings2, Unlink } from 'lucide-react';
|
||||
import { useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { z } from 'zod';
|
||||
import { linkPiplineFormSchema } from '../form-schema';
|
||||
import LinkDataPipelineModal from './link-data-pipline-modal';
|
||||
|
||||
interface DataPipelineItemProps {
|
||||
id: string;
|
||||
name: string;
|
||||
avatar?: string;
|
||||
isDefault?: boolean;
|
||||
linked?: boolean;
|
||||
openLinkModalFunc?: (open: boolean) => void;
|
||||
openLinkModalFunc?: (open: boolean, data?: IDataPipelineNodeProps) => void;
|
||||
}
|
||||
const DataPipelineItem = (props: DataPipelineItemProps) => {
|
||||
const { t } = useTranslation();
|
||||
const { name, avatar, isDefault, linked, openLinkModalFunc } = props;
|
||||
const openUnlinkModal = () => {
|
||||
Modal.show({
|
||||
visible: true,
|
||||
className: '!w-[560px]',
|
||||
title: t('dataflowParser.unlinkPipelineModalTitle'),
|
||||
children: (
|
||||
<div
|
||||
className="text-sm text-text-secondary"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t('dataflowParser.unlinkPipelineModalContent'),
|
||||
}}
|
||||
></div>
|
||||
),
|
||||
onVisibleChange: () => {
|
||||
Modal.hide();
|
||||
},
|
||||
footer: (
|
||||
<div className="flex justify-end gap-2">
|
||||
<Button variant={'outline'} onClick={() => Modal.hide()}>
|
||||
{t('dataflowParser.changeStepModalCancelText')}
|
||||
</Button>
|
||||
<Button
|
||||
variant={'secondary'}
|
||||
className="!bg-state-error text-bg-base"
|
||||
onClick={() => {
|
||||
Modal.hide();
|
||||
}}
|
||||
>
|
||||
{t('dataflowParser.unlinkPipelineModalConfirmText')}
|
||||
</Button>
|
||||
</div>
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center justify-between gap-1 px-2 rounded-lg border">
|
||||
<div className="flex items-center gap-1">
|
||||
@ -28,42 +68,89 @@ const DataPipelineItem = (props: DataPipelineItemProps) => {
|
||||
)}
|
||||
</div>
|
||||
<div className="flex gap-1 items-center">
|
||||
<Button variant={'transparent'} className="border-none">
|
||||
<Button
|
||||
variant={'transparent'}
|
||||
className="border-none"
|
||||
type="button"
|
||||
onClick={() =>
|
||||
openLinkModalFunc?.(true, { ...omit(props, ['openLinkModalFunc']) })
|
||||
}
|
||||
>
|
||||
<Settings2 />
|
||||
</Button>
|
||||
{!isDefault && (
|
||||
<Button
|
||||
variant={'transparent'}
|
||||
className="border-none"
|
||||
onClick={() => {
|
||||
openLinkModalFunc?.(true);
|
||||
}}
|
||||
>
|
||||
{linked ? <Link /> : <Unlink />}
|
||||
</Button>
|
||||
<>
|
||||
{linked && (
|
||||
<Button
|
||||
type="button"
|
||||
variant={'transparent'}
|
||||
className="border-none"
|
||||
onClick={() => {
|
||||
openUnlinkModal();
|
||||
}}
|
||||
>
|
||||
<Unlink />
|
||||
</Button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export interface IDataPipelineNodeProps {
|
||||
id: string;
|
||||
name: string;
|
||||
avatar?: string;
|
||||
isDefault?: boolean;
|
||||
linked?: boolean;
|
||||
}
|
||||
const LinkDataPipeline = () => {
|
||||
const { t } = useTranslation();
|
||||
const [openLinkModal, setOpenLinkModal] = useState(false);
|
||||
const [currentDataPipeline, setCurrentDataPipeline] =
|
||||
useState<IDataPipelineNodeProps>();
|
||||
const testNode = [
|
||||
{
|
||||
id: '1',
|
||||
name: 'Data Pipeline 1',
|
||||
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
|
||||
isDefault: true,
|
||||
linked: true,
|
||||
},
|
||||
{
|
||||
id: '2',
|
||||
name: 'Data Pipeline 2',
|
||||
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
|
||||
linked: false,
|
||||
},
|
||||
{
|
||||
id: '3',
|
||||
name: 'Data Pipeline 3',
|
||||
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
|
||||
linked: false,
|
||||
},
|
||||
{
|
||||
id: '4',
|
||||
name: 'Data Pipeline 4',
|
||||
avatar: 'https://avatars.githubusercontent.com/u/10656201?v=4',
|
||||
linked: true,
|
||||
},
|
||||
];
|
||||
const openLinkModalFunc = (open: boolean) => {
|
||||
const openLinkModalFunc = (open: boolean, data?: IDataPipelineNodeProps) => {
|
||||
console.log('open', open, data);
|
||||
setOpenLinkModal(open);
|
||||
if (data) {
|
||||
setCurrentDataPipeline(data);
|
||||
} else {
|
||||
setCurrentDataPipeline(undefined);
|
||||
}
|
||||
};
|
||||
const handleLinkOrEditSubmit = (
|
||||
data: z.infer<typeof linkPiplineFormSchema>,
|
||||
) => {
|
||||
console.log('handleLinkOrEditSubmit', data);
|
||||
};
|
||||
return (
|
||||
<div className="flex flex-col gap-2">
|
||||
@ -74,9 +161,15 @@ const LinkDataPipeline = () => {
|
||||
</div>
|
||||
<div className="flex justify-between items-center">
|
||||
<div className="text-center text-xs text-text-secondary">
|
||||
Manage data pipeline linkage with this dataset
|
||||
{t('knowledgeConfiguration.linkPipelineSetTip')}
|
||||
</div>
|
||||
<Button variant={'transparent'}>
|
||||
<Button
|
||||
type="button"
|
||||
variant={'transparent'}
|
||||
onClick={() => {
|
||||
openLinkModalFunc?.(true);
|
||||
}}
|
||||
>
|
||||
<Link />
|
||||
<span className="text-xs text-text-primary">
|
||||
{t('knowledgeConfiguration.linkDataPipeline')}
|
||||
@ -94,10 +187,12 @@ const LinkDataPipeline = () => {
|
||||
))}
|
||||
</section>
|
||||
<LinkDataPipelineModal
|
||||
data={currentDataPipeline}
|
||||
open={openLinkModal}
|
||||
setOpen={(open: boolean) => {
|
||||
openLinkModalFunc(open);
|
||||
}}
|
||||
onSubmit={handleLinkOrEditSubmit}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
@ -10,32 +10,53 @@ import {
|
||||
FormMessage,
|
||||
} from '@/components/ui/form';
|
||||
import { Modal } from '@/components/ui/modal/modal';
|
||||
import { Switch } from '@/components/ui/switch';
|
||||
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { t } from 'i18next';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
import { linkPiplineFormSchema } from '../form-schema';
|
||||
import { pipelineFormSchema } from '../form-schema';
|
||||
import { IDataPipelineNodeProps } from './link-data-pipeline';
|
||||
|
||||
const LinkDataPipelineModal = ({
|
||||
data,
|
||||
open,
|
||||
setOpen,
|
||||
onSubmit,
|
||||
}: {
|
||||
data: IDataPipelineNodeProps | undefined;
|
||||
open: boolean;
|
||||
setOpen: (open: boolean) => void;
|
||||
onSubmit?: (data: any) => void;
|
||||
}) => {
|
||||
const form = useForm<z.infer<typeof linkPiplineFormSchema>>({
|
||||
resolver: zodResolver(linkPiplineFormSchema),
|
||||
defaultValues: { data_flow: ['888'], file_filter: '' },
|
||||
const isEdit = !!data;
|
||||
const form = useForm<z.infer<typeof pipelineFormSchema>>({
|
||||
resolver: zodResolver(pipelineFormSchema),
|
||||
defaultValues: {
|
||||
data_flow: [],
|
||||
set_default: false,
|
||||
file_filter: '',
|
||||
},
|
||||
});
|
||||
// const [open, setOpen] = useState(false);
|
||||
const { navigateToAgents } = useNavigatePage();
|
||||
const handleFormSubmit = (values: any) => {
|
||||
console.log(values);
|
||||
console.log(values, data);
|
||||
const param = {
|
||||
...data,
|
||||
...values,
|
||||
};
|
||||
onSubmit?.(param);
|
||||
};
|
||||
return (
|
||||
<Modal
|
||||
title={t('knowledgeConfiguration.linkDataPipeline')}
|
||||
className="!w-[560px]"
|
||||
title={
|
||||
!isEdit
|
||||
? t('knowledgeConfiguration.linkDataPipeline')
|
||||
: t('knowledgeConfiguration.eidtLinkDataPipeline')
|
||||
}
|
||||
open={open}
|
||||
onOpenChange={setOpen}
|
||||
showfooter={false}
|
||||
@ -43,10 +64,12 @@ const LinkDataPipelineModal = ({
|
||||
<Form {...form}>
|
||||
<form onSubmit={form.handleSubmit(handleFormSubmit)}>
|
||||
<div className="flex flex-col gap-4 ">
|
||||
<DataFlowSelect
|
||||
toDataPipeline={navigateToAgents}
|
||||
formFieldName="data_flow"
|
||||
/>
|
||||
{!isEdit && (
|
||||
<DataFlowSelect
|
||||
toDataPipeline={navigateToAgents}
|
||||
formFieldName="data_flow"
|
||||
/>
|
||||
)}
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={'file_filter'}
|
||||
@ -65,7 +88,9 @@ const LinkDataPipelineModal = ({
|
||||
<div className="text-muted-foreground">
|
||||
<FormControl>
|
||||
<Input
|
||||
placeholder={t('dataFlowPlaceholder')}
|
||||
placeholder={t(
|
||||
'knowledgeConfiguration.filterPlaceholder',
|
||||
)}
|
||||
{...field}
|
||||
/>
|
||||
</FormControl>
|
||||
@ -78,11 +103,56 @@ const LinkDataPipelineModal = ({
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
{isEdit && (
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={'set_default'}
|
||||
render={({ field }) => (
|
||||
<FormItem className=" items-center space-y-0 ">
|
||||
<div className="flex flex-col gap-1">
|
||||
<div className="flex gap-2 justify-between ">
|
||||
<FormLabel
|
||||
tooltip={t('knowledgeConfiguration.setDefaultTip')}
|
||||
className="text-sm text-text-primary whitespace-wrap "
|
||||
>
|
||||
{t('knowledgeConfiguration.setDefault')}
|
||||
</FormLabel>
|
||||
</div>
|
||||
|
||||
<div className="text-muted-foreground">
|
||||
<FormControl>
|
||||
<Switch
|
||||
value={field.value}
|
||||
onCheckedChange={field.onChange}
|
||||
/>
|
||||
</FormControl>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex pt-1">
|
||||
<div className="w-full"></div>
|
||||
<FormMessage />
|
||||
</div>
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
)}
|
||||
<div className="flex justify-end gap-1">
|
||||
<Button type="reset" variant={'outline'} className="btn-primary">
|
||||
<Button
|
||||
type="button"
|
||||
variant={'outline'}
|
||||
className="btn-primary"
|
||||
onClick={() => {
|
||||
setOpen(false);
|
||||
}}
|
||||
>
|
||||
{t('modal.cancelText')}
|
||||
</Button>
|
||||
<Button type="submit" variant={'default'} className="btn-primary">
|
||||
<Button
|
||||
type="button"
|
||||
variant={'default'}
|
||||
className="btn-primary"
|
||||
onClick={form.handleSubmit(handleFormSubmit)}
|
||||
>
|
||||
{t('modal.okText')}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
@ -72,7 +72,17 @@ export const formSchema = z.object({
|
||||
// icon: z.array(z.instanceof(File)),
|
||||
});
|
||||
|
||||
export const linkPiplineFormSchema = z.object({
|
||||
data_flow: z.array(z.string()),
|
||||
export const pipelineFormSchema = z.object({
|
||||
data_flow: z.array(z.string()).optional(),
|
||||
set_default: z.boolean().optional(),
|
||||
file_filter: z.string().optional(),
|
||||
});
|
||||
|
||||
export const linkPiplineFormSchema = pipelineFormSchema.pick({
|
||||
data_flow: true,
|
||||
file_filter: true,
|
||||
});
|
||||
export const editPiplineFormSchema = pipelineFormSchema.pick({
|
||||
set_default: true,
|
||||
file_filter: true,
|
||||
});
|
||||
|
||||
@ -86,9 +86,12 @@ export default function DatasetSettings() {
|
||||
<GeneralForm></GeneralForm>
|
||||
<Divider />
|
||||
|
||||
<GraphRagItems className="border-none p-0"></GraphRagItems>
|
||||
<GraphRagItems
|
||||
className="border-none p-0"
|
||||
showGenerateItem={true}
|
||||
></GraphRagItems>
|
||||
<Divider />
|
||||
<RaptorFormFields></RaptorFormFields>
|
||||
<RaptorFormFields showGenerateItem={true}></RaptorFormFields>
|
||||
<Divider />
|
||||
<LinkDataPipeline />
|
||||
</MainContainer>
|
||||
|
||||
@ -6,16 +6,20 @@ import {
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
} from '@/components/ui/dropdown-menu';
|
||||
import { Modal } from '@/components/ui/modal/modal';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { toFixed } from '@/utils/common-util';
|
||||
import { t } from 'i18next';
|
||||
import { lowerFirst } from 'lodash';
|
||||
import { CirclePause, WandSparkles } from 'lucide-react';
|
||||
import { CirclePause, Trash2, WandSparkles } from 'lucide-react';
|
||||
import { useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { generateStatus, useFetchGenerateData } from './hook';
|
||||
|
||||
const MenuItem: React.FC<{ name: 'KnowledgeGraph' | 'Raptor' }> = ({
|
||||
name,
|
||||
}) => {
|
||||
export enum GenerateType {
|
||||
KnowledgeGraph = 'KnowledgeGraph',
|
||||
Raptor = 'Raptor',
|
||||
}
|
||||
const MenuItem: React.FC<{ name: GenerateType }> = ({ name }) => {
|
||||
console.log(name, 'pppp');
|
||||
const iconKeyMap = {
|
||||
KnowledgeGraph: 'knowledgegraph',
|
||||
@ -111,3 +115,102 @@ const Generate: React.FC = () => {
|
||||
};
|
||||
|
||||
export default Generate;
|
||||
|
||||
export type IGenerateLogProps = {
|
||||
id?: string;
|
||||
status: 0 | 1;
|
||||
message?: string;
|
||||
created_at?: string;
|
||||
updated_at?: string;
|
||||
type?: GenerateType;
|
||||
className?: string;
|
||||
onDelete?: () => void;
|
||||
};
|
||||
export const GenerateLogButton = (props: IGenerateLogProps) => {
|
||||
const { t } = useTranslation();
|
||||
const {
|
||||
id,
|
||||
status,
|
||||
message,
|
||||
created_at,
|
||||
updated_at,
|
||||
type,
|
||||
className,
|
||||
onDelete,
|
||||
} = props;
|
||||
const handleDelete = () => {
|
||||
Modal.show({
|
||||
visible: true,
|
||||
className: '!w-[560px]',
|
||||
title:
|
||||
t('common.delete') +
|
||||
' ' +
|
||||
(type === GenerateType.KnowledgeGraph
|
||||
? t('knowledgeDetails.knowledgeGraph')
|
||||
: t('knowledgeDetails.raptor')),
|
||||
children: (
|
||||
<div
|
||||
className="text-sm text-text-secondary"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t('knowledgeConfiguration.deleteGenerateModalContent', {
|
||||
type:
|
||||
type === GenerateType.KnowledgeGraph
|
||||
? t('knowledgeDetails.knowledgeGraph')
|
||||
: t('knowledgeDetails.raptor'),
|
||||
}),
|
||||
}}
|
||||
></div>
|
||||
),
|
||||
onVisibleChange: () => {
|
||||
Modal.hide();
|
||||
},
|
||||
footer: (
|
||||
<div className="flex justify-end gap-2">
|
||||
<Button
|
||||
type="button"
|
||||
variant={'outline'}
|
||||
onClick={() => Modal.hide()}
|
||||
>
|
||||
{t('dataflowParser.changeStepModalCancelText')}
|
||||
</Button>
|
||||
<Button
|
||||
type="button"
|
||||
variant={'secondary'}
|
||||
className="!bg-state-error text-text-primary"
|
||||
onClick={() => {
|
||||
Modal.hide();
|
||||
}}
|
||||
>
|
||||
{t('common.delete')}
|
||||
</Button>
|
||||
</div>
|
||||
),
|
||||
});
|
||||
};
|
||||
return (
|
||||
<div
|
||||
className={cn('flex bg-bg-card rounded-md py-1 px-3', props.className)}
|
||||
>
|
||||
<div className="flex items-center justify-between w-full">
|
||||
{status === 1 && (
|
||||
<>
|
||||
<div>
|
||||
{message || t('knowledgeDetails.generatedOn')}
|
||||
{created_at}
|
||||
</div>
|
||||
<Trash2
|
||||
size={14}
|
||||
className="cursor-pointer"
|
||||
onClick={(e) => {
|
||||
console.log('delete');
|
||||
handleDelete();
|
||||
e.stopPropagation();
|
||||
}}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
{status === 0 && <div>{t('knowledgeDetails.notGenerated')}</div>}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@ -75,7 +75,7 @@ export default function Dataset() {
|
||||
filters={filters}
|
||||
leftPanel={
|
||||
<div className="items-start">
|
||||
<div className="pb-1">{t('knowledgeDetails.dataset')}</div>
|
||||
<div className="pb-1">{t('knowledgeDetails.subbarFiles')}</div>
|
||||
<div className="text-text-sub-title-invert text-sm">
|
||||
{t('knowledgeDetails.datasetDescription')}
|
||||
</div>
|
||||
|
||||
@ -9,7 +9,7 @@ import { cn, formatBytes } from '@/lib/utils';
|
||||
import { Routes } from '@/routes';
|
||||
import { formatPureDate } from '@/utils/date';
|
||||
import { isEmpty } from 'lodash';
|
||||
import { Banknote, Database, FileSearch2, GitGraph } from 'lucide-react';
|
||||
import { Banknote, FileSearch2, FolderOpen, GitGraph } from 'lucide-react';
|
||||
import { useMemo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useHandleMenuClick } from './hooks';
|
||||
@ -34,8 +34,8 @@ export function SideBar({ refreshCount }: PropType) {
|
||||
// key: Routes.DataSetOverview,
|
||||
// },
|
||||
{
|
||||
icon: Database,
|
||||
label: t(`knowledgeDetails.dataset`),
|
||||
icon: FolderOpen,
|
||||
label: t(`knowledgeDetails.subbarFiles`),
|
||||
key: Routes.DatasetBase,
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user