mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Compare commits
14 Commits
5b2e5dd334
...
cd77425b87
| Author | SHA1 | Date | |
|---|---|---|---|
| cd77425b87 | |||
| 544c9990e3 | |||
| 41a647fe32 | |||
| 594bf485d4 | |||
| 863c3e3d9c | |||
| 1767039be3 | |||
| cd75fa02b1 | |||
| cfdd37820a | |||
| 9d12380806 | |||
| 866098634b | |||
| 8013505daf | |||
| deb81810e9 | |||
| 6ab96287c9 | |||
| aaa4776657 |
@ -18,12 +18,14 @@ import re
|
||||
from abc import ABC
|
||||
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
|
||||
from api.db import LLMType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.dialog_service import meta_filter
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api import settings
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.app.tag import label_question
|
||||
from rag.prompts.generator import cross_languages, kb_prompt
|
||||
from rag.prompts.generator import cross_languages, kb_prompt, gen_meta_filter
|
||||
|
||||
|
||||
class RetrievalParam(ToolParamBase):
|
||||
@ -58,6 +60,7 @@ class RetrievalParam(ToolParamBase):
|
||||
self.use_kg = False
|
||||
self.cross_languages = []
|
||||
self.toc_enhance = False
|
||||
self.meta_data_filter={}
|
||||
|
||||
def check(self):
|
||||
self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
|
||||
@ -117,6 +120,21 @@ class Retrieval(ToolBase, ABC):
|
||||
vars = self.get_input_elements_from_text(kwargs["query"])
|
||||
vars = {k:o["value"] for k,o in vars.items()}
|
||||
query = self.string_format(kwargs["query"], vars)
|
||||
|
||||
doc_ids=[]
|
||||
if self._param.meta_data_filter!={}:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if self._param.meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
|
||||
filters = gen_meta_filter(chat_mdl, metas, query)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif self._param.meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, self._param.meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
|
||||
if self._param.cross_languages:
|
||||
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
||||
|
||||
@ -131,6 +149,7 @@ class Retrieval(ToolBase, ABC):
|
||||
self._param.top_n,
|
||||
self._param.similarity_threshold,
|
||||
1 - self._param.keywords_similarity_weight,
|
||||
doc_ids=doc_ids,
|
||||
aggs=False,
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=label_question(query, kbs),
|
||||
|
||||
@ -45,7 +45,7 @@ from api.utils.api_utils import (
|
||||
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||
from rag.nlp import search
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
|
||||
@ -524,6 +524,21 @@ def rename():
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||
es_body = {
|
||||
"docnm_kwd": req["name"],
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": req["doc_id"]},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -470,6 +470,20 @@ def list_docs(dataset_id, tenant_id):
|
||||
required: false
|
||||
default: 0
|
||||
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
|
||||
- in: query
|
||||
name: suffix
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by file suffix (e.g., ["pdf", "txt", "docx"]).
|
||||
- in: query
|
||||
name: run
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by document run status. Supports both numeric ("0", "1", "2", "3", "4") and text formats ("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL").
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
@ -512,63 +526,62 @@ def list_docs(dataset_id, tenant_id):
|
||||
description: Processing status.
|
||||
"""
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
id = request.args.get("id")
|
||||
name = request.args.get("name")
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
|
||||
if id and not DocumentService.query(id=id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
q = request.args
|
||||
document_id = q.get("id")
|
||||
name = q.get("name")
|
||||
|
||||
if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {document_id}.")
|
||||
if name and not DocumentService.query(name=name, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {name}.")
|
||||
|
||||
page = int(request.args.get("page", 1))
|
||||
keywords = request.args.get("keywords", "")
|
||||
page_size = int(request.args.get("page_size", 30))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
if request.args.get("desc") == "False":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
|
||||
page = int(q.get("page", 1))
|
||||
page_size = int(q.get("page_size", 30))
|
||||
orderby = q.get("orderby", "create_time")
|
||||
desc = str(q.get("desc", "true")).strip().lower() != "false"
|
||||
keywords = q.get("keywords", "")
|
||||
|
||||
create_time_from = int(request.args.get("create_time_from", 0))
|
||||
create_time_to = int(request.args.get("create_time_to", 0))
|
||||
# filters - align with OpenAPI parameter names
|
||||
suffix = q.getlist("suffix")
|
||||
run_status = q.getlist("run")
|
||||
create_time_from = int(q.get("create_time_from", 0))
|
||||
create_time_to = int(q.get("create_time_to", 0))
|
||||
|
||||
# map run status (accept text or numeric) - align with API parameter
|
||||
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
|
||||
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
|
||||
|
||||
docs, total = DocumentService.get_list(
|
||||
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
|
||||
)
|
||||
|
||||
# time range filter (0 means no bound)
|
||||
if create_time_from or create_time_to:
|
||||
filtered_docs = []
|
||||
for doc in docs:
|
||||
doc_create_time = doc.get("create_time", 0)
|
||||
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
|
||||
filtered_docs.append(doc)
|
||||
docs = filtered_docs
|
||||
docs = [
|
||||
d for d in docs
|
||||
if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
|
||||
and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
|
||||
]
|
||||
|
||||
# rename key's name
|
||||
renamed_doc_list = []
|
||||
# rename keys + map run status back to text for output
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method",
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL",
|
||||
}
|
||||
for doc in docs:
|
||||
renamed_doc = {}
|
||||
for key, value in doc.items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
renamed_doc_list.append(renamed_doc)
|
||||
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
||||
run_status_numeric_to_text = {"0": "UNSTART", "1": "RUNNING", "2": "CANCEL", "3": "DONE", "4": "FAIL"}
|
||||
|
||||
output_docs = []
|
||||
for d in docs:
|
||||
renamed_doc = {key_mapping.get(k, k): v for k, v in d.items()}
|
||||
if "run" in d:
|
||||
renamed_doc["run"] = run_status_numeric_to_text.get(str(d["run"]), d["run"])
|
||||
output_docs.append(renamed_doc)
|
||||
|
||||
return get_result(data={"total": total, "docs": output_docs})
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
|
||||
@ -79,7 +79,7 @@ class DocumentService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_list(cls, kb_id, page_number, items_per_page,
|
||||
orderby, desc, keywords, id, name):
|
||||
orderby, desc, keywords, id, name, suffix=None, run = None):
|
||||
fields = cls.get_cls_model_fields()
|
||||
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
|
||||
.join(File, on = (File.id == File2Document.file_id))\
|
||||
@ -96,6 +96,10 @@ class DocumentService(CommonService):
|
||||
docs = docs.where(
|
||||
fn.LOWER(cls.model.name).contains(keywords.lower())
|
||||
)
|
||||
if suffix:
|
||||
docs = docs.where(cls.model.suffix.in_(suffix))
|
||||
if run:
|
||||
docs = docs.where(cls.model.run.in_(run))
|
||||
if desc:
|
||||
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
|
||||
@ -54,8 +54,8 @@ class RAGFlowExcelParser:
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
try:
|
||||
df = pd.read_excel(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
dfs = pd.read_excel(file_like_object, sheet_name=None)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(dfs)
|
||||
except Exception as ex:
|
||||
logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
|
||||
file_like_object.seek(0)
|
||||
@ -75,6 +75,10 @@ class RAGFlowExcelParser:
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
# if contains multiple sheets use _dataframes_to_workbook
|
||||
if isinstance(df, dict) and len(df) > 1:
|
||||
return RAGFlowExcelParser._dataframes_to_workbook(df)
|
||||
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
@ -88,6 +92,22 @@ class RAGFlowExcelParser:
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
return wb
|
||||
|
||||
@staticmethod
|
||||
def _dataframes_to_workbook(dfs: dict):
|
||||
wb = Workbook()
|
||||
default_sheet = wb.active
|
||||
wb.remove(default_sheet)
|
||||
|
||||
for sheet_name, df in dfs.items():
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
ws = wb.create_sheet(title=sheet_name)
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
return wb
|
||||
|
||||
def html(self, fnm, chunk_rows=256):
|
||||
from html import escape
|
||||
|
||||
@ -17,6 +17,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
|
||||
from rag.prompts.generator import vision_llm_figure_describe_prompt
|
||||
@ -32,6 +34,43 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
if isinstance(figure_data[1], Image.Image)
|
||||
]
|
||||
|
||||
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
if vision_model:
|
||||
figures_data = vision_figure_parser_figure_data_wrapper(sections)
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tbls.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
return tbls
|
||||
|
||||
def vision_figure_parser_pdf_wrapper(tbls,callback=None,**kwargs):
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
if vision_model:
|
||||
def is_figure_item(item):
|
||||
return (
|
||||
isinstance(item[0][0], Image.Image) and
|
||||
isinstance(item[0][1], list)
|
||||
)
|
||||
figures_data = [item for item in tbls if is_figure_item(item)]
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tbls = [item for item in tbls if not is_figure_item(item)]
|
||||
tbls.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
return tbls
|
||||
|
||||
shared_executor = ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
|
||||
17
docs/guides/agent/agent_component_reference/chunker_token.md
Normal file
17
docs/guides/agent/agent_component_reference/chunker_token.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 32
|
||||
slug: /chunker_token_component
|
||||
---
|
||||
|
||||
# Parser component
|
||||
|
||||
A component that sets the parsing rules for your dataset.
|
||||
|
||||
---
|
||||
|
||||
A **Parser** component defines how various file types should be parsed, including parsing methods for PDFs , fields to parse for Emails, and OCR methods for images.
|
||||
|
||||
|
||||
## Scenario
|
||||
|
||||
A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows.
|
||||
@ -1198,23 +1198,24 @@ Failure:
|
||||
|
||||
### List documents
|
||||
|
||||
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
|
||||
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
|
||||
|
||||
Lists documents in a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: GET
|
||||
- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
|
||||
- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
|
||||
- Headers:
|
||||
- `'content-Type: application/json'`
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
##### Request examples
|
||||
|
||||
**A basic request with pagination:**
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp} \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
@ -1236,10 +1237,34 @@ curl --request GET \
|
||||
Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`.
|
||||
- `id`: (*Filter parameter*), `string`
|
||||
The ID of the document to retrieve.
|
||||
- `create_time_from`: (*Filter parameter*), `integer`
|
||||
- `create_time_from`: (*Filter parameter*), `integer`
|
||||
Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to `0`.
|
||||
- `create_time_to`: (*Filter parameter*), `integer`
|
||||
- `create_time_to`: (*Filter parameter*), `integer`
|
||||
Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to `0`.
|
||||
- `suffix`: (*Filter parameter*), `array[string]`
|
||||
Filter by file suffix. Supports multiple values, e.g., `pdf`, `txt`, and `docx`. Defaults to all suffixes.
|
||||
- `run`: (*Filter parameter*), `array[string]`
|
||||
Filter by document processing status. Supports numeric, text, and mixed formats:
|
||||
- Numeric format: `["0", "1", "2", "3", "4"]`
|
||||
- Text format: `[UNSTART, RUNNING, CANCEL, DONE, FAIL]`
|
||||
- Mixed format: `[UNSTART, 1, DONE]` (mixing numeric and text formats)
|
||||
- Status mapping:
|
||||
- `0` / `UNSTART`: Document not yet processed
|
||||
- `1` / `RUNNING`: Document is currently being processed
|
||||
- `2` / `CANCEL`: Document processing was cancelled
|
||||
- `3` / `DONE`: Document processing completed successfully
|
||||
- `4` / `FAIL`: Document processing failed
|
||||
Defaults to all statuses.
|
||||
|
||||
##### Usage examples
|
||||
|
||||
**A request with multiple filtering parameters**
|
||||
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url 'http://{address}/api/v1/datasets/{dataset_id}/documents?suffix=pdf&run=DONE&page=1&page_size=10' \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
@ -1270,7 +1295,7 @@ Success:
|
||||
"process_duration": 0.0,
|
||||
"progress": 0.0,
|
||||
"progress_msg": "",
|
||||
"run": "0",
|
||||
"run": "UNSTART",
|
||||
"size": 7,
|
||||
"source_type": "local",
|
||||
"status": "1",
|
||||
|
||||
@ -20,11 +20,14 @@ import re
|
||||
from io import BytesIO
|
||||
|
||||
from deepdoc.parser.utils import get_text
|
||||
from rag.app import naive
|
||||
from rag.nlp import bullets_category, is_english,remove_contents_table, \
|
||||
hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, \
|
||||
tokenize_chunks
|
||||
from rag.nlp import rag_tokenizer
|
||||
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser import PdfParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
@ -81,13 +84,15 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
sections, tbls = [], []
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
doc_parser = DocxParser()
|
||||
doc_parser = naive.Docx()
|
||||
# TODO: table of contents need to be removed
|
||||
sections, tbls = doc_parser(
|
||||
binary if binary else filename, from_page=from_page, to_page=to_page)
|
||||
filename, binary=binary, from_page=from_page, to_page=to_page)
|
||||
remove_contents_table(sections, eng=is_english(
|
||||
random_choices([t for t, _ in sections], k=200)))
|
||||
tbls = [((None, lns), None) for lns in tbls]
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
# tbls = [((None, lns), None) for lns in tbls]
|
||||
sections=[(item[0],item[1] if item[1] is not None else "") for item in sections if not isinstance(item[1], Image.Image)]
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
@ -96,6 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = PlainParser()
|
||||
sections, tbls = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
|
||||
elif re.search(r"\.txt$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
@ -23,6 +23,7 @@ from io import BytesIO
|
||||
from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level
|
||||
from rag.utils import num_tokens_from_string
|
||||
from deepdoc.parser import PdfParser, PlainParser, DocxParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
from docx import Document
|
||||
from PIL import Image
|
||||
|
||||
@ -252,7 +253,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
tk_cnt = num_tokens_from_string(txt)
|
||||
if sec_id > -1:
|
||||
last_sid = sec_id
|
||||
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
|
||||
return res
|
||||
@ -261,6 +262,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
docx_parser = Docx()
|
||||
ti_list, tbls = docx_parser(filename, binary,
|
||||
from_page=0, to_page=10000, callback=callback)
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
for text, image in ti_list:
|
||||
d = copy.deepcopy(doc)
|
||||
|
||||
@ -32,7 +32,7 @@ from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.utils.file_utils import extract_embed_file
|
||||
from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownElementExtractor, MarkdownParser, PdfParser, TxtParser
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser,vision_figure_parser_docx_wrapper,vision_figure_parser_pdf_wrapper
|
||||
from deepdoc.parser.pdf_parser import PlainParser, VisionParser
|
||||
from deepdoc.parser.mineru_parser import MinerUParser
|
||||
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
|
||||
@ -475,24 +475,13 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
|
||||
|
||||
# fix "There is no item named 'word/NULL' in the archive", referring to https://github.com/python-openxml/python-docx/issues/1105#issuecomment-1298075246
|
||||
_SerializedRelationships.load_from_xml = load_from_xml_v2
|
||||
sections, tables = Docx()(filename, binary)
|
||||
|
||||
if vision_model:
|
||||
figures_data = vision_figure_parser_figure_data_wrapper(sections)
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tables.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
@ -521,25 +510,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
|
||||
if layout_recognizer == "DeepDOC":
|
||||
pdf_parser = Pdf()
|
||||
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
|
||||
if vision_model:
|
||||
sections, tables, figures = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback, separate_tables_figures=True)
|
||||
callback(0.5, "Basic parsing complete. Proceeding with figure enhancement...")
|
||||
try:
|
||||
pdf_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures, **kwargs)
|
||||
boosted_figures = pdf_vision_parser(callback=callback)
|
||||
tables.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
tables.extend(figures)
|
||||
else:
|
||||
sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
|
||||
sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
|
||||
tables=vision_figure_parser_pdf_wrapper(tbls=tables,callback=callback,**kwargs)
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
@ -23,6 +23,7 @@ from deepdoc.parser.utils import get_text
|
||||
from rag.app import naive
|
||||
from rag.nlp import rag_tokenizer, tokenize
|
||||
from deepdoc.parser import PdfParser, ExcelParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
@ -57,13 +58,8 @@ class Pdf(PdfParser):
|
||||
|
||||
sections = [(b["text"], self.get_position(b, zoomin))
|
||||
for i, b in enumerate(self.boxes)]
|
||||
for (img, rows), poss in tbls:
|
||||
if not rows:
|
||||
continue
|
||||
sections.append((rows if isinstance(rows, str) else rows[0],
|
||||
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
||||
return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
|
||||
x[-1][0][0], x[-1][0][3], x[-1][0][1]))], None
|
||||
x[-1][0][0], x[-1][0][3], x[-1][0][1]))], tbls
|
||||
|
||||
|
||||
def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
@ -80,6 +76,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
sections, tbls = naive.Docx()(filename, binary)
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
sections = [s for s, _ in sections if s]
|
||||
for (_, html), _ in tbls:
|
||||
sections.append(html)
|
||||
@ -89,8 +86,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = Pdf()
|
||||
if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text":
|
||||
pdf_parser = PlainParser()
|
||||
sections, _ = pdf_parser(
|
||||
sections, tbls = pdf_parser(
|
||||
filename if not binary else binary, to_page=to_page, callback=callback)
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
for (img, rows), poss in tbls:
|
||||
if not rows:
|
||||
continue
|
||||
sections.append((rows if isinstance(rows, str) else rows[0],
|
||||
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
||||
sections = [s for s, _ in sections if s]
|
||||
|
||||
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
||||
|
||||
@ -18,12 +18,12 @@ import logging
|
||||
import copy
|
||||
import re
|
||||
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
|
||||
from api.db import ParserType
|
||||
from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
|
||||
from deepdoc.parser import PdfParser, PlainParser
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
def __init__(self):
|
||||
self.model_speciess = ParserType.PAPER.value
|
||||
@ -160,6 +160,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = Pdf()
|
||||
paper = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
tbls=paper["tables"]
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
paper["tables"] = tbls
|
||||
else:
|
||||
raise NotImplementedError("file type not supported yet(pdf supported)")
|
||||
|
||||
|
||||
@ -13,13 +13,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from abc import ABC
|
||||
from copy import deepcopy
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
@ -171,6 +174,7 @@ class GptV4(Base):
|
||||
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.openai.com/v1"
|
||||
self.api_key = key
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
@ -224,6 +228,61 @@ class QWenCV(GptV4):
|
||||
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
|
||||
|
||||
def chat(self, system, history, gen_conf, images=[], video_bytes=None, filename=""):
|
||||
if video_bytes:
|
||||
try:
|
||||
summary, summary_num_tokens = self._process_video(video_bytes, filename)
|
||||
return summary, summary_num_tokens
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
return "**ERROR**: Method chat not supported yet.", 0
|
||||
|
||||
def _process_video(self, video_bytes, filename):
|
||||
from dashscope import MultiModalConversation
|
||||
|
||||
video_suffix = Path(filename).suffix or ".mp4"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=video_suffix) as tmp:
|
||||
tmp.write(video_bytes)
|
||||
tmp_path = tmp.name
|
||||
|
||||
video_path = f"file://{tmp_path}"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"video": video_path,
|
||||
"fps": 2,
|
||||
},
|
||||
{
|
||||
"text": "Please summarize this video in proper sentences.",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
def call_api():
|
||||
response = MultiModalConversation.call(
|
||||
api_key=self.api_key,
|
||||
model=self.model_name,
|
||||
messages=messages,
|
||||
)
|
||||
summary = response["output"]["choices"][0]["message"].content[0]["text"]
|
||||
return summary, num_tokens_from_string(summary)
|
||||
|
||||
try:
|
||||
return call_api()
|
||||
except Exception as e1:
|
||||
import dashscope
|
||||
|
||||
dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
|
||||
try:
|
||||
return call_api()
|
||||
except Exception as e2:
|
||||
raise RuntimeError(f"Both default and intl endpoint failed.\nFirst error: {e1}\nSecond error: {e2}")
|
||||
|
||||
|
||||
|
||||
class HunyuanCV(GptV4):
|
||||
_FACTORY_NAME = "Tencent Hunyuan"
|
||||
@ -616,8 +675,6 @@ class GeminiCV(Base):
|
||||
def _process_video(self, video_bytes, filename):
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
video_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
client = genai.Client(api_key=self.api_key)
|
||||
|
||||
@ -459,12 +459,10 @@ def tree_merge(bull, sections, depth):
|
||||
return len(BULLET_PATTERN[bull])+1, text
|
||||
else:
|
||||
return len(BULLET_PATTERN[bull])+2, text
|
||||
|
||||
level_set = set()
|
||||
lines = []
|
||||
for section in sections:
|
||||
level, text = get_level(bull, section)
|
||||
|
||||
if not text.strip("\n"):
|
||||
continue
|
||||
|
||||
@ -797,8 +795,8 @@ class Node:
|
||||
def __init__(self, level, depth=-1, texts=None):
|
||||
self.level = level
|
||||
self.depth = depth
|
||||
self.texts = texts if texts is not None else [] # 存放内容
|
||||
self.children = [] # 子节点
|
||||
self.texts = texts or []
|
||||
self.children = []
|
||||
|
||||
def add_child(self, child_node):
|
||||
self.children.append(child_node)
|
||||
@ -825,35 +823,51 @@ class Node:
|
||||
return f"Node(level={self.level}, texts={self.texts}, children={len(self.children)})"
|
||||
|
||||
def build_tree(self, lines):
|
||||
stack = [self]
|
||||
for line in lines:
|
||||
level, text = line
|
||||
node = Node(level=level, texts=[text])
|
||||
|
||||
if level <= self.depth or self.depth == -1:
|
||||
while stack and level <= stack[-1].get_level():
|
||||
stack.pop()
|
||||
|
||||
stack[-1].add_child(node)
|
||||
stack.append(node)
|
||||
else:
|
||||
stack = [self]
|
||||
for level, text in lines:
|
||||
if self.depth != -1 and level > self.depth:
|
||||
# Beyond target depth: merge content into the current leaf instead of creating deeper nodes
|
||||
stack[-1].add_text(text)
|
||||
return self
|
||||
continue
|
||||
|
||||
# Move up until we find the proper parent whose level is strictly smaller than current
|
||||
while len(stack) > 1 and level <= stack[-1].get_level():
|
||||
stack.pop()
|
||||
|
||||
node = Node(level=level, texts=[text])
|
||||
# Attach as child of current parent and descend
|
||||
stack[-1].add_child(node)
|
||||
stack.append(node)
|
||||
|
||||
return self
|
||||
|
||||
def get_tree(self):
|
||||
tree_list = []
|
||||
self._dfs(self, tree_list, 0, [])
|
||||
self._dfs(self, tree_list, [])
|
||||
return tree_list
|
||||
|
||||
def _dfs(self, node, tree_list, current_depth, titles):
|
||||
def _dfs(self, node, tree_list, titles):
|
||||
level = node.get_level()
|
||||
texts = node.get_texts()
|
||||
child = node.get_children()
|
||||
|
||||
if node.get_texts():
|
||||
if 0 < node.get_level() < self.depth:
|
||||
titles.extend(node.get_texts())
|
||||
else:
|
||||
combined_text = ["\n".join(titles + node.get_texts())]
|
||||
tree_list.append(combined_text)
|
||||
if level == 0 and texts:
|
||||
tree_list.append("\n".join(titles+texts))
|
||||
|
||||
# Titles within configured depth are accumulated into the current path
|
||||
if 1 <= level <= self.depth:
|
||||
path_titles = titles + texts
|
||||
else:
|
||||
path_titles = titles
|
||||
|
||||
for child in node.get_children():
|
||||
self._dfs(child, tree_list, current_depth + 1, titles.copy())
|
||||
# Body outside the depth limit becomes its own chunk under the current title path
|
||||
if level > self.depth and texts:
|
||||
tree_list.append("\n".join(path_titles + texts))
|
||||
|
||||
# A leaf title within depth emits its title path as a chunk (header-only section)
|
||||
elif not child and (1 <= level <= self.depth):
|
||||
tree_list.append("\n".join(path_titles))
|
||||
|
||||
# Recurse into children with the updated title path
|
||||
for c in child:
|
||||
self._dfs(c, tree_list, path_titles)
|
||||
@ -114,7 +114,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
||||
),
|
||||
}
|
||||
],
|
||||
{"max_tokens": self._max_token},
|
||||
{"max_tokens": max(self._max_token, 512)}, # fix issue: #10235
|
||||
)
|
||||
cnt = re.sub(
|
||||
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",
|
||||
|
||||
@ -1052,13 +1052,14 @@ async def task_manager():
|
||||
|
||||
async def main():
|
||||
logging.info(r"""
|
||||
______ __ ______ __
|
||||
/_ __/___ ______/ /__ / ____/ _____ _______ __/ /_____ _____
|
||||
/ / / __ `/ ___/ //_/ / __/ | |/_/ _ \/ ___/ / / / __/ __ \/ ___/
|
||||
/ / / /_/ (__ ) ,< / /____> </ __/ /__/ /_/ / /_/ /_/ / /
|
||||
/_/ \__,_/____/_/|_| /_____/_/|_|\___/\___/\__,_/\__/\____/_/
|
||||
____ __ _
|
||||
/ _/___ ____ ____ _____/ /_(_)___ ____ ________ ______ _____ _____
|
||||
/ // __ \/ __ `/ _ \/ ___/ __/ / __ \/ __ \ / ___/ _ \/ ___/ | / / _ \/ ___/
|
||||
_/ // / / / /_/ / __(__ ) /_/ / /_/ / / / / (__ ) __/ / | |/ / __/ /
|
||||
/___/_/ /_/\__, /\___/____/\__/_/\____/_/ /_/ /____/\___/_/ |___/\___/_/
|
||||
/____/
|
||||
""")
|
||||
logging.info(f'TaskExecutor: RAGFlow version: {get_ragflow_version()}')
|
||||
logging.info(f'RAGFlow version: {get_ragflow_version()}')
|
||||
settings.init_settings()
|
||||
print_rag_settings()
|
||||
if sys.platform != "win32":
|
||||
|
||||
@ -83,7 +83,7 @@ class TestChunksRetrieval:
|
||||
"ValueError('Search does not support negative slicing.')",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
pytest.param({"page": 2, "page_size": 2}, 0, 2, "", marks=pytest.mark.skip(reason="issues/6646")),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 0, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 0, ""),
|
||||
pytest.param(
|
||||
@ -124,9 +124,9 @@ class TestChunksRetrieval:
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
# ({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
pytest.param({"page_size": 1}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
|
||||
({"page_size": 5}, 0, 4, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param({"page_size": "1"}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
|
||||
# ({"page_size": -1}, 0, 0, ""),
|
||||
pytest.param(
|
||||
{"page_size": "a"},
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import { PlusOutlined } from '@ant-design/icons';
|
||||
import { TweenOneGroup } from 'rc-tween-one';
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
|
||||
import { X } from 'lucide-react';
|
||||
@ -57,7 +56,7 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
<HoverCard key={tag}>
|
||||
<HoverCardContent side="top">{tag}</HoverCardContent>
|
||||
<HoverCardTrigger asChild>
|
||||
<div className="w-fit flex items-center justify-center gap-2 border-dashed border px-1 rounded-sm bg-bg-card">
|
||||
<div className="w-fit flex items-center justify-center gap-2 border-dashed border px-2 py-1 rounded-sm bg-bg-card">
|
||||
<div className="flex gap-2 items-center">
|
||||
<div className="max-w-80 overflow-hidden text-ellipsis">
|
||||
{tag}
|
||||
@ -84,11 +83,11 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
|
||||
return (
|
||||
<div>
|
||||
{inputVisible ? (
|
||||
{inputVisible && (
|
||||
<Input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
className="h-8 bg-bg-card"
|
||||
className="h-8 bg-bg-card mb-1"
|
||||
value={inputValue}
|
||||
onChange={handleInputChange}
|
||||
onBlur={handleInputConfirm}
|
||||
@ -98,36 +97,20 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && (
|
||||
<TweenOneGroup
|
||||
className="flex gap-2 flex-wrap mt-2"
|
||||
enter={{
|
||||
scale: 0.8,
|
||||
opacity: 0,
|
||||
type: 'from',
|
||||
duration: 100,
|
||||
}}
|
||||
onEnd={(e) => {
|
||||
if (e.type === 'appear' || e.type === 'enter') {
|
||||
(e.target as any).style = 'display: inline-block';
|
||||
}
|
||||
}}
|
||||
leave={{ opacity: 0, width: 0, scale: 0, duration: 200 }}
|
||||
appear={false}
|
||||
>
|
||||
{tagChild}
|
||||
</TweenOneGroup>
|
||||
)}
|
||||
<div className="flex gap-2 py-1">
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}</>}
|
||||
{!inputVisible && (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
|
||||
@ -1533,8 +1533,8 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
'Your users will see this welcome message at the beginning.',
|
||||
modeTip: 'The mode defines how the workflow is initiated.',
|
||||
mode: 'Mode',
|
||||
conversational: 'conversational',
|
||||
task: 'task',
|
||||
conversational: 'Conversational',
|
||||
task: 'Task',
|
||||
beginInputTip:
|
||||
'By defining input parameters, this content can be accessed by other components in subsequent processes.',
|
||||
query: 'Query variables',
|
||||
@ -1605,6 +1605,119 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
ceateAgent: 'Agent flow',
|
||||
createPipeline: 'Ingestion pipeline',
|
||||
chooseAgentType: 'Choose Agent Type',
|
||||
parser: 'Parser',
|
||||
parserDescription:
|
||||
'Extracts raw text and structure from files for downstream processing.',
|
||||
tokenizer: 'Indexer',
|
||||
tokenizerRequired: 'Please add the Indexer node first',
|
||||
tokenizerDescription:
|
||||
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
|
||||
splitter: 'Token',
|
||||
splitterDescription:
|
||||
'Split text into chunks by token length with optional delimiters and overlap.',
|
||||
hierarchicalMergerDescription:
|
||||
'Split documents into sections by title hierarchy with regex rules for finer control.',
|
||||
hierarchicalMerger: 'Title',
|
||||
extractor: 'Transformer',
|
||||
extractorDescription:
|
||||
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
|
||||
outputFormat: 'Output format',
|
||||
fileFormats: 'File format',
|
||||
fileFormatOptions: {
|
||||
pdf: 'PDF',
|
||||
spreadsheet: 'Spreadsheet',
|
||||
image: 'Image',
|
||||
email: 'Email',
|
||||
'text&markdown': 'Text & Markup',
|
||||
word: 'Word',
|
||||
slides: 'PPT',
|
||||
audio: 'Audio',
|
||||
},
|
||||
fields: 'Field',
|
||||
addParser: 'Add Parser',
|
||||
hierarchy: 'Hierarchy',
|
||||
regularExpressions: 'Regular Expressions',
|
||||
overlappedPercent: 'Overlapped percent (%)',
|
||||
searchMethod: 'Search method',
|
||||
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
|
||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||
// file: 'File',
|
||||
parserMethod: 'Parsing method',
|
||||
// systemPrompt: 'System Prompt',
|
||||
systemPromptPlaceholder:
|
||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||
exportJson: 'Export JSON',
|
||||
viewResult: 'View result',
|
||||
running: 'Running',
|
||||
summary: 'Summary',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
metadata: 'Metadata',
|
||||
fieldName: 'Result destination',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Extract the most important keywords/phrases of a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Summarize the text content, and give the top 5 important keywords/phrases.
|
||||
- The keywords MUST be in the same language as the given piece of text content.
|
||||
- The keywords are delimited by ENGLISH COMMA.
|
||||
- Output keywords ONLY.`,
|
||||
questions: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Propose 3 questions about a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Understand and summarize the text content, and propose the top 3 important questions.
|
||||
- The questions SHOULD NOT have overlapping meanings.
|
||||
- The questions SHOULD cover the main content of the text as much as possible.
|
||||
- The questions MUST be in the same language as the given piece of text content.
|
||||
- One question per line.
|
||||
- Output questions ONLY.`,
|
||||
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
|
||||
|
||||
Key Instructions:
|
||||
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
|
||||
2. Language: Write the summary in the same language as the source text.
|
||||
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
|
||||
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
|
||||
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
|
||||
|
||||
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
|
||||
},
|
||||
user: {
|
||||
keywords: `Text Content
|
||||
[Insert text here]`,
|
||||
questions: `Text Content
|
||||
[Insert text here]`,
|
||||
summary: `Text to Summarize:
|
||||
[Insert text here]`,
|
||||
metadata: `Content: [INSERT CONTENT HERE]`,
|
||||
},
|
||||
},
|
||||
cancel: 'Cancel',
|
||||
swicthPromptMessage:
|
||||
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
||||
tokenizerSearchMethodOptions: {
|
||||
full_text: 'Full-text',
|
||||
embedding: 'Embedding',
|
||||
},
|
||||
filenameEmbeddingWeight: 'Filename embedding weight',
|
||||
tokenizerFieldsOptions: {
|
||||
text: 'Processed Text',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
summary: 'Augmented Context',
|
||||
},
|
||||
imageParseMethodOptions: {
|
||||
ocr: 'OCR',
|
||||
},
|
||||
},
|
||||
llmTools: {
|
||||
bad_calculator: {
|
||||
@ -1705,125 +1818,6 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
<p>Are you sure you want to proceed?</p> `,
|
||||
unlinkPipelineModalConfirmText: 'Unlink',
|
||||
},
|
||||
dataflow: {
|
||||
parser: 'Parser',
|
||||
parserDescription:
|
||||
'Extracts raw text and structure from files for downstream processing.',
|
||||
tokenizer: 'Indexer',
|
||||
tokenizerRequired: 'Please add the Indexer node first',
|
||||
tokenizerDescription:
|
||||
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
|
||||
splitter: 'Token',
|
||||
splitterDescription:
|
||||
'Split text into chunks by token length with optional delimiters and overlap.',
|
||||
hierarchicalMergerDescription:
|
||||
'Split documents into sections by title hierarchy with regex rules for finer control.',
|
||||
hierarchicalMerger: 'Title',
|
||||
extractor: 'Transformer',
|
||||
extractorDescription:
|
||||
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
|
||||
outputFormat: 'Output format',
|
||||
lang: 'Language',
|
||||
fileFormats: 'File format',
|
||||
fileFormatOptions: {
|
||||
pdf: 'PDF',
|
||||
spreadsheet: 'Spreadsheet',
|
||||
image: 'Image',
|
||||
email: 'Email',
|
||||
'text&markdown': 'Text & Markup',
|
||||
word: 'Word',
|
||||
slides: 'PPT',
|
||||
audio: 'Audio',
|
||||
},
|
||||
fields: 'Field',
|
||||
addParser: 'Add Parser',
|
||||
hierarchy: 'Hierarchy',
|
||||
regularExpressions: 'Regular Expressions',
|
||||
overlappedPercent: 'Overlapped percent (%)',
|
||||
searchMethod: 'Search method',
|
||||
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
|
||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||
begin: 'File',
|
||||
parserMethod: 'Parsing method',
|
||||
systemPrompt: 'System Prompt',
|
||||
systemPromptPlaceholder:
|
||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||
exportJson: 'Export JSON',
|
||||
viewResult: 'View result',
|
||||
running: 'Running',
|
||||
summary: 'Summary',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
metadata: 'Metadata',
|
||||
fieldName: 'Result destination',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Extract the most important keywords/phrases of a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Summarize the text content, and give the top 5 important keywords/phrases.
|
||||
- The keywords MUST be in the same language as the given piece of text content.
|
||||
- The keywords are delimited by ENGLISH COMMA.
|
||||
- Output keywords ONLY.`,
|
||||
questions: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Propose 3 questions about a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Understand and summarize the text content, and propose the top 3 important questions.
|
||||
- The questions SHOULD NOT have overlapping meanings.
|
||||
- The questions SHOULD cover the main content of the text as much as possible.
|
||||
- The questions MUST be in the same language as the given piece of text content.
|
||||
- One question per line.
|
||||
- Output questions ONLY.`,
|
||||
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
|
||||
|
||||
Key Instructions:
|
||||
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
|
||||
2. Language: Write the summary in the same language as the source text.
|
||||
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
|
||||
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
|
||||
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
|
||||
|
||||
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
|
||||
},
|
||||
user: {
|
||||
keywords: `Text Content
|
||||
[Insert text here]`,
|
||||
questions: `Text Content
|
||||
[Insert text here]`,
|
||||
summary: `Text to Summarize:
|
||||
[Insert text here]`,
|
||||
metadata: `Content: [INSERT CONTENT HERE]`,
|
||||
},
|
||||
},
|
||||
cancel: 'Cancel',
|
||||
swicthPromptMessage:
|
||||
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
||||
tokenizerSearchMethodOptions: {
|
||||
full_text: 'Full-text',
|
||||
embedding: 'Embedding',
|
||||
},
|
||||
filenameEmbeddingWeight: 'Filename embedding weight',
|
||||
tokenizerFieldsOptions: {
|
||||
text: 'Processed Text',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
summary: 'Augmented Context',
|
||||
},
|
||||
imageParseMethodOptions: {
|
||||
ocr: 'OCR',
|
||||
},
|
||||
note: 'Note',
|
||||
noteDescription: 'Note',
|
||||
notePlaceholder: 'Please enter a note',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: 'Files being downloaded from data sources. ',
|
||||
processingTip: 'Files being processed by Ingestion pipeline.',
|
||||
|
||||
@ -1511,6 +1511,93 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
createFromTemplate: '从模板创建',
|
||||
importJsonFile: '导入 JSON 文件',
|
||||
chooseAgentType: '选择智能体类型',
|
||||
parser: '解析器',
|
||||
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
|
||||
tokenizer: '分词器',
|
||||
tokenizerRequired: '请先添加Tokenizer节点',
|
||||
tokenizerDescription:
|
||||
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
|
||||
splitter: '按字符分割',
|
||||
splitterDescription:
|
||||
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
|
||||
hierarchicalMergerDescription:
|
||||
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
|
||||
hierarchicalMerger: '按标题分割',
|
||||
extractor: '提取器',
|
||||
extractorDescription:
|
||||
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
|
||||
outputFormat: '输出格式',
|
||||
fileFormats: '文件格式',
|
||||
fields: '字段',
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比(%)',
|
||||
searchMethod: '搜索方法',
|
||||
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
parserMethod: '解析方法',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
viewResult: '查看结果',
|
||||
running: '运行中',
|
||||
summary: '增强上下文',
|
||||
keywords: '关键词',
|
||||
questions: '问题',
|
||||
metadata: '元数据',
|
||||
fieldName: '结果目的地',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
从给定的文本内容中提取最重要的关键词/短语。
|
||||
|
||||
要求
|
||||
- 总结文本内容,并给出最重要的5个关键词/短语。
|
||||
- 关键词必须与给定的文本内容使用相同的语言。
|
||||
- 关键词之间用英文逗号分隔。
|
||||
- 仅输出关键词。`,
|
||||
questions: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
针对给定的文本内容提出3个问题。
|
||||
|
||||
要求
|
||||
- 理解并总结文本内容,并提出最重要的3个问题。
|
||||
- 问题的含义不应重叠。
|
||||
- 问题应尽可能涵盖文本的主要内容。
|
||||
- 问题必须与给定的文本内容使用相同的语言。
|
||||
- 每行一个问题。
|
||||
- 仅输出问题。`,
|
||||
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
|
||||
|
||||
关键说明:
|
||||
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
|
||||
2. 语言:摘要必须使用与原文相同的语言。
|
||||
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
|
||||
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
|
||||
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
|
||||
|
||||
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
|
||||
},
|
||||
user: {
|
||||
keywords: `文本内容
|
||||
[在此处插入文本]`,
|
||||
questions: `文本内容
|
||||
[在此处插入文本]`,
|
||||
summary: `要总结的文本:
|
||||
[在此处插入文本]`,
|
||||
metadata: `内容:[在此处插入内容]`,
|
||||
},
|
||||
},
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
},
|
||||
footer: {
|
||||
profile: 'All rights reserved @ React',
|
||||
@ -1618,101 +1705,6 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
<p>你确定要继续吗?</p> `,
|
||||
unlinkPipelineModalConfirmText: '解绑',
|
||||
},
|
||||
dataflow: {
|
||||
parser: '解析器',
|
||||
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
|
||||
tokenizer: '分词器',
|
||||
tokenizerRequired: '请先添加Tokenizer节点',
|
||||
tokenizerDescription:
|
||||
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
|
||||
splitter: '按字符分割',
|
||||
splitterDescription:
|
||||
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
|
||||
hierarchicalMergerDescription:
|
||||
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
|
||||
hierarchicalMerger: '按标题分割',
|
||||
extractor: '提取器',
|
||||
extractorDescription:
|
||||
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
|
||||
outputFormat: '输出格式',
|
||||
lang: '语言',
|
||||
fileFormats: '文件格式',
|
||||
fields: '字段',
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比(%)',
|
||||
searchMethod: '搜索方法',
|
||||
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
begin: '文件',
|
||||
parserMethod: '解析方法',
|
||||
systemPrompt: '系统提示词',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
viewResult: '查看结果',
|
||||
running: '运行中',
|
||||
summary: '增强上下文',
|
||||
keywords: '关键词',
|
||||
questions: '问题',
|
||||
metadata: '元数据',
|
||||
fieldName: '结果目的地',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
从给定的文本内容中提取最重要的关键词/短语。
|
||||
|
||||
要求
|
||||
- 总结文本内容,并给出最重要的5个关键词/短语。
|
||||
- 关键词必须与给定的文本内容使用相同的语言。
|
||||
- 关键词之间用英文逗号分隔。
|
||||
- 仅输出关键词。`,
|
||||
questions: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
针对给定的文本内容提出3个问题。
|
||||
|
||||
要求
|
||||
- 理解并总结文本内容,并提出最重要的3个问题。
|
||||
- 问题的含义不应重叠。
|
||||
- 问题应尽可能涵盖文本的主要内容。
|
||||
- 问题必须与给定的文本内容使用相同的语言。
|
||||
- 每行一个问题。
|
||||
- 仅输出问题。`,
|
||||
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
|
||||
|
||||
关键说明:
|
||||
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
|
||||
2. 语言:摘要必须使用与原文相同的语言。
|
||||
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
|
||||
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
|
||||
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
|
||||
|
||||
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
|
||||
},
|
||||
user: {
|
||||
keywords: `文本内容
|
||||
[在此处插入文本]`,
|
||||
questions: `文本内容
|
||||
[在此处插入文本]`,
|
||||
summary: `要总结的文本:
|
||||
[在此处插入文本]`,
|
||||
metadata: `内容:[在此处插入内容]`,
|
||||
},
|
||||
},
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
note: '注释',
|
||||
noteDescription: '注释',
|
||||
notePlaceholder: '请输入注释',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: '正在从数据源下载文件。',
|
||||
processingTip: '正在由pipeline处理文件。',
|
||||
|
||||
@ -36,7 +36,7 @@ function InnerFileNode({ data, id, selected }: NodeProps<IBeginNode>) {
|
||||
<section className="flex items-center gap-2">
|
||||
<OperatorIcon name={data.label as Operator}></OperatorIcon>
|
||||
<div className="truncate text-center font-semibold text-sm">
|
||||
{t(`dataflow.begin`)}
|
||||
{t(`flow.begin`)}
|
||||
</div>
|
||||
</section>
|
||||
<section className={cn(styles.generateParameters, 'flex gap-2 flex-col')}>
|
||||
|
||||
@ -5,6 +5,8 @@ import { Plus } from 'lucide-react';
|
||||
import { useMemo } from 'react';
|
||||
import { NodeHandleId } from '../../constant';
|
||||
import { HandleContext } from '../../context';
|
||||
import { useIsPipeline } from '../../hooks/use-is-pipeline';
|
||||
import useGraphStore from '../../store';
|
||||
import { useDropdownManager } from '../context';
|
||||
import { NextStepDropdown } from './dropdown/next-step-dropdown';
|
||||
|
||||
@ -14,9 +16,12 @@ export function CommonHandle({
|
||||
...props
|
||||
}: HandleProps & { nodeId: string }) {
|
||||
const { visible, hideModal, showModal } = useSetModalState();
|
||||
|
||||
const { canShowDropdown, setActiveDropdown, clearActiveDropdown } =
|
||||
useDropdownManager();
|
||||
const { hasChildNode } = useGraphStore((state) => state);
|
||||
const isPipeline = useIsPipeline();
|
||||
|
||||
const isConnectable = !(isPipeline && hasChildNode(nodeId)); // Using useMemo will cause isConnectable to not be updated when the subsequent connection line is deleted
|
||||
|
||||
const value = useMemo(
|
||||
() => ({
|
||||
@ -33,6 +38,7 @@ export function CommonHandle({
|
||||
<HandleContext.Provider value={value}>
|
||||
<Handle
|
||||
{...props}
|
||||
isConnectable={isConnectable}
|
||||
className={cn(
|
||||
'inline-flex justify-center items-center !bg-accent-primary !border-none group-hover:!size-4 group-hover:!rounded-sm',
|
||||
className,
|
||||
@ -40,6 +46,10 @@ export function CommonHandle({
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
|
||||
if (!isConnectable) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!canShowDropdown()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -46,7 +46,7 @@ function ParserNode({
|
||||
className="flex flex-col text-text-primary gap-1"
|
||||
>
|
||||
<span className="text-text-secondary">Parser {idx + 1}</span>
|
||||
{t(`dataflow.fileFormatOptions.${x.fileFormat}`)}
|
||||
{t(`flow.fileFormatOptions.${x.fileFormat}`)}
|
||||
</LabelCard>
|
||||
)}
|
||||
</NodeCollapsible>
|
||||
|
||||
@ -38,12 +38,10 @@ function TokenizerNode({
|
||||
></CommonHandle>
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
<LabelCard className="text-text-primary flex justify-between flex-col gap-1">
|
||||
<span className="text-text-secondary">
|
||||
{t('dataflow.searchMethod')}
|
||||
</span>
|
||||
<span className="text-text-secondary">{t('flow.searchMethod')}</span>
|
||||
<ul className="space-y-1">
|
||||
{data.form?.search_method.map((x) => (
|
||||
<li key={x}>{t(`dataflow.tokenizerSearchMethodOptions.${x}`)}</li>
|
||||
<li key={x}>{t(`flow.tokenizerSearchMethodOptions.${x}`)}</li>
|
||||
))}
|
||||
</ul>
|
||||
</LabelCard>
|
||||
|
||||
@ -48,13 +48,3 @@ export type HandleContextType = {
|
||||
export const HandleContext = createContext<HandleContextType>(
|
||||
{} as HandleContextType,
|
||||
);
|
||||
|
||||
export type PipelineLogContextType = {
|
||||
messageId: string;
|
||||
setMessageId: (messageId: string) => void;
|
||||
setUploadedFileData: (data: Record<string, any>) => void;
|
||||
};
|
||||
|
||||
export const PipelineLogContext = createContext<PipelineLogContextType>(
|
||||
{} as PipelineLogContextType,
|
||||
);
|
||||
|
||||
@ -47,7 +47,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
|
||||
|
||||
const promptOptions = useBuildNodeOutputOptions(node?.id);
|
||||
|
||||
const options = buildOptions(ContextGeneratorFieldName, t, 'dataflow');
|
||||
const options = buildOptions(ContextGeneratorFieldName, t, 'flow');
|
||||
|
||||
const {
|
||||
handleFieldNameChange,
|
||||
@ -63,7 +63,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
|
||||
<Form {...form}>
|
||||
<FormWrapper>
|
||||
<LargeModelFormField></LargeModelFormField>
|
||||
<RAGFlowFormItem label={t('dataflow.fieldName')} name="field_name">
|
||||
<RAGFlowFormItem label={t('flow.fieldName')} name="field_name">
|
||||
{(field) => (
|
||||
<SelectWithSearch
|
||||
onChange={(value) => {
|
||||
@ -93,7 +93,7 @@ const ExtractorForm = ({ node }: INextOperatorForm) => {
|
||||
</FormWrapper>
|
||||
{visible && (
|
||||
<ConfirmDeleteDialog
|
||||
title={t('dataflow.switchPromptMessage')}
|
||||
title={t('flow.switchPromptMessage')}
|
||||
open
|
||||
onOpenChange={hideModal}
|
||||
onOk={confirmSwitch}
|
||||
|
||||
@ -21,7 +21,7 @@ export function useSwitchPrompt(form: UseFormReturn<ExtractorFormSchemaType>) {
|
||||
|
||||
const setPromptValue = useCallback(
|
||||
(field: keyof ExtractorFormSchemaType, key: string, value: string) => {
|
||||
form.setValue(field, t(`dataflow.prompts.${key}.${value}`), {
|
||||
form.setValue(field, t(`flow.prompts.${key}.${value}`), {
|
||||
shouldDirty: true,
|
||||
shouldValidate: true,
|
||||
});
|
||||
|
||||
@ -98,7 +98,7 @@ export function RegularExpressions({
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<FormLabel required className="mb-2 text-text-secondary">
|
||||
{t('dataflow.regularExpressions')}
|
||||
{t('flow.regularExpressions')}
|
||||
</FormLabel>
|
||||
<section className="space-y-4">
|
||||
{fields.map((field, index) => (
|
||||
@ -158,7 +158,7 @@ const HierarchicalMergerForm = ({ node }: INextOperatorForm) => {
|
||||
return (
|
||||
<Form {...form}>
|
||||
<FormWrapper>
|
||||
<RAGFlowFormItem name={'hierarchy'} label={t('dataflow.hierarchy')}>
|
||||
<RAGFlowFormItem name={'hierarchy'} label={t('flow.hierarchy')}>
|
||||
<SelectWithSearch options={HierarchyOptions}></SelectWithSearch>
|
||||
</RAGFlowFormItem>
|
||||
{fields.map((field, index) => (
|
||||
|
||||
@ -50,7 +50,7 @@ export function OutputFormatFormField({
|
||||
return (
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix(`output_format`, prefix)}
|
||||
label={t('dataflow.outputFormat')}
|
||||
label={t('flow.outputFormat')}
|
||||
>
|
||||
<SelectWithSearch
|
||||
options={buildOutputOptionsFormatMap()[fileType]}
|
||||
@ -69,7 +69,7 @@ export function ParserMethodFormField({
|
||||
name={buildFieldNameWithPrefix(`parse_method`, prefix)}
|
||||
horizontal={false}
|
||||
optionsWithoutLLM={optionsWithoutLLM}
|
||||
label={t('dataflow.parserMethod')}
|
||||
label={t('flow.parserMethod')}
|
||||
></LayoutRecognizeFormField>
|
||||
);
|
||||
}
|
||||
@ -92,7 +92,7 @@ export function LanguageFormField({ prefix }: CommonProps) {
|
||||
return (
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix(`lang`, prefix)}
|
||||
label={t('dataflow.lang')}
|
||||
label={t('flow.lang')}
|
||||
>
|
||||
{(field) => (
|
||||
<SelectWithSearch
|
||||
|
||||
@ -14,7 +14,7 @@ export function EmailFormFields({ prefix }: CommonProps) {
|
||||
<>
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix(`fields`, prefix)}
|
||||
label={t('dataflow.fields')}
|
||||
label={t('flow.fields')}
|
||||
>
|
||||
{(field) => (
|
||||
<MultiSelect
|
||||
|
||||
@ -17,7 +17,7 @@ export function ImageFormFields({ prefix }: CommonProps) {
|
||||
const options = buildOptions(
|
||||
ImageParseMethod,
|
||||
t,
|
||||
'dataflow.imageParseMethodOptions',
|
||||
'flow.imageParseMethodOptions',
|
||||
);
|
||||
const parseMethodName = buildFieldNameWithPrefix('parse_method', prefix);
|
||||
|
||||
@ -50,9 +50,9 @@ export function ImageFormFields({ prefix }: CommonProps) {
|
||||
{languageShown && (
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix('system_prompt', prefix)}
|
||||
label={t('dataflow.systemPrompt')}
|
||||
label={t('flow.systemPrompt')}
|
||||
>
|
||||
<Textarea placeholder={t('dataflow.systemPromptPlaceholder')} />
|
||||
<Textarea placeholder={t('flow.systemPromptPlaceholder')} />
|
||||
</RAGFlowFormItem>
|
||||
)}
|
||||
</>
|
||||
|
||||
@ -133,7 +133,7 @@ function ParserItem({
|
||||
</div>
|
||||
<RAGFlowFormItem
|
||||
name={buildFieldNameWithPrefix(`fileFormat`, prefix)}
|
||||
label={t('dataflow.fileFormats')}
|
||||
label={t('flow.fileFormats')}
|
||||
>
|
||||
{(field) => (
|
||||
<SelectWithSearch
|
||||
@ -165,7 +165,7 @@ const ParserForm = ({ node }: INextOperatorForm) => {
|
||||
const FileFormatOptions = buildOptions(
|
||||
FileType,
|
||||
t,
|
||||
'dataflow.fileFormatOptions',
|
||||
'flow.fileFormatOptions',
|
||||
).filter(
|
||||
(x) => x.value !== FileType.Video, // Temporarily hide the video option
|
||||
);
|
||||
@ -212,7 +212,7 @@ const ParserForm = ({ node }: INextOperatorForm) => {
|
||||
})}
|
||||
{fields.length < FileFormatOptions.length && (
|
||||
<BlockButton onClick={add} type="button" className="mt-2.5">
|
||||
{t('dataflow.addParser')}
|
||||
{t('flow.addParser')}
|
||||
</BlockButton>
|
||||
)}
|
||||
</form>
|
||||
|
||||
@ -2,6 +2,10 @@ import { Collapse } from '@/components/collapse';
|
||||
import { CrossLanguageFormField } from '@/components/cross-language-form-field';
|
||||
import { FormContainer } from '@/components/form-container';
|
||||
import { KnowledgeBaseFormField } from '@/components/knowledge-base-item';
|
||||
import {
|
||||
MetadataFilter,
|
||||
MetadataFilterSchema,
|
||||
} from '@/components/metadata-filter';
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { RerankFormFields } from '@/components/rerank';
|
||||
import { SimilaritySliderFormField } from '@/components/similarity-slider';
|
||||
@ -41,6 +45,7 @@ export const RetrievalPartialSchema = {
|
||||
cross_languages: z.array(z.string()),
|
||||
use_kg: z.boolean(),
|
||||
toc_enhance: z.boolean(),
|
||||
...MetadataFilterSchema,
|
||||
};
|
||||
|
||||
export const FormSchema = z.object({
|
||||
@ -118,6 +123,7 @@ function RetrievalForm({ node }: INextOperatorForm) {
|
||||
></SimilaritySliderFormField>
|
||||
<TopNFormField></TopNFormField>
|
||||
<RerankFormFields></RerankFormFields>
|
||||
<MetadataFilter></MetadataFilter>
|
||||
<EmptyResponseField></EmptyResponseField>
|
||||
<CrossLanguageFormField name="cross_languages"></CrossLanguageFormField>
|
||||
<UseKnowledgeGraphFormField name="use_kg"></UseKnowledgeGraphFormField>
|
||||
|
||||
@ -60,7 +60,7 @@ const SplitterForm = ({ node }: INextOperatorForm) => {
|
||||
name="overlapped_percent"
|
||||
max={30}
|
||||
min={0}
|
||||
label={t('dataflow.overlappedPercent')}
|
||||
label={t('flow.overlappedPercent')}
|
||||
></SliderInputFormField>
|
||||
<section>
|
||||
<span className="mb-2 inline-block">{t('flow.delimiters')}</span>
|
||||
|
||||
@ -38,12 +38,12 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
|
||||
const SearchMethodOptions = buildOptions(
|
||||
TokenizerSearchMethod,
|
||||
t,
|
||||
`dataflow.tokenizerSearchMethodOptions`,
|
||||
`flow.tokenizerSearchMethodOptions`,
|
||||
);
|
||||
const FieldsOptions = buildOptions(
|
||||
TokenizerFields,
|
||||
t,
|
||||
'dataflow.tokenizerFieldsOptions',
|
||||
'flow.tokenizerFieldsOptions',
|
||||
);
|
||||
|
||||
const form = useForm<TokenizerFormSchemaType>({
|
||||
@ -59,8 +59,8 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
|
||||
<FormWrapper>
|
||||
<RAGFlowFormItem
|
||||
name="search_method"
|
||||
label={t('dataflow.searchMethod')}
|
||||
tooltip={t('dataflow.searchMethodTip')}
|
||||
label={t('flow.searchMethod')}
|
||||
tooltip={t('flow.searchMethodTip')}
|
||||
>
|
||||
{(field) => (
|
||||
<MultiSelect
|
||||
@ -73,11 +73,11 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
|
||||
</RAGFlowFormItem>
|
||||
<SliderInputFormField
|
||||
name="filename_embd_weight"
|
||||
label={t('dataflow.filenameEmbeddingWeight')}
|
||||
label={t('flow.filenameEmbeddingWeight')}
|
||||
max={0.5}
|
||||
step={0.01}
|
||||
></SliderInputFormField>
|
||||
<RAGFlowFormItem name="fields" label={t('dataflow.fields')}>
|
||||
<RAGFlowFormItem name="fields" label={t('flow.fields')}>
|
||||
{(field) => <SelectWithSearch options={FieldsOptions} {...field} />}
|
||||
</RAGFlowFormItem>
|
||||
</FormWrapper>
|
||||
|
||||
@ -2,6 +2,7 @@ import { Collapse } from '@/components/collapse';
|
||||
import { CrossLanguageFormField } from '@/components/cross-language-form-field';
|
||||
import { FormContainer } from '@/components/form-container';
|
||||
import { KnowledgeBaseFormField } from '@/components/knowledge-base-item';
|
||||
import { MetadataFilter } from '@/components/metadata-filter';
|
||||
import { RerankFormFields } from '@/components/rerank';
|
||||
import { SimilaritySliderFormField } from '@/components/similarity-slider';
|
||||
import { TOCEnhanceFormField } from '@/components/toc-enhance-form-field';
|
||||
@ -51,6 +52,7 @@ const RetrievalForm = () => {
|
||||
></SimilaritySliderFormField>
|
||||
<TopNFormField></TopNFormField>
|
||||
<RerankFormFields></RerankFormFields>
|
||||
<MetadataFilter></MetadataFilter>
|
||||
<EmptyResponseField></EmptyResponseField>
|
||||
<CrossLanguageFormField name="cross_languages"></CrossLanguageFormField>
|
||||
<UseKnowledgeGraphFormField name="use_kg"></UseKnowledgeGraphFormField>
|
||||
|
||||
@ -128,8 +128,8 @@ export const useInitializeOperatorParams = () => {
|
||||
[Operator.Extractor]: {
|
||||
...initialExtractorValues,
|
||||
llm_id: llmId,
|
||||
sys_prompt: t('dataflow.prompts.system.summary'),
|
||||
prompts: t('dataflow.prompts.user.summary'),
|
||||
sys_prompt: t('flow.prompts.system.summary'),
|
||||
prompts: t('flow.prompts.user.summary'),
|
||||
},
|
||||
};
|
||||
}, [llmId]);
|
||||
|
||||
55
web/src/pages/agent/hooks/use-run-dataflow.ts
Normal file
55
web/src/pages/agent/hooks/use-run-dataflow.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import message from '@/components/ui/message';
|
||||
import { useSendMessageBySSE } from '@/hooks/use-send-message';
|
||||
import api from '@/utils/api';
|
||||
import { get } from 'lodash';
|
||||
import { useCallback, useState } from 'react';
|
||||
import { useParams } from 'umi';
|
||||
import { UseFetchLogReturnType } from './use-fetch-pipeline-log';
|
||||
import { useSaveGraph } from './use-save-graph';
|
||||
|
||||
export function useRunDataflow({
|
||||
showLogSheet,
|
||||
setMessageId,
|
||||
}: {
|
||||
showLogSheet: () => void;
|
||||
} & Pick<UseFetchLogReturnType, 'setMessageId'>) {
|
||||
const { send } = useSendMessageBySSE(api.runCanvas);
|
||||
const { id } = useParams();
|
||||
const { saveGraph, loading } = useSaveGraph();
|
||||
const [uploadedFileData, setUploadedFileData] =
|
||||
useState<Record<string, any>>();
|
||||
|
||||
const run = useCallback(
|
||||
async (fileResponseData: Record<string, any>) => {
|
||||
const saveRet = await saveGraph();
|
||||
const success = saveRet?.code === 0;
|
||||
if (!success) return;
|
||||
|
||||
showLogSheet();
|
||||
const res = await send({
|
||||
id,
|
||||
query: '',
|
||||
session_id: null,
|
||||
files: [fileResponseData.file],
|
||||
});
|
||||
|
||||
if (res && res?.response.status === 200 && get(res, 'data.code') === 0) {
|
||||
// fetch canvas
|
||||
setUploadedFileData(fileResponseData.file);
|
||||
const msgId = get(res, 'data.data.message_id');
|
||||
if (msgId) {
|
||||
setMessageId(msgId);
|
||||
}
|
||||
|
||||
return msgId;
|
||||
} else {
|
||||
message.error(get(res, 'data.message', ''));
|
||||
}
|
||||
},
|
||||
[id, saveGraph, send, setMessageId, setUploadedFileData, showLogSheet],
|
||||
);
|
||||
|
||||
return { run, loading: loading, uploadedFileData };
|
||||
}
|
||||
|
||||
export type RunDataflowType = ReturnType<typeof useRunDataflow>;
|
||||
@ -61,7 +61,7 @@ export const useShowSingleDebugDrawer = () => {
|
||||
};
|
||||
};
|
||||
|
||||
const ExcludedNodes = [Operator.Note, Operator.Placeholder];
|
||||
const ExcludedNodes = [Operator.Note, Operator.Placeholder, Operator.File];
|
||||
|
||||
export function useShowDrawer({
|
||||
drawerVisible,
|
||||
|
||||
@ -32,25 +32,26 @@ import {
|
||||
Settings,
|
||||
Upload,
|
||||
} from 'lucide-react';
|
||||
import { ComponentPropsWithoutRef, useCallback, useState } from 'react';
|
||||
import { ComponentPropsWithoutRef, useCallback } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useParams } from 'umi';
|
||||
import AgentCanvas from './canvas';
|
||||
import { DropdownProvider } from './canvas/context';
|
||||
import { Operator } from './constant';
|
||||
import { PipelineLogContext } from './context';
|
||||
import { useCancelCurrentDataflow } from './hooks/use-cancel-dataflow';
|
||||
import { useHandleExportJsonFile } from './hooks/use-export-json';
|
||||
import { useFetchDataOnMount } from './hooks/use-fetch-data';
|
||||
import { useFetchPipelineLog } from './hooks/use-fetch-pipeline-log';
|
||||
import { useGetBeginNodeDataInputs } from './hooks/use-get-begin-query';
|
||||
import { useIsPipeline } from './hooks/use-is-pipeline';
|
||||
import { useRunDataflow } from './hooks/use-run-dataflow';
|
||||
import {
|
||||
useSaveGraph,
|
||||
useSaveGraphBeforeOpeningDebugDrawer,
|
||||
useWatchAgentChange,
|
||||
} from './hooks/use-save-graph';
|
||||
import { PipelineLogSheet } from './pipeline-log-sheet';
|
||||
import PipelineRunSheet from './pipeline-run-sheet';
|
||||
import { SettingDialog } from './setting-dialog';
|
||||
import useGraphStore from './store';
|
||||
import { useAgentHistoryManager } from './use-agent-history-manager';
|
||||
@ -110,6 +111,12 @@ export default function Agent() {
|
||||
|
||||
// pipeline
|
||||
|
||||
const {
|
||||
visible: pipelineRunSheetVisible,
|
||||
hideModal: hidePipelineRunSheet,
|
||||
showModal: showPipelineRunSheet,
|
||||
} = useSetModalState();
|
||||
|
||||
const {
|
||||
visible: pipelineLogSheetVisible,
|
||||
showModal: showPipelineLogSheet,
|
||||
@ -126,13 +133,11 @@ export default function Agent() {
|
||||
isLogEmpty,
|
||||
} = useFetchPipelineLog(pipelineLogSheetVisible);
|
||||
|
||||
const [uploadedFileData, setUploadedFileData] =
|
||||
useState<Record<string, any>>();
|
||||
const findNodeByName = useGraphStore((state) => state.findNodeByName);
|
||||
|
||||
const handleRunPipeline = useCallback(() => {
|
||||
if (!findNodeByName(Operator.Tokenizer)) {
|
||||
message.warning(t('dataflow.tokenizerRequired'));
|
||||
message.warning(t('flow.tokenizerRequired'));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -141,14 +146,15 @@ export default function Agent() {
|
||||
showPipelineLogSheet();
|
||||
} else {
|
||||
hidePipelineLogSheet();
|
||||
handleRun();
|
||||
// handleRun();
|
||||
showPipelineRunSheet();
|
||||
}
|
||||
}, [
|
||||
findNodeByName,
|
||||
handleRun,
|
||||
hidePipelineLogSheet,
|
||||
isParsing,
|
||||
showPipelineLogSheet,
|
||||
showPipelineRunSheet,
|
||||
t,
|
||||
]);
|
||||
|
||||
@ -157,7 +163,7 @@ export default function Agent() {
|
||||
stopFetchTrace,
|
||||
});
|
||||
|
||||
const run = useCallback(() => {
|
||||
const handleButtonRunClick = useCallback(() => {
|
||||
if (isPipeline) {
|
||||
handleRunPipeline();
|
||||
} else {
|
||||
@ -165,6 +171,12 @@ export default function Agent() {
|
||||
}
|
||||
}, [handleRunAgent, handleRunPipeline, isPipeline]);
|
||||
|
||||
const {
|
||||
run: runPipeline,
|
||||
loading: pipelineRunning,
|
||||
uploadedFileData,
|
||||
} = useRunDataflow({ showLogSheet: showPipelineLogSheet, setMessageId });
|
||||
|
||||
return (
|
||||
<section className="h-full">
|
||||
<PageHeader>
|
||||
@ -194,7 +206,7 @@ export default function Agent() {
|
||||
>
|
||||
<LaptopMinimalCheck /> {t('flow.save')}
|
||||
</ButtonLoading>
|
||||
<Button variant={'secondary'} onClick={run}>
|
||||
<Button variant={'secondary'} onClick={handleButtonRunClick}>
|
||||
<CirclePlay />
|
||||
{t('flow.run')}
|
||||
</Button>
|
||||
@ -241,18 +253,14 @@ export default function Agent() {
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
</PageHeader>
|
||||
<PipelineLogContext.Provider
|
||||
value={{ messageId, setMessageId, setUploadedFileData }}
|
||||
>
|
||||
<ReactFlowProvider>
|
||||
<DropdownProvider>
|
||||
<AgentCanvas
|
||||
drawerVisible={chatDrawerVisible}
|
||||
hideDrawer={hideChatDrawer}
|
||||
></AgentCanvas>
|
||||
</DropdownProvider>
|
||||
</ReactFlowProvider>
|
||||
</PipelineLogContext.Provider>
|
||||
<ReactFlowProvider>
|
||||
<DropdownProvider>
|
||||
<AgentCanvas
|
||||
drawerVisible={chatDrawerVisible}
|
||||
hideDrawer={hideChatDrawer}
|
||||
></AgentCanvas>
|
||||
</DropdownProvider>
|
||||
</ReactFlowProvider>
|
||||
{embedVisible && (
|
||||
<EmbedDialog
|
||||
visible={embedVisible}
|
||||
@ -284,6 +292,13 @@ export default function Agent() {
|
||||
uploadedFileData={uploadedFileData}
|
||||
></PipelineLogSheet>
|
||||
)}
|
||||
{pipelineRunSheetVisible && (
|
||||
<PipelineRunSheet
|
||||
hideModal={hidePipelineRunSheet}
|
||||
run={runPipeline}
|
||||
loading={pipelineRunning}
|
||||
></PipelineRunSheet>
|
||||
)}
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ export function PipelineLogSheet({
|
||||
uploadedFileData?.extension,
|
||||
})}
|
||||
>
|
||||
{t('dataflow.viewResult')} <ArrowUpRight />
|
||||
{t('flow.viewResult')} <ArrowUpRight />
|
||||
</Button>
|
||||
)}
|
||||
</SheetTitle>
|
||||
@ -95,7 +95,7 @@ export function PipelineLogSheet({
|
||||
className="w-full mt-8 bg-state-error/10 text-state-error hover:bg-state-error hover:text-bg-base"
|
||||
onClick={handleCancel}
|
||||
>
|
||||
<CirclePause /> {t('dataflow.cancel')}
|
||||
<CirclePause /> {t('flow.cancel')}
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
@ -104,7 +104,7 @@ export function PipelineLogSheet({
|
||||
className="w-full mt-8 bg-accent-primary-5 text-text-secondary hover:bg-accent-primary-5 hover:text-accent-primary hover:border-accent-primary hover:border"
|
||||
>
|
||||
<SquareArrowOutUpRight />
|
||||
{t('dataflow.exportJson')}
|
||||
{t('flow.exportJson')}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
31
web/src/pages/agent/pipeline-run-sheet/index.tsx
Normal file
31
web/src/pages/agent/pipeline-run-sheet/index.tsx
Normal file
@ -0,0 +1,31 @@
|
||||
import {
|
||||
Sheet,
|
||||
SheetContent,
|
||||
SheetHeader,
|
||||
SheetTitle,
|
||||
} from '@/components/ui/sheet';
|
||||
import { IModalProps } from '@/interfaces/common';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { RunDataflowType } from '../hooks/use-run-dataflow';
|
||||
import { UploaderForm } from './uploader';
|
||||
|
||||
type RunSheetProps = IModalProps<any> &
|
||||
Pick<RunDataflowType, 'run' | 'loading'>;
|
||||
|
||||
const PipelineRunSheet = ({ hideModal, run, loading }: RunSheetProps) => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Sheet onOpenChange={hideModal} open modal={false}>
|
||||
<SheetContent className={cn('top-20 p-2')}>
|
||||
<SheetHeader>
|
||||
<SheetTitle>{t('flow.testRun')}</SheetTitle>
|
||||
<UploaderForm ok={run} loading={loading}></UploaderForm>
|
||||
</SheetHeader>
|
||||
</SheetContent>
|
||||
</Sheet>
|
||||
);
|
||||
};
|
||||
|
||||
export default PipelineRunSheet;
|
||||
57
web/src/pages/agent/pipeline-run-sheet/uploader.tsx
Normal file
57
web/src/pages/agent/pipeline-run-sheet/uploader.tsx
Normal file
@ -0,0 +1,57 @@
|
||||
'use client';
|
||||
|
||||
import { z } from 'zod';
|
||||
|
||||
import { RAGFlowFormItem } from '@/components/ragflow-form';
|
||||
import { ButtonLoading } from '@/components/ui/button';
|
||||
import { Form } from '@/components/ui/form';
|
||||
import { FileUploadDirectUpload } from '@/pages/agent/debug-content/uploader';
|
||||
import { zodResolver } from '@hookform/resolvers/zod';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const formSchema = z.object({
|
||||
file: z.record(z.any()),
|
||||
});
|
||||
|
||||
export type FormSchemaType = z.infer<typeof formSchema>;
|
||||
|
||||
type UploaderFormProps = {
|
||||
ok: (values: FormSchemaType) => void;
|
||||
loading: boolean;
|
||||
};
|
||||
|
||||
export function UploaderForm({ ok, loading }: UploaderFormProps) {
|
||||
const { t } = useTranslation();
|
||||
const form = useForm<FormSchemaType>({
|
||||
resolver: zodResolver(formSchema),
|
||||
defaultValues: {},
|
||||
});
|
||||
|
||||
return (
|
||||
<Form {...form}>
|
||||
<form onSubmit={form.handleSubmit(ok)} className="space-y-8">
|
||||
<RAGFlowFormItem name="file">
|
||||
{(field) => {
|
||||
return (
|
||||
<FileUploadDirectUpload
|
||||
value={field.value}
|
||||
onChange={field.onChange}
|
||||
></FileUploadDirectUpload>
|
||||
);
|
||||
}}
|
||||
</RAGFlowFormItem>
|
||||
|
||||
<div>
|
||||
<ButtonLoading
|
||||
type="submit"
|
||||
loading={loading}
|
||||
className="w-full mt-1"
|
||||
>
|
||||
{t('flow.run')}
|
||||
</ButtonLoading>
|
||||
</div>
|
||||
</form>
|
||||
</Form>
|
||||
);
|
||||
}
|
||||
@ -89,6 +89,7 @@ export type RFState = {
|
||||
) => void; // Deleting a condition of a classification operator will delete the related edge
|
||||
findAgentToolNodeById: (id: string | null) => string | undefined;
|
||||
selectNodeIds: (nodeIds: string[]) => void;
|
||||
hasChildNode: (nodeId: string) => boolean;
|
||||
};
|
||||
|
||||
// this is our useStore hook that we can use in our components to get parts of the store and call actions
|
||||
@ -527,6 +528,10 @@ const useGraphStore = create<RFState>()(
|
||||
})),
|
||||
);
|
||||
},
|
||||
hasChildNode: (nodeId) => {
|
||||
const { edges } = get();
|
||||
return edges.some((edge) => edge.source === nodeId);
|
||||
},
|
||||
})),
|
||||
{ name: 'graph', trace: true },
|
||||
),
|
||||
|
||||
@ -9,16 +9,30 @@ import { removeUselessFieldsFromValues } from '@/utils/form';
|
||||
import { Edge, Node, XYPosition } from '@xyflow/react';
|
||||
import { FormInstance, FormListFieldData } from 'antd';
|
||||
import { humanId } from 'human-id';
|
||||
import { curry, get, intersectionWith, isEqual, omit, sample } from 'lodash';
|
||||
import {
|
||||
curry,
|
||||
get,
|
||||
intersectionWith,
|
||||
isEmpty,
|
||||
isEqual,
|
||||
omit,
|
||||
sample,
|
||||
} from 'lodash';
|
||||
import pipe from 'lodash/fp/pipe';
|
||||
import isObject from 'lodash/isObject';
|
||||
import {
|
||||
CategorizeAnchorPointPositions,
|
||||
FileType,
|
||||
FileTypeSuffixMap,
|
||||
NoCopyOperatorsList,
|
||||
NoDebugOperatorsList,
|
||||
NodeHandleId,
|
||||
Operator,
|
||||
} from './constant';
|
||||
import { ExtractorFormSchemaType } from './form/extractor-form';
|
||||
import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
|
||||
import { ParserFormSchemaType } from './form/parser-form';
|
||||
import { SplitterFormSchemaType } from './form/splitter-form';
|
||||
import { BeginQuery, IPosition } from './interface';
|
||||
|
||||
function buildAgentExceptionGoto(edges: Edge[], nodeId: string) {
|
||||
@ -170,6 +184,92 @@ export function hasSubAgent(edges: Edge[], nodeId?: string) {
|
||||
return !!edge;
|
||||
}
|
||||
|
||||
// Because the array of react-hook-form must be object data,
|
||||
// it needs to be converted into a simple data type array required by the backend
|
||||
function transformObjectArrayToPureArray(
|
||||
list: Array<Record<string, any>>,
|
||||
field: string,
|
||||
) {
|
||||
return Array.isArray(list)
|
||||
? list.filter((x) => !isEmpty(x[field])).map((y) => y[field])
|
||||
: [];
|
||||
}
|
||||
|
||||
function transformParserParams(params: ParserFormSchemaType) {
|
||||
const setups = params.setups.reduce<
|
||||
Record<string, ParserFormSchemaType['setups'][0]>
|
||||
>((pre, cur) => {
|
||||
if (cur.fileFormat) {
|
||||
let filteredSetup: Partial<
|
||||
ParserFormSchemaType['setups'][0] & { suffix: string[] }
|
||||
> = {
|
||||
output_format: cur.output_format,
|
||||
suffix: FileTypeSuffixMap[cur.fileFormat as FileType],
|
||||
};
|
||||
|
||||
switch (cur.fileFormat) {
|
||||
case FileType.PDF:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
parse_method: cur.parse_method,
|
||||
lang: cur.lang,
|
||||
};
|
||||
break;
|
||||
case FileType.Image:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
parse_method: cur.parse_method,
|
||||
lang: cur.lang,
|
||||
system_prompt: cur.system_prompt,
|
||||
};
|
||||
break;
|
||||
case FileType.Email:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
fields: cur.fields,
|
||||
};
|
||||
break;
|
||||
case FileType.Video:
|
||||
case FileType.Audio:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
llm_id: cur.llm_id,
|
||||
};
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
pre[cur.fileFormat] = filteredSetup;
|
||||
}
|
||||
return pre;
|
||||
}, {});
|
||||
|
||||
return { ...params, setups };
|
||||
}
|
||||
|
||||
function transformSplitterParams(params: SplitterFormSchemaType) {
|
||||
return {
|
||||
...params,
|
||||
overlapped_percent: Number(params.overlapped_percent) / 100,
|
||||
delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
|
||||
};
|
||||
}
|
||||
|
||||
function transformHierarchicalMergerParams(
|
||||
params: HierarchicalMergerFormSchemaType,
|
||||
) {
|
||||
const levels = params.levels.map((x) =>
|
||||
transformObjectArrayToPureArray(x.expressions, 'expression'),
|
||||
);
|
||||
|
||||
return { ...params, hierarchy: Number(params.hierarchy), levels };
|
||||
}
|
||||
|
||||
function transformExtractorParams(params: ExtractorFormSchemaType) {
|
||||
return { ...params, prompts: [{ content: params.prompts, role: 'user' }] };
|
||||
}
|
||||
|
||||
// construct a dsl based on the node information of the graph
|
||||
export const buildDslComponentsByGraph = (
|
||||
nodes: RAGFlowNodeType[],
|
||||
@ -202,6 +302,21 @@ export const buildDslComponentsByGraph = (
|
||||
params = buildCategorize(edges, nodes, id);
|
||||
break;
|
||||
|
||||
case Operator.Parser:
|
||||
params = transformParserParams(params);
|
||||
break;
|
||||
|
||||
case Operator.Splitter:
|
||||
params = transformSplitterParams(params);
|
||||
break;
|
||||
|
||||
case Operator.HierarchicalMerger:
|
||||
params = transformHierarchicalMergerParams(params);
|
||||
break;
|
||||
case Operator.Extractor:
|
||||
params = transformExtractorParams(params);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@ -148,6 +148,6 @@ export interface NavigateToDataflowResultProps {
|
||||
[PipelineResultSearchParams.AgentTitle]?: string;
|
||||
[PipelineResultSearchParams.IsReadOnly]?: string;
|
||||
[PipelineResultSearchParams.Type]: string;
|
||||
[PipelineResultSearchParams.CreatedBy]: string;
|
||||
[PipelineResultSearchParams.DocumentExtension]: string;
|
||||
[PipelineResultSearchParams.CreatedBy]?: string;
|
||||
[PipelineResultSearchParams.DocumentExtension]?: string;
|
||||
}
|
||||
|
||||
@ -311,7 +311,6 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
|
||||
data,
|
||||
pagination,
|
||||
setPagination,
|
||||
loading,
|
||||
active = LogTabs.FILE_LOGS,
|
||||
}) => {
|
||||
const [sorting, setSorting] = useState<SortingState>([]);
|
||||
@ -328,13 +327,13 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
|
||||
fileName: row.original.document_name,
|
||||
source: row.original.source_from,
|
||||
task: row.original?.task_type,
|
||||
status: row.original.statusName,
|
||||
status: row.original.status as RunningStatus,
|
||||
startDate: formatDate(row.original.process_begin_at),
|
||||
duration: formatSecondsToHumanReadable(
|
||||
row.original.process_duration || 0,
|
||||
),
|
||||
details: row.original.progress_msg,
|
||||
};
|
||||
} as unknown as IFileLogItem;
|
||||
console.log('logDetail', logDetail);
|
||||
setLogInfo(logDetail);
|
||||
setIsModalVisible(true);
|
||||
|
||||
Reference in New Issue
Block a user