Compare commits

...

7 Commits

Author SHA1 Message Date
ad56137a59 Feat: ​​OpenSearch's support for newly embedding models​​ (#10494)
### What problem does this PR solve?

fix issues:https://github.com/infiniflow/ragflow/issues/10402

As the newly distributed embedding models support vector dimensions max
to 4096, while current OpenSearch's max dimension support is 1536.
As I tested, the 4096-dimensions vector will be treated as a float type
which is unacceptable in OpenSearch.

Besides, OpenSearch supports max to 16000 dimensions by defalut with the
vector engine(Faiss). According to:
https://docs.opensearch.org/2.19/field-types/supported-field-types/knn-methods-engines/

I added max to 10240 dimensions support for OpenSearch, as I think will
be sufficient in the future.

As I tested , it worked well on my own server (treated as knn_vector)by
using qwen3-embedding:8b as the embedding model:
<img width="1338" height="790" alt="image"
src="https://github.com/user-attachments/assets/a9b2d284-fcf6-4cea-859a-6aadccf36ace"
/>


### Type of change

- [x] New Feature (non-breaking change which adds functionality)


By the way, I will still focus on the stuff about
Elasticsearch/Opensearch as search engines and vector databases.

Co-authored-by: 张雨豪 <zhangyh80@chinatelecom.cn>
2025-10-11 19:58:12 +08:00
2828e321bc Fix: remove lang for autio. (#10496)
### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 19:38:07 +08:00
932781ea4e Fix: incorrect agent template #10393 (#10491)
### What problem does this PR solve?

Fix: incorrect agent template #10493

### Type of change

- [X] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 19:37:42 +08:00
5200711441 Feat: add support for multi-column PDF parsing (#10475)
### What problem does this PR solve?

Add support for multi-columns PDF parsing. #9878, #9919.

Two-column sample:
<img width="1885" height="1020" alt="image"
src="https://github.com/user-attachments/assets/0270c028-2db8-4ca6-a4b7-cd5830882d28"
/>

Three-column sample: 
<img width="1881" height="992" alt="image"
src="https://github.com/user-attachments/assets/9ee88844-d5b1-4927-9e4e-3bd810d6e03a"
/>

Single-column sample:
<img width="1883" height="1042" alt="image"
src="https://github.com/user-attachments/assets/e93d3d18-43c3-4067-b5fa-e454ed0ab093"
/>



### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:46:09 +08:00
c21cea2038 Fix: Added table of contents extraction functionality and optimized form item layout #9869 (#10492)
### What problem does this PR solve?

Fix: Added table of contents extraction functionality and optimized form
item layout #9869

- Added `EnableTocToggle` component to toggle table of contents
extraction on and off
- Added multiple parser configuration components (such as naive, book,
laws, etc.), displaying different parser components based on built-in
slicing methods

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 18:45:55 +08:00
6a0f448419 Feat: Modify the default style of the agent node anchor #9869 (#10489)
### What problem does this PR solve?

Feat: Modify the default style of the agent node anchor #9869

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:45:38 +08:00
7d2f65671f Feat: debugging toc part. (#10486)
### What problem does this PR solve?

#10436

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:45:21 +08:00
70 changed files with 1320 additions and 753 deletions

File diff suppressed because one or more lines are too long

View File

@ -2816,6 +2816,13 @@
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT", "tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
"status": "1", "status": "1",
"llm": [ "llm": [
{
"llm_name":"THUDM/GLM-4.1V-9B-Thinking",
"tags":"LLM,CHAT,IMAGE2TEXT, 64k",
"max_tokens":64000,
"model_type":"chat",
"is_tools": false
},
{ {
"llm_name": "Qwen/Qwen3-Embedding-8B", "llm_name": "Qwen/Qwen3-Embedding-8B",
"tags": "TEXT EMBEDDING,TEXT RE-RANK,32k", "tags": "TEXT EMBEDDING,TEXT RE-RANK,32k",
@ -3145,13 +3152,6 @@
"model_type": "chat", "model_type": "chat",
"is_tools": true "is_tools": true
}, },
{
"llm_name": "Qwen/Qwen2-1.5B-Instruct",
"tags": "LLM,CHAT,32k",
"max_tokens": 32000,
"model_type": "chat",
"is_tools": true
},
{ {
"llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct", "llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct",
"tags": "LLM,CHAT,32k", "tags": "LLM,CHAT,32k",
@ -3159,13 +3159,6 @@
"model_type": "chat", "model_type": "chat",
"is_tools": false "is_tools": false
}, },
{
"llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct",
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
"max_tokens": 32000,
"model_type": "image2text",
"is_tools": false
},
{ {
"llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct", "llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct",
"tags": "LLM,CHAT,32k", "tags": "LLM,CHAT,32k",

View File

@ -200,6 +200,61 @@
} }
} }
}, },
{
"knn_vector": {
"match": "*_2048_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 2048
}
}
},
{
"knn_vector": {
"match": "*_4096_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 4096
}
}
},
{
"knn_vector": {
"match": "*_6144_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 6144
}
}
},
{
"knn_vector": {
"match": "*_8192_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 8192
}
}
},
{
"knn_vector": {
"match": "*_10240_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 10240
}
}
},
{ {
"binary": { "binary": {
"match": "*_bin", "match": "*_bin",

View File

@ -17,7 +17,6 @@
import re import re
import mistune
from markdown import markdown from markdown import markdown
@ -117,8 +116,6 @@ class MarkdownElementExtractor:
def __init__(self, markdown_content): def __init__(self, markdown_content):
self.markdown_content = markdown_content self.markdown_content = markdown_content
self.lines = markdown_content.split("\n") self.lines = markdown_content.split("\n")
self.ast_parser = mistune.create_markdown(renderer="ast")
self.ast_nodes = self.ast_parser(markdown_content)
def extract_elements(self): def extract_elements(self):
"""Extract individual elements (headers, code blocks, lists, etc.)""" """Extract individual elements (headers, code blocks, lists, etc.)"""

View File

@ -15,11 +15,13 @@
# #
import logging import logging
import math
import os import os
import random import random
import re import re
import sys import sys
import threading import threading
from collections import Counter, defaultdict
from copy import deepcopy from copy import deepcopy
from io import BytesIO from io import BytesIO
from timeit import default_timer as timer from timeit import default_timer as timer
@ -349,9 +351,78 @@ class RAGFlowPdfParser:
self.boxes[i]["top"] += self.page_cum_height[self.boxes[i]["page_number"] - 1] self.boxes[i]["top"] += self.page_cum_height[self.boxes[i]["page_number"] - 1]
self.boxes[i]["bottom"] += self.page_cum_height[self.boxes[i]["page_number"] - 1] self.boxes[i]["bottom"] += self.page_cum_height[self.boxes[i]["page_number"] - 1]
def _text_merge(self): def _assign_column(self, boxes, zoomin=3):
if not boxes:
return boxes
if all("col_id" in b for b in boxes):
return boxes
by_page = defaultdict(list)
for b in boxes:
by_page[b["page_number"]].append(b)
page_info = {} # pg -> dict(page_w, left_edge, cand_cols)
counter = Counter()
for pg, bxs in by_page.items():
if not bxs:
page_info[pg] = {"page_w": 1.0, "left_edge": 0.0, "cand": 1}
counter[1] += 1
continue
if hasattr(self, "page_images") and self.page_images and len(self.page_images) >= pg:
page_w = self.page_images[pg - 1].size[0] / max(1, zoomin)
left_edge = 0.0
else:
xs0 = [box["x0"] for box in bxs]
xs1 = [box["x1"] for box in bxs]
left_edge = float(min(xs0))
page_w = max(1.0, float(max(xs1) - left_edge))
widths = [max(1.0, (box["x1"] - box["x0"])) for box in bxs]
median_w = float(np.median(widths)) if widths else 1.0
raw_cols = int(page_w / max(1.0, median_w))
# cand = raw_cols if (raw_cols >= 2 and median_w < page_w / raw_cols * 0.8) else 1
cand = raw_cols
page_info[pg] = {"page_w": page_w, "left_edge": left_edge, "cand": cand}
counter[cand] += 1
logging.info(f"[Page {pg}] median_w={median_w:.2f}, page_w={page_w:.2f}, raw_cols={raw_cols}, cand={cand}")
global_cols = counter.most_common(1)[0][0]
logging.info(f"Global column_num decided by majority: {global_cols}")
for pg, bxs in by_page.items():
if not bxs:
continue
page_w = page_info[pg]["page_w"]
left_edge = page_info[pg]["left_edge"]
if global_cols == 1:
for box in bxs:
box["col_id"] = 0
continue
for box in bxs:
w = box["x1"] - box["x0"]
if w >= 0.8 * page_w:
box["col_id"] = 0
continue
cx = 0.5 * (box["x0"] + box["x1"])
norm_cx = (cx - left_edge) / page_w
norm_cx = max(0.0, min(norm_cx, 0.999999))
box["col_id"] = int(min(global_cols - 1, norm_cx * global_cols))
return boxes
def _text_merge(self, zoomin=3):
# merge adjusted boxes # merge adjusted boxes
bxs = self.boxes bxs = self._assign_column(self.boxes, zoomin)
def end_with(b, txt): def end_with(b, txt):
txt = txt.strip() txt = txt.strip()
@ -367,9 +438,15 @@ class RAGFlowPdfParser:
while i < len(bxs) - 1: while i < len(bxs) - 1:
b = bxs[i] b = bxs[i]
b_ = bxs[i + 1] b_ = bxs[i + 1]
if b["page_number"] != b_["page_number"] or b.get("col_id") != b_.get("col_id"):
i += 1
continue
if b.get("layoutno", "0") != b_.get("layoutno", "1") or b.get("layout_type", "") in ["table", "figure", "equation"]: if b.get("layoutno", "0") != b_.get("layoutno", "1") or b.get("layout_type", "") in ["table", "figure", "equation"]:
i += 1 i += 1
continue continue
if abs(self._y_dis(b, b_)) < self.mean_height[bxs[i]["page_number"] - 1] / 3: if abs(self._y_dis(b, b_)) < self.mean_height[bxs[i]["page_number"] - 1] / 3:
# merge # merge
bxs[i]["x1"] = b_["x1"] bxs[i]["x1"] = b_["x1"]
@ -379,50 +456,49 @@ class RAGFlowPdfParser:
bxs.pop(i + 1) bxs.pop(i + 1)
continue continue
i += 1 i += 1
continue
dis_thr = 1
dis = b["x1"] - b_["x0"]
if b.get("layout_type", "") != "text" or b_.get("layout_type", "") != "text":
if end_with(b, "") or start_with(b_, ""):
dis_thr = -8
else:
i += 1
continue
if abs(self._y_dis(b, b_)) < self.mean_height[bxs[i]["page_number"] - 1] / 5 and dis >= dis_thr and b["x1"] < b_["x1"]:
# merge
bxs[i]["x1"] = b_["x1"]
bxs[i]["top"] = (b["top"] + b_["top"]) / 2
bxs[i]["bottom"] = (b["bottom"] + b_["bottom"]) / 2
bxs[i]["text"] += b_["text"]
bxs.pop(i + 1)
continue
i += 1
self.boxes = bxs self.boxes = bxs
def _naive_vertical_merge(self, zoomin=3): def _naive_vertical_merge(self, zoomin=3):
import math bxs = self._assign_column(self.boxes, zoomin)
bxs = Recognizer.sort_Y_firstly(self.boxes, np.median(self.mean_height) / 3)
column_width = np.median([b["x1"] - b["x0"] for b in self.boxes]) grouped = defaultdict(list)
if not column_width or math.isnan(column_width): for b in bxs:
column_width = self.mean_width[0] grouped[(b["page_number"], b.get("col_id", 0))].append(b)
self.column_num = int(self.page_images[0].size[0] / zoomin / column_width)
if column_width < self.page_images[0].size[0] / zoomin / self.column_num: merged_boxes = []
logging.info("Multi-column................... {} {}".format(column_width, self.page_images[0].size[0] / zoomin / self.column_num)) for (pg, col), bxs in grouped.items():
self.boxes = self.sort_X_by_page(self.boxes, column_width / self.column_num) bxs = sorted(bxs, key=lambda x: (x["top"], x["x0"]))
if not bxs:
continue
mh = self.mean_height[pg - 1] if self.mean_height else np.median([b["bottom"] - b["top"] for b in bxs]) or 10
i = 0 i = 0
while i + 1 < len(bxs): while i + 1 < len(bxs):
b = bxs[i] b = bxs[i]
b_ = bxs[i + 1] b_ = bxs[i + 1]
if b["page_number"] < b_["page_number"] and re.match(r"[0-9 •一—-]+$", b["text"]): if b["page_number"] < b_["page_number"] and re.match(r"[0-9 •一—-]+$", b["text"]):
bxs.pop(i) bxs.pop(i)
continue continue
if not b["text"].strip(): if not b["text"].strip():
bxs.pop(i) bxs.pop(i)
continue continue
if not b["text"].strip() or b.get("layoutno") != b_.get("layoutno"):
i += 1
continue
if b_["top"] - b["bottom"] > mh * 1.5:
i += 1
continue
overlap = max(0, min(b["x1"], b_["x1"]) - max(b["x0"], b_["x0"]))
if overlap / max(1, min(b["x1"] - b["x0"], b_["x1"] - b_["x0"])) < 0.3:
i += 1
continue
concatting_feats = [ concatting_feats = [
b["text"].strip()[-1] in ",;:'\",、‘“;:-", b["text"].strip()[-1] in ",;:'\",、‘“;:-",
len(b["text"].strip()) > 1 and b["text"].strip()[-2] in ",;:'\",‘“、;:", len(b["text"].strip()) > 1 and b["text"].strip()[-2] in ",;:'\",‘“、;:",
@ -449,13 +525,39 @@ class RAGFlowPdfParser:
) )
i += 1 i += 1
continue continue
# merge up and down
b["text"] = (b["text"].rstrip() + " " + b_["text"].lstrip()).strip()
b["bottom"] = b_["bottom"] b["bottom"] = b_["bottom"]
b["text"] += b_["text"]
b["x0"] = min(b["x0"], b_["x0"]) b["x0"] = min(b["x0"], b_["x0"])
b["x1"] = max(b["x1"], b_["x1"]) b["x1"] = max(b["x1"], b_["x1"])
bxs.pop(i + 1) bxs.pop(i + 1)
self.boxes = bxs
merged_boxes.extend(bxs)
self.boxes = sorted(merged_boxes, key=lambda x: (x["page_number"], x.get("col_id", 0), x["top"]))
def _final_reading_order_merge(self, zoomin=3):
if not self.boxes:
return
self.boxes = self._assign_column(self.boxes, zoomin=zoomin)
pages = defaultdict(lambda: defaultdict(list))
for b in self.boxes:
pg = b["page_number"]
col = b.get("col_id", 0)
pages[pg][col].append(b)
for pg in pages:
for col in pages[pg]:
pages[pg][col].sort(key=lambda x: (x["top"], x["x0"]))
new_boxes = []
for pg in sorted(pages.keys()):
for col in sorted(pages[pg].keys()):
new_boxes.extend(pages[pg][col])
self.boxes = new_boxes
def _concat_downward(self, concat_between_pages=True): def _concat_downward(self, concat_between_pages=True):
self.boxes = Recognizer.sort_Y_firstly(self.boxes, 0) self.boxes = Recognizer.sort_Y_firstly(self.boxes, 0)
@ -997,7 +1099,7 @@ class RAGFlowPdfParser:
self.__ocr(i + 1, img, chars, zoomin, id) self.__ocr(i + 1, img, chars, zoomin, id)
if callback and i % 6 == 5: if callback and i % 6 == 5:
callback((i + 1) * 0.6 / len(self.page_images), msg="") callback((i + 1) * 0.6 / len(self.page_images))
async def __img_ocr_launcher(): async def __img_ocr_launcher():
def __ocr_preprocess(): def __ocr_preprocess():
@ -1074,7 +1176,6 @@ class RAGFlowPdfParser:
def insert_table_figures(tbls_or_figs, layout_type): def insert_table_figures(tbls_or_figs, layout_type):
def min_rectangle_distance(rect1, rect2): def min_rectangle_distance(rect1, rect2):
import math
pn1, left1, right1, top1, bottom1 = rect1 pn1, left1, right1, top1, bottom1 = rect1
pn2, left2, right2, top2, bottom2 = rect2 pn2, left2, right2, top2, bottom2 = rect2
if right1 >= left2 and right2 >= left1 and bottom1 >= top2 and bottom2 >= top1: if right1 >= left2 and right2 >= left1 and bottom1 >= top2 and bottom2 >= top1:
@ -1095,7 +1196,9 @@ class RAGFlowPdfParser:
for (img, txt), poss in tbls_or_figs: for (img, txt), poss in tbls_or_figs:
bboxes = [(i, (b["page_number"], b["x0"], b["x1"], b["top"], b["bottom"])) for i, b in enumerate(self.boxes)] bboxes = [(i, (b["page_number"], b["x0"], b["x1"], b["top"], b["bottom"])) for i, b in enumerate(self.boxes)]
dists = [(min_rectangle_distance((pn, left, right, top+self.page_cum_height[pn], bott+self.page_cum_height[pn]), rect),i) for i, rect in bboxes for pn, left, right, top, bott in poss] dists = [
(min_rectangle_distance((pn, left, right, top + self.page_cum_height[pn], bott + self.page_cum_height[pn]), rect), i) for i, rect in bboxes for pn, left, right, top, bott in poss
]
min_i = np.argmin(dists, axis=0)[0] min_i = np.argmin(dists, axis=0)[0]
min_i, rect = bboxes[dists[min_i][-1]] min_i, rect = bboxes[dists[min_i][-1]]
if isinstance(txt, list): if isinstance(txt, list):
@ -1103,10 +1206,20 @@ class RAGFlowPdfParser:
pn, left, right, top, bott = poss[0] pn, left, right, top, bott = poss[0]
if self.boxes[min_i]["bottom"] < top + self.page_cum_height[pn]: if self.boxes[min_i]["bottom"] < top + self.page_cum_height[pn]:
min_i += 1 min_i += 1
self.boxes.insert(min_i, { self.boxes.insert(
"page_number": pn+1, "x0": left, "x1": right, "top": top+self.page_cum_height[pn], "bottom": bott+self.page_cum_height[pn], "layout_type": layout_type, "text": txt, "image": img, min_i,
"positions": [[pn+1, int(left), int(right), int(top), int(bott)]] {
}) "page_number": pn + 1,
"x0": left,
"x1": right,
"top": top + self.page_cum_height[pn],
"bottom": bott + self.page_cum_height[pn],
"layout_type": layout_type,
"text": txt,
"image": img,
"positions": [[pn + 1, int(left), int(right), int(top), int(bott)]],
},
)
for b in self.boxes: for b in self.boxes:
b["position_tag"] = self._line_tag(b, zoomin) b["position_tag"] = self._line_tag(b, zoomin)

View File

@ -328,7 +328,7 @@ class Pdf(PdfParser):
callback(0.65, "Table analysis ({:.2f}s)".format(timer() - start)) callback(0.65, "Table analysis ({:.2f}s)".format(timer() - start))
start = timer() start = timer()
self._text_merge() self._text_merge(zoomin=zoomin)
callback(0.67, "Text merged ({:.2f}s)".format(timer() - start)) callback(0.67, "Text merged ({:.2f}s)".format(timer() - start))
if separate_tables_figures: if separate_tables_figures:
@ -340,6 +340,7 @@ class Pdf(PdfParser):
tbls = self._extract_table_figure(True, zoomin, True, True) tbls = self._extract_table_figure(True, zoomin, True, True)
self._naive_vertical_merge() self._naive_vertical_merge()
self._concat_downward() self._concat_downward()
self._final_reading_order_merge()
# self._filter_forpages() # self._filter_forpages()
logging.info("layouts cost: {}s".format(timer() - first_start)) logging.info("layouts cost: {}s".format(timer() - first_start))
return [(b["text"], self._line_tag(b, zoomin)) for b in self.boxes], tbls return [(b["text"], self._line_tag(b, zoomin)) for b in self.boxes], tbls

View File

@ -184,8 +184,6 @@ class ParserParam(ProcessParamBase):
audio_config = self.setups.get("audio", "") audio_config = self.setups.get("audio", "")
if audio_config: if audio_config:
self.check_empty(audio_config.get("llm_id"), "Audio VLM") self.check_empty(audio_config.get("llm_id"), "Audio VLM")
audio_language = audio_config.get("lang", "")
self.check_empty(audio_language, "Language")
email_config = self.setups.get("email", "") email_config = self.setups.get("email", "")
if email_config: if email_config:
@ -348,15 +346,13 @@ class Parser(ProcessBase):
conf = self._param.setups["audio"] conf = self._param.setups["audio"]
self.set_output("output_format", conf["output_format"]) self.set_output("output_format", conf["output_format"])
lang = conf["lang"]
_, ext = os.path.splitext(name) _, ext = os.path.splitext(name)
with tempfile.NamedTemporaryFile(suffix=ext) as tmpf: with tempfile.NamedTemporaryFile(suffix=ext) as tmpf:
tmpf.write(blob) tmpf.write(blob)
tmpf.flush() tmpf.flush()
tmp_path = os.path.abspath(tmpf.name) tmp_path = os.path.abspath(tmpf.name)
seq2txt_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.SPEECH2TEXT, lang=lang) seq2txt_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.SPEECH2TEXT)
txt = seq2txt_mdl.transcription(tmp_path) txt = seq2txt_mdl.transcription(tmp_path)
self.set_output("text", txt) self.set_output("text", txt)

View File

@ -25,7 +25,7 @@ class SplitterFromUpstream(BaseModel):
file: dict | None = Field(default=None) file: dict | None = Field(default=None)
chunks: list[dict[str, Any]] | None = Field(default=None) chunks: list[dict[str, Any]] | None = Field(default=None)
output_format: Literal["json", "markdown", "text", "html"] | None = Field(default=None) output_format: Literal["json", "markdown", "text", "html", "chunks"] | None = Field(default=None)
json_result: list[dict[str, Any]] | None = Field(default=None, alias="json") json_result: list[dict[str, Any]] | None = Field(default=None, alias="json")
markdown_result: str | None = Field(default=None, alias="markdown") markdown_result: str | None = Field(default=None, alias="markdown")

View File

@ -126,7 +126,7 @@ class Tokenizer(ProcessBase):
if ck.get("summary"): if ck.get("summary"):
ck["content_ltks"] = rag_tokenizer.tokenize(str(ck["summary"])) ck["content_ltks"] = rag_tokenizer.tokenize(str(ck["summary"]))
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"]) ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
else: elif ck.get("text"):
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"]) ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"]) ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
if i % 100 == 99: if i % 100 == 99:
@ -155,6 +155,8 @@ class Tokenizer(ProcessBase):
for i, ck in enumerate(chunks): for i, ck in enumerate(chunks):
ck["title_tks"] = rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", from_upstream.name)) ck["title_tks"] = rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", from_upstream.name))
ck["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(ck["title_tks"]) ck["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(ck["title_tks"])
if not ck.get("text"):
continue
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"]) ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"]) ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
if i % 100 == 99: if i % 100 == 99:

View File

@ -613,13 +613,13 @@ def naive_merge(sections: str | list, chunk_token_num=128, delimiter="\n。
dels = get_delimiters(delimiter) dels = get_delimiters(delimiter)
for sec, pos in sections: for sec, pos in sections:
if num_tokens_from_string(sec) < chunk_token_num: if num_tokens_from_string(sec) < chunk_token_num:
add_chunk(sec, pos) add_chunk("\n"+sec, pos)
continue continue
split_sec = re.split(r"(%s)" % dels, sec, flags=re.DOTALL) split_sec = re.split(r"(%s)" % dels, sec, flags=re.DOTALL)
for sub_sec in split_sec: for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec): if re.match(f"^{dels}$", sub_sec):
continue continue
add_chunk(sub_sec, pos) add_chunk("\n"+sub_sec, pos)
return cks return cks
@ -669,13 +669,13 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
for sub_sec in split_sec: for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec): if re.match(f"^{dels}$", sub_sec):
continue continue
add_chunk(sub_sec, image, text_pos) add_chunk("\n"+sub_sec, image, text_pos)
else: else:
split_sec = re.split(r"(%s)" % dels, text) split_sec = re.split(r"(%s)" % dels, text)
for sub_sec in split_sec: for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec): if re.match(f"^{dels}$", sub_sec):
continue continue
add_chunk(sub_sec, image) add_chunk("\n"+sub_sec, image)
return cks, result_images return cks, result_images
@ -757,7 +757,7 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
for sub_sec in split_sec: for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec): if re.match(f"^{dels}$", sub_sec):
continue continue
add_chunk(sub_sec, image,"") add_chunk("\n"+sub_sec, image,"")
line = "" line = ""
if line: if line:
@ -765,7 +765,7 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
for sub_sec in split_sec: for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec): if re.match(f"^{dels}$", sub_sec):
continue continue
add_chunk(sub_sec, image,"") add_chunk("\n"+sub_sec, image,"")
return cks, images return cks, images

View File

@ -23,7 +23,7 @@ import jinja2
import json_repair import json_repair
import trio import trio
from api.utils import hash_str2int from api.utils import hash_str2int
from rag.nlp import is_chinese from rag.nlp import rag_tokenizer
from rag.prompts.template import load_prompt from rag.prompts.template import load_prompt
from rag.settings import TAG_FLD from rag.settings import TAG_FLD
from rag.utils import encoder, num_tokens_from_string from rag.utils import encoder, num_tokens_from_string
@ -672,7 +672,7 @@ def assign_toc_levels(toc_secs, chat_mdl, gen_conf = {"temperature": 0.2}):
TOC_FROM_TEXT_SYSTEM = load_prompt("toc_from_text_system") TOC_FROM_TEXT_SYSTEM = load_prompt("toc_from_text_system")
TOC_FROM_TEXT_USER = load_prompt("toc_from_text_user") TOC_FROM_TEXT_USER = load_prompt("toc_from_text_user")
# Generate TOC from text chunks with text llms # Generate TOC from text chunks with text llms
async def gen_toc_from_text(txt_info: dict, chat_mdl): async def gen_toc_from_text(txt_info: dict, chat_mdl, callback=None):
try: try:
ans = gen_json( ans = gen_json(
PROMPT_JINJA_ENV.from_string(TOC_FROM_TEXT_SYSTEM).render(), PROMPT_JINJA_ENV.from_string(TOC_FROM_TEXT_SYSTEM).render(),
@ -682,6 +682,8 @@ async def gen_toc_from_text(txt_info: dict, chat_mdl):
) )
print(ans, "::::::::::::::::::::::::::::::::::::", flush=True) print(ans, "::::::::::::::::::::::::::::::::::::", flush=True)
txt_info["toc"] = ans if ans else [] txt_info["toc"] = ans if ans else []
if callback:
callback(msg="")
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)
@ -707,14 +709,14 @@ def split_chunks(chunks, max_length: int):
return result return result
async def run_toc_from_text(chunks, chat_mdl): async def run_toc_from_text(chunks, chat_mdl, callback=None):
input_budget = int(chat_mdl.max_length * INPUT_UTILIZATION) - num_tokens_from_string( input_budget = int(chat_mdl.max_length * INPUT_UTILIZATION) - num_tokens_from_string(
TOC_FROM_TEXT_USER + TOC_FROM_TEXT_SYSTEM TOC_FROM_TEXT_USER + TOC_FROM_TEXT_SYSTEM
) )
input_budget = 1024 if input_budget > 1024 else input_budget input_budget = 1024 if input_budget > 1024 else input_budget
chunk_sections = split_chunks(chunks, input_budget) chunk_sections = split_chunks(chunks, input_budget)
res = [] titles = []
chunks_res = [] chunks_res = []
async with trio.open_nursery() as nursery: async with trio.open_nursery() as nursery:
@ -722,21 +724,21 @@ async def run_toc_from_text(chunks, chat_mdl):
if not chunk: if not chunk:
continue continue
chunks_res.append({"chunks": chunk}) chunks_res.append({"chunks": chunk})
nursery.start_soon(gen_toc_from_text, chunks_res[-1], chat_mdl) nursery.start_soon(gen_toc_from_text, chunks_res[-1], chat_mdl, callback)
for chunk in chunks_res: for chunk in chunks_res:
res.extend(chunk.get("toc", [])) titles.extend(chunk.get("toc", []))
print(res, ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") print(titles, ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# Filter out entries with title == -1 # Filter out entries with title == -1
prune = len(titles) > 512
max_len = 12 if prune else 22
filtered = [] filtered = []
for x in res: for x in titles:
if not x.get("title") or x["title"] == "-1": if not x.get("title") or x["title"] == "-1":
continue continue
if is_chinese(x["title"]) and len(x["title"]) > 12: if len(rag_tokenizer.tokenize(x["title"]).split(" ")) > max_len:
continue
if len(x["title"].split(" ")) > 12:
continue continue
if re.match(r"[0-9,.()/ -]+$", x["title"]): if re.match(r"[0-9,.()/ -]+$", x["title"]):
continue continue
@ -751,8 +753,12 @@ async def run_toc_from_text(chunks, chat_mdl):
toc_with_levels = assign_toc_levels(raw_structure, chat_mdl, {"temperature": 0.0, "top_p": 0.9}) toc_with_levels = assign_toc_levels(raw_structure, chat_mdl, {"temperature": 0.0, "top_p": 0.9})
# Merge structure and content (by index) # Merge structure and content (by index)
prune = len(toc_with_levels) > 512
max_lvl = sorted([t.get("level", "0") for t in toc_with_levels])[-1]
merged = [] merged = []
for _ , (toc_item, src_item) in enumerate(zip(toc_with_levels, filtered)): for _ , (toc_item, src_item) in enumerate(zip(toc_with_levels, filtered)):
if prune and toc_item.get("level", "0") >= max_lvl:
continue
merged.append({ merged.append({
"level": toc_item.get("level", "0"), "level": toc_item.get("level", "0"),
"title": toc_item.get("title", ""), "title": toc_item.get("title", ""),
@ -776,7 +782,7 @@ def relevant_chunks_with_toc(query: str, toc:list[dict], chat_mdl, topn: int=6):
print(ans, "::::::::::::::::::::::::::::::::::::", flush=True) print(ans, "::::::::::::::::::::::::::::::::::::", flush=True)
id2score = {} id2score = {}
for ti, sc in zip(toc, ans): for ti, sc in zip(toc, ans):
if sc.get("score", -1) < 1: if not isinstance(sc, dict) or sc.get("score", -1) < 1:
continue continue
for id in ti.get("ids", []): for id in ti.get("ids", []):
if id not in id2score: if id not in id2score:

View File

@ -370,14 +370,14 @@ async def build_chunks(task, progress_callback):
nursery.start_soon(doc_question_proposal, chat_mdl, d, task["parser_config"]["auto_questions"]) nursery.start_soon(doc_question_proposal, chat_mdl, d, task["parser_config"]["auto_questions"])
progress_callback(msg="Question generation {} chunks completed in {:.2f}s".format(len(docs), timer() - st)) progress_callback(msg="Question generation {} chunks completed in {:.2f}s".format(len(docs), timer() - st))
if task["parser_config"].get("toc_extraction", True): if task["parser_id"].lower() == "naive" and task["parser_config"].get("toc_extraction", False):
progress_callback(msg="Start to generate table of content ...") progress_callback(msg="Start to generate table of content ...")
chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"]) chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"])
docs = sorted(docs, key=lambda d:( docs = sorted(docs, key=lambda d:(
d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0), d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0) d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
)) ))
toc: list[dict] = await run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl) toc: list[dict] = await run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl, progress_callback)
logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent=' ')) logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent=' '))
ii = 0 ii = 0
while ii < len(toc): while ii < len(toc):
@ -387,7 +387,7 @@ async def build_chunks(task, progress_callback):
toc[ii]["ids"] = [docs[idx]["id"]] toc[ii]["ids"] = [docs[idx]["id"]]
if ii == len(toc) -1: if ii == len(toc) -1:
break break
for jj in range(idx+1, int(toc[ii+1]["chunk_id"])): for jj in range(idx+1, int(toc[ii+1]["chunk_id"])+1):
toc[ii]["ids"].append(docs[jj]["id"]) toc[ii]["ids"].append(docs[jj]["id"])
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)

View File

@ -20,6 +20,7 @@ import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import { import {
ChunkMethodItem, ChunkMethodItem,
EnableTocToggle,
ParseTypeItem, ParseTypeItem,
} from '@/pages/dataset/dataset-setting/configuration/common-item'; } from '@/pages/dataset/dataset-setting/configuration/common-item';
import { zodResolver } from '@hookform/resolvers/zod'; import { zodResolver } from '@hookform/resolvers/zod';
@ -113,6 +114,7 @@ export function ChunkMethodDialog({
auto_keywords: z.coerce.number().optional(), auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(), auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(), html4excel: z.boolean().optional(),
toc_extraction: z.boolean().optional(),
// raptor: z // raptor: z
// .object({ // .object({
// use_raptor: z.boolean().optional(), // use_raptor: z.boolean().optional(),
@ -247,7 +249,7 @@ export function ChunkMethodDialog({
}, [parseType, form]); }, [parseType, form]);
return ( return (
<Dialog open onOpenChange={hideModal}> <Dialog open onOpenChange={hideModal}>
<DialogContent className="max-w-[50vw]"> <DialogContent className="max-w-[50vw] text-text-primary">
<DialogHeader> <DialogHeader>
<DialogTitle>{t('knowledgeDetails.chunkMethod')}</DialogTitle> <DialogTitle>{t('knowledgeDetails.chunkMethod')}</DialogTitle>
</DialogHeader> </DialogHeader>
@ -338,6 +340,7 @@ export function ChunkMethodDialog({
show={showAutoKeywords(selectedTag) || showExcelToHtml} show={showAutoKeywords(selectedTag) || showExcelToHtml}
className="space-y-3" className="space-y-3"
> >
<EnableTocToggle />
{showAutoKeywords(selectedTag) && ( {showAutoKeywords(selectedTag) && (
<> <>
<AutoKeywordsFormField></AutoKeywordsFormField> <AutoKeywordsFormField></AutoKeywordsFormField>

View File

@ -15,6 +15,7 @@ export function useDefaultParserValues() {
auto_keywords: 0, auto_keywords: 0,
auto_questions: 0, auto_questions: 0,
html4excel: false, html4excel: false,
toc_extraction: false,
// raptor: { // raptor: {
// use_raptor: false, // use_raptor: false,
// prompt: t('knowledgeConfiguration.promptText'), // prompt: t('knowledgeConfiguration.promptText'),

View File

@ -1,5 +1,7 @@
import { AgentCategory } from '@/constants/agent'; import { AgentCategory } from '@/constants/agent';
import { FormLayout } from '@/constants/form';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { useFetchAgentList } from '@/hooks/use-agent-request'; import { useFetchAgentList } from '@/hooks/use-agent-request';
import { buildSelectOptions } from '@/utils/component-util'; import { buildSelectOptions } from '@/utils/component-util';
import { ArrowUpRight } from 'lucide-react'; import { ArrowUpRight } from 'lucide-react';
@ -21,18 +23,27 @@ export interface IDataPipelineSelectNode {
} }
interface IProps { interface IProps {
toDataPipeline?: () => void; showToDataPipeline?: boolean;
formFieldName: string; formFieldName: string;
isMult?: boolean; isMult?: boolean;
setDataList?: (data: IDataPipelineSelectNode[]) => void; setDataList?: (data: IDataPipelineSelectNode[]) => void;
layout?: FormLayout;
} }
export function DataFlowSelect(props: IProps) { export function DataFlowSelect(props: IProps) {
const { toDataPipeline, formFieldName, isMult = false, setDataList } = props; const {
showToDataPipeline,
formFieldName,
isMult = false,
setDataList,
layout = FormLayout.Vertical,
} = props;
const { t } = useTranslate('knowledgeConfiguration'); const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext(); const form = useFormContext();
const { navigateToAgents } = useNavigatePage();
const toDataPipLine = () => { const toDataPipLine = () => {
toDataPipeline?.(); navigateToAgents();
}; };
const { data: dataPipelineOptions } = useFetchAgentList({ const { data: dataPipelineOptions } = useFetchAgentList({
canvas_category: AgentCategory.DataflowCanvas, canvas_category: AgentCategory.DataflowCanvas,
@ -69,15 +80,16 @@ export function DataFlowSelect(props: IProps) {
name={formFieldName} name={formFieldName}
render={({ field }) => ( render={({ field }) => (
<FormItem className=" items-center space-y-0 "> <FormItem className=" items-center space-y-0 ">
{layout === FormLayout.Vertical && (
<div className="flex flex-col gap-1"> <div className="flex flex-col gap-1">
<div className="flex gap-2 justify-between "> <div className="flex gap-2 justify-between ">
<FormLabel <FormLabel
tooltip={t('dataFlowTip')} // tooltip={t('dataFlowTip')}
className="text-sm text-text-primary whitespace-wrap " className="text-sm text-text-primary whitespace-wrap "
> >
{t('dataPipeline')} {t('manualSetup')}
</FormLabel> </FormLabel>
{toDataPipeline && ( {showToDataPipeline && (
<div <div
className="text-sm flex text-text-primary cursor-pointer" className="text-sm flex text-text-primary cursor-pointer"
onClick={toDataPipLine} onClick={toDataPipLine}
@ -110,6 +122,50 @@ export function DataFlowSelect(props: IProps) {
</FormControl> </FormControl>
</div> </div>
</div> </div>
)}
{layout === FormLayout.Horizontal && (
<div className="flex gap-1 items-center">
<div className="flex gap-2 justify-between w-1/4">
<FormLabel
// tooltip={t('dataFlowTip')}
className="text-sm text-text-secondary whitespace-wrap "
>
{t('manualSetup')}
</FormLabel>
</div>
<div className="text-muted-foreground w-3/4 flex flex-col items-end">
{showToDataPipeline && (
<div
className="text-sm flex text-text-primary cursor-pointer"
onClick={toDataPipLine}
>
{t('buildItFromScratch')}
<ArrowUpRight size={14} />
</div>
)}
<FormControl>
<>
{!isMult && (
<SelectWithSearch
{...field}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
{isMult && (
<MultiSelect
{...field}
onValueChange={field.onChange}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
</>
</FormControl>
</div>
</div>
)}
<div className="flex pt-1"> <div className="flex pt-1">
<FormMessage /> <FormMessage />
</div> </div>

View File

@ -61,7 +61,7 @@ export function DelimiterFormField() {
<FormLabel <FormLabel
required required
tooltip={t('knowledgeDetails.delimiterTip')} tooltip={t('knowledgeDetails.delimiterTip')}
className="text-sm text-muted-foreground whitespace-break-spaces w-1/4" className="text-sm text-text-secondary whitespace-break-spaces w-1/4"
> >
{t('knowledgeDetails.delimiter')} {t('knowledgeDetails.delimiter')}
</FormLabel> </FormLabel>

View File

@ -28,7 +28,7 @@ export function ExcelToHtmlFormField() {
<div className="flex items-center gap-1"> <div className="flex items-center gap-1">
<FormLabel <FormLabel
tooltip={t('html4excelTip')} tooltip={t('html4excelTip')}
className="text-sm text-muted-foreground whitespace-break-spaces w-1/4" className="text-sm text-text-secondary whitespace-break-spaces w-1/4"
> >
{t('html4excel')} {t('html4excel')}
</FormLabel> </FormLabel>

View File

@ -79,7 +79,7 @@ export function LayoutRecognizeFormField({
> >
<FormLabel <FormLabel
tooltip={t('layoutRecognizeTip')} tooltip={t('layoutRecognizeTip')}
className={cn('text-sm text-muted-foreground whitespace-wrap', { className={cn('text-sm text-text-secondary whitespace-wrap', {
['w-1/4']: horizontal, ['w-1/4']: horizontal,
})} })}
> >

View File

@ -17,7 +17,7 @@ export function MaxTokenNumberFormField({ max = 2048, initialValue }: IProps) {
tooltip={t('chunkTokenNumberTip')} tooltip={t('chunkTokenNumberTip')}
max={max} max={max}
defaultValue={initialValue ?? 0} defaultValue={initialValue ?? 0}
layout={FormLayout.Vertical} layout={FormLayout.Horizontal}
></SliderInputFormField> ></SliderInputFormField>
); );
} }

View File

@ -36,7 +36,7 @@ export function SliderInputFormField({
tooltip, tooltip,
defaultValue, defaultValue,
className, className,
layout = FormLayout.Vertical, layout = FormLayout.Horizontal,
}: SliderInputFormFieldProps) { }: SliderInputFormFieldProps) {
const form = useFormContext(); const form = useFormContext();

View File

@ -1,5 +1,4 @@
import { cn } from '@/lib/utils'; import { cn } from '@/lib/utils';
import { Radio as LucideRadio } from 'lucide-react';
import React, { useContext, useState } from 'react'; import React, { useContext, useState } from 'react';
const RadioGroupContext = React.createContext<{ const RadioGroupContext = React.createContext<{
@ -57,7 +56,7 @@ function Radio({ value, checked, disabled, onChange, children }: RadioProps) {
onClick={handleClick} onClick={handleClick}
> >
{isChecked && ( {isChecked && (
<LucideRadio className="h-3 w-3 fill-primary text-primary" /> <div className="h-3 w-3 fill-primary text-primary bg-text-primary rounded-full" />
)} )}
</span> </span>
{children && <span className="text-foreground">{children}</span>} {children && <span className="text-foreground">{children}</span>}

View File

@ -19,7 +19,8 @@ export const useNavigatePage = () => {
const navigateToDataset = useCallback( const navigateToDataset = useCallback(
(id: string) => () => { (id: string) => () => {
navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`); // navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`);
navigate(`${Routes.Dataset}/${id}`);
}, },
[navigate], [navigate],
); );

View File

@ -116,7 +116,7 @@ export default {
generate: 'Generate', generate: 'Generate',
raptor: 'Raptor', raptor: 'Raptor',
processingType: 'Processing Type', processingType: 'Processing Type',
dataPipeline: 'Data Pipeline', dataPipeline: 'Ingestion pipeline',
operations: 'Operations', operations: 'Operations',
taskId: 'Task ID', taskId: 'Task ID',
duration: 'Duration', duration: 'Duration',
@ -126,8 +126,8 @@ export default {
startDate: 'Start Date', startDate: 'Start Date',
source: 'Source', source: 'Source',
fileName: 'File Name', fileName: 'File Name',
datasetLogs: 'Dataset Logs', datasetLogs: 'Dataset',
fileLogs: 'File Logs', fileLogs: 'File',
overview: 'Overview', overview: 'Overview',
success: 'Success', success: 'Success',
failed: 'Failed', failed: 'Failed',
@ -270,6 +270,9 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.', reRankModelWaring: 'Re-rank model is very time consuming.',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
tocExtraction: 'toc toggle',
tocExtractionTip:
" For existing chunks, generate a hierarchical table of contents (one directory per file). During queries, when Directory Enhancement is activated, the system will use a large model to determine which directory items are relevant to the user's question, thereby identifying the relevant chunks.",
deleteGenerateModalContent: ` deleteGenerateModalContent: `
<p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results <p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results
will remove all derived entities and relationships from this dataset. will remove all derived entities and relationships from this dataset.
@ -284,11 +287,11 @@ export default {
fileFilter: 'File Filter', fileFilter: 'File Filter',
setDefaultTip: '', setDefaultTip: '',
setDefault: 'Set as Default', setDefault: 'Set as Default',
eidtLinkDataPipeline: 'Edit Data Pipeline', eidtLinkDataPipeline: 'Edit Ingestion pipeline',
linkPipelineSetTip: 'Manage data pipeline linkage with this dataset', linkPipelineSetTip: 'Manage Ingestion pipeline linkage with this dataset',
default: 'Default', default: 'Default',
dataPipeline: 'Data Pipeline', dataPipeline: 'Ingestion pipeline',
linkDataPipeline: 'Link Data Pipeline', linkDataPipeline: 'Link Ingestion pipeline',
enableAutoGenerate: 'Enable Auto Generate', enableAutoGenerate: 'Enable Auto Generate',
teamPlaceholder: 'Please select a team.', teamPlaceholder: 'Please select a team.',
dataFlowPlaceholder: 'Please select a pipeline.', dataFlowPlaceholder: 'Please select a pipeline.',
@ -1595,7 +1598,7 @@ This delimiter is used to split the input text into several text pieces echo of
createFromTemplate: 'Create from template', createFromTemplate: 'Create from template',
importJsonFile: 'Import JSON file', importJsonFile: 'Import JSON file',
ceateAgent: 'Agent flow', ceateAgent: 'Agent flow',
createPipeline: 'Data pipeline', createPipeline: 'Ingestion pipeline',
chooseAgentType: 'Choose Agent Type', chooseAgentType: 'Choose Agent Type',
}, },
llmTools: { llmTools: {
@ -1688,9 +1691,9 @@ This delimiter is used to split the input text into several text pieces echo of
<p>To keep them, please click Rerun to re-run the current stage.</p> `, <p>To keep them, please click Rerun to re-run the current stage.</p> `,
changeStepModalConfirmText: 'Switch Anyway', changeStepModalConfirmText: 'Switch Anyway',
changeStepModalCancelText: 'Cancel', changeStepModalCancelText: 'Cancel',
unlinkPipelineModalTitle: 'Unlink data pipeline', unlinkPipelineModalTitle: 'Unlink Ingestion pipeline',
unlinkPipelineModalContent: ` unlinkPipelineModalContent: `
<p>Once unlinked, this Dataset will no longer be connected to the current Data Pipeline.</p> <p>Once unlinked, this Dataset will no longer be connected to the current Ingestion pipeline.</p>
<p>Files that are already being parsed will continue until completion</p> <p>Files that are already being parsed will continue until completion</p>
<p>Files that are not yet parsed will no longer be processed</p> <br/> <p>Files that are not yet parsed will no longer be processed</p> <br/>
<p>Are you sure you want to proceed?</p> `, <p>Are you sure you want to proceed?</p> `,

View File

@ -114,8 +114,8 @@ export default {
startDate: '开始时间', startDate: '开始时间',
source: '来源', source: '来源',
fileName: '文件名', fileName: '文件名',
datasetLogs: '数据集日志', datasetLogs: '数据集',
fileLogs: '文件日志', fileLogs: '文件',
overview: '概览', overview: '概览',
success: '成功', success: '成功',
failed: '失败', failed: '失败',
@ -255,6 +255,9 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除', theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
tocExtraction: '目录提取',
tocExtractionTip:
'对于已有的chunk生成层级结构的目录信息每个文件一个目录。在查询时激活`目录增强`后系统会用大模型去判断用户问题和哪些目录项相关从而找到相关的chunk。',
deleteGenerateModalContent: ` deleteGenerateModalContent: `
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果 <p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
将从此数据集中移除所有派生实体和关系。 将从此数据集中移除所有派生实体和关系。

View File

@ -102,7 +102,7 @@ function InnerButtonEdge({
...showHighlight, ...showHighlight,
...placeholderHighlightStyle, ...placeholderHighlightStyle,
}} }}
className={cn('text-text-secondary')} className={cn('text-text-disabled')}
/> />
<EdgeLabelRenderer> <EdgeLabelRenderer>

View File

@ -1,4 +1,4 @@
import { useIsDarkTheme, useTheme } from '@/components/theme-provider'; import { useTheme } from '@/components/theme-provider';
import { import {
Tooltip, Tooltip,
TooltipContent, TooltipContent,
@ -19,7 +19,6 @@ import { NotebookPen } from 'lucide-react';
import { useCallback, useEffect, useState } from 'react'; import { useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { ChatSheet } from '../chat/chat-sheet'; import { ChatSheet } from '../chat/chat-sheet';
import { AgentBackground } from '../components/background';
import { import {
AgentChatContext, AgentChatContext,
AgentChatLogContext, AgentChatLogContext,
@ -42,6 +41,7 @@ import { useMoveNote } from '../hooks/use-move-note';
import { usePlaceholderManager } from '../hooks/use-placeholder-manager'; import { usePlaceholderManager } from '../hooks/use-placeholder-manager';
import { useDropdownManager } from './context'; import { useDropdownManager } from './context';
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight'; import Spotlight from '@/components/spotlight';
import { import {
useHideFormSheetOnNodeDeletion, useHideFormSheetOnNodeDeletion,
@ -61,7 +61,6 @@ import { GenerateNode } from './node/generate-node';
import { InvokeNode } from './node/invoke-node'; import { InvokeNode } from './node/invoke-node';
import { IterationNode, IterationStartNode } from './node/iteration-node'; import { IterationNode, IterationStartNode } from './node/iteration-node';
import { KeywordNode } from './node/keyword-node'; import { KeywordNode } from './node/keyword-node';
import { LogicNode } from './node/logic-node';
import { MessageNode } from './node/message-node'; import { MessageNode } from './node/message-node';
import NoteNode from './node/note-node'; import NoteNode from './node/note-node';
import { PlaceholderNode } from './node/placeholder-node'; import { PlaceholderNode } from './node/placeholder-node';
@ -78,7 +77,6 @@ export const nodeTypes: NodeTypes = {
beginNode: BeginNode, beginNode: BeginNode,
placeholderNode: PlaceholderNode, placeholderNode: PlaceholderNode,
relevantNode: RelevantNode, relevantNode: RelevantNode,
logicNode: LogicNode,
noteNode: NoteNode, noteNode: NoteNode,
switchNode: SwitchNode, switchNode: SwitchNode,
generateNode: GenerateNode, generateNode: GenerateNode,
@ -173,8 +171,6 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
} }
}; };
const isDarkTheme = useIsDarkTheme();
useHideFormSheetOnNodeDeletion({ hideFormDrawer }); useHideFormSheetOnNodeDeletion({ hideFormDrawer });
const { visible, hideModal, showModal } = useSetModalState(); const { visible, hideModal, showModal } = useSetModalState();
@ -243,7 +239,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
> >
<defs> <defs>
<marker <marker
fill="rgb(157 149 225)" fill="var(--text-disabled)"
id="logo" id="logo"
viewBox="0 0 40 40" viewBox="0 0 40 40"
refX="8" refX="8"
@ -286,12 +282,6 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
defaultEdgeOptions={{ defaultEdgeOptions={{
type: 'buttonEdge', type: 'buttonEdge',
markerEnd: 'logo', markerEnd: 'logo',
style: {
strokeWidth: 1,
stroke: isDarkTheme
? 'rgba(91, 93, 106, 1)'
: 'rgba(151, 154, 171, 1)',
},
zIndex: 1001, // https://github.com/xyflow/xyflow/discussions/3498 zIndex: 1001, // https://github.com/xyflow/xyflow/discussions/3498
}} }}
deleteKeyCode={['Delete', 'Backspace']} deleteKeyCode={['Delete', 'Backspace']}

View File

@ -7,9 +7,8 @@ import { useTranslation } from 'react-i18next';
import { AgentExceptionMethod, NodeHandleId } from '../../constant'; import { AgentExceptionMethod, NodeHandleId } from '../../constant';
import useGraphStore from '../../store'; import useGraphStore from '../../store';
import { isBottomSubAgent } from '../../utils'; import { isBottomSubAgent } from '../../utils';
import { CommonHandle } from './handle'; import { CommonHandle, LeftEndHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon'; import { RightHandleStyle } from './handle-icon';
import styles from './index.less';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar'; import { ToolBar } from './toolbar';
@ -40,19 +39,11 @@ function InnerAgentNode({
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
{isHeadAgent && ( {isHeadAgent && (
<> <>
<CommonHandle <LeftEndHandle></LeftEndHandle>
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
<CommonHandle <CommonHandle
type="source" type="source"
position={Position.Right} position={Position.Right}
isConnectable={isConnectable} isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle} style={RightHandleStyle}
nodeId={id} nodeId={id}
id={NodeHandleId.Start} id={NodeHandleId.Start}
@ -61,18 +52,22 @@ function InnerAgentNode({
</> </>
)} )}
{isHeadAgent || (
<Handle <Handle
type="target" type="target"
position={Position.Top} position={Position.Top}
isConnectable={false} isConnectable={false}
id={NodeHandleId.AgentTop} id={NodeHandleId.AgentTop}
className="!bg-accent-primary !size-2"
></Handle> ></Handle>
)}
<Handle <Handle
type="source" type="source"
position={Position.Bottom} position={Position.Bottom}
isConnectable={false} isConnectable={false}
id={NodeHandleId.AgentBottom} id={NodeHandleId.AgentBottom}
style={{ left: 180 }} style={{ left: 180 }}
className="!bg-accent-primary !size-2"
></Handle> ></Handle>
<Handle <Handle
type="source" type="source"
@ -80,6 +75,7 @@ function InnerAgentNode({
isConnectable={false} isConnectable={false}
id={NodeHandleId.Tool} id={NodeHandleId.Tool}
style={{ left: 20 }} style={{ left: 20 }}
className="!bg-accent-primary !size-2"
></Handle> ></Handle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader> <NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<section className="flex flex-col gap-2"> <section className="flex flex-col gap-2">

View File

@ -3,8 +3,7 @@ import { ICategorizeNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react'; import { NodeProps, Position } from '@xyflow/react';
import { get } from 'lodash'; import { get } from 'lodash';
import { memo } from 'react'; import { memo } from 'react';
import { NodeHandleId } from '../../constant'; import { CommonHandle, LeftEndHandle } from './handle';
import { CommonHandle } from './handle';
import { RightHandleStyle } from './handle-icon'; import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
@ -20,13 +19,7 @@ export function InnerCategorizeNode({
return ( return (
<ToolBar selected={selected} id={id} label={data.label}> <ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
<CommonHandle <LeftEndHandle></LeftEndHandle>
type="target"
position={Position.Left}
isConnectable
id={NodeHandleId.End}
nodeId={id}
></CommonHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader> <NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
@ -41,7 +34,6 @@ export function InnerCategorizeNode({
{position.name} {position.name}
</div> </div>
<CommonHandle <CommonHandle
// key={position.text}
id={position.uuid} id={position.uuid}
type="source" type="source"
position={Position.Right} position={Position.Right}

View File

@ -1,8 +1,9 @@
import { useSetModalState } from '@/hooks/common-hooks'; import { useSetModalState } from '@/hooks/common-hooks';
import { cn } from '@/lib/utils'; import { cn } from '@/lib/utils';
import { Handle, HandleProps } from '@xyflow/react'; import { Handle, HandleProps, Position } from '@xyflow/react';
import { Plus } from 'lucide-react'; import { Plus } from 'lucide-react';
import { useMemo } from 'react'; import { useMemo } from 'react';
import { NodeHandleId } from '../../constant';
import { HandleContext } from '../../context'; import { HandleContext } from '../../context';
import { useDropdownManager } from '../context'; import { useDropdownManager } from '../context';
import { InnerNextStepDropdown } from './dropdown/next-step-dropdown'; import { InnerNextStepDropdown } from './dropdown/next-step-dropdown';
@ -33,7 +34,7 @@ export function CommonHandle({
<Handle <Handle
{...props} {...props}
className={cn( className={cn(
'inline-flex justify-center items-center !bg-accent-primary !size-4 !rounded-sm !border-none ', 'inline-flex justify-center items-center !bg-accent-primary !border-none group-hover:!size-4 group-hover:!rounded-sm',
className, className,
)} )}
onClick={(e) => { onClick={(e) => {
@ -47,7 +48,7 @@ export function CommonHandle({
showModal(); showModal();
}} }}
> >
<Plus className="size-3 pointer-events-none text-text-title-invert" /> <Plus className="size-3 pointer-events-none text-white hidden group-hover:inline-block" />
{visible && ( {visible && (
<InnerNextStepDropdown <InnerNextStepDropdown
hideModal={() => { hideModal={() => {
@ -62,3 +63,19 @@ export function CommonHandle({
</HandleContext.Provider> </HandleContext.Provider>
); );
} }
export function LeftEndHandle({
isConnectable,
...props
}: Omit<HandleProps, 'type' | 'position'>) {
return (
<Handle
isConnectable={isConnectable}
className="!bg-accent-primary !size-2"
id={NodeHandleId.End}
type="target"
position={Position.Left}
{...props}
></Handle>
);
}

View File

@ -3,8 +3,8 @@ import { NodeProps, Position } from '@xyflow/react';
import { memo } from 'react'; import { memo } from 'react';
import { NodeHandleId } from '../../constant'; import { NodeHandleId } from '../../constant';
import { needsSingleStepDebugging } from '../../utils'; import { needsSingleStepDebugging } from '../../utils';
import { CommonHandle } from './handle'; import { CommonHandle, LeftEndHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon'; import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar'; import { ToolBar } from './toolbar';
@ -23,14 +23,7 @@ function InnerRagNode({
showRun={needsSingleStepDebugging(data.label)} showRun={needsSingleStepDebugging(data.label)}
> >
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
<CommonHandle <LeftEndHandle></LeftEndHandle>
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<CommonHandle <CommonHandle
type="source" type="source"
position={Position.Right} position={Position.Right}

View File

@ -7,8 +7,7 @@ import { NodeProps, NodeResizeControl, Position } from '@xyflow/react';
import { memo } from 'react'; import { memo } from 'react';
import { NodeHandleId, Operator } from '../../constant'; import { NodeHandleId, Operator } from '../../constant';
import OperatorIcon from '../../operator-icon'; import OperatorIcon from '../../operator-icon';
import { CommonHandle } from './handle'; import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import styles from './index.less'; import styles from './index.less';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
@ -24,30 +23,21 @@ export function InnerIterationNode({
return ( return (
<ToolBar selected={selected} id={id} label={data.label} showRun={false}> <ToolBar selected={selected} id={id} label={data.label} showRun={false}>
<section <section
className={cn('h-full bg-transparent rounded-b-md ', { className={cn('h-full bg-transparent rounded-b-md group', {
[styles.selectedHeader]: selected, [styles.selectedHeader]: selected,
})} })}
> >
<NodeResizeControl style={controlStyle} minWidth={100} minHeight={50}> <NodeResizeControl style={controlStyle} minWidth={100} minHeight={50}>
<ResizeIcon /> <ResizeIcon />
</NodeResizeControl> </NodeResizeControl>
<CommonHandle <LeftEndHandle></LeftEndHandle>
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
className={styles.handle}
nodeId={id}
></CommonHandle>
<CommonHandle <CommonHandle
id={NodeHandleId.Start} id={NodeHandleId.Start}
type="source" type="source"
position={Position.Right} position={Position.Right}
isConnectable={isConnectable} isConnectable={isConnectable}
className={styles.handle}
nodeId={id} nodeId={id}
></CommonHandle> ></CommonHandle>
<NodeHeader <NodeHeader
id={id} id={id}
name={data.name} name={data.name}
@ -75,8 +65,6 @@ function InnerIterationStartNode({
type="source" type="source"
position={Position.Right} position={Position.Right}
isConnectable={isConnectable} isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle}
isConnectableEnd={false} isConnectableEnd={false}
id={NodeHandleId.Start} id={NodeHandleId.Start}
nodeId={id} nodeId={id}

View File

@ -1,41 +0,0 @@
import { ILogicNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { memo } from 'react';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
export function InnerLogicNode({
id,
data,
isConnectable = true,
selected,
}: NodeProps<ILogicNode>) {
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<CommonHandle
id="c"
type="source"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<CommonHandle
type="source"
position={Position.Right}
isConnectable={isConnectable}
style={RightHandleStyle}
id="b"
nodeId={id}
></CommonHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
</NodeWrapper>
</ToolBar>
);
}
export const LogicNode = memo(InnerLogicNode);

View File

@ -1,35 +1,21 @@
import { IMessageNode } from '@/interfaces/database/flow'; import { IMessageNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react'; import { NodeProps } from '@xyflow/react';
import { Flex } from 'antd'; import { Flex } from 'antd';
import classNames from 'classnames'; import classNames from 'classnames';
import { get } from 'lodash'; import { get } from 'lodash';
import { memo } from 'react'; import { memo } from 'react';
import { NodeHandleId } from '../../constant'; import { LeftEndHandle } from './handle';
import { CommonHandle } from './handle';
import { LeftHandleStyle } from './handle-icon';
import styles from './index.less'; import styles from './index.less';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar'; import { ToolBar } from './toolbar';
function InnerMessageNode({ function InnerMessageNode({ id, data, selected }: NodeProps<IMessageNode>) {
id,
data,
isConnectable = true,
selected,
}: NodeProps<IMessageNode>) {
const messages: string[] = get(data, 'form.messages', []); const messages: string[] = get(data, 'form.messages', []);
return ( return (
<ToolBar selected={selected} id={id} label={data.label}> <ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
<CommonHandle <LeftEndHandle></LeftEndHandle>
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
{/* <CommonHandle {/* <CommonHandle
type="source" type="source"
position={Position.Right} position={Position.Right}
@ -47,7 +33,6 @@ function InnerMessageNode({
[styles.nodeHeader]: messages.length > 0, [styles.nodeHeader]: messages.length > 0,
})} })}
></NodeHeader> ></NodeHeader>
<Flex vertical gap={8} className={styles.messageNodeContainer}> <Flex vertical gap={8} className={styles.messageNodeContainer}>
{messages.map((message, idx) => { {messages.map((message, idx) => {
return ( return (

View File

@ -7,7 +7,7 @@ export function NodeWrapper({ children, className, selected }: IProps) {
return ( return (
<section <section
className={cn( className={cn(
'bg-text-title-invert p-2.5 rounded-sm w-[200px] text-xs', 'bg-text-title-invert p-2.5 rounded-sm w-[200px] text-xs group',
{ 'border border-accent-primary': selected }, { 'border border-accent-primary': selected },
className, className,
)} )}

View File

@ -7,8 +7,7 @@ import { get } from 'lodash';
import { memo } from 'react'; import { memo } from 'react';
import { NodeHandleId } from '../../constant'; import { NodeHandleId } from '../../constant';
import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query'; import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query';
import { CommonHandle } from './handle'; import { CommonHandle, LeftEndHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import styles from './index.less'; import styles from './index.less';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
@ -28,22 +27,12 @@ function InnerRetrievalNode({
return ( return (
<ToolBar selected={selected} id={id} label={data.label}> <ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
<CommonHandle <LeftEndHandle></LeftEndHandle>
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
className={styles.handle}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<CommonHandle <CommonHandle
id={NodeHandleId.Start} id={NodeHandleId.Start}
type="source" type="source"
position={Position.Right} position={Position.Right}
isConnectable={isConnectable} isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle}
nodeId={id} nodeId={id}
isConnectableEnd={false} isConnectableEnd={false}
></CommonHandle> ></CommonHandle>

View File

@ -2,10 +2,10 @@ import { Card, CardContent } from '@/components/ui/card';
import { ISwitchCondition, ISwitchNode } from '@/interfaces/database/flow'; import { ISwitchCondition, ISwitchNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react'; import { NodeProps, Position } from '@xyflow/react';
import { memo, useCallback } from 'react'; import { memo, useCallback } from 'react';
import { NodeHandleId, SwitchOperatorOptions } from '../../constant'; import { SwitchOperatorOptions } from '../../constant';
import { LogicalOperatorIcon } from '../../form/switch-form'; import { LogicalOperatorIcon } from '../../form/switch-form';
import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query'; import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query';
import { CommonHandle } from './handle'; import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon'; import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header'; import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper'; import { NodeWrapper } from './node-wrapper';
@ -66,13 +66,7 @@ function InnerSwitchNode({ id, data, selected }: NodeProps<ISwitchNode>) {
return ( return (
<ToolBar selected={selected} id={id} label={data.label} showRun={false}> <ToolBar selected={selected} id={id} label={data.label} showRun={false}>
<NodeWrapper selected={selected}> <NodeWrapper selected={selected}>
<CommonHandle <LeftEndHandle></LeftEndHandle>
type="target"
position={Position.Left}
isConnectable
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader> <NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<section className="gap-2.5 flex flex-col"> <section className="gap-2.5 flex flex-col">
{positions.map((position, idx) => { {positions.map((position, idx) => {

View File

@ -49,6 +49,7 @@ function InnerToolNode({
type="target" type="target"
position={Position.Top} position={Position.Top}
isConnectable={isConnectable} isConnectable={isConnectable}
className="!bg-accent-primary !size-2"
></Handle> ></Handle>
<ul className="space-y-2"> <ul className="space-y-2">
{tools.map((x) => ( {tools.map((x) => (

View File

@ -66,7 +66,7 @@ export function ToolBar({
return ( return (
<TooltipNode selected={selected}> <TooltipNode selected={selected}>
<TooltipTrigger>{children}</TooltipTrigger> <TooltipTrigger className="h-full">{children}</TooltipTrigger>
<TooltipContent position={Position.Top}> <TooltipContent position={Position.Top}>
<section className="flex gap-2 items-center"> <section className="flex gap-2 items-center">

View File

@ -69,7 +69,6 @@ export enum Operator {
AkShare = 'AkShare', AkShare = 'AkShare',
YahooFinance = 'YahooFinance', YahooFinance = 'YahooFinance',
Jin10 = 'Jin10', Jin10 = 'Jin10',
Concentrator = 'Concentrator',
TuShare = 'TuShare', TuShare = 'TuShare',
Note = 'Note', Note = 'Note',
Crawler = 'Crawler', Crawler = 'Crawler',
@ -102,7 +101,6 @@ export const AgentOperatorList = [
Operator.RewriteQuestion, Operator.RewriteQuestion,
Operator.KeywordExtract, Operator.KeywordExtract,
Operator.Switch, Operator.Switch,
Operator.Concentrator,
Operator.Iteration, Operator.Iteration,
Operator.WaitingDialogue, Operator.WaitingDialogue,
Operator.Note, Operator.Note,
@ -129,9 +127,6 @@ export const componentMenuList = [
{ {
name: Operator.Switch, name: Operator.Switch,
}, },
{
name: Operator.Concentrator,
},
{ {
name: Operator.Iteration, name: Operator.Iteration,
}, },
@ -544,8 +539,6 @@ export const initialJin10Values = {
...initialQueryBaseValues, ...initialQueryBaseValues,
}; };
export const initialConcentratorValues = {};
export const initialTuShareValues = { export const initialTuShareValues = {
token: 'xxx', token: 'xxx',
src: 'eastmoney', src: 'eastmoney',
@ -824,7 +817,6 @@ export const RestrictedUpstreamMap = {
[Operator.AkShare]: [Operator.Begin], [Operator.AkShare]: [Operator.Begin],
[Operator.YahooFinance]: [Operator.Begin], [Operator.YahooFinance]: [Operator.Begin],
[Operator.Jin10]: [Operator.Begin], [Operator.Jin10]: [Operator.Begin],
[Operator.Concentrator]: [Operator.Begin],
[Operator.TuShare]: [Operator.Begin], [Operator.TuShare]: [Operator.Begin],
[Operator.Crawler]: [Operator.Begin], [Operator.Crawler]: [Operator.Begin],
[Operator.Note]: [], [Operator.Note]: [],
@ -840,6 +832,7 @@ export const RestrictedUpstreamMap = {
[Operator.StringTransform]: [Operator.Begin], [Operator.StringTransform]: [Operator.Begin],
[Operator.UserFillUp]: [Operator.Begin], [Operator.UserFillUp]: [Operator.Begin],
[Operator.Tool]: [Operator.Begin], [Operator.Tool]: [Operator.Begin],
[Operator.Placeholder]: [Operator.Begin],
}; };
export const NodeMap = { export const NodeMap = {
@ -865,7 +858,6 @@ export const NodeMap = {
[Operator.SearXNG]: 'ragNode', [Operator.SearXNG]: 'ragNode',
[Operator.ExeSQL]: 'ragNode', [Operator.ExeSQL]: 'ragNode',
[Operator.Switch]: 'switchNode', [Operator.Switch]: 'switchNode',
[Operator.Concentrator]: 'logicNode',
[Operator.WenCai]: 'ragNode', [Operator.WenCai]: 'ragNode',
[Operator.AkShare]: 'ragNode', [Operator.AkShare]: 'ragNode',
[Operator.YahooFinance]: 'ragNode', [Operator.YahooFinance]: 'ragNode',
@ -908,7 +900,6 @@ export const BeginQueryTypeIconMap = {
export const NoDebugOperatorsList = [ export const NoDebugOperatorsList = [
Operator.Begin, Operator.Begin,
Operator.Concentrator,
Operator.Message, Operator.Message,
Operator.RewriteQuestion, Operator.RewriteQuestion,
Operator.Switch, Operator.Switch,

View File

@ -136,9 +136,6 @@ export const FormConfigMap = {
[Operator.SearXNG]: { [Operator.SearXNG]: {
component: SearXNGForm, component: SearXNGForm,
}, },
[Operator.Concentrator]: {
component: () => <></>,
},
[Operator.Note]: { [Operator.Note]: {
component: () => <></>, component: () => <></>,
}, },

View File

@ -25,7 +25,6 @@ import {
initialBingValues, initialBingValues,
initialCategorizeValues, initialCategorizeValues,
initialCodeValues, initialCodeValues,
initialConcentratorValues,
initialCrawlerValues, initialCrawlerValues,
initialDeepLValues, initialDeepLValues,
initialDuckValues, initialDuckValues,
@ -124,7 +123,6 @@ export const useInitializeOperatorParams = () => {
[Operator.AkShare]: initialAkShareValues, [Operator.AkShare]: initialAkShareValues,
[Operator.YahooFinance]: initialYahooFinanceValues, [Operator.YahooFinance]: initialYahooFinanceValues,
[Operator.Jin10]: initialJin10Values, [Operator.Jin10]: initialJin10Values,
[Operator.Concentrator]: initialConcentratorValues,
[Operator.TuShare]: initialTuShareValues, [Operator.TuShare]: initialTuShareValues,
[Operator.Note]: initialNoteValues, [Operator.Note]: initialNoteValues,
[Operator.Crawler]: initialCrawlerValues, [Operator.Crawler]: initialCrawlerValues,
@ -140,6 +138,7 @@ export const useInitializeOperatorParams = () => {
[Operator.Tool]: {}, [Operator.Tool]: {},
[Operator.UserFillUp]: initialUserFillUpValues, [Operator.UserFillUp]: initialUserFillUpValues,
[Operator.StringTransform]: initialStringTransformValues, [Operator.StringTransform]: initialStringTransformValues,
[Operator.Placeholder]: {},
}; };
}, [llmId]); }, [llmId]);

View File

@ -18,7 +18,6 @@ import {
initialBingValues, initialBingValues,
initialCategorizeValues, initialCategorizeValues,
initialCodeValues, initialCodeValues,
initialConcentratorValues,
initialCrawlerValues, initialCrawlerValues,
initialDeepLValues, initialDeepLValues,
initialDuckValues, initialDuckValues,
@ -100,7 +99,6 @@ export const useInitializeOperatorParams = () => {
[Operator.AkShare]: initialAkShareValues, [Operator.AkShare]: initialAkShareValues,
[Operator.YahooFinance]: initialYahooFinanceValues, [Operator.YahooFinance]: initialYahooFinanceValues,
[Operator.Jin10]: initialJin10Values, [Operator.Jin10]: initialJin10Values,
[Operator.Concentrator]: initialConcentratorValues,
[Operator.TuShare]: initialTuShareValues, [Operator.TuShare]: initialTuShareValues,
[Operator.Note]: initialNoteValues, [Operator.Note]: initialNoteValues,
[Operator.Crawler]: initialCrawlerValues, [Operator.Crawler]: initialCrawlerValues,

View File

@ -1,3 +1,5 @@
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import { Card, CardContent } from '@/components/ui/card'; import { Card, CardContent } from '@/components/ui/card';
import { import {
@ -22,7 +24,6 @@ import { ArrowDownToLine } from 'lucide-react';
import { ReactNode, useCallback, useEffect, useState } from 'react'; import { ReactNode, useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { nodeTypes } from '../canvas'; import { nodeTypes } from '../canvas';
import { AgentBackground } from '../components/background';
export function VersionDialog({ export function VersionDialog({
hideModal, hideModal,
@ -121,6 +122,7 @@ export function VersionDialog({
minZoom={0.1} minZoom={0.1}
> >
<AgentBackground></AgentBackground> <AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
</ReactFlow> </ReactFlow>
</ReactFlowProvider> </ReactFlowProvider>
</section> </section>

View File

@ -21,7 +21,6 @@ import '@xyflow/react/dist/style.css';
import { NotebookPen } from 'lucide-react'; import { NotebookPen } from 'lucide-react';
import { memo, useCallback, useRef, useState } from 'react'; import { memo, useCallback, useRef, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { AgentBackground } from '../components/background';
import { AgentInstanceContext, HandleContext } from '../context'; import { AgentInstanceContext, HandleContext } from '../context';
import FormSheet from '../form-sheet/next'; import FormSheet from '../form-sheet/next';
@ -31,6 +30,8 @@ import { useBeforeDelete } from '../hooks/use-before-delete';
import { useMoveNote } from '../hooks/use-move-note'; import { useMoveNote } from '../hooks/use-move-note';
import { useDropdownManager } from './context'; import { useDropdownManager } from './context';
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { useRunDataflow } from '../hooks/use-run-dataflow'; import { useRunDataflow } from '../hooks/use-run-dataflow';
import { import {
useHideFormSheetOnNodeDeletion, useHideFormSheetOnNodeDeletion,
@ -263,6 +264,7 @@ function DataFlowCanvas({ drawerVisible, hideDrawer, showLogSheet }: IProps) {
onBeforeDelete={handleBeforeDelete} onBeforeDelete={handleBeforeDelete}
> >
<AgentBackground></AgentBackground> <AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
<Controls position={'bottom-center'} orientation="horizontal"> <Controls position={'bottom-center'} orientation="horizontal">
<ControlButton> <ControlButton>
<Tooltip> <Tooltip>

View File

@ -1,13 +0,0 @@
import { useIsDarkTheme } from '@/components/theme-provider';
import { Background } from '@xyflow/react';
export function AgentBackground() {
const isDarkTheme = useIsDarkTheme();
return (
<Background
color={isDarkTheme ? 'rgba(255,255,255,0.15)' : '#A8A9B3'}
bgColor={isDarkTheme ? 'rgba(11, 11, 12, 1)' : 'rgba(0, 0, 0, 0.05)'}
/>
);
}

View File

@ -1,3 +1,5 @@
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import { Card, CardContent } from '@/components/ui/card'; import { Card, CardContent } from '@/components/ui/card';
import { import {
@ -22,7 +24,6 @@ import { ArrowDownToLine } from 'lucide-react';
import { ReactNode, useCallback, useEffect, useState } from 'react'; import { ReactNode, useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { nodeTypes } from '../canvas'; import { nodeTypes } from '../canvas';
import { AgentBackground } from '../components/background';
export function VersionDialog({ export function VersionDialog({
hideModal, hideModal,
@ -121,6 +122,7 @@ export function VersionDialog({
minZoom={0.1} minZoom={0.1}
> >
<AgentBackground></AgentBackground> <AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
</ReactFlow> </ReactFlow>
</ReactFlowProvider> </ReactFlowProvider>
</section> </section>

View File

@ -0,0 +1,64 @@
import { useFormContext, useWatch } from 'react-hook-form';
import { DocumentParserType } from '@/constants/knowledge';
import { useMemo } from 'react';
import { AudioConfiguration } from './configuration/audio';
import { BookConfiguration } from './configuration/book';
import { EmailConfiguration } from './configuration/email';
import { KnowledgeGraphConfiguration } from './configuration/knowledge-graph';
import { LawsConfiguration } from './configuration/laws';
import { ManualConfiguration } from './configuration/manual';
import { NaiveConfiguration } from './configuration/naive';
import { OneConfiguration } from './configuration/one';
import { PaperConfiguration } from './configuration/paper';
import { PictureConfiguration } from './configuration/picture';
import { PresentationConfiguration } from './configuration/presentation';
import { QAConfiguration } from './configuration/qa';
import { ResumeConfiguration } from './configuration/resume';
import { TableConfiguration } from './configuration/table';
import { TagConfiguration } from './configuration/tag';
const ConfigurationComponentMap = {
[DocumentParserType.Naive]: NaiveConfiguration,
[DocumentParserType.Qa]: QAConfiguration,
[DocumentParserType.Resume]: ResumeConfiguration,
[DocumentParserType.Manual]: ManualConfiguration,
[DocumentParserType.Table]: TableConfiguration,
[DocumentParserType.Paper]: PaperConfiguration,
[DocumentParserType.Book]: BookConfiguration,
[DocumentParserType.Laws]: LawsConfiguration,
[DocumentParserType.Presentation]: PresentationConfiguration,
[DocumentParserType.Picture]: PictureConfiguration,
[DocumentParserType.One]: OneConfiguration,
[DocumentParserType.Audio]: AudioConfiguration,
[DocumentParserType.Email]: EmailConfiguration,
[DocumentParserType.Tag]: TagConfiguration,
[DocumentParserType.KnowledgeGraph]: KnowledgeGraphConfiguration,
};
function EmptyComponent() {
return <div></div>;
}
export function ChunkMethodForm() {
const form = useFormContext();
const finalParserId: DocumentParserType = useWatch({
control: form.control,
name: 'parser_id',
});
const ConfigurationComponent = useMemo(() => {
return finalParserId
? ConfigurationComponentMap[finalParserId]
: EmptyComponent;
}, [finalParserId]);
return (
<section className="h-full flex flex-col">
<div className="overflow-auto flex-1 min-h-0">
<ConfigurationComponent></ConfigurationComponent>
</div>
</section>
);
}

View File

@ -8,8 +8,9 @@ import {
FormMessage, FormMessage,
} from '@/components/ui/form'; } from '@/components/ui/form';
import { MultiSelect } from '@/components/ui/multi-select'; import { MultiSelect } from '@/components/ui/multi-select';
import { FormLayout } from '@/constants/form';
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks'; import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
import { Flex, Form, InputNumber, Select, Slider, Space } from 'antd'; import { Form, Select, Space } from 'antd';
import DOMPurify from 'dompurify'; import DOMPurify from 'dompurify';
import { useFormContext, useWatch } from 'react-hook-form'; import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
@ -44,7 +45,7 @@ export const TagSetItem = () => {
<FormItem className=" items-center space-y-0 "> <FormItem className=" items-center space-y-0 ">
<div className="flex items-center"> <div className="flex items-center">
<FormLabel <FormLabel
className="text-sm text-muted-foreground whitespace-nowrap w-1/4" className="text-sm text-text-secondary whitespace-nowrap w-1/4"
tooltip={ tooltip={
<div <div
dangerouslySetInnerHTML={{ dangerouslySetInnerHTML={{
@ -116,27 +117,9 @@ export const TopNTagsItem = () => {
max={10} max={10}
min={1} min={1}
defaultValue={3} defaultValue={3}
layout={FormLayout.Horizontal}
></SliderInputFormField> ></SliderInputFormField>
); );
return (
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'topn_tags']}
noStyle
initialValue={3}
>
<Slider max={10} min={1} style={{ width: '100%' }} />
</Form.Item>
</Flex>
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
<InputNumber max={10} min={1} />
</Form.Item>
</Flex>
</Form.Item>
);
}; };
export function TagItems() { export function TagItems() {

View File

@ -0,0 +1,20 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { ConfigurationFormContainer } from '../configuration-form-container';
import { TagItems } from '../components/tag-item';
export function AudioConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,28 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function BookConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -42,7 +42,7 @@ export function ChunkMethodItem(props: IProps) {
'w-1/4 whitespace-pre-wrap': line === 1, 'w-1/4 whitespace-pre-wrap': line === 1,
})} })}
> >
{t('dataPipeline')} {t('builtIn')}
</FormLabel> </FormLabel>
<div className={line === 1 ? 'w-3/4 ' : 'w-full'}> <div className={line === 1 ? 'w-3/4 ' : 'w-full'}>
<FormControl> <FormControl>
@ -115,7 +115,7 @@ export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) {
); );
} }
export function ParseTypeItem() { export function ParseTypeItem({ line = 2 }: { line?: number }) {
const { t } = useTranslate('knowledgeConfiguration'); const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext(); const form = useFormContext();
@ -125,17 +125,26 @@ export function ParseTypeItem() {
name={'parseType'} name={'parseType'}
render={({ field }) => ( render={({ field }) => (
<FormItem className=" items-center space-y-0 "> <FormItem className=" items-center space-y-0 ">
<div className=""> <div
className={cn('flex', {
' items-center': line === 1,
'flex-col gap-1': line === 2,
})}
>
<FormLabel <FormLabel
tooltip={t('parseTypeTip')} // tooltip={t('parseTypeTip')}
className="text-sm whitespace-wrap " className={cn('text-sm whitespace-wrap ', {
'w-1/4': line === 1,
})}
> >
{t('parseType')} {t('parseType')}
</FormLabel> </FormLabel>
<div className="text-muted-foreground"> <div
className={cn('text-muted-foreground', { 'w-3/4': line === 1 })}
>
<FormControl> <FormControl>
<Radio.Group {...field}> <Radio.Group {...field}>
<div className="w-3/4 flex gap-2 justify-between text-muted-foreground"> <div className="w-1/2 flex gap-2 justify-between text-muted-foreground">
<Radio value={1}>{t('builtIn')}</Radio> <Radio value={1}>{t('builtIn')}</Radio>
<Radio value={2}>{t('manualSetup')}</Radio> <Radio value={2}>{t('manualSetup')}</Radio>
</div> </div>
@ -144,7 +153,7 @@ export function ParseTypeItem() {
</div> </div>
</div> </div>
<div className="flex pt-1"> <div className="flex pt-1">
<div className="w-1/4"></div> <div className={line === 1 ? 'w-1/4' : ''}></div>
<FormMessage /> <FormMessage />
</div> </div>
</FormItem> </FormItem>
@ -188,3 +197,39 @@ export function EnableAutoGenerateItem() {
/> />
); );
} }
export function EnableTocToggle() {
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
return (
<FormField
control={form.control}
name={'parser_config.toc_extraction'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-center">
<FormLabel
tooltip={t('tocExtractionTip')}
className="text-sm whitespace-wrap w-1/4"
>
{t('tocExtraction')}
</FormLabel>
<div className="text-muted-foreground w-3/4">
<FormControl>
<Switch
checked={field.value}
onCheckedChange={field.onChange}
/>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
);
}

View File

@ -0,0 +1,18 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function EmailConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,15 @@
import { DelimiterFormField } from '@/components/delimiter-form-field';
import { EntityTypesFormField } from '@/components/entity-types-form-field';
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
export function KnowledgeGraphConfiguration() {
return (
<>
<>
<EntityTypesFormField></EntityTypesFormField>
<MaxTokenNumberFormField max={8192 * 2}></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
</>
</>
);
}

View File

@ -0,0 +1,29 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function LawsConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,27 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function ManualConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<TagItems></TagItems>
</MainContainer>
);
}

View File

@ -0,0 +1,33 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { DelimiterFormField } from '@/components/delimiter-form-field';
import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
import { EnableTocToggle } from './common-item';
export function NaiveConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<MaxTokenNumberFormField initialValue={512}></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
<EnableTocToggle />
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
<ExcelToHtmlFormField></ExcelToHtmlFormField>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,21 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function OneConfiguration() {
return (
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,28 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function PaperConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,18 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function PictureConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,29 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function PresentationConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,10 @@
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function QAConfiguration() {
return (
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,10 @@
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function ResumeConfiguration() {
return (
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,12 @@
import { ConfigurationFormContainer } from '../configuration-form-container';
export function TableConfiguration() {
return (
<ConfigurationFormContainer>
{/* <ChunkMethodItem></ChunkMethodItem>
<EmbeddingModelItem></EmbeddingModelItem>
<PageRankFormField></PageRankFormField> */}
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,5 @@
import { ConfigurationFormContainer } from '../configuration-form-container';
export function TagConfiguration() {
return <ConfigurationFormContainer></ConfigurationFormContainer>;
}

View File

@ -1,6 +1,9 @@
import { t } from 'i18next';
import { z } from 'zod'; import { z } from 'zod';
export const formSchema = z.object({ export const formSchema = z
.object({
parseType: z.number(),
name: z.string().min(1, { name: z.string().min(1, {
message: 'Username must be at least 2 characters.', message: 'Username must be at least 2 characters.',
}), }),
@ -25,6 +28,7 @@ export const formSchema = z.object({
html4excel: z.boolean(), html4excel: z.boolean(),
tag_kb_ids: z.array(z.string()).nullish(), tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(), topn_tags: z.number().optional(),
toc_extraction: z.boolean().optional(),
raptor: z raptor: z
.object({ .object({
use_raptor: z.boolean().optional(), use_raptor: z.boolean().optional(),
@ -73,6 +77,15 @@ export const formSchema = z.object({
.optional(), .optional(),
pagerank: z.number(), pagerank: z.number(),
// icon: z.array(z.instanceof(File)), // icon: z.array(z.instanceof(File)),
})
.superRefine((data, ctx) => {
if (data.parseType === 2 && !data.pipeline_id) {
ctx.addIssue({
path: ['pipeline_id'],
message: t('common.pleaseSelect'),
code: 'custom',
});
}
}); });
export const pipelineFormSchema = z.object({ export const pipelineFormSchema = z.object({

View File

@ -1,14 +1,18 @@
import { IDataPipelineSelectNode } from '@/components/data-pipeline-select'; import {
DataFlowSelect,
IDataPipelineSelectNode,
} from '@/components/data-pipeline-select';
import GraphRagItems from '@/components/parse-configuration/graph-rag-form-fields'; import GraphRagItems from '@/components/parse-configuration/graph-rag-form-fields';
import RaptorFormFields from '@/components/parse-configuration/raptor-form-fields'; import RaptorFormFields from '@/components/parse-configuration/raptor-form-fields';
import { Button } from '@/components/ui/button'; import { Button } from '@/components/ui/button';
import Divider from '@/components/ui/divider'; import Divider from '@/components/ui/divider';
import { Form } from '@/components/ui/form'; import { Form } from '@/components/ui/form';
import { FormLayout } from '@/constants/form';
import { DocumentParserType } from '@/constants/knowledge'; import { DocumentParserType } from '@/constants/knowledge';
import { PermissionRole } from '@/constants/permission'; import { PermissionRole } from '@/constants/permission';
import { zodResolver } from '@hookform/resolvers/zod'; import { zodResolver } from '@hookform/resolvers/zod';
import { useEffect, useState } from 'react'; import { useEffect, useState } from 'react';
import { useForm } from 'react-hook-form'; import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import { z } from 'zod'; import { z } from 'zod';
import { TopTitle } from '../dataset-title'; import { TopTitle } from '../dataset-title';
@ -16,10 +20,10 @@ import {
GenerateType, GenerateType,
IGenerateLogButtonProps, IGenerateLogButtonProps,
} from '../dataset/generate-button/generate'; } from '../dataset/generate-button/generate';
import LinkDataPipeline, { import { ChunkMethodForm } from './chunk-method-form';
IDataPipelineNodeProps, import { IDataPipelineNodeProps } from './components/link-data-pipeline';
} from './components/link-data-pipeline';
import { MainContainer } from './configuration-form-container'; import { MainContainer } from './configuration-form-container';
import { ChunkMethodItem, ParseTypeItem } from './configuration/common-item';
import { formSchema } from './form-schema'; import { formSchema } from './form-schema';
import { GeneralForm } from './general-form'; import { GeneralForm } from './general-form';
import { useFetchKnowledgeConfigurationOnMount } from './hooks'; import { useFetchKnowledgeConfigurationOnMount } from './hooks';
@ -44,6 +48,7 @@ const enum MethodValue {
export default function DatasetSettings() { export default function DatasetSettings() {
const { t } = useTranslation(); const { t } = useTranslation();
const form = useForm<z.infer<typeof formSchema>>({ const form = useForm<z.infer<typeof formSchema>>({
resolver: zodResolver(formSchema), resolver: zodResolver(formSchema),
defaultValues: { defaultValues: {
@ -58,6 +63,7 @@ export default function DatasetSettings() {
auto_questions: 0, auto_questions: 0,
html4excel: false, html4excel: false,
topn_tags: 3, topn_tags: 3,
toc_extraction: false,
raptor: { raptor: {
use_raptor: true, use_raptor: true,
max_token: 256, max_token: 256,
@ -73,17 +79,17 @@ export default function DatasetSettings() {
}, },
}, },
pipeline_id: '', pipeline_id: '',
parseType: 1,
pagerank: 0, pagerank: 0,
}, },
}); });
const knowledgeDetails = useFetchKnowledgeConfigurationOnMount(form); const knowledgeDetails = useFetchKnowledgeConfigurationOnMount(form);
const [pipelineData, setPipelineData] = useState<IDataPipelineNodeProps>(); const [pipelineData, setPipelineData] = useState<IDataPipelineNodeProps>();
const [graphRagGenerateData, setGraphRagGenerateData] = const [graphRagGenerateData, setGraphRagGenerateData] =
useState<IGenerateLogButtonProps>(); useState<IGenerateLogButtonProps>();
const [raptorGenerateData, setRaptorGenerateData] = const [raptorGenerateData, setRaptorGenerateData] =
useState<IGenerateLogButtonProps>(); useState<IGenerateLogButtonProps>();
useEffect(() => { useEffect(() => {
console.log('🚀 ~ DatasetSettings ~ knowledgeDetails:', knowledgeDetails); console.log('🚀 ~ DatasetSettings ~ knowledgeDetails:', knowledgeDetails);
if (knowledgeDetails) { if (knowledgeDetails) {
@ -102,8 +108,10 @@ export default function DatasetSettings() {
finish_at: knowledgeDetails.raptor_task_finish_at, finish_at: knowledgeDetails.raptor_task_finish_at,
task_id: knowledgeDetails.raptor_task_id, task_id: knowledgeDetails.raptor_task_id,
} as IGenerateLogButtonProps); } as IGenerateLogButtonProps);
form.setValue('parseType', knowledgeDetails.pipeline_id ? 2 : 1);
form.setValue('pipeline_id', knowledgeDetails.pipeline_id || '');
} }
}, [knowledgeDetails]); }, [knowledgeDetails, form]);
async function onSubmit(data: z.infer<typeof formSchema>) { async function onSubmit(data: z.infer<typeof formSchema>) {
try { try {
@ -137,6 +145,22 @@ export default function DatasetSettings() {
} as IGenerateLogButtonProps); } as IGenerateLogButtonProps);
} }
}; };
const parseType = useWatch({
control: form.control,
name: 'parseType',
defaultValue: knowledgeDetails.pipeline_id ? 2 : 1,
});
const selectedTag = useWatch({
name: 'parser_id',
control: form.control,
});
useEffect(() => {
if (parseType === 1) {
form.setValue('pipeline_id', '');
}
console.log('parseType', parseType);
}, [parseType, form]);
return ( return (
<section className="p-5 h-full flex flex-col"> <section className="p-5 h-full flex flex-col">
<TopTitle <TopTitle
@ -167,10 +191,30 @@ export default function DatasetSettings() {
onDelete={() => handleDeletePipelineTask(GenerateType.Raptor)} onDelete={() => handleDeletePipelineTask(GenerateType.Raptor)}
></RaptorFormFields> ></RaptorFormFields>
<Divider /> <Divider />
<LinkDataPipeline <ParseTypeItem line={1} />
{parseType === 1 && (
<ChunkMethodItem line={1}></ChunkMethodItem>
)}
{parseType === 2 && (
<DataFlowSelect
isMult={false}
showToDataPipeline={true}
formFieldName="pipeline_id"
layout={FormLayout.Horizontal}
/>
)}
<Divider />
{parseType === 1 && (
<ChunkMethodForm
selectedTag={selectedTag as DocumentParserType}
/>
)}
{/* <LinkDataPipeline
data={pipelineData} data={pipelineData}
handleLinkOrEditSubmit={handleLinkOrEditSubmit} handleLinkOrEditSubmit={handleLinkOrEditSubmit}
/> /> */}
</MainContainer> </MainContainer>
</div> </div>
<div className="text-right items-center flex justify-end gap-3 w-[768px]"> <div className="text-right items-center flex justify-end gap-3 w-[768px]">

View File

@ -62,6 +62,7 @@ export function SavingButton() {
if (beValid) { if (beValid) {
form.handleSubmit(async (values) => { form.handleSubmit(async (values) => {
console.log('saveKnowledgeConfiguration: ', values); console.log('saveKnowledgeConfiguration: ', values);
delete values['parseType'];
// delete values['avatar']; // delete values['avatar'];
await saveKnowledgeConfiguration({ await saveKnowledgeConfiguration({
kb_id, kb_id,

View File

@ -29,11 +29,6 @@ export function SideBar({ refreshCount }: PropType) {
const items = useMemo(() => { const items = useMemo(() => {
const list = [ const list = [
{
icon: <DatabaseZap className="size-4" />,
label: t(`knowledgeDetails.overview`),
key: Routes.DataSetOverview,
},
{ {
icon: <FolderOpen className="size-4" />, icon: <FolderOpen className="size-4" />,
label: t(`knowledgeDetails.subbarFiles`), label: t(`knowledgeDetails.subbarFiles`),
@ -44,6 +39,11 @@ export function SideBar({ refreshCount }: PropType) {
label: t(`knowledgeDetails.testing`), label: t(`knowledgeDetails.testing`),
key: Routes.DatasetTesting, key: Routes.DatasetTesting,
}, },
{
icon: <DatabaseZap className="size-4" />,
label: t(`knowledgeDetails.overview`),
key: Routes.DataSetOverview,
},
{ {
icon: <Banknote className="size-4" />, icon: <Banknote className="size-4" />,
label: t(`knowledgeDetails.configuration`), label: t(`knowledgeDetails.configuration`),

View File

@ -16,6 +16,7 @@ import {
FormMessage, FormMessage,
} from '@/components/ui/form'; } from '@/components/ui/form';
import { Input } from '@/components/ui/input'; import { Input } from '@/components/ui/input';
import { FormLayout } from '@/constants/form';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks'; import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { IModalProps } from '@/interfaces/common'; import { IModalProps } from '@/interfaces/common';
import { zodResolver } from '@hookform/resolvers/zod'; import { zodResolver } from '@hookform/resolvers/zod';
@ -137,8 +138,9 @@ export function InputForm({ onOk }: IModalProps<any>) {
{parseType === 2 && ( {parseType === 2 && (
<DataFlowSelect <DataFlowSelect
isMult={false} isMult={false}
toDataPipeline={navigateToAgents} showToDataPipeline={true}
formFieldName="pipeline_id" formFieldName="pipeline_id"
layout={FormLayout.Vertical}
/> />
)} )}
</form> </form>