Compare commits

...

7 Commits

Author SHA1 Message Date
ad56137a59 Feat: ​​OpenSearch's support for newly embedding models​​ (#10494)
### What problem does this PR solve?

fix issues:https://github.com/infiniflow/ragflow/issues/10402

As the newly distributed embedding models support vector dimensions max
to 4096, while current OpenSearch's max dimension support is 1536.
As I tested, the 4096-dimensions vector will be treated as a float type
which is unacceptable in OpenSearch.

Besides, OpenSearch supports max to 16000 dimensions by defalut with the
vector engine(Faiss). According to:
https://docs.opensearch.org/2.19/field-types/supported-field-types/knn-methods-engines/

I added max to 10240 dimensions support for OpenSearch, as I think will
be sufficient in the future.

As I tested , it worked well on my own server (treated as knn_vector)by
using qwen3-embedding:8b as the embedding model:
<img width="1338" height="790" alt="image"
src="https://github.com/user-attachments/assets/a9b2d284-fcf6-4cea-859a-6aadccf36ace"
/>


### Type of change

- [x] New Feature (non-breaking change which adds functionality)


By the way, I will still focus on the stuff about
Elasticsearch/Opensearch as search engines and vector databases.

Co-authored-by: 张雨豪 <zhangyh80@chinatelecom.cn>
2025-10-11 19:58:12 +08:00
2828e321bc Fix: remove lang for autio. (#10496)
### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 19:38:07 +08:00
932781ea4e Fix: incorrect agent template #10393 (#10491)
### What problem does this PR solve?

Fix: incorrect agent template #10493

### Type of change

- [X] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 19:37:42 +08:00
5200711441 Feat: add support for multi-column PDF parsing (#10475)
### What problem does this PR solve?

Add support for multi-columns PDF parsing. #9878, #9919.

Two-column sample:
<img width="1885" height="1020" alt="image"
src="https://github.com/user-attachments/assets/0270c028-2db8-4ca6-a4b7-cd5830882d28"
/>

Three-column sample: 
<img width="1881" height="992" alt="image"
src="https://github.com/user-attachments/assets/9ee88844-d5b1-4927-9e4e-3bd810d6e03a"
/>

Single-column sample:
<img width="1883" height="1042" alt="image"
src="https://github.com/user-attachments/assets/e93d3d18-43c3-4067-b5fa-e454ed0ab093"
/>



### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:46:09 +08:00
c21cea2038 Fix: Added table of contents extraction functionality and optimized form item layout #9869 (#10492)
### What problem does this PR solve?

Fix: Added table of contents extraction functionality and optimized form
item layout #9869

- Added `EnableTocToggle` component to toggle table of contents
extraction on and off
- Added multiple parser configuration components (such as naive, book,
laws, etc.), displaying different parser components based on built-in
slicing methods

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-11 18:45:55 +08:00
6a0f448419 Feat: Modify the default style of the agent node anchor #9869 (#10489)
### What problem does this PR solve?

Feat: Modify the default style of the agent node anchor #9869

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:45:38 +08:00
7d2f65671f Feat: debugging toc part. (#10486)
### What problem does this PR solve?

#10436

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-10-11 18:45:21 +08:00
70 changed files with 1320 additions and 753 deletions

File diff suppressed because one or more lines are too long

View File

@ -2816,6 +2816,13 @@
"tags": "LLM,TEXT EMBEDDING,TEXT RE-RANK,IMAGE2TEXT",
"status": "1",
"llm": [
{
"llm_name":"THUDM/GLM-4.1V-9B-Thinking",
"tags":"LLM,CHAT,IMAGE2TEXT, 64k",
"max_tokens":64000,
"model_type":"chat",
"is_tools": false
},
{
"llm_name": "Qwen/Qwen3-Embedding-8B",
"tags": "TEXT EMBEDDING,TEXT RE-RANK,32k",
@ -3145,13 +3152,6 @@
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "Qwen/Qwen2-1.5B-Instruct",
"tags": "LLM,CHAT,32k",
"max_tokens": 32000,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "Pro/Qwen/Qwen2.5-Coder-7B-Instruct",
"tags": "LLM,CHAT,32k",
@ -3159,13 +3159,6 @@
"model_type": "chat",
"is_tools": false
},
{
"llm_name": "Pro/Qwen/Qwen2-VL-7B-Instruct",
"tags": "LLM,CHAT,IMAGE2TEXT,32k",
"max_tokens": 32000,
"model_type": "image2text",
"is_tools": false
},
{
"llm_name": "Pro/Qwen/Qwen2.5-7B-Instruct",
"tags": "LLM,CHAT,32k",

View File

@ -200,6 +200,61 @@
}
}
},
{
"knn_vector": {
"match": "*_2048_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 2048
}
}
},
{
"knn_vector": {
"match": "*_4096_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 4096
}
}
},
{
"knn_vector": {
"match": "*_6144_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 6144
}
}
},
{
"knn_vector": {
"match": "*_8192_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 8192
}
}
},
{
"knn_vector": {
"match": "*_10240_vec",
"mapping": {
"type": "knn_vector",
"index": true,
"space_type": "cosinesimil",
"dimension": 10240
}
}
},
{
"binary": {
"match": "*_bin",

View File

@ -17,7 +17,6 @@
import re
import mistune
from markdown import markdown
@ -117,8 +116,6 @@ class MarkdownElementExtractor:
def __init__(self, markdown_content):
self.markdown_content = markdown_content
self.lines = markdown_content.split("\n")
self.ast_parser = mistune.create_markdown(renderer="ast")
self.ast_nodes = self.ast_parser(markdown_content)
def extract_elements(self):
"""Extract individual elements (headers, code blocks, lists, etc.)"""

View File

@ -15,11 +15,13 @@
#
import logging
import math
import os
import random
import re
import sys
import threading
from collections import Counter, defaultdict
from copy import deepcopy
from io import BytesIO
from timeit import default_timer as timer
@ -349,9 +351,78 @@ class RAGFlowPdfParser:
self.boxes[i]["top"] += self.page_cum_height[self.boxes[i]["page_number"] - 1]
self.boxes[i]["bottom"] += self.page_cum_height[self.boxes[i]["page_number"] - 1]
def _text_merge(self):
def _assign_column(self, boxes, zoomin=3):
if not boxes:
return boxes
if all("col_id" in b for b in boxes):
return boxes
by_page = defaultdict(list)
for b in boxes:
by_page[b["page_number"]].append(b)
page_info = {} # pg -> dict(page_w, left_edge, cand_cols)
counter = Counter()
for pg, bxs in by_page.items():
if not bxs:
page_info[pg] = {"page_w": 1.0, "left_edge": 0.0, "cand": 1}
counter[1] += 1
continue
if hasattr(self, "page_images") and self.page_images and len(self.page_images) >= pg:
page_w = self.page_images[pg - 1].size[0] / max(1, zoomin)
left_edge = 0.0
else:
xs0 = [box["x0"] for box in bxs]
xs1 = [box["x1"] for box in bxs]
left_edge = float(min(xs0))
page_w = max(1.0, float(max(xs1) - left_edge))
widths = [max(1.0, (box["x1"] - box["x0"])) for box in bxs]
median_w = float(np.median(widths)) if widths else 1.0
raw_cols = int(page_w / max(1.0, median_w))
# cand = raw_cols if (raw_cols >= 2 and median_w < page_w / raw_cols * 0.8) else 1
cand = raw_cols
page_info[pg] = {"page_w": page_w, "left_edge": left_edge, "cand": cand}
counter[cand] += 1
logging.info(f"[Page {pg}] median_w={median_w:.2f}, page_w={page_w:.2f}, raw_cols={raw_cols}, cand={cand}")
global_cols = counter.most_common(1)[0][0]
logging.info(f"Global column_num decided by majority: {global_cols}")
for pg, bxs in by_page.items():
if not bxs:
continue
page_w = page_info[pg]["page_w"]
left_edge = page_info[pg]["left_edge"]
if global_cols == 1:
for box in bxs:
box["col_id"] = 0
continue
for box in bxs:
w = box["x1"] - box["x0"]
if w >= 0.8 * page_w:
box["col_id"] = 0
continue
cx = 0.5 * (box["x0"] + box["x1"])
norm_cx = (cx - left_edge) / page_w
norm_cx = max(0.0, min(norm_cx, 0.999999))
box["col_id"] = int(min(global_cols - 1, norm_cx * global_cols))
return boxes
def _text_merge(self, zoomin=3):
# merge adjusted boxes
bxs = self.boxes
bxs = self._assign_column(self.boxes, zoomin)
def end_with(b, txt):
txt = txt.strip()
@ -367,9 +438,15 @@ class RAGFlowPdfParser:
while i < len(bxs) - 1:
b = bxs[i]
b_ = bxs[i + 1]
if b["page_number"] != b_["page_number"] or b.get("col_id") != b_.get("col_id"):
i += 1
continue
if b.get("layoutno", "0") != b_.get("layoutno", "1") or b.get("layout_type", "") in ["table", "figure", "equation"]:
i += 1
continue
if abs(self._y_dis(b, b_)) < self.mean_height[bxs[i]["page_number"] - 1] / 3:
# merge
bxs[i]["x1"] = b_["x1"]
@ -379,83 +456,108 @@ class RAGFlowPdfParser:
bxs.pop(i + 1)
continue
i += 1
continue
dis_thr = 1
dis = b["x1"] - b_["x0"]
if b.get("layout_type", "") != "text" or b_.get("layout_type", "") != "text":
if end_with(b, "") or start_with(b_, ""):
dis_thr = -8
else:
i += 1
continue
if abs(self._y_dis(b, b_)) < self.mean_height[bxs[i]["page_number"] - 1] / 5 and dis >= dis_thr and b["x1"] < b_["x1"]:
# merge
bxs[i]["x1"] = b_["x1"]
bxs[i]["top"] = (b["top"] + b_["top"]) / 2
bxs[i]["bottom"] = (b["bottom"] + b_["bottom"]) / 2
bxs[i]["text"] += b_["text"]
bxs.pop(i + 1)
continue
i += 1
self.boxes = bxs
def _naive_vertical_merge(self, zoomin=3):
import math
bxs = Recognizer.sort_Y_firstly(self.boxes, np.median(self.mean_height) / 3)
bxs = self._assign_column(self.boxes, zoomin)
column_width = np.median([b["x1"] - b["x0"] for b in self.boxes])
if not column_width or math.isnan(column_width):
column_width = self.mean_width[0]
self.column_num = int(self.page_images[0].size[0] / zoomin / column_width)
if column_width < self.page_images[0].size[0] / zoomin / self.column_num:
logging.info("Multi-column................... {} {}".format(column_width, self.page_images[0].size[0] / zoomin / self.column_num))
self.boxes = self.sort_X_by_page(self.boxes, column_width / self.column_num)
grouped = defaultdict(list)
for b in bxs:
grouped[(b["page_number"], b.get("col_id", 0))].append(b)
i = 0
while i + 1 < len(bxs):
b = bxs[i]
b_ = bxs[i + 1]
if b["page_number"] < b_["page_number"] and re.match(r"[0-9 •一—-]+$", b["text"]):
bxs.pop(i)
merged_boxes = []
for (pg, col), bxs in grouped.items():
bxs = sorted(bxs, key=lambda x: (x["top"], x["x0"]))
if not bxs:
continue
if not b["text"].strip():
bxs.pop(i)
continue
concatting_feats = [
b["text"].strip()[-1] in ",;:'\",、‘“;:-",
len(b["text"].strip()) > 1 and b["text"].strip()[-2] in ",;:'\",‘“、;:",
b_["text"].strip() and b_["text"].strip()[0] in "。;?!?”)),,、:",
]
# features for not concating
feats = [
b.get("layoutno", 0) != b_.get("layoutno", 0),
b["text"].strip()[-1] in "。?!?",
self.is_english and b["text"].strip()[-1] in ".!?",
b["page_number"] == b_["page_number"] and b_["top"] - b["bottom"] > self.mean_height[b["page_number"] - 1] * 1.5,
b["page_number"] < b_["page_number"] and abs(b["x0"] - b_["x0"]) > self.mean_width[b["page_number"] - 1] * 4,
]
# split features
detach_feats = [b["x1"] < b_["x0"], b["x0"] > b_["x1"]]
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
logging.debug(
"{} {} {} {}".format(
b["text"],
b_["text"],
any(feats),
any(concatting_feats),
mh = self.mean_height[pg - 1] if self.mean_height else np.median([b["bottom"] - b["top"] for b in bxs]) or 10
i = 0
while i + 1 < len(bxs):
b = bxs[i]
b_ = bxs[i + 1]
if b["page_number"] < b_["page_number"] and re.match(r"[0-9 •一—-]+$", b["text"]):
bxs.pop(i)
continue
if not b["text"].strip():
bxs.pop(i)
continue
if not b["text"].strip() or b.get("layoutno") != b_.get("layoutno"):
i += 1
continue
if b_["top"] - b["bottom"] > mh * 1.5:
i += 1
continue
overlap = max(0, min(b["x1"], b_["x1"]) - max(b["x0"], b_["x0"]))
if overlap / max(1, min(b["x1"] - b["x0"], b_["x1"] - b_["x0"])) < 0.3:
i += 1
continue
concatting_feats = [
b["text"].strip()[-1] in ",;:'\",、‘“;:-",
len(b["text"].strip()) > 1 and b["text"].strip()[-2] in ",;:'\",‘“、;:",
b_["text"].strip() and b_["text"].strip()[0] in "。;?!?”)),,、:",
]
# features for not concating
feats = [
b.get("layoutno", 0) != b_.get("layoutno", 0),
b["text"].strip()[-1] in "。?!?",
self.is_english and b["text"].strip()[-1] in ".!?",
b["page_number"] == b_["page_number"] and b_["top"] - b["bottom"] > self.mean_height[b["page_number"] - 1] * 1.5,
b["page_number"] < b_["page_number"] and abs(b["x0"] - b_["x0"]) > self.mean_width[b["page_number"] - 1] * 4,
]
# split features
detach_feats = [b["x1"] < b_["x0"], b["x0"] > b_["x1"]]
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
logging.debug(
"{} {} {} {}".format(
b["text"],
b_["text"],
any(feats),
any(concatting_feats),
)
)
)
i += 1
continue
# merge up and down
b["bottom"] = b_["bottom"]
b["text"] += b_["text"]
b["x0"] = min(b["x0"], b_["x0"])
b["x1"] = max(b["x1"], b_["x1"])
bxs.pop(i + 1)
self.boxes = bxs
i += 1
continue
b["text"] = (b["text"].rstrip() + " " + b_["text"].lstrip()).strip()
b["bottom"] = b_["bottom"]
b["x0"] = min(b["x0"], b_["x0"])
b["x1"] = max(b["x1"], b_["x1"])
bxs.pop(i + 1)
merged_boxes.extend(bxs)
self.boxes = sorted(merged_boxes, key=lambda x: (x["page_number"], x.get("col_id", 0), x["top"]))
def _final_reading_order_merge(self, zoomin=3):
if not self.boxes:
return
self.boxes = self._assign_column(self.boxes, zoomin=zoomin)
pages = defaultdict(lambda: defaultdict(list))
for b in self.boxes:
pg = b["page_number"]
col = b.get("col_id", 0)
pages[pg][col].append(b)
for pg in pages:
for col in pages[pg]:
pages[pg][col].sort(key=lambda x: (x["top"], x["x0"]))
new_boxes = []
for pg in sorted(pages.keys()):
for col in sorted(pages[pg].keys()):
new_boxes.extend(pages[pg][col])
self.boxes = new_boxes
def _concat_downward(self, concat_between_pages=True):
self.boxes = Recognizer.sort_Y_firstly(self.boxes, 0)
@ -997,7 +1099,7 @@ class RAGFlowPdfParser:
self.__ocr(i + 1, img, chars, zoomin, id)
if callback and i % 6 == 5:
callback((i + 1) * 0.6 / len(self.page_images), msg="")
callback((i + 1) * 0.6 / len(self.page_images))
async def __img_ocr_launcher():
def __ocr_preprocess():
@ -1074,7 +1176,6 @@ class RAGFlowPdfParser:
def insert_table_figures(tbls_or_figs, layout_type):
def min_rectangle_distance(rect1, rect2):
import math
pn1, left1, right1, top1, bottom1 = rect1
pn2, left2, right2, top2, bottom2 = rect2
if right1 >= left2 and right2 >= left1 and bottom1 >= top2 and bottom2 >= top1:
@ -1091,27 +1192,39 @@ class RAGFlowPdfParser:
dy = top1 - bottom2
else:
dy = 0
return math.sqrt(dx*dx + dy*dy)# + (pn2-pn1)*10000
return math.sqrt(dx * dx + dy * dy) # + (pn2-pn1)*10000
for (img, txt), poss in tbls_or_figs:
bboxes = [(i, (b["page_number"], b["x0"], b["x1"], b["top"], b["bottom"])) for i, b in enumerate(self.boxes)]
dists = [(min_rectangle_distance((pn, left, right, top+self.page_cum_height[pn], bott+self.page_cum_height[pn]), rect),i) for i, rect in bboxes for pn, left, right, top, bott in poss]
dists = [
(min_rectangle_distance((pn, left, right, top + self.page_cum_height[pn], bott + self.page_cum_height[pn]), rect), i) for i, rect in bboxes for pn, left, right, top, bott in poss
]
min_i = np.argmin(dists, axis=0)[0]
min_i, rect = bboxes[dists[min_i][-1]]
if isinstance(txt, list):
txt = "\n".join(txt)
pn, left, right, top, bott = poss[0]
if self.boxes[min_i]["bottom"] < top+self.page_cum_height[pn]:
if self.boxes[min_i]["bottom"] < top + self.page_cum_height[pn]:
min_i += 1
self.boxes.insert(min_i, {
"page_number": pn+1, "x0": left, "x1": right, "top": top+self.page_cum_height[pn], "bottom": bott+self.page_cum_height[pn], "layout_type": layout_type, "text": txt, "image": img,
"positions": [[pn+1, int(left), int(right), int(top), int(bott)]]
})
self.boxes.insert(
min_i,
{
"page_number": pn + 1,
"x0": left,
"x1": right,
"top": top + self.page_cum_height[pn],
"bottom": bott + self.page_cum_height[pn],
"layout_type": layout_type,
"text": txt,
"image": img,
"positions": [[pn + 1, int(left), int(right), int(top), int(bott)]],
},
)
for b in self.boxes:
b["position_tag"] = self._line_tag(b, zoomin)
b["image"] = self.crop(b["position_tag"], zoomin)
b["positions"] = [[pos[0][-1]+1, *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(b["position_tag"])]
b["positions"] = [[pos[0][-1] + 1, *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(b["position_tag"])]
insert_table_figures(tbls, "table")
insert_table_figures(figs, "figure")

View File

@ -328,7 +328,7 @@ class Pdf(PdfParser):
callback(0.65, "Table analysis ({:.2f}s)".format(timer() - start))
start = timer()
self._text_merge()
self._text_merge(zoomin=zoomin)
callback(0.67, "Text merged ({:.2f}s)".format(timer() - start))
if separate_tables_figures:
@ -340,6 +340,7 @@ class Pdf(PdfParser):
tbls = self._extract_table_figure(True, zoomin, True, True)
self._naive_vertical_merge()
self._concat_downward()
self._final_reading_order_merge()
# self._filter_forpages()
logging.info("layouts cost: {}s".format(timer() - first_start))
return [(b["text"], self._line_tag(b, zoomin)) for b in self.boxes], tbls

View File

@ -184,8 +184,6 @@ class ParserParam(ProcessParamBase):
audio_config = self.setups.get("audio", "")
if audio_config:
self.check_empty(audio_config.get("llm_id"), "Audio VLM")
audio_language = audio_config.get("lang", "")
self.check_empty(audio_language, "Language")
email_config = self.setups.get("email", "")
if email_config:
@ -348,15 +346,13 @@ class Parser(ProcessBase):
conf = self._param.setups["audio"]
self.set_output("output_format", conf["output_format"])
lang = conf["lang"]
_, ext = os.path.splitext(name)
with tempfile.NamedTemporaryFile(suffix=ext) as tmpf:
tmpf.write(blob)
tmpf.flush()
tmp_path = os.path.abspath(tmpf.name)
seq2txt_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.SPEECH2TEXT, lang=lang)
seq2txt_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.SPEECH2TEXT)
txt = seq2txt_mdl.transcription(tmp_path)
self.set_output("text", txt)

View File

@ -25,7 +25,7 @@ class SplitterFromUpstream(BaseModel):
file: dict | None = Field(default=None)
chunks: list[dict[str, Any]] | None = Field(default=None)
output_format: Literal["json", "markdown", "text", "html"] | None = Field(default=None)
output_format: Literal["json", "markdown", "text", "html", "chunks"] | None = Field(default=None)
json_result: list[dict[str, Any]] | None = Field(default=None, alias="json")
markdown_result: str | None = Field(default=None, alias="markdown")

View File

@ -126,7 +126,7 @@ class Tokenizer(ProcessBase):
if ck.get("summary"):
ck["content_ltks"] = rag_tokenizer.tokenize(str(ck["summary"]))
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
else:
elif ck.get("text"):
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
if i % 100 == 99:
@ -155,6 +155,8 @@ class Tokenizer(ProcessBase):
for i, ck in enumerate(chunks):
ck["title_tks"] = rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", from_upstream.name))
ck["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(ck["title_tks"])
if not ck.get("text"):
continue
ck["content_ltks"] = rag_tokenizer.tokenize(ck["text"])
ck["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(ck["content_ltks"])
if i % 100 == 99:

View File

@ -613,13 +613,13 @@ def naive_merge(sections: str | list, chunk_token_num=128, delimiter="\n。
dels = get_delimiters(delimiter)
for sec, pos in sections:
if num_tokens_from_string(sec) < chunk_token_num:
add_chunk(sec, pos)
add_chunk("\n"+sec, pos)
continue
split_sec = re.split(r"(%s)" % dels, sec, flags=re.DOTALL)
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, pos)
add_chunk("\n"+sub_sec, pos)
return cks
@ -669,13 +669,13 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, image, text_pos)
add_chunk("\n"+sub_sec, image, text_pos)
else:
split_sec = re.split(r"(%s)" % dels, text)
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, image)
add_chunk("\n"+sub_sec, image)
return cks, result_images
@ -757,7 +757,7 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, image,"")
add_chunk("\n"+sub_sec, image,"")
line = ""
if line:
@ -765,7 +765,7 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。"):
for sub_sec in split_sec:
if re.match(f"^{dels}$", sub_sec):
continue
add_chunk(sub_sec, image,"")
add_chunk("\n"+sub_sec, image,"")
return cks, images

View File

@ -23,7 +23,7 @@ import jinja2
import json_repair
import trio
from api.utils import hash_str2int
from rag.nlp import is_chinese
from rag.nlp import rag_tokenizer
from rag.prompts.template import load_prompt
from rag.settings import TAG_FLD
from rag.utils import encoder, num_tokens_from_string
@ -672,7 +672,7 @@ def assign_toc_levels(toc_secs, chat_mdl, gen_conf = {"temperature": 0.2}):
TOC_FROM_TEXT_SYSTEM = load_prompt("toc_from_text_system")
TOC_FROM_TEXT_USER = load_prompt("toc_from_text_user")
# Generate TOC from text chunks with text llms
async def gen_toc_from_text(txt_info: dict, chat_mdl):
async def gen_toc_from_text(txt_info: dict, chat_mdl, callback=None):
try:
ans = gen_json(
PROMPT_JINJA_ENV.from_string(TOC_FROM_TEXT_SYSTEM).render(),
@ -682,6 +682,8 @@ async def gen_toc_from_text(txt_info: dict, chat_mdl):
)
print(ans, "::::::::::::::::::::::::::::::::::::", flush=True)
txt_info["toc"] = ans if ans else []
if callback:
callback(msg="")
except Exception as e:
logging.exception(e)
@ -707,14 +709,14 @@ def split_chunks(chunks, max_length: int):
return result
async def run_toc_from_text(chunks, chat_mdl):
async def run_toc_from_text(chunks, chat_mdl, callback=None):
input_budget = int(chat_mdl.max_length * INPUT_UTILIZATION) - num_tokens_from_string(
TOC_FROM_TEXT_USER + TOC_FROM_TEXT_SYSTEM
)
input_budget = 1024 if input_budget > 1024 else input_budget
chunk_sections = split_chunks(chunks, input_budget)
res = []
titles = []
chunks_res = []
async with trio.open_nursery() as nursery:
@ -722,21 +724,21 @@ async def run_toc_from_text(chunks, chat_mdl):
if not chunk:
continue
chunks_res.append({"chunks": chunk})
nursery.start_soon(gen_toc_from_text, chunks_res[-1], chat_mdl)
nursery.start_soon(gen_toc_from_text, chunks_res[-1], chat_mdl, callback)
for chunk in chunks_res:
res.extend(chunk.get("toc", []))
titles.extend(chunk.get("toc", []))
print(res, ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
print(titles, ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
# Filter out entries with title == -1
prune = len(titles) > 512
max_len = 12 if prune else 22
filtered = []
for x in res:
for x in titles:
if not x.get("title") or x["title"] == "-1":
continue
if is_chinese(x["title"]) and len(x["title"]) > 12:
continue
if len(x["title"].split(" ")) > 12:
if len(rag_tokenizer.tokenize(x["title"]).split(" ")) > max_len:
continue
if re.match(r"[0-9,.()/ -]+$", x["title"]):
continue
@ -751,8 +753,12 @@ async def run_toc_from_text(chunks, chat_mdl):
toc_with_levels = assign_toc_levels(raw_structure, chat_mdl, {"temperature": 0.0, "top_p": 0.9})
# Merge structure and content (by index)
prune = len(toc_with_levels) > 512
max_lvl = sorted([t.get("level", "0") for t in toc_with_levels])[-1]
merged = []
for _ , (toc_item, src_item) in enumerate(zip(toc_with_levels, filtered)):
if prune and toc_item.get("level", "0") >= max_lvl:
continue
merged.append({
"level": toc_item.get("level", "0"),
"title": toc_item.get("title", ""),
@ -776,7 +782,7 @@ def relevant_chunks_with_toc(query: str, toc:list[dict], chat_mdl, topn: int=6):
print(ans, "::::::::::::::::::::::::::::::::::::", flush=True)
id2score = {}
for ti, sc in zip(toc, ans):
if sc.get("score", -1) < 1:
if not isinstance(sc, dict) or sc.get("score", -1) < 1:
continue
for id in ti.get("ids", []):
if id not in id2score:

View File

@ -370,14 +370,14 @@ async def build_chunks(task, progress_callback):
nursery.start_soon(doc_question_proposal, chat_mdl, d, task["parser_config"]["auto_questions"])
progress_callback(msg="Question generation {} chunks completed in {:.2f}s".format(len(docs), timer() - st))
if task["parser_config"].get("toc_extraction", True):
if task["parser_id"].lower() == "naive" and task["parser_config"].get("toc_extraction", False):
progress_callback(msg="Start to generate table of content ...")
chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"])
docs = sorted(docs, key=lambda d:(
d.get("page_num_int", 0)[0] if isinstance(d.get("page_num_int", 0), list) else d.get("page_num_int", 0),
d.get("top_int", 0)[0] if isinstance(d.get("top_int", 0), list) else d.get("top_int", 0)
))
toc: list[dict] = await run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl)
toc: list[dict] = await run_toc_from_text([d["content_with_weight"] for d in docs], chat_mdl, progress_callback)
logging.info("------------ T O C -------------\n"+json.dumps(toc, ensure_ascii=False, indent=' '))
ii = 0
while ii < len(toc):
@ -387,7 +387,7 @@ async def build_chunks(task, progress_callback):
toc[ii]["ids"] = [docs[idx]["id"]]
if ii == len(toc) -1:
break
for jj in range(idx+1, int(toc[ii+1]["chunk_id"])):
for jj in range(idx+1, int(toc[ii+1]["chunk_id"])+1):
toc[ii]["ids"].append(docs[jj]["id"])
except Exception as e:
logging.exception(e)

View File

@ -20,6 +20,7 @@ import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import {
ChunkMethodItem,
EnableTocToggle,
ParseTypeItem,
} from '@/pages/dataset/dataset-setting/configuration/common-item';
import { zodResolver } from '@hookform/resolvers/zod';
@ -113,6 +114,7 @@ export function ChunkMethodDialog({
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
toc_extraction: z.boolean().optional(),
// raptor: z
// .object({
// use_raptor: z.boolean().optional(),
@ -247,7 +249,7 @@ export function ChunkMethodDialog({
}, [parseType, form]);
return (
<Dialog open onOpenChange={hideModal}>
<DialogContent className="max-w-[50vw]">
<DialogContent className="max-w-[50vw] text-text-primary">
<DialogHeader>
<DialogTitle>{t('knowledgeDetails.chunkMethod')}</DialogTitle>
</DialogHeader>
@ -338,6 +340,7 @@ export function ChunkMethodDialog({
show={showAutoKeywords(selectedTag) || showExcelToHtml}
className="space-y-3"
>
<EnableTocToggle />
{showAutoKeywords(selectedTag) && (
<>
<AutoKeywordsFormField></AutoKeywordsFormField>

View File

@ -15,6 +15,7 @@ export function useDefaultParserValues() {
auto_keywords: 0,
auto_questions: 0,
html4excel: false,
toc_extraction: false,
// raptor: {
// use_raptor: false,
// prompt: t('knowledgeConfiguration.promptText'),

View File

@ -1,5 +1,7 @@
import { AgentCategory } from '@/constants/agent';
import { FormLayout } from '@/constants/form';
import { useTranslate } from '@/hooks/common-hooks';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { useFetchAgentList } from '@/hooks/use-agent-request';
import { buildSelectOptions } from '@/utils/component-util';
import { ArrowUpRight } from 'lucide-react';
@ -21,18 +23,27 @@ export interface IDataPipelineSelectNode {
}
interface IProps {
toDataPipeline?: () => void;
showToDataPipeline?: boolean;
formFieldName: string;
isMult?: boolean;
setDataList?: (data: IDataPipelineSelectNode[]) => void;
layout?: FormLayout;
}
export function DataFlowSelect(props: IProps) {
const { toDataPipeline, formFieldName, isMult = false, setDataList } = props;
const {
showToDataPipeline,
formFieldName,
isMult = false,
setDataList,
layout = FormLayout.Vertical,
} = props;
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
const { navigateToAgents } = useNavigatePage();
const toDataPipLine = () => {
toDataPipeline?.();
navigateToAgents();
};
const { data: dataPipelineOptions } = useFetchAgentList({
canvas_category: AgentCategory.DataflowCanvas,
@ -69,47 +80,92 @@ export function DataFlowSelect(props: IProps) {
name={formFieldName}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex flex-col gap-1">
<div className="flex gap-2 justify-between ">
<FormLabel
tooltip={t('dataFlowTip')}
className="text-sm text-text-primary whitespace-wrap "
>
{t('dataPipeline')}
</FormLabel>
{toDataPipeline && (
<div
className="text-sm flex text-text-primary cursor-pointer"
onClick={toDataPipLine}
{layout === FormLayout.Vertical && (
<div className="flex flex-col gap-1">
<div className="flex gap-2 justify-between ">
<FormLabel
// tooltip={t('dataFlowTip')}
className="text-sm text-text-primary whitespace-wrap "
>
{t('buildItFromScratch')}
<ArrowUpRight size={14} />
</div>
)}
</div>
{t('manualSetup')}
</FormLabel>
{showToDataPipeline && (
<div
className="text-sm flex text-text-primary cursor-pointer"
onClick={toDataPipLine}
>
{t('buildItFromScratch')}
<ArrowUpRight size={14} />
</div>
)}
</div>
<div className="text-muted-foreground">
<FormControl>
<>
{!isMult && (
<SelectWithSearch
{...field}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
{isMult && (
<MultiSelect
{...field}
onValueChange={field.onChange}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
</>
</FormControl>
<div className="text-muted-foreground">
<FormControl>
<>
{!isMult && (
<SelectWithSearch
{...field}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
{isMult && (
<MultiSelect
{...field}
onValueChange={field.onChange}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
</>
</FormControl>
</div>
</div>
</div>
)}
{layout === FormLayout.Horizontal && (
<div className="flex gap-1 items-center">
<div className="flex gap-2 justify-between w-1/4">
<FormLabel
// tooltip={t('dataFlowTip')}
className="text-sm text-text-secondary whitespace-wrap "
>
{t('manualSetup')}
</FormLabel>
</div>
<div className="text-muted-foreground w-3/4 flex flex-col items-end">
{showToDataPipeline && (
<div
className="text-sm flex text-text-primary cursor-pointer"
onClick={toDataPipLine}
>
{t('buildItFromScratch')}
<ArrowUpRight size={14} />
</div>
)}
<FormControl>
<>
{!isMult && (
<SelectWithSearch
{...field}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
{isMult && (
<MultiSelect
{...field}
onValueChange={field.onChange}
placeholder={t('dataFlowPlaceholder')}
options={options}
/>
)}
</>
</FormControl>
</div>
</div>
)}
<div className="flex pt-1">
<FormMessage />
</div>

View File

@ -61,7 +61,7 @@ export function DelimiterFormField() {
<FormLabel
required
tooltip={t('knowledgeDetails.delimiterTip')}
className="text-sm text-muted-foreground whitespace-break-spaces w-1/4"
className="text-sm text-text-secondary whitespace-break-spaces w-1/4"
>
{t('knowledgeDetails.delimiter')}
</FormLabel>

View File

@ -28,7 +28,7 @@ export function ExcelToHtmlFormField() {
<div className="flex items-center gap-1">
<FormLabel
tooltip={t('html4excelTip')}
className="text-sm text-muted-foreground whitespace-break-spaces w-1/4"
className="text-sm text-text-secondary whitespace-break-spaces w-1/4"
>
{t('html4excel')}
</FormLabel>

View File

@ -79,7 +79,7 @@ export function LayoutRecognizeFormField({
>
<FormLabel
tooltip={t('layoutRecognizeTip')}
className={cn('text-sm text-muted-foreground whitespace-wrap', {
className={cn('text-sm text-text-secondary whitespace-wrap', {
['w-1/4']: horizontal,
})}
>

View File

@ -17,7 +17,7 @@ export function MaxTokenNumberFormField({ max = 2048, initialValue }: IProps) {
tooltip={t('chunkTokenNumberTip')}
max={max}
defaultValue={initialValue ?? 0}
layout={FormLayout.Vertical}
layout={FormLayout.Horizontal}
></SliderInputFormField>
);
}

View File

@ -36,7 +36,7 @@ export function SliderInputFormField({
tooltip,
defaultValue,
className,
layout = FormLayout.Vertical,
layout = FormLayout.Horizontal,
}: SliderInputFormFieldProps) {
const form = useFormContext();

View File

@ -1,5 +1,4 @@
import { cn } from '@/lib/utils';
import { Radio as LucideRadio } from 'lucide-react';
import React, { useContext, useState } from 'react';
const RadioGroupContext = React.createContext<{
@ -57,7 +56,7 @@ function Radio({ value, checked, disabled, onChange, children }: RadioProps) {
onClick={handleClick}
>
{isChecked && (
<LucideRadio className="h-3 w-3 fill-primary text-primary" />
<div className="h-3 w-3 fill-primary text-primary bg-text-primary rounded-full" />
)}
</span>
{children && <span className="text-foreground">{children}</span>}

View File

@ -19,7 +19,8 @@ export const useNavigatePage = () => {
const navigateToDataset = useCallback(
(id: string) => () => {
navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`);
// navigate(`${Routes.DatasetBase}${Routes.DataSetOverview}/${id}`);
navigate(`${Routes.Dataset}/${id}`);
},
[navigate],
);

View File

@ -116,7 +116,7 @@ export default {
generate: 'Generate',
raptor: 'Raptor',
processingType: 'Processing Type',
dataPipeline: 'Data Pipeline',
dataPipeline: 'Ingestion pipeline',
operations: 'Operations',
taskId: 'Task ID',
duration: 'Duration',
@ -126,8 +126,8 @@ export default {
startDate: 'Start Date',
source: 'Source',
fileName: 'File Name',
datasetLogs: 'Dataset Logs',
fileLogs: 'File Logs',
datasetLogs: 'Dataset',
fileLogs: 'File',
overview: 'Overview',
success: 'Success',
failed: 'Failed',
@ -270,6 +270,9 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.',
},
knowledgeConfiguration: {
tocExtraction: 'toc toggle',
tocExtractionTip:
" For existing chunks, generate a hierarchical table of contents (one directory per file). During queries, when Directory Enhancement is activated, the system will use a large model to determine which directory items are relevant to the user's question, thereby identifying the relevant chunks.",
deleteGenerateModalContent: `
<p>Deleting the generated <strong class='text-text-primary'>{{type}}</strong> results
will remove all derived entities and relationships from this dataset.
@ -284,11 +287,11 @@ export default {
fileFilter: 'File Filter',
setDefaultTip: '',
setDefault: 'Set as Default',
eidtLinkDataPipeline: 'Edit Data Pipeline',
linkPipelineSetTip: 'Manage data pipeline linkage with this dataset',
eidtLinkDataPipeline: 'Edit Ingestion pipeline',
linkPipelineSetTip: 'Manage Ingestion pipeline linkage with this dataset',
default: 'Default',
dataPipeline: 'Data Pipeline',
linkDataPipeline: 'Link Data Pipeline',
dataPipeline: 'Ingestion pipeline',
linkDataPipeline: 'Link Ingestion pipeline',
enableAutoGenerate: 'Enable Auto Generate',
teamPlaceholder: 'Please select a team.',
dataFlowPlaceholder: 'Please select a pipeline.',
@ -1595,7 +1598,7 @@ This delimiter is used to split the input text into several text pieces echo of
createFromTemplate: 'Create from template',
importJsonFile: 'Import JSON file',
ceateAgent: 'Agent flow',
createPipeline: 'Data pipeline',
createPipeline: 'Ingestion pipeline',
chooseAgentType: 'Choose Agent Type',
},
llmTools: {
@ -1688,9 +1691,9 @@ This delimiter is used to split the input text into several text pieces echo of
<p>To keep them, please click Rerun to re-run the current stage.</p> `,
changeStepModalConfirmText: 'Switch Anyway',
changeStepModalCancelText: 'Cancel',
unlinkPipelineModalTitle: 'Unlink data pipeline',
unlinkPipelineModalTitle: 'Unlink Ingestion pipeline',
unlinkPipelineModalContent: `
<p>Once unlinked, this Dataset will no longer be connected to the current Data Pipeline.</p>
<p>Once unlinked, this Dataset will no longer be connected to the current Ingestion pipeline.</p>
<p>Files that are already being parsed will continue until completion</p>
<p>Files that are not yet parsed will no longer be processed</p> <br/>
<p>Are you sure you want to proceed?</p> `,

View File

@ -114,8 +114,8 @@ export default {
startDate: '开始时间',
source: '来源',
fileName: '文件名',
datasetLogs: '数据集日志',
fileLogs: '文件日志',
datasetLogs: '数据集',
fileLogs: '文件',
overview: '概览',
success: '成功',
failed: '失败',
@ -255,6 +255,9 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
},
knowledgeConfiguration: {
tocExtraction: '目录提取',
tocExtractionTip:
'对于已有的chunk生成层级结构的目录信息每个文件一个目录。在查询时激活`目录增强`后系统会用大模型去判断用户问题和哪些目录项相关从而找到相关的chunk。',
deleteGenerateModalContent: `
<p>删除生成的 <strong class='text-text-primary'>{{type}}</strong> 结果
将从此数据集中移除所有派生实体和关系。

View File

@ -102,7 +102,7 @@ function InnerButtonEdge({
...showHighlight,
...placeholderHighlightStyle,
}}
className={cn('text-text-secondary')}
className={cn('text-text-disabled')}
/>
<EdgeLabelRenderer>

View File

@ -1,4 +1,4 @@
import { useIsDarkTheme, useTheme } from '@/components/theme-provider';
import { useTheme } from '@/components/theme-provider';
import {
Tooltip,
TooltipContent,
@ -19,7 +19,6 @@ import { NotebookPen } from 'lucide-react';
import { useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { ChatSheet } from '../chat/chat-sheet';
import { AgentBackground } from '../components/background';
import {
AgentChatContext,
AgentChatLogContext,
@ -42,6 +41,7 @@ import { useMoveNote } from '../hooks/use-move-note';
import { usePlaceholderManager } from '../hooks/use-placeholder-manager';
import { useDropdownManager } from './context';
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import {
useHideFormSheetOnNodeDeletion,
@ -61,7 +61,6 @@ import { GenerateNode } from './node/generate-node';
import { InvokeNode } from './node/invoke-node';
import { IterationNode, IterationStartNode } from './node/iteration-node';
import { KeywordNode } from './node/keyword-node';
import { LogicNode } from './node/logic-node';
import { MessageNode } from './node/message-node';
import NoteNode from './node/note-node';
import { PlaceholderNode } from './node/placeholder-node';
@ -78,7 +77,6 @@ export const nodeTypes: NodeTypes = {
beginNode: BeginNode,
placeholderNode: PlaceholderNode,
relevantNode: RelevantNode,
logicNode: LogicNode,
noteNode: NoteNode,
switchNode: SwitchNode,
generateNode: GenerateNode,
@ -173,8 +171,6 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
}
};
const isDarkTheme = useIsDarkTheme();
useHideFormSheetOnNodeDeletion({ hideFormDrawer });
const { visible, hideModal, showModal } = useSetModalState();
@ -243,7 +239,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
>
<defs>
<marker
fill="rgb(157 149 225)"
fill="var(--text-disabled)"
id="logo"
viewBox="0 0 40 40"
refX="8"
@ -286,12 +282,6 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
defaultEdgeOptions={{
type: 'buttonEdge',
markerEnd: 'logo',
style: {
strokeWidth: 1,
stroke: isDarkTheme
? 'rgba(91, 93, 106, 1)'
: 'rgba(151, 154, 171, 1)',
},
zIndex: 1001, // https://github.com/xyflow/xyflow/discussions/3498
}}
deleteKeyCode={['Delete', 'Backspace']}

View File

@ -7,9 +7,8 @@ import { useTranslation } from 'react-i18next';
import { AgentExceptionMethod, NodeHandleId } from '../../constant';
import useGraphStore from '../../store';
import { isBottomSubAgent } from '../../utils';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import styles from './index.less';
import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
@ -40,19 +39,11 @@ function InnerAgentNode({
<NodeWrapper selected={selected}>
{isHeadAgent && (
<>
<CommonHandle
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
type="source"
position={Position.Right}
isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle}
nodeId={id}
id={NodeHandleId.Start}
@ -61,18 +52,22 @@ function InnerAgentNode({
</>
)}
<Handle
type="target"
position={Position.Top}
isConnectable={false}
id={NodeHandleId.AgentTop}
></Handle>
{isHeadAgent || (
<Handle
type="target"
position={Position.Top}
isConnectable={false}
id={NodeHandleId.AgentTop}
className="!bg-accent-primary !size-2"
></Handle>
)}
<Handle
type="source"
position={Position.Bottom}
isConnectable={false}
id={NodeHandleId.AgentBottom}
style={{ left: 180 }}
className="!bg-accent-primary !size-2"
></Handle>
<Handle
type="source"
@ -80,6 +75,7 @@ function InnerAgentNode({
isConnectable={false}
id={NodeHandleId.Tool}
style={{ left: 20 }}
className="!bg-accent-primary !size-2"
></Handle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<section className="flex flex-col gap-2">

View File

@ -3,8 +3,7 @@ import { ICategorizeNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { get } from 'lodash';
import { memo } from 'react';
import { NodeHandleId } from '../../constant';
import { CommonHandle } from './handle';
import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
@ -20,13 +19,7 @@ export function InnerCategorizeNode({
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<CommonHandle
type="target"
position={Position.Left}
isConnectable
id={NodeHandleId.End}
nodeId={id}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
@ -41,7 +34,6 @@ export function InnerCategorizeNode({
{position.name}
</div>
<CommonHandle
// key={position.text}
id={position.uuid}
type="source"
position={Position.Right}

View File

@ -1,8 +1,9 @@
import { useSetModalState } from '@/hooks/common-hooks';
import { cn } from '@/lib/utils';
import { Handle, HandleProps } from '@xyflow/react';
import { Handle, HandleProps, Position } from '@xyflow/react';
import { Plus } from 'lucide-react';
import { useMemo } from 'react';
import { NodeHandleId } from '../../constant';
import { HandleContext } from '../../context';
import { useDropdownManager } from '../context';
import { InnerNextStepDropdown } from './dropdown/next-step-dropdown';
@ -33,7 +34,7 @@ export function CommonHandle({
<Handle
{...props}
className={cn(
'inline-flex justify-center items-center !bg-accent-primary !size-4 !rounded-sm !border-none ',
'inline-flex justify-center items-center !bg-accent-primary !border-none group-hover:!size-4 group-hover:!rounded-sm',
className,
)}
onClick={(e) => {
@ -47,7 +48,7 @@ export function CommonHandle({
showModal();
}}
>
<Plus className="size-3 pointer-events-none text-text-title-invert" />
<Plus className="size-3 pointer-events-none text-white hidden group-hover:inline-block" />
{visible && (
<InnerNextStepDropdown
hideModal={() => {
@ -62,3 +63,19 @@ export function CommonHandle({
</HandleContext.Provider>
);
}
export function LeftEndHandle({
isConnectable,
...props
}: Omit<HandleProps, 'type' | 'position'>) {
return (
<Handle
isConnectable={isConnectable}
className="!bg-accent-primary !size-2"
id={NodeHandleId.End}
type="target"
position={Position.Left}
{...props}
></Handle>
);
}

View File

@ -3,8 +3,8 @@ import { NodeProps, Position } from '@xyflow/react';
import { memo } from 'react';
import { NodeHandleId } from '../../constant';
import { needsSingleStepDebugging } from '../../utils';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
@ -23,14 +23,7 @@ function InnerRagNode({
showRun={needsSingleStepDebugging(data.label)}
>
<NodeWrapper selected={selected}>
<CommonHandle
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
type="source"
position={Position.Right}

View File

@ -7,8 +7,7 @@ import { NodeProps, NodeResizeControl, Position } from '@xyflow/react';
import { memo } from 'react';
import { NodeHandleId, Operator } from '../../constant';
import OperatorIcon from '../../operator-icon';
import { CommonHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import { CommonHandle, LeftEndHandle } from './handle';
import styles from './index.less';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
@ -24,30 +23,21 @@ export function InnerIterationNode({
return (
<ToolBar selected={selected} id={id} label={data.label} showRun={false}>
<section
className={cn('h-full bg-transparent rounded-b-md ', {
className={cn('h-full bg-transparent rounded-b-md group', {
[styles.selectedHeader]: selected,
})}
>
<NodeResizeControl style={controlStyle} minWidth={100} minHeight={50}>
<ResizeIcon />
</NodeResizeControl>
<CommonHandle
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
className={styles.handle}
nodeId={id}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
id={NodeHandleId.Start}
type="source"
position={Position.Right}
isConnectable={isConnectable}
className={styles.handle}
nodeId={id}
></CommonHandle>
<NodeHeader
id={id}
name={data.name}
@ -75,8 +65,6 @@ function InnerIterationStartNode({
type="source"
position={Position.Right}
isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle}
isConnectableEnd={false}
id={NodeHandleId.Start}
nodeId={id}

View File

@ -1,41 +0,0 @@
import { ILogicNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { memo } from 'react';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
export function InnerLogicNode({
id,
data,
isConnectable = true,
selected,
}: NodeProps<ILogicNode>) {
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<CommonHandle
id="c"
type="source"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<CommonHandle
type="source"
position={Position.Right}
isConnectable={isConnectable}
style={RightHandleStyle}
id="b"
nodeId={id}
></CommonHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
</NodeWrapper>
</ToolBar>
);
}
export const LogicNode = memo(InnerLogicNode);

View File

@ -1,35 +1,21 @@
import { IMessageNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { NodeProps } from '@xyflow/react';
import { Flex } from 'antd';
import classNames from 'classnames';
import { get } from 'lodash';
import { memo } from 'react';
import { NodeHandleId } from '../../constant';
import { CommonHandle } from './handle';
import { LeftHandleStyle } from './handle-icon';
import { LeftEndHandle } from './handle';
import styles from './index.less';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
function InnerMessageNode({
id,
data,
isConnectable = true,
selected,
}: NodeProps<IMessageNode>) {
function InnerMessageNode({ id, data, selected }: NodeProps<IMessageNode>) {
const messages: string[] = get(data, 'form.messages', []);
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<CommonHandle
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
{/* <CommonHandle
type="source"
position={Position.Right}
@ -47,7 +33,6 @@ function InnerMessageNode({
[styles.nodeHeader]: messages.length > 0,
})}
></NodeHeader>
<Flex vertical gap={8} className={styles.messageNodeContainer}>
{messages.map((message, idx) => {
return (

View File

@ -7,7 +7,7 @@ export function NodeWrapper({ children, className, selected }: IProps) {
return (
<section
className={cn(
'bg-text-title-invert p-2.5 rounded-sm w-[200px] text-xs',
'bg-text-title-invert p-2.5 rounded-sm w-[200px] text-xs group',
{ 'border border-accent-primary': selected },
className,
)}

View File

@ -7,8 +7,7 @@ import { get } from 'lodash';
import { memo } from 'react';
import { NodeHandleId } from '../../constant';
import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import { CommonHandle, LeftEndHandle } from './handle';
import styles from './index.less';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
@ -28,22 +27,12 @@ function InnerRetrievalNode({
return (
<ToolBar selected={selected} id={id} label={data.label}>
<NodeWrapper selected={selected}>
<CommonHandle
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
className={styles.handle}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<CommonHandle
id={NodeHandleId.Start}
type="source"
position={Position.Right}
isConnectable={isConnectable}
className={styles.handle}
style={RightHandleStyle}
nodeId={id}
isConnectableEnd={false}
></CommonHandle>

View File

@ -2,10 +2,10 @@ import { Card, CardContent } from '@/components/ui/card';
import { ISwitchCondition, ISwitchNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { memo, useCallback } from 'react';
import { NodeHandleId, SwitchOperatorOptions } from '../../constant';
import { SwitchOperatorOptions } from '../../constant';
import { LogicalOperatorIcon } from '../../form/switch-form';
import { useGetVariableLabelByValue } from '../../hooks/use-get-begin-query';
import { CommonHandle } from './handle';
import { CommonHandle, LeftEndHandle } from './handle';
import { RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
@ -66,13 +66,7 @@ function InnerSwitchNode({ id, data, selected }: NodeProps<ISwitchNode>) {
return (
<ToolBar selected={selected} id={id} label={data.label} showRun={false}>
<NodeWrapper selected={selected}>
<CommonHandle
type="target"
position={Position.Left}
isConnectable
nodeId={id}
id={NodeHandleId.End}
></CommonHandle>
<LeftEndHandle></LeftEndHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<section className="gap-2.5 flex flex-col">
{positions.map((position, idx) => {

View File

@ -49,6 +49,7 @@ function InnerToolNode({
type="target"
position={Position.Top}
isConnectable={isConnectable}
className="!bg-accent-primary !size-2"
></Handle>
<ul className="space-y-2">
{tools.map((x) => (

View File

@ -66,7 +66,7 @@ export function ToolBar({
return (
<TooltipNode selected={selected}>
<TooltipTrigger>{children}</TooltipTrigger>
<TooltipTrigger className="h-full">{children}</TooltipTrigger>
<TooltipContent position={Position.Top}>
<section className="flex gap-2 items-center">

View File

@ -69,7 +69,6 @@ export enum Operator {
AkShare = 'AkShare',
YahooFinance = 'YahooFinance',
Jin10 = 'Jin10',
Concentrator = 'Concentrator',
TuShare = 'TuShare',
Note = 'Note',
Crawler = 'Crawler',
@ -102,7 +101,6 @@ export const AgentOperatorList = [
Operator.RewriteQuestion,
Operator.KeywordExtract,
Operator.Switch,
Operator.Concentrator,
Operator.Iteration,
Operator.WaitingDialogue,
Operator.Note,
@ -129,9 +127,6 @@ export const componentMenuList = [
{
name: Operator.Switch,
},
{
name: Operator.Concentrator,
},
{
name: Operator.Iteration,
},
@ -544,8 +539,6 @@ export const initialJin10Values = {
...initialQueryBaseValues,
};
export const initialConcentratorValues = {};
export const initialTuShareValues = {
token: 'xxx',
src: 'eastmoney',
@ -824,7 +817,6 @@ export const RestrictedUpstreamMap = {
[Operator.AkShare]: [Operator.Begin],
[Operator.YahooFinance]: [Operator.Begin],
[Operator.Jin10]: [Operator.Begin],
[Operator.Concentrator]: [Operator.Begin],
[Operator.TuShare]: [Operator.Begin],
[Operator.Crawler]: [Operator.Begin],
[Operator.Note]: [],
@ -840,6 +832,7 @@ export const RestrictedUpstreamMap = {
[Operator.StringTransform]: [Operator.Begin],
[Operator.UserFillUp]: [Operator.Begin],
[Operator.Tool]: [Operator.Begin],
[Operator.Placeholder]: [Operator.Begin],
};
export const NodeMap = {
@ -865,7 +858,6 @@ export const NodeMap = {
[Operator.SearXNG]: 'ragNode',
[Operator.ExeSQL]: 'ragNode',
[Operator.Switch]: 'switchNode',
[Operator.Concentrator]: 'logicNode',
[Operator.WenCai]: 'ragNode',
[Operator.AkShare]: 'ragNode',
[Operator.YahooFinance]: 'ragNode',
@ -908,7 +900,6 @@ export const BeginQueryTypeIconMap = {
export const NoDebugOperatorsList = [
Operator.Begin,
Operator.Concentrator,
Operator.Message,
Operator.RewriteQuestion,
Operator.Switch,

View File

@ -136,9 +136,6 @@ export const FormConfigMap = {
[Operator.SearXNG]: {
component: SearXNGForm,
},
[Operator.Concentrator]: {
component: () => <></>,
},
[Operator.Note]: {
component: () => <></>,
},

View File

@ -25,7 +25,6 @@ import {
initialBingValues,
initialCategorizeValues,
initialCodeValues,
initialConcentratorValues,
initialCrawlerValues,
initialDeepLValues,
initialDuckValues,
@ -124,7 +123,6 @@ export const useInitializeOperatorParams = () => {
[Operator.AkShare]: initialAkShareValues,
[Operator.YahooFinance]: initialYahooFinanceValues,
[Operator.Jin10]: initialJin10Values,
[Operator.Concentrator]: initialConcentratorValues,
[Operator.TuShare]: initialTuShareValues,
[Operator.Note]: initialNoteValues,
[Operator.Crawler]: initialCrawlerValues,
@ -140,6 +138,7 @@ export const useInitializeOperatorParams = () => {
[Operator.Tool]: {},
[Operator.UserFillUp]: initialUserFillUpValues,
[Operator.StringTransform]: initialStringTransformValues,
[Operator.Placeholder]: {},
};
}, [llmId]);

View File

@ -18,7 +18,6 @@ import {
initialBingValues,
initialCategorizeValues,
initialCodeValues,
initialConcentratorValues,
initialCrawlerValues,
initialDeepLValues,
initialDuckValues,
@ -100,7 +99,6 @@ export const useInitializeOperatorParams = () => {
[Operator.AkShare]: initialAkShareValues,
[Operator.YahooFinance]: initialYahooFinanceValues,
[Operator.Jin10]: initialJin10Values,
[Operator.Concentrator]: initialConcentratorValues,
[Operator.TuShare]: initialTuShareValues,
[Operator.Note]: initialNoteValues,
[Operator.Crawler]: initialCrawlerValues,

View File

@ -1,3 +1,5 @@
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { Button } from '@/components/ui/button';
import { Card, CardContent } from '@/components/ui/card';
import {
@ -22,7 +24,6 @@ import { ArrowDownToLine } from 'lucide-react';
import { ReactNode, useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { nodeTypes } from '../canvas';
import { AgentBackground } from '../components/background';
export function VersionDialog({
hideModal,
@ -121,6 +122,7 @@ export function VersionDialog({
minZoom={0.1}
>
<AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
</ReactFlow>
</ReactFlowProvider>
</section>

View File

@ -21,7 +21,6 @@ import '@xyflow/react/dist/style.css';
import { NotebookPen } from 'lucide-react';
import { memo, useCallback, useRef, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { AgentBackground } from '../components/background';
import { AgentInstanceContext, HandleContext } from '../context';
import FormSheet from '../form-sheet/next';
@ -31,6 +30,8 @@ import { useBeforeDelete } from '../hooks/use-before-delete';
import { useMoveNote } from '../hooks/use-move-note';
import { useDropdownManager } from './context';
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { useRunDataflow } from '../hooks/use-run-dataflow';
import {
useHideFormSheetOnNodeDeletion,
@ -263,6 +264,7 @@ function DataFlowCanvas({ drawerVisible, hideDrawer, showLogSheet }: IProps) {
onBeforeDelete={handleBeforeDelete}
>
<AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
<Controls position={'bottom-center'} orientation="horizontal">
<ControlButton>
<Tooltip>

View File

@ -1,13 +0,0 @@
import { useIsDarkTheme } from '@/components/theme-provider';
import { Background } from '@xyflow/react';
export function AgentBackground() {
const isDarkTheme = useIsDarkTheme();
return (
<Background
color={isDarkTheme ? 'rgba(255,255,255,0.15)' : '#A8A9B3'}
bgColor={isDarkTheme ? 'rgba(11, 11, 12, 1)' : 'rgba(0, 0, 0, 0.05)'}
/>
);
}

View File

@ -1,3 +1,5 @@
import { AgentBackground } from '@/components/canvas/background';
import Spotlight from '@/components/spotlight';
import { Button } from '@/components/ui/button';
import { Card, CardContent } from '@/components/ui/card';
import {
@ -22,7 +24,6 @@ import { ArrowDownToLine } from 'lucide-react';
import { ReactNode, useCallback, useEffect, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { nodeTypes } from '../canvas';
import { AgentBackground } from '../components/background';
export function VersionDialog({
hideModal,
@ -121,6 +122,7 @@ export function VersionDialog({
minZoom={0.1}
>
<AgentBackground></AgentBackground>
<Spotlight className="z-0" opcity={0.7} coverage={70} />
</ReactFlow>
</ReactFlowProvider>
</section>

View File

@ -0,0 +1,64 @@
import { useFormContext, useWatch } from 'react-hook-form';
import { DocumentParserType } from '@/constants/knowledge';
import { useMemo } from 'react';
import { AudioConfiguration } from './configuration/audio';
import { BookConfiguration } from './configuration/book';
import { EmailConfiguration } from './configuration/email';
import { KnowledgeGraphConfiguration } from './configuration/knowledge-graph';
import { LawsConfiguration } from './configuration/laws';
import { ManualConfiguration } from './configuration/manual';
import { NaiveConfiguration } from './configuration/naive';
import { OneConfiguration } from './configuration/one';
import { PaperConfiguration } from './configuration/paper';
import { PictureConfiguration } from './configuration/picture';
import { PresentationConfiguration } from './configuration/presentation';
import { QAConfiguration } from './configuration/qa';
import { ResumeConfiguration } from './configuration/resume';
import { TableConfiguration } from './configuration/table';
import { TagConfiguration } from './configuration/tag';
const ConfigurationComponentMap = {
[DocumentParserType.Naive]: NaiveConfiguration,
[DocumentParserType.Qa]: QAConfiguration,
[DocumentParserType.Resume]: ResumeConfiguration,
[DocumentParserType.Manual]: ManualConfiguration,
[DocumentParserType.Table]: TableConfiguration,
[DocumentParserType.Paper]: PaperConfiguration,
[DocumentParserType.Book]: BookConfiguration,
[DocumentParserType.Laws]: LawsConfiguration,
[DocumentParserType.Presentation]: PresentationConfiguration,
[DocumentParserType.Picture]: PictureConfiguration,
[DocumentParserType.One]: OneConfiguration,
[DocumentParserType.Audio]: AudioConfiguration,
[DocumentParserType.Email]: EmailConfiguration,
[DocumentParserType.Tag]: TagConfiguration,
[DocumentParserType.KnowledgeGraph]: KnowledgeGraphConfiguration,
};
function EmptyComponent() {
return <div></div>;
}
export function ChunkMethodForm() {
const form = useFormContext();
const finalParserId: DocumentParserType = useWatch({
control: form.control,
name: 'parser_id',
});
const ConfigurationComponent = useMemo(() => {
return finalParserId
? ConfigurationComponentMap[finalParserId]
: EmptyComponent;
}, [finalParserId]);
return (
<section className="h-full flex flex-col">
<div className="overflow-auto flex-1 min-h-0">
<ConfigurationComponent></ConfigurationComponent>
</div>
</section>
);
}

View File

@ -8,8 +8,9 @@ import {
FormMessage,
} from '@/components/ui/form';
import { MultiSelect } from '@/components/ui/multi-select';
import { FormLayout } from '@/constants/form';
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
import { Flex, Form, InputNumber, Select, Slider, Space } from 'antd';
import { Form, Select, Space } from 'antd';
import DOMPurify from 'dompurify';
import { useFormContext, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
@ -44,7 +45,7 @@ export const TagSetItem = () => {
<FormItem className=" items-center space-y-0 ">
<div className="flex items-center">
<FormLabel
className="text-sm text-muted-foreground whitespace-nowrap w-1/4"
className="text-sm text-text-secondary whitespace-nowrap w-1/4"
tooltip={
<div
dangerouslySetInnerHTML={{
@ -116,27 +117,9 @@ export const TopNTagsItem = () => {
max={10}
min={1}
defaultValue={3}
layout={FormLayout.Horizontal}
></SliderInputFormField>
);
return (
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'topn_tags']}
noStyle
initialValue={3}
>
<Slider max={10} min={1} style={{ width: '100%' }} />
</Form.Item>
</Flex>
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
<InputNumber max={10} min={1} />
</Form.Item>
</Flex>
</Form.Item>
);
};
export function TagItems() {

View File

@ -0,0 +1,20 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { ConfigurationFormContainer } from '../configuration-form-container';
import { TagItems } from '../components/tag-item';
export function AudioConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,28 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function BookConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -42,7 +42,7 @@ export function ChunkMethodItem(props: IProps) {
'w-1/4 whitespace-pre-wrap': line === 1,
})}
>
{t('dataPipeline')}
{t('builtIn')}
</FormLabel>
<div className={line === 1 ? 'w-3/4 ' : 'w-full'}>
<FormControl>
@ -115,7 +115,7 @@ export function EmbeddingModelItem({ line = 1, isEdit = true }: IProps) {
);
}
export function ParseTypeItem() {
export function ParseTypeItem({ line = 2 }: { line?: number }) {
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
@ -125,17 +125,26 @@ export function ParseTypeItem() {
name={'parseType'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="">
<div
className={cn('flex', {
' items-center': line === 1,
'flex-col gap-1': line === 2,
})}
>
<FormLabel
tooltip={t('parseTypeTip')}
className="text-sm whitespace-wrap "
// tooltip={t('parseTypeTip')}
className={cn('text-sm whitespace-wrap ', {
'w-1/4': line === 1,
})}
>
{t('parseType')}
</FormLabel>
<div className="text-muted-foreground">
<div
className={cn('text-muted-foreground', { 'w-3/4': line === 1 })}
>
<FormControl>
<Radio.Group {...field}>
<div className="w-3/4 flex gap-2 justify-between text-muted-foreground">
<div className="w-1/2 flex gap-2 justify-between text-muted-foreground">
<Radio value={1}>{t('builtIn')}</Radio>
<Radio value={2}>{t('manualSetup')}</Radio>
</div>
@ -144,7 +153,7 @@ export function ParseTypeItem() {
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<div className={line === 1 ? 'w-1/4' : ''}></div>
<FormMessage />
</div>
</FormItem>
@ -188,3 +197,39 @@ export function EnableAutoGenerateItem() {
/>
);
}
export function EnableTocToggle() {
const { t } = useTranslate('knowledgeConfiguration');
const form = useFormContext();
return (
<FormField
control={form.control}
name={'parser_config.toc_extraction'}
render={({ field }) => (
<FormItem className=" items-center space-y-0 ">
<div className="flex items-center">
<FormLabel
tooltip={t('tocExtractionTip')}
className="text-sm whitespace-wrap w-1/4"
>
{t('tocExtraction')}
</FormLabel>
<div className="text-muted-foreground w-3/4">
<FormControl>
<Switch
checked={field.value}
onCheckedChange={field.onChange}
/>
</FormControl>
</div>
</div>
<div className="flex pt-1">
<div className="w-1/4"></div>
<FormMessage />
</div>
</FormItem>
)}
/>
);
}

View File

@ -0,0 +1,18 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function EmailConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,15 @@
import { DelimiterFormField } from '@/components/delimiter-form-field';
import { EntityTypesFormField } from '@/components/entity-types-form-field';
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
export function KnowledgeGraphConfiguration() {
return (
<>
<>
<EntityTypesFormField></EntityTypesFormField>
<MaxTokenNumberFormField max={8192 * 2}></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
</>
</>
);
}

View File

@ -0,0 +1,29 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function LawsConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,27 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function ManualConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<TagItems></TagItems>
</MainContainer>
);
}

View File

@ -0,0 +1,33 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { DelimiterFormField } from '@/components/delimiter-form-field';
import { ExcelToHtmlFormField } from '@/components/excel-to-html-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { MaxTokenNumberFormField } from '@/components/max-token-number-from-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
import { EnableTocToggle } from './common-item';
export function NaiveConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<MaxTokenNumberFormField initialValue={512}></MaxTokenNumberFormField>
<DelimiterFormField></DelimiterFormField>
<EnableTocToggle />
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
<ExcelToHtmlFormField></ExcelToHtmlFormField>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,21 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function OneConfiguration() {
return (
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,28 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function PaperConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,18 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function PictureConfiguration() {
return (
<ConfigurationFormContainer>
<>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,29 @@
import {
AutoKeywordsFormField,
AutoQuestionsFormField,
} from '@/components/auto-keywords-form-field';
import { LayoutRecognizeFormField } from '@/components/layout-recognize-form-field';
import { TagItems } from '../components/tag-item';
import {
ConfigurationFormContainer,
MainContainer,
} from '../configuration-form-container';
export function PresentationConfiguration() {
return (
<MainContainer>
<ConfigurationFormContainer>
<LayoutRecognizeFormField></LayoutRecognizeFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<AutoKeywordsFormField></AutoKeywordsFormField>
<AutoQuestionsFormField></AutoQuestionsFormField>
</ConfigurationFormContainer>
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
</MainContainer>
);
}

View File

@ -0,0 +1,10 @@
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function QAConfiguration() {
return (
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,10 @@
import { TagItems } from '../components/tag-item';
import { ConfigurationFormContainer } from '../configuration-form-container';
export function ResumeConfiguration() {
return (
<ConfigurationFormContainer>
<TagItems></TagItems>
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,12 @@
import { ConfigurationFormContainer } from '../configuration-form-container';
export function TableConfiguration() {
return (
<ConfigurationFormContainer>
{/* <ChunkMethodItem></ChunkMethodItem>
<EmbeddingModelItem></EmbeddingModelItem>
<PageRankFormField></PageRankFormField> */}
</ConfigurationFormContainer>
);
}

View File

@ -0,0 +1,5 @@
import { ConfigurationFormContainer } from '../configuration-form-container';
export function TagConfiguration() {
return <ConfigurationFormContainer></ConfigurationFormContainer>;
}

View File

@ -1,79 +1,92 @@
import { t } from 'i18next';
import { z } from 'zod';
export const formSchema = z.object({
name: z.string().min(1, {
message: 'Username must be at least 2 characters.',
}),
description: z.string().min(2, {
message: 'Username must be at least 2 characters.',
}),
// avatar: z.instanceof(File),
avatar: z.any().nullish(),
permission: z.string().optional(),
parser_id: z.string(),
pipeline_id: z.string().optional(),
pipeline_name: z.string().optional(),
pipeline_avatar: z.string().optional(),
embd_id: z.string(),
parser_config: z
.object({
layout_recognize: z.string(),
chunk_token_num: z.number(),
delimiter: z.string(),
auto_keywords: z.number().optional(),
auto_questions: z.number().optional(),
html4excel: z.boolean(),
tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional(),
max_token: z.number().optional(),
threshold: z.number().optional(),
max_cluster: z.number().optional(),
random_seed: z.number().optional(),
})
.refine(
(data) => {
if (data.use_raptor && !data.prompt) {
return false;
}
return true;
},
{
message: 'Prompt is required',
path: ['prompt'],
},
),
graphrag: z
.object({
use_graphrag: z.boolean().optional(),
entity_types: z.array(z.string()).optional(),
method: z.string().optional(),
resolution: z.boolean().optional(),
community: z.boolean().optional(),
})
.refine(
(data) => {
if (
data.use_graphrag &&
(!data.entity_types || data.entity_types.length === 0)
) {
return false;
}
return true;
},
{
message: 'Please enter Entity types',
path: ['entity_types'],
},
),
})
.optional(),
pagerank: z.number(),
// icon: z.array(z.instanceof(File)),
});
export const formSchema = z
.object({
parseType: z.number(),
name: z.string().min(1, {
message: 'Username must be at least 2 characters.',
}),
description: z.string().min(2, {
message: 'Username must be at least 2 characters.',
}),
// avatar: z.instanceof(File),
avatar: z.any().nullish(),
permission: z.string().optional(),
parser_id: z.string(),
pipeline_id: z.string().optional(),
pipeline_name: z.string().optional(),
pipeline_avatar: z.string().optional(),
embd_id: z.string(),
parser_config: z
.object({
layout_recognize: z.string(),
chunk_token_num: z.number(),
delimiter: z.string(),
auto_keywords: z.number().optional(),
auto_questions: z.number().optional(),
html4excel: z.boolean(),
tag_kb_ids: z.array(z.string()).nullish(),
topn_tags: z.number().optional(),
toc_extraction: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional(),
max_token: z.number().optional(),
threshold: z.number().optional(),
max_cluster: z.number().optional(),
random_seed: z.number().optional(),
})
.refine(
(data) => {
if (data.use_raptor && !data.prompt) {
return false;
}
return true;
},
{
message: 'Prompt is required',
path: ['prompt'],
},
),
graphrag: z
.object({
use_graphrag: z.boolean().optional(),
entity_types: z.array(z.string()).optional(),
method: z.string().optional(),
resolution: z.boolean().optional(),
community: z.boolean().optional(),
})
.refine(
(data) => {
if (
data.use_graphrag &&
(!data.entity_types || data.entity_types.length === 0)
) {
return false;
}
return true;
},
{
message: 'Please enter Entity types',
path: ['entity_types'],
},
),
})
.optional(),
pagerank: z.number(),
// icon: z.array(z.instanceof(File)),
})
.superRefine((data, ctx) => {
if (data.parseType === 2 && !data.pipeline_id) {
ctx.addIssue({
path: ['pipeline_id'],
message: t('common.pleaseSelect'),
code: 'custom',
});
}
});
export const pipelineFormSchema = z.object({
pipeline_id: z.string().optional(),

View File

@ -1,14 +1,18 @@
import { IDataPipelineSelectNode } from '@/components/data-pipeline-select';
import {
DataFlowSelect,
IDataPipelineSelectNode,
} from '@/components/data-pipeline-select';
import GraphRagItems from '@/components/parse-configuration/graph-rag-form-fields';
import RaptorFormFields from '@/components/parse-configuration/raptor-form-fields';
import { Button } from '@/components/ui/button';
import Divider from '@/components/ui/divider';
import { Form } from '@/components/ui/form';
import { FormLayout } from '@/constants/form';
import { DocumentParserType } from '@/constants/knowledge';
import { PermissionRole } from '@/constants/permission';
import { zodResolver } from '@hookform/resolvers/zod';
import { useEffect, useState } from 'react';
import { useForm } from 'react-hook-form';
import { useForm, useWatch } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { TopTitle } from '../dataset-title';
@ -16,10 +20,10 @@ import {
GenerateType,
IGenerateLogButtonProps,
} from '../dataset/generate-button/generate';
import LinkDataPipeline, {
IDataPipelineNodeProps,
} from './components/link-data-pipeline';
import { ChunkMethodForm } from './chunk-method-form';
import { IDataPipelineNodeProps } from './components/link-data-pipeline';
import { MainContainer } from './configuration-form-container';
import { ChunkMethodItem, ParseTypeItem } from './configuration/common-item';
import { formSchema } from './form-schema';
import { GeneralForm } from './general-form';
import { useFetchKnowledgeConfigurationOnMount } from './hooks';
@ -44,6 +48,7 @@ const enum MethodValue {
export default function DatasetSettings() {
const { t } = useTranslation();
const form = useForm<z.infer<typeof formSchema>>({
resolver: zodResolver(formSchema),
defaultValues: {
@ -58,6 +63,7 @@ export default function DatasetSettings() {
auto_questions: 0,
html4excel: false,
topn_tags: 3,
toc_extraction: false,
raptor: {
use_raptor: true,
max_token: 256,
@ -73,17 +79,17 @@ export default function DatasetSettings() {
},
},
pipeline_id: '',
parseType: 1,
pagerank: 0,
},
});
const knowledgeDetails = useFetchKnowledgeConfigurationOnMount(form);
const [pipelineData, setPipelineData] = useState<IDataPipelineNodeProps>();
const [graphRagGenerateData, setGraphRagGenerateData] =
useState<IGenerateLogButtonProps>();
const [raptorGenerateData, setRaptorGenerateData] =
useState<IGenerateLogButtonProps>();
useEffect(() => {
console.log('🚀 ~ DatasetSettings ~ knowledgeDetails:', knowledgeDetails);
if (knowledgeDetails) {
@ -102,8 +108,10 @@ export default function DatasetSettings() {
finish_at: knowledgeDetails.raptor_task_finish_at,
task_id: knowledgeDetails.raptor_task_id,
} as IGenerateLogButtonProps);
form.setValue('parseType', knowledgeDetails.pipeline_id ? 2 : 1);
form.setValue('pipeline_id', knowledgeDetails.pipeline_id || '');
}
}, [knowledgeDetails]);
}, [knowledgeDetails, form]);
async function onSubmit(data: z.infer<typeof formSchema>) {
try {
@ -137,6 +145,22 @@ export default function DatasetSettings() {
} as IGenerateLogButtonProps);
}
};
const parseType = useWatch({
control: form.control,
name: 'parseType',
defaultValue: knowledgeDetails.pipeline_id ? 2 : 1,
});
const selectedTag = useWatch({
name: 'parser_id',
control: form.control,
});
useEffect(() => {
if (parseType === 1) {
form.setValue('pipeline_id', '');
}
console.log('parseType', parseType);
}, [parseType, form]);
return (
<section className="p-5 h-full flex flex-col">
<TopTitle
@ -167,10 +191,30 @@ export default function DatasetSettings() {
onDelete={() => handleDeletePipelineTask(GenerateType.Raptor)}
></RaptorFormFields>
<Divider />
<LinkDataPipeline
<ParseTypeItem line={1} />
{parseType === 1 && (
<ChunkMethodItem line={1}></ChunkMethodItem>
)}
{parseType === 2 && (
<DataFlowSelect
isMult={false}
showToDataPipeline={true}
formFieldName="pipeline_id"
layout={FormLayout.Horizontal}
/>
)}
<Divider />
{parseType === 1 && (
<ChunkMethodForm
selectedTag={selectedTag as DocumentParserType}
/>
)}
{/* <LinkDataPipeline
data={pipelineData}
handleLinkOrEditSubmit={handleLinkOrEditSubmit}
/>
/> */}
</MainContainer>
</div>
<div className="text-right items-center flex justify-end gap-3 w-[768px]">

View File

@ -62,6 +62,7 @@ export function SavingButton() {
if (beValid) {
form.handleSubmit(async (values) => {
console.log('saveKnowledgeConfiguration: ', values);
delete values['parseType'];
// delete values['avatar'];
await saveKnowledgeConfiguration({
kb_id,

View File

@ -29,11 +29,6 @@ export function SideBar({ refreshCount }: PropType) {
const items = useMemo(() => {
const list = [
{
icon: <DatabaseZap className="size-4" />,
label: t(`knowledgeDetails.overview`),
key: Routes.DataSetOverview,
},
{
icon: <FolderOpen className="size-4" />,
label: t(`knowledgeDetails.subbarFiles`),
@ -44,6 +39,11 @@ export function SideBar({ refreshCount }: PropType) {
label: t(`knowledgeDetails.testing`),
key: Routes.DatasetTesting,
},
{
icon: <DatabaseZap className="size-4" />,
label: t(`knowledgeDetails.overview`),
key: Routes.DataSetOverview,
},
{
icon: <Banknote className="size-4" />,
label: t(`knowledgeDetails.configuration`),

View File

@ -16,6 +16,7 @@ import {
FormMessage,
} from '@/components/ui/form';
import { Input } from '@/components/ui/input';
import { FormLayout } from '@/constants/form';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { IModalProps } from '@/interfaces/common';
import { zodResolver } from '@hookform/resolvers/zod';
@ -137,8 +138,9 @@ export function InputForm({ onOk }: IModalProps<any>) {
{parseType === 2 && (
<DataFlowSelect
isMult={false}
toDataPipeline={navigateToAgents}
showToDataPipeline={true}
formFieldName="pipeline_id"
layout={FormLayout.Vertical}
/>
)}
</form>