Feat: Merge splitter and hierarchicalMerger into one node #9869 (#10543)

### What problem does this PR solve?

Feat: Merge splitter and hierarchicalMerger into one node #9869

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu
2025-10-14 14:55:47 +08:00
committed by GitHub
parent 5b387b68ba
commit d99d1e3518
13 changed files with 81 additions and 20 deletions

View File

@ -1737,7 +1737,7 @@ This delimiter is used to split the input text into several text pieces echo of
addParser: 'Add Parser', addParser: 'Add Parser',
hierarchy: 'Hierarchy', hierarchy: 'Hierarchy',
regularExpressions: 'Regular Expressions', regularExpressions: 'Regular Expressions',
overlappedPercent: 'Overlapped percent', overlappedPercent: 'Overlapped percent (%)',
searchMethod: 'Search method', searchMethod: 'Search method',
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both. searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
The Tokenizer will store the content in the corresponding data structures for the selected methods.`, The Tokenizer will store the content in the corresponding data structures for the selected methods.`,
@ -1749,11 +1749,11 @@ The Tokenizer will store the content in the corresponding data structures for th
exportJson: 'Export JSON', exportJson: 'Export JSON',
viewResult: 'View result', viewResult: 'View result',
running: 'Running', running: 'Running',
summary: 'Augmented Context', summary: 'Summary',
keywords: 'Keywords', keywords: 'Keywords',
questions: 'Questions', questions: 'Questions',
metadata: 'Metadata', metadata: 'Metadata',
fieldName: 'Result Destination', fieldName: 'Result destination',
prompts: { prompts: {
system: { system: {
keywords: `Role keywords: `Role
@ -1818,6 +1818,9 @@ Important structured information may include: names, dates, locations, events, k
imageParseMethodOptions: { imageParseMethodOptions: {
ocr: 'OCR', ocr: 'OCR',
}, },
note: 'Note',
noteDescription: 'Note',
notePlaceholder: 'Please enter a note',
}, },
datasetOverview: { datasetOverview: {
downloadTip: 'Files being downloaded from data sources. ', downloadTip: 'Files being downloaded from data sources. ',

View File

@ -1642,7 +1642,7 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
addParser: '增加解析器', addParser: '增加解析器',
hierarchy: '层次结构', hierarchy: '层次结构',
regularExpressions: '正则表达式', regularExpressions: '正则表达式',
overlappedPercent: '重叠百分比', overlappedPercent: '重叠百分比%',
searchMethod: '搜索方法', searchMethod: '搜索方法',
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。 searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`, Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
@ -1710,6 +1710,9 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
cancel: '取消', cancel: '取消',
filenameEmbeddingWeight: '文件名嵌入权重', filenameEmbeddingWeight: '文件名嵌入权重',
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?', switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
note: '注释',
noteDescription: '注释',
notePlaceholder: '请输入注释',
}, },
datasetOverview: { datasetOverview: {
downloadTip: '正在从数据源下载文件。', downloadTip: '正在从数据源下载文件。',

View File

@ -45,7 +45,6 @@ import { RagNode } from './node';
import { BeginNode } from './node/begin-node'; import { BeginNode } from './node/begin-node';
import { NextStepDropdown } from './node/dropdown/next-step-dropdown'; import { NextStepDropdown } from './node/dropdown/next-step-dropdown';
import { ExtractorNode } from './node/extractor-node'; import { ExtractorNode } from './node/extractor-node';
import { HierarchicalMergerNode } from './node/hierarchical-merger-node';
import NoteNode from './node/note-node'; import NoteNode from './node/note-node';
import ParserNode from './node/parser-node'; import ParserNode from './node/parser-node';
import { SplitterNode } from './node/splitter-node'; import { SplitterNode } from './node/splitter-node';
@ -58,7 +57,6 @@ export const nodeTypes: NodeTypes = {
parserNode: ParserNode, parserNode: ParserNode,
tokenizerNode: TokenizerNode, tokenizerNode: TokenizerNode,
splitterNode: SplitterNode, splitterNode: SplitterNode,
hierarchicalMergerNode: HierarchicalMergerNode,
contextNode: ExtractorNode, contextNode: ExtractorNode,
}; };

View File

@ -1 +0,0 @@
export { RagNode as HierarchicalMergerNode } from './index';

View File

@ -9,6 +9,7 @@ interface IProps {
gap?: number; gap?: number;
className?: string; className?: string;
wrapperClassName?: string; wrapperClassName?: string;
icon?: React.ReactNode;
} }
const InnerNodeHeader = ({ const InnerNodeHeader = ({
@ -16,11 +17,12 @@ const InnerNodeHeader = ({
name, name,
className, className,
wrapperClassName, wrapperClassName,
icon,
}: IProps) => { }: IProps) => {
return ( return (
<section className={cn(wrapperClassName, 'pb-4')}> <section className={cn(wrapperClassName, 'pb-4')}>
<div className={cn(className, 'flex gap-2.5')}> <div className={cn(className, 'flex gap-2.5')}>
<OperatorIcon name={label as Operator}></OperatorIcon> {icon || <OperatorIcon name={label as Operator}></OperatorIcon>}
<span className="truncate text-center font-semibold text-sm"> <span className="truncate text-center font-semibold text-sm">
{name} {name}
</span> </span>

View File

@ -41,7 +41,7 @@ function ParserNode({
{data.form?.setups.map((x, idx) => ( {data.form?.setups.map((x, idx) => (
<LabelCard <LabelCard
key={idx} key={idx}
className="flex justify-between text-text-primary" className="flex justify- flex-col text-text-primary gap-1"
> >
<span className="text-text-secondary">Parser {idx + 1}</span> <span className="text-text-secondary">Parser {idx + 1}</span>
{t(`dataflow.fileFormatOptions.${x.fileFormat}`)} {t(`dataflow.fileFormatOptions.${x.fileFormat}`)}

View File

@ -1 +1,52 @@
export { RagNode as SplitterNode } from './index'; import { IRagNode } from '@/interfaces/database/flow';
import { NodeProps, Position } from '@xyflow/react';
import { PropsWithChildren, memo } from 'react';
import { NodeHandleId, Operator } from '../../constant';
import OperatorIcon from '../../operator-icon';
import { LabelCard } from './card';
import { CommonHandle } from './handle';
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
import NodeHeader from './node-header';
import { NodeWrapper } from './node-wrapper';
import { ToolBar } from './toolbar';
type RagNodeProps = NodeProps<IRagNode> & PropsWithChildren;
function InnerSplitterNode({
id,
data,
isConnectable = true,
selected,
}: RagNodeProps) {
return (
<ToolBar selected={selected} id={id} label={data.label} showCopy={false}>
<NodeWrapper selected={selected}>
<CommonHandle
id={NodeHandleId.End}
type="target"
position={Position.Left}
isConnectable={isConnectable}
style={LeftHandleStyle}
nodeId={id}
></CommonHandle>
<CommonHandle
type="source"
position={Position.Right}
isConnectable={isConnectable}
id={NodeHandleId.Start}
style={RightHandleStyle}
nodeId={id}
isConnectableEnd={false}
></CommonHandle>
<NodeHeader
id={id}
name={'Chunker'}
label={data.label}
icon={<OperatorIcon name={Operator.Splitter}></OperatorIcon>}
></NodeHeader>
<LabelCard>{data.name}</LabelCard>
</NodeWrapper>
</ToolBar>
);
}
export const SplitterNode = memo(InnerSplitterNode);

View File

@ -37,11 +37,11 @@ function TokenizerNode({
nodeId={id} nodeId={id}
></CommonHandle> ></CommonHandle>
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader> <NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
<LabelCard className="text-text-primary flex justify-between"> <LabelCard className="text-text-primary flex justify-between flex-col gap-1">
<span className="text-text-secondary"> <span className="text-text-secondary">
{t('dataflow.searchMethod')} {t('dataflow.searchMethod')}
</span> </span>
<ul> <ul className="space-y-1">
{data.form?.search_method.map((x) => ( {data.form?.search_method.map((x) => (
<li key={x}>{t(`dataflow.tokenizerSearchMethodOptions.${x}`)}</li> <li key={x}>{t(`dataflow.tokenizerSearchMethodOptions.${x}`)}</li>
))} ))}

View File

@ -337,7 +337,7 @@ export const NodeMap = {
[Operator.Parser]: 'parserNode', [Operator.Parser]: 'parserNode',
[Operator.Tokenizer]: 'tokenizerNode', [Operator.Tokenizer]: 'tokenizerNode',
[Operator.Splitter]: 'splitterNode', [Operator.Splitter]: 'splitterNode',
[Operator.HierarchicalMerger]: 'hierarchicalMergerNode', [Operator.HierarchicalMerger]: 'splitterNode',
[Operator.Extractor]: 'contextNode', [Operator.Extractor]: 'contextNode',
}; };

View File

@ -58,7 +58,13 @@ const FormSheet = ({
<SheetTitle className="hidden"></SheetTitle> <SheetTitle className="hidden"></SheetTitle>
<section className="flex-col border-b py-2 px-5"> <section className="flex-col border-b py-2 px-5">
<div className="flex items-center gap-2 pb-3"> <div className="flex items-center gap-2 pb-3">
<OperatorIcon name={operatorName}></OperatorIcon> <OperatorIcon
name={
operatorName === Operator.HierarchicalMerger
? Operator.Splitter
: operatorName
}
></OperatorIcon>
<div className="flex items-center gap-1 flex-1"> <div className="flex items-center gap-1 flex-1">
<label htmlFor="">{t('flow.title')}</label> <label htmlFor="">{t('flow.title')}</label>
{node?.id === BeginId ? ( {node?.id === BeginId ? (

View File

@ -30,7 +30,6 @@ import { useWatchFormChange } from '../../hooks/use-watch-form-change';
import { INextOperatorForm } from '../../interface'; import { INextOperatorForm } from '../../interface';
import { buildOutputList } from '../../utils/build-output-list'; import { buildOutputList } from '../../utils/build-output-list';
import { Output } from '../components/output'; import { Output } from '../components/output';
import { OutputFormatFormField } from './common-form-fields';
import { EmailFormFields } from './email-form-fields'; import { EmailFormFields } from './email-form-fields';
import { ImageFormFields } from './image-form-fields'; import { ImageFormFields } from './image-form-fields';
import { PdfFormFields } from './pdf-form-fields'; import { PdfFormFields } from './pdf-form-fields';
@ -147,10 +146,10 @@ function ParserItem({
)} )}
</RAGFlowFormItem> </RAGFlowFormItem>
<Widget prefix={prefix} fileType={fileFormat as FileType}></Widget> <Widget prefix={prefix} fileType={fileFormat as FileType}></Widget>
<OutputFormatFormField {/* <OutputFormatFormField
prefix={prefix} prefix={prefix}
fileType={fileFormat as FileType} fileType={fileFormat as FileType}
/> /> */}
{index < fieldLength - 1 && <Separator />} {index < fieldLength - 1 && <Separator />}
</section> </section>
); );

View File

@ -26,7 +26,7 @@ export const FormSchema = z.object({
value: z.string().optional(), value: z.string().optional(),
}), }),
), ),
overlapped_percent: z.number(), // 0.0 - 0.3 overlapped_percent: z.number(), // 0.0 - 0.3 , 0% - 30%
}); });
export type SplitterFormSchemaType = z.infer<typeof FormSchema>; export type SplitterFormSchemaType = z.infer<typeof FormSchema>;
@ -58,9 +58,8 @@ const SplitterForm = ({ node }: INextOperatorForm) => {
></SliderInputFormField> ></SliderInputFormField>
<SliderInputFormField <SliderInputFormField
name="overlapped_percent" name="overlapped_percent"
max={0.3} max={30}
min={0} min={0}
step={0.01}
label={t('dataflow.overlappedPercent')} label={t('dataflow.overlappedPercent')}
></SliderInputFormField> ></SliderInputFormField>
<section> <section>

View File

@ -131,6 +131,7 @@ function transformParserParams(params: ParserFormSchemaType) {
function transformSplitterParams(params: SplitterFormSchemaType) { function transformSplitterParams(params: SplitterFormSchemaType) {
return { return {
...params, ...params,
overlapped_percent: Number(params.overlapped_percent) / 100,
delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'), delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
}; };
} }