Feat: Merge title splitter and token splitter into chunker category #9869 (#10517)

### What problem does this PR solve?

Feat: Merge title splitter and token splitter into chunker category
#9869

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu
2025-10-13 15:46:14 +08:00
committed by GitHub
parent 77481ab3ab
commit cf5867b146
2 changed files with 70 additions and 22 deletions

View File

@ -1710,12 +1710,12 @@ This delimiter is used to split the input text into several text pieces echo of
tokenizerRequired: 'Please add the Indexer node first', tokenizerRequired: 'Please add the Indexer node first',
tokenizerDescription: tokenizerDescription:
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.', 'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
splitter: 'Token Splitter', splitter: 'Token',
splitterDescription: splitterDescription:
'Split text into chunks by token length with optional delimiters and overlap.', 'Split text into chunks by token length with optional delimiters and overlap.',
hierarchicalMergerDescription: hierarchicalMergerDescription:
'Split documents into sections by title hierarchy with regex rules for finer control.', 'Split documents into sections by title hierarchy with regex rules for finer control.',
hierarchicalMerger: 'Title Splitter', hierarchicalMerger: 'Title',
extractor: 'Transformer', extractor: 'Transformer',
extractorDescription: extractorDescription:
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.', 'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',

View File

@ -1,3 +1,9 @@
import {
Accordion,
AccordionContent,
AccordionItem,
AccordionTrigger,
} from '@/components/ui/accordion';
import { import {
DropdownMenu, DropdownMenu,
DropdownMenuContent, DropdownMenuContent,
@ -19,12 +25,13 @@ import {
PropsWithChildren, PropsWithChildren,
createContext, createContext,
memo, memo,
useCallback,
useContext, useContext,
useEffect, useEffect,
useMemo, useMemo,
useRef, useRef,
} from 'react'; } from 'react';
import { Operator, SingleOperators } from '../../../constant'; import { Operator } from '../../../constant';
import { AgentInstanceContext, HandleContext } from '../../../context'; import { AgentInstanceContext, HandleContext } from '../../../context';
import OperatorIcon from '../../../operator-icon'; import OperatorIcon from '../../../operator-icon';
@ -116,16 +123,22 @@ function OperatorItemList({
// Limit the number of operators of a certain type on the canvas to only one // Limit the number of operators of a certain type on the canvas to only one
function useRestrictSingleOperatorOnCanvas() { function useRestrictSingleOperatorOnCanvas() {
const list: Operator[] = [];
const { findNodeByName } = useGraphStore((state) => state); const { findNodeByName } = useGraphStore((state) => state);
SingleOperators.forEach((operator) => { const restrictSingleOperatorOnCanvas = useCallback(
(singleOperators: Operator[]) => {
const list: Operator[] = [];
singleOperators.forEach((operator) => {
if (!findNodeByName(operator)) { if (!findNodeByName(operator)) {
list.push(operator); list.push(operator);
} }
}); });
return list; return list;
},
[findNodeByName],
);
return restrictSingleOperatorOnCanvas;
} }
function AccordionOperators({ function AccordionOperators({
@ -137,25 +150,60 @@ function AccordionOperators({
mousePosition?: { x: number; y: number }; mousePosition?: { x: number; y: number };
nodeId?: string; nodeId?: string;
}) { }) {
const singleOperators = useRestrictSingleOperatorOnCanvas(); const restrictSingleOperatorOnCanvas = useRestrictSingleOperatorOnCanvas();
const { getOperatorTypeFromId } = useGraphStore((state) => state); const { getOperatorTypeFromId } = useGraphStore((state) => state);
const operators = useMemo(() => { const operators = useMemo(() => {
let list = [...singleOperators]; let list = [
if (getOperatorTypeFromId(nodeId) === Operator.Extractor) { ...restrictSingleOperatorOnCanvas([Operator.Parser, Operator.Tokenizer]),
const Splitters = [Operator.HierarchicalMerger, Operator.Splitter]; ];
list = list.filter((x) => !Splitters.includes(x)); // The Context Generator node can only be followed by a Tokenizer and a Context Generator.
}
list.push(Operator.Extractor); list.push(Operator.Extractor);
return list; return list;
}, [getOperatorTypeFromId, nodeId, singleOperators]); }, [restrictSingleOperatorOnCanvas]);
const chunkerOperators = useMemo(() => {
return [
...restrictSingleOperatorOnCanvas([
Operator.Splitter,
Operator.HierarchicalMerger,
]),
];
}, [restrictSingleOperatorOnCanvas]);
const showChunker = useMemo(() => {
return (
getOperatorTypeFromId(nodeId) !== Operator.Extractor &&
chunkerOperators.length > 0
);
}, [chunkerOperators.length, getOperatorTypeFromId, nodeId]);
return ( return (
<>
<OperatorItemList <OperatorItemList
operators={operators} operators={operators}
isCustomDropdown={isCustomDropdown} isCustomDropdown={isCustomDropdown}
mousePosition={mousePosition} mousePosition={mousePosition}
></OperatorItemList> ></OperatorItemList>
{showChunker && (
<Accordion
type="single"
collapsible
className="w-full px-4"
defaultValue="item-1"
>
<AccordionItem value="item-1">
<AccordionTrigger>Chunker</AccordionTrigger>
<AccordionContent className="flex flex-col gap-4 text-balance">
<OperatorItemList
operators={chunkerOperators}
isCustomDropdown={isCustomDropdown}
mousePosition={mousePosition}
></OperatorItemList>
</AccordionContent>
</AccordionItem>
</Accordion>
)}
</>
); );
} }