mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
### What problem does this PR solve? Feat: Merge splitter and hierarchicalMerger into one node #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -1737,7 +1737,7 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
addParser: 'Add Parser',
|
||||
hierarchy: 'Hierarchy',
|
||||
regularExpressions: 'Regular Expressions',
|
||||
overlappedPercent: 'Overlapped percent',
|
||||
overlappedPercent: 'Overlapped percent (%)',
|
||||
searchMethod: 'Search method',
|
||||
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
|
||||
The Tokenizer will store the content in the corresponding data structures for the selected methods.`,
|
||||
@ -1749,11 +1749,11 @@ The Tokenizer will store the content in the corresponding data structures for th
|
||||
exportJson: 'Export JSON',
|
||||
viewResult: 'View result',
|
||||
running: 'Running',
|
||||
summary: 'Augmented Context',
|
||||
summary: 'Summary',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
metadata: 'Metadata',
|
||||
fieldName: 'Result Destination',
|
||||
fieldName: 'Result destination',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `Role
|
||||
@ -1818,6 +1818,9 @@ Important structured information may include: names, dates, locations, events, k
|
||||
imageParseMethodOptions: {
|
||||
ocr: 'OCR',
|
||||
},
|
||||
note: 'Note',
|
||||
noteDescription: 'Note',
|
||||
notePlaceholder: 'Please enter a note',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: 'Files being downloaded from data sources. ',
|
||||
|
||||
@ -1642,7 +1642,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比',
|
||||
overlappedPercent: '重叠百分比(%)',
|
||||
searchMethod: '搜索方法',
|
||||
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
@ -1710,6 +1710,9 @@ Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
note: '注释',
|
||||
noteDescription: '注释',
|
||||
notePlaceholder: '请输入注释',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: '正在从数据源下载文件。',
|
||||
|
||||
@ -45,7 +45,6 @@ import { RagNode } from './node';
|
||||
import { BeginNode } from './node/begin-node';
|
||||
import { NextStepDropdown } from './node/dropdown/next-step-dropdown';
|
||||
import { ExtractorNode } from './node/extractor-node';
|
||||
import { HierarchicalMergerNode } from './node/hierarchical-merger-node';
|
||||
import NoteNode from './node/note-node';
|
||||
import ParserNode from './node/parser-node';
|
||||
import { SplitterNode } from './node/splitter-node';
|
||||
@ -58,7 +57,6 @@ export const nodeTypes: NodeTypes = {
|
||||
parserNode: ParserNode,
|
||||
tokenizerNode: TokenizerNode,
|
||||
splitterNode: SplitterNode,
|
||||
hierarchicalMergerNode: HierarchicalMergerNode,
|
||||
contextNode: ExtractorNode,
|
||||
};
|
||||
|
||||
|
||||
@ -1 +0,0 @@
|
||||
export { RagNode as HierarchicalMergerNode } from './index';
|
||||
@ -9,6 +9,7 @@ interface IProps {
|
||||
gap?: number;
|
||||
className?: string;
|
||||
wrapperClassName?: string;
|
||||
icon?: React.ReactNode;
|
||||
}
|
||||
|
||||
const InnerNodeHeader = ({
|
||||
@ -16,11 +17,12 @@ const InnerNodeHeader = ({
|
||||
name,
|
||||
className,
|
||||
wrapperClassName,
|
||||
icon,
|
||||
}: IProps) => {
|
||||
return (
|
||||
<section className={cn(wrapperClassName, 'pb-4')}>
|
||||
<div className={cn(className, 'flex gap-2.5')}>
|
||||
<OperatorIcon name={label as Operator}></OperatorIcon>
|
||||
{icon || <OperatorIcon name={label as Operator}></OperatorIcon>}
|
||||
<span className="truncate text-center font-semibold text-sm">
|
||||
{name}
|
||||
</span>
|
||||
|
||||
@ -41,7 +41,7 @@ function ParserNode({
|
||||
{data.form?.setups.map((x, idx) => (
|
||||
<LabelCard
|
||||
key={idx}
|
||||
className="flex justify-between text-text-primary"
|
||||
className="flex justify- flex-col text-text-primary gap-1"
|
||||
>
|
||||
<span className="text-text-secondary">Parser {idx + 1}</span>
|
||||
{t(`dataflow.fileFormatOptions.${x.fileFormat}`)}
|
||||
|
||||
@ -1 +1,52 @@
|
||||
export { RagNode as SplitterNode } from './index';
|
||||
import { IRagNode } from '@/interfaces/database/flow';
|
||||
import { NodeProps, Position } from '@xyflow/react';
|
||||
import { PropsWithChildren, memo } from 'react';
|
||||
import { NodeHandleId, Operator } from '../../constant';
|
||||
import OperatorIcon from '../../operator-icon';
|
||||
import { LabelCard } from './card';
|
||||
import { CommonHandle } from './handle';
|
||||
import { LeftHandleStyle, RightHandleStyle } from './handle-icon';
|
||||
import NodeHeader from './node-header';
|
||||
import { NodeWrapper } from './node-wrapper';
|
||||
import { ToolBar } from './toolbar';
|
||||
|
||||
type RagNodeProps = NodeProps<IRagNode> & PropsWithChildren;
|
||||
function InnerSplitterNode({
|
||||
id,
|
||||
data,
|
||||
isConnectable = true,
|
||||
selected,
|
||||
}: RagNodeProps) {
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label} showCopy={false}>
|
||||
<NodeWrapper selected={selected}>
|
||||
<CommonHandle
|
||||
id={NodeHandleId.End}
|
||||
type="target"
|
||||
position={Position.Left}
|
||||
isConnectable={isConnectable}
|
||||
style={LeftHandleStyle}
|
||||
nodeId={id}
|
||||
></CommonHandle>
|
||||
<CommonHandle
|
||||
type="source"
|
||||
position={Position.Right}
|
||||
isConnectable={isConnectable}
|
||||
id={NodeHandleId.Start}
|
||||
style={RightHandleStyle}
|
||||
nodeId={id}
|
||||
isConnectableEnd={false}
|
||||
></CommonHandle>
|
||||
<NodeHeader
|
||||
id={id}
|
||||
name={'Chunker'}
|
||||
label={data.label}
|
||||
icon={<OperatorIcon name={Operator.Splitter}></OperatorIcon>}
|
||||
></NodeHeader>
|
||||
<LabelCard>{data.name}</LabelCard>
|
||||
</NodeWrapper>
|
||||
</ToolBar>
|
||||
);
|
||||
}
|
||||
|
||||
export const SplitterNode = memo(InnerSplitterNode);
|
||||
|
||||
@ -37,11 +37,11 @@ function TokenizerNode({
|
||||
nodeId={id}
|
||||
></CommonHandle>
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
<LabelCard className="text-text-primary flex justify-between">
|
||||
<LabelCard className="text-text-primary flex justify-between flex-col gap-1">
|
||||
<span className="text-text-secondary">
|
||||
{t('dataflow.searchMethod')}
|
||||
</span>
|
||||
<ul>
|
||||
<ul className="space-y-1">
|
||||
{data.form?.search_method.map((x) => (
|
||||
<li key={x}>{t(`dataflow.tokenizerSearchMethodOptions.${x}`)}</li>
|
||||
))}
|
||||
|
||||
@ -337,7 +337,7 @@ export const NodeMap = {
|
||||
[Operator.Parser]: 'parserNode',
|
||||
[Operator.Tokenizer]: 'tokenizerNode',
|
||||
[Operator.Splitter]: 'splitterNode',
|
||||
[Operator.HierarchicalMerger]: 'hierarchicalMergerNode',
|
||||
[Operator.HierarchicalMerger]: 'splitterNode',
|
||||
[Operator.Extractor]: 'contextNode',
|
||||
};
|
||||
|
||||
|
||||
@ -58,7 +58,13 @@ const FormSheet = ({
|
||||
<SheetTitle className="hidden"></SheetTitle>
|
||||
<section className="flex-col border-b py-2 px-5">
|
||||
<div className="flex items-center gap-2 pb-3">
|
||||
<OperatorIcon name={operatorName}></OperatorIcon>
|
||||
<OperatorIcon
|
||||
name={
|
||||
operatorName === Operator.HierarchicalMerger
|
||||
? Operator.Splitter
|
||||
: operatorName
|
||||
}
|
||||
></OperatorIcon>
|
||||
<div className="flex items-center gap-1 flex-1">
|
||||
<label htmlFor="">{t('flow.title')}</label>
|
||||
{node?.id === BeginId ? (
|
||||
|
||||
@ -30,7 +30,6 @@ import { useWatchFormChange } from '../../hooks/use-watch-form-change';
|
||||
import { INextOperatorForm } from '../../interface';
|
||||
import { buildOutputList } from '../../utils/build-output-list';
|
||||
import { Output } from '../components/output';
|
||||
import { OutputFormatFormField } from './common-form-fields';
|
||||
import { EmailFormFields } from './email-form-fields';
|
||||
import { ImageFormFields } from './image-form-fields';
|
||||
import { PdfFormFields } from './pdf-form-fields';
|
||||
@ -147,10 +146,10 @@ function ParserItem({
|
||||
)}
|
||||
</RAGFlowFormItem>
|
||||
<Widget prefix={prefix} fileType={fileFormat as FileType}></Widget>
|
||||
<OutputFormatFormField
|
||||
{/* <OutputFormatFormField
|
||||
prefix={prefix}
|
||||
fileType={fileFormat as FileType}
|
||||
/>
|
||||
/> */}
|
||||
{index < fieldLength - 1 && <Separator />}
|
||||
</section>
|
||||
);
|
||||
|
||||
@ -26,7 +26,7 @@ export const FormSchema = z.object({
|
||||
value: z.string().optional(),
|
||||
}),
|
||||
),
|
||||
overlapped_percent: z.number(), // 0.0 - 0.3
|
||||
overlapped_percent: z.number(), // 0.0 - 0.3 , 0% - 30%
|
||||
});
|
||||
|
||||
export type SplitterFormSchemaType = z.infer<typeof FormSchema>;
|
||||
@ -58,9 +58,8 @@ const SplitterForm = ({ node }: INextOperatorForm) => {
|
||||
></SliderInputFormField>
|
||||
<SliderInputFormField
|
||||
name="overlapped_percent"
|
||||
max={0.3}
|
||||
max={30}
|
||||
min={0}
|
||||
step={0.01}
|
||||
label={t('dataflow.overlappedPercent')}
|
||||
></SliderInputFormField>
|
||||
<section>
|
||||
|
||||
@ -131,6 +131,7 @@ function transformParserParams(params: ParserFormSchemaType) {
|
||||
function transformSplitterParams(params: SplitterFormSchemaType) {
|
||||
return {
|
||||
...params,
|
||||
overlapped_percent: Number(params.overlapped_percent) / 100,
|
||||
delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user