Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)

### What problem does this PR solve?

#9869

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: jinhai <haijin.chn@gmail.com>
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: chanx <1243304602@qq.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com>
Co-authored-by: huangzl <huangzl@shinemo.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Wilmer <33392318@qq.com>
Co-authored-by: Adrian Weidig <adrianweidig@gmx.net>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yongteng Lei <yongtengrey@outlook.com>
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com>
Co-authored-by: BadwomanCraZY <511528396@qq.com>
Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com>
Co-authored-by: Russell Valentine <russ@coldstonelabs.org>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Billy Bao <newyorkupperbay@gmail.com>
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com>
Co-authored-by: TensorNull <tensor.null@gmail.com>
Co-authored-by: TeslaZY <TeslaZY@outlook.com>
Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com>
Co-authored-by: AB <aj@Ajays-MacBook-Air.local>
Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com>
Co-authored-by: He Wang <wanghechn@qq.com>
Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com>
Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box>
Co-authored-by: Stephen Hu <stephenhu@seismic.com>
Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com>
Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com>
Co-authored-by: mxc <mxc@example.com>
Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com>
Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com>
Co-authored-by: mcoder6425 <mcoder64@gmail.com>
Co-authored-by: lemsn <lemsn@msn.com>
Co-authored-by: lemsn <lemsn@126.com>
Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com>
Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com>
Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
Kevin Hu
2025-10-09 12:36:19 +08:00
committed by GitHub
parent ef0aecea3b
commit cbf04ee470
490 changed files with 10630 additions and 30688 deletions

View File

@ -1,56 +1,36 @@
import {
IAgentForm,
ICategorizeForm,
ICategorizeItem,
ICategorizeItemResult,
} from '@/interfaces/database/agent';
import { IAgentForm } from '@/interfaces/database/agent';
import { DSLComponents, RAGFlowNodeType } from '@/interfaces/database/flow';
import { removeUselessFieldsFromValues } from '@/utils/form';
import { Edge, Node, XYPosition } from '@xyflow/react';
import { Edge, XYPosition } from '@xyflow/react';
import { FormInstance, FormListFieldData } from 'antd';
import { humanId } from 'human-id';
import { curry, get, intersectionWith, isEqual, omit, sample } from 'lodash';
import { curry, get, intersectionWith, isEmpty, isEqual, sample } from 'lodash';
import pipe from 'lodash/fp/pipe';
import isObject from 'lodash/isObject';
import {
CategorizeAnchorPointPositions,
FileType,
FileTypeSuffixMap,
NoDebugOperatorsList,
NodeHandleId,
Operator,
} from './constant';
import { BeginQuery, IPosition } from './interface';
function buildAgentExceptionGoto(edges: Edge[], nodeId: string) {
const exceptionEdges = edges.filter(
(x) =>
x.source === nodeId && x.sourceHandle === NodeHandleId.AgentException,
);
return exceptionEdges.map((x) => x.target);
}
import { ExtractorFormSchemaType } from './form/extractor-form';
import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
import { ParserFormSchemaType } from './form/parser-form';
import { SplitterFormSchemaType } from './form/splitter-form';
import { IPosition } from './interface';
const buildComponentDownstreamOrUpstream = (
edges: Edge[],
nodeId: string,
isBuildDownstream = true,
nodes: Node[],
) => {
return edges
.filter((y) => {
const node = nodes.find((x) => x.id === nodeId);
let isNotUpstreamTool = true;
let isNotUpstreamAgent = true;
let isNotExceptionGoto = true;
if (isBuildDownstream && node?.data.label === Operator.Agent) {
isNotExceptionGoto = y.sourceHandle !== NodeHandleId.AgentException;
// Exclude the tool operator downstream of the agent operator
isNotUpstreamTool = !y.target.startsWith(Operator.Tool);
// Exclude the agent operator downstream of the agent operator
isNotUpstreamAgent = !(
y.target.startsWith(Operator.Agent) &&
y.targetHandle === NodeHandleId.AgentTop
);
}
return (
y[isBuildDownstream ? 'source' : 'target'] === nodeId &&
isNotUpstreamTool &&
@ -63,79 +43,12 @@ const buildComponentDownstreamOrUpstream = (
const removeUselessDataInTheOperator = curry(
(operatorName: string, params: Record<string, unknown>) => {
if (
operatorName === Operator.Generate ||
operatorName === Operator.Categorize
) {
return removeUselessFieldsFromValues(params, '');
}
// if (operatorName === Operator.Categorize) {
// return removeUselessFieldsFromValues(params, '');
// }
return params;
},
);
// initialize data for operators without parameters
// const initializeOperatorParams = curry((operatorName: string, values: any) => {
// if (isEmpty(values)) {
// return initialFormValuesMap[operatorName as Operator];
// }
// return values;
// });
function buildAgentTools(edges: Edge[], nodes: Node[], nodeId: string) {
const node = nodes.find((x) => x.id === nodeId);
const params = { ...(node?.data.form ?? {}) };
if (node && node.data.label === Operator.Agent) {
const bottomSubAgentEdges = edges.filter(
(x) => x.source === nodeId && x.sourceHandle === NodeHandleId.AgentBottom,
);
(params as IAgentForm).tools = (params as IAgentForm).tools.concat(
bottomSubAgentEdges.map((x) => {
const {
params: formData,
id,
name,
} = buildAgentTools(edges, nodes, x.target);
return {
component_name: Operator.Agent,
id,
name: name as string, // Cast name to string and provide fallback
params: { ...formData },
};
}),
);
}
return { params, name: node?.data.name, id: node?.id };
}
function filterTargetsBySourceHandleId(edges: Edge[], handleId: string) {
return edges.filter((x) => x.sourceHandle === handleId).map((x) => x.target);
}
function buildCategorize(edges: Edge[], nodes: Node[], nodeId: string) {
const node = nodes.find((x) => x.id === nodeId);
const params = { ...(node?.data.form ?? {}) } as ICategorizeForm;
if (node && node.data.label === Operator.Categorize) {
const subEdges = edges.filter((x) => x.source === nodeId);
const items = params.items || [];
const nextCategoryDescription = items.reduce<
ICategorizeForm['category_description']
>((pre, val) => {
const key = val.name;
pre[key] = {
...omit(val, 'name', 'uuid'),
examples: val.examples?.map((x) => x.value) || [],
to: filterTargetsBySourceHandleId(subEdges, val.uuid),
};
return pre;
}, {});
params.category_description = nextCategoryDescription;
}
return omit(params, 'items');
}
const buildOperatorParams = (operatorName: string) =>
pipe(
@ -143,7 +56,7 @@ const buildOperatorParams = (operatorName: string) =>
// initializeOperatorParams(operatorName), // Final processing, for guarantee
);
const ExcludeOperators = [Operator.Note, Operator.Tool];
const ExcludeOperators = [Operator.Note];
export function isBottomSubAgent(edges: Edge[], nodeId?: string) {
const edge = edges.find(
@ -151,6 +64,90 @@ export function isBottomSubAgent(edges: Edge[], nodeId?: string) {
);
return !!edge;
}
// Because the array of react-hook-form must be object data,
// it needs to be converted into a simple data type array required by the backend
function transformObjectArrayToPureArray(
list: Array<Record<string, any>>,
field: string,
) {
return Array.isArray(list)
? list.filter((x) => !isEmpty(x[field])).map((y) => y[field])
: [];
}
function transformParserParams(params: ParserFormSchemaType) {
const setups = params.setups.reduce<
Record<string, ParserFormSchemaType['setups'][0]>
>((pre, cur) => {
if (cur.fileFormat) {
let filteredSetup: Partial<
ParserFormSchemaType['setups'][0] & { suffix: string[] }
> = {
output_format: cur.output_format,
suffix: FileTypeSuffixMap[cur.fileFormat as FileType],
};
switch (cur.fileFormat) {
case FileType.PDF:
filteredSetup = {
...filteredSetup,
parse_method: cur.parse_method,
lang: cur.lang,
};
break;
case FileType.Image:
filteredSetup = {
...filteredSetup,
parse_method: cur.parse_method,
lang: cur.lang,
system_prompt: cur.system_prompt,
};
break;
case FileType.Email:
filteredSetup = {
...filteredSetup,
fields: cur.fields,
};
break;
case FileType.Video:
case FileType.Audio:
filteredSetup = {
...filteredSetup,
llm_id: cur.llm_id,
};
break;
default:
break;
}
pre[cur.fileFormat] = filteredSetup;
}
return pre;
}, {});
return { ...params, setups };
}
function transformSplitterParams(params: SplitterFormSchemaType) {
return {
...params,
delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
};
}
function transformHierarchicalMergerParams(
params: HierarchicalMergerFormSchemaType,
) {
const levels = params.levels.map((x) =>
transformObjectArrayToPureArray(x.expressions, 'expression'),
);
return { ...params, hierarchy: Number(params.hierarchy), levels };
}
function transformExtractorParams(params: ExtractorFormSchemaType) {
return { ...params, prompts: [{ content: params.prompts, role: 'user' }] };
}
// construct a dsl based on the node information of the graph
export const buildDslComponentsByGraph = (
@ -172,16 +169,19 @@ export const buildDslComponentsByGraph = (
let params = x?.data.form ?? {};
switch (operatorName) {
case Operator.Agent: {
const { params: formData } = buildAgentTools(edges, nodes, id);
params = {
...formData,
exception_goto: buildAgentExceptionGoto(edges, id),
};
case Operator.Parser:
params = transformParserParams(params);
break;
}
case Operator.Categorize:
params = buildCategorize(edges, nodes, id);
case Operator.Splitter:
params = transformSplitterParams(params);
break;
case Operator.HierarchicalMerger:
params = transformHierarchicalMergerParams(params);
break;
case Operator.Extractor:
params = transformExtractorParams(params);
break;
default:
@ -194,8 +194,8 @@ export const buildDslComponentsByGraph = (
component_name: operatorName,
params: buildOperatorParams(operatorName)(params) ?? {},
},
downstream: buildComponentDownstreamOrUpstream(edges, id, true, nodes),
upstream: buildComponentDownstreamOrUpstream(edges, id, false, nodes),
downstream: buildComponentDownstreamOrUpstream(edges, id, true),
upstream: buildComponentDownstreamOrUpstream(edges, id, false),
parent_id: x?.parentId,
};
});
@ -294,10 +294,6 @@ export const getOtherFieldValues = (
x !== form.getFieldValue([formListName, field.name, latestField]),
);
export const generateSwitchHandleText = (idx: number) => {
return `Case ${idx + 1}`;
};
export const getNodeDragHandle = (nodeType?: string) => {
return nodeType === Operator.Note ? '.note-drag-handle' : undefined;
};
@ -353,25 +349,6 @@ export const generateNodeNamesWithIncreasingIndex = (
export const duplicateNodeForm = (nodeData?: RAGFlowNodeType['data']) => {
const form: Record<string, any> = { ...(nodeData?.form ?? {}) };
// Delete the downstream node corresponding to the to field of the Categorize operator
if (nodeData?.label === Operator.Categorize) {
form.category_description = Object.keys(form.category_description).reduce<
Record<string, Record<string, any>>
>((pre, cur) => {
pre[cur] = {
...form.category_description[cur],
to: undefined,
};
return pre;
}, {});
}
// Delete the downstream nodes corresponding to the yes and no fields of the Relevant operator
if (nodeData?.label === Operator.Relevant) {
form.yes = undefined;
form.no = undefined;
}
return {
...(nodeData ?? { label: '' }),
form,
@ -386,40 +363,6 @@ export const needsSingleStepDebugging = (label: string) => {
return !NoDebugOperatorsList.some((x) => (label as Operator) === x);
};
// Get the coordinates of the node relative to the Iteration node
export function getRelativePositionToIterationNode(
nodes: RAGFlowNodeType[],
position?: XYPosition, // relative position
) {
if (!position) {
return;
}
const iterationNodes = nodes.filter(
(node) => node.data.label === Operator.Iteration,
);
for (const iterationNode of iterationNodes) {
const {
position: { x, y },
width,
height,
} = iterationNode;
const halfWidth = (width || 0) / 2;
if (
position.x >= x - halfWidth &&
position.x <= x + halfWidth &&
position.y >= y &&
position.y <= y + (height || 0)
) {
return {
parentId: iterationNode.id,
position: { x: position.x - x + halfWidth, y: position.y - y },
};
}
}
}
export const generateDuplicateNode = (
position?: XYPosition,
label?: string,
@ -454,71 +397,11 @@ export function convertToObjectArray(list: Array<string | number | boolean>) {
return list.map((x) => ({ value: x }));
}
/**
* convert the following object into a list
*
* {
"product_related": {
"description": "The question is about product usage, appearance and how it works.",
"examples": "Why it always beaming?\nHow to install it onto the wall?\nIt leaks, what to do?",
"to": "generate:0"
}
}
*/
export const buildCategorizeListFromObject = (
categorizeItem: ICategorizeItemResult,
) => {
// Categorize's to field has two data sources, with edges as the data source.
// Changes in the edge or to field need to be synchronized to the form field.
return Object.keys(categorizeItem)
.reduce<Array<ICategorizeItem>>((pre, cur) => {
// synchronize edge data to the to field
pre.push({
name: cur,
...categorizeItem[cur],
examples: convertToObjectArray(categorizeItem[cur].examples),
});
return pre;
}, [])
.sort((a, b) => a.index - b.index);
};
/**
* Convert the list in the following form into an object
* {
"items": [
{
"name": "Categorize 1",
"description": "111",
"examples": ["ddd"],
"to": "Retrieval:LazyEelsStick"
}
]
}
*/
export const buildCategorizeObjectFromList = (list: Array<ICategorizeItem>) => {
return list.reduce<ICategorizeItemResult>((pre, cur) => {
if (cur?.name) {
pre[cur.name] = {
...omit(cur, 'name', 'examples'),
examples: convertToStringArray(cur.examples) as string[],
};
}
return pre;
}, {});
};
export function getAgentNodeTools(agentNode?: RAGFlowNodeType) {
const tools: IAgentForm['tools'] = get(agentNode, 'data.form.tools', []);
return tools;
}
export function getAgentNodeMCP(agentNode?: RAGFlowNodeType) {
const tools: IAgentForm['mcp'] = get(agentNode, 'data.form.mcp', []);
return tools;
}
export function mapEdgeMouseEvent(
edges: Edge[],
edgeId: string,
@ -538,21 +421,3 @@ export function mapEdgeMouseEvent(
return nextEdges;
}
export function buildBeginQueryWithObject(
inputs: Record<string, BeginQuery>,
values: BeginQuery[],
) {
const nextInputs = Object.keys(inputs).reduce<Record<string, BeginQuery>>(
(pre, key) => {
const item = values.find((x) => x.key === key);
if (item) {
pre[key] = { ...item };
}
return pre;
},
{},
);
return nextInputs;
}