mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)
### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
@ -1,56 +1,36 @@
|
||||
import {
|
||||
IAgentForm,
|
||||
ICategorizeForm,
|
||||
ICategorizeItem,
|
||||
ICategorizeItemResult,
|
||||
} from '@/interfaces/database/agent';
|
||||
import { IAgentForm } from '@/interfaces/database/agent';
|
||||
import { DSLComponents, RAGFlowNodeType } from '@/interfaces/database/flow';
|
||||
import { removeUselessFieldsFromValues } from '@/utils/form';
|
||||
import { Edge, Node, XYPosition } from '@xyflow/react';
|
||||
import { Edge, XYPosition } from '@xyflow/react';
|
||||
import { FormInstance, FormListFieldData } from 'antd';
|
||||
import { humanId } from 'human-id';
|
||||
import { curry, get, intersectionWith, isEqual, omit, sample } from 'lodash';
|
||||
import { curry, get, intersectionWith, isEmpty, isEqual, sample } from 'lodash';
|
||||
import pipe from 'lodash/fp/pipe';
|
||||
import isObject from 'lodash/isObject';
|
||||
import {
|
||||
CategorizeAnchorPointPositions,
|
||||
FileType,
|
||||
FileTypeSuffixMap,
|
||||
NoDebugOperatorsList,
|
||||
NodeHandleId,
|
||||
Operator,
|
||||
} from './constant';
|
||||
import { BeginQuery, IPosition } from './interface';
|
||||
|
||||
function buildAgentExceptionGoto(edges: Edge[], nodeId: string) {
|
||||
const exceptionEdges = edges.filter(
|
||||
(x) =>
|
||||
x.source === nodeId && x.sourceHandle === NodeHandleId.AgentException,
|
||||
);
|
||||
|
||||
return exceptionEdges.map((x) => x.target);
|
||||
}
|
||||
import { ExtractorFormSchemaType } from './form/extractor-form';
|
||||
import { HierarchicalMergerFormSchemaType } from './form/hierarchical-merger-form';
|
||||
import { ParserFormSchemaType } from './form/parser-form';
|
||||
import { SplitterFormSchemaType } from './form/splitter-form';
|
||||
import { IPosition } from './interface';
|
||||
|
||||
const buildComponentDownstreamOrUpstream = (
|
||||
edges: Edge[],
|
||||
nodeId: string,
|
||||
isBuildDownstream = true,
|
||||
nodes: Node[],
|
||||
) => {
|
||||
return edges
|
||||
.filter((y) => {
|
||||
const node = nodes.find((x) => x.id === nodeId);
|
||||
let isNotUpstreamTool = true;
|
||||
let isNotUpstreamAgent = true;
|
||||
let isNotExceptionGoto = true;
|
||||
if (isBuildDownstream && node?.data.label === Operator.Agent) {
|
||||
isNotExceptionGoto = y.sourceHandle !== NodeHandleId.AgentException;
|
||||
// Exclude the tool operator downstream of the agent operator
|
||||
isNotUpstreamTool = !y.target.startsWith(Operator.Tool);
|
||||
// Exclude the agent operator downstream of the agent operator
|
||||
isNotUpstreamAgent = !(
|
||||
y.target.startsWith(Operator.Agent) &&
|
||||
y.targetHandle === NodeHandleId.AgentTop
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
y[isBuildDownstream ? 'source' : 'target'] === nodeId &&
|
||||
isNotUpstreamTool &&
|
||||
@ -63,79 +43,12 @@ const buildComponentDownstreamOrUpstream = (
|
||||
|
||||
const removeUselessDataInTheOperator = curry(
|
||||
(operatorName: string, params: Record<string, unknown>) => {
|
||||
if (
|
||||
operatorName === Operator.Generate ||
|
||||
operatorName === Operator.Categorize
|
||||
) {
|
||||
return removeUselessFieldsFromValues(params, '');
|
||||
}
|
||||
// if (operatorName === Operator.Categorize) {
|
||||
// return removeUselessFieldsFromValues(params, '');
|
||||
// }
|
||||
return params;
|
||||
},
|
||||
);
|
||||
// initialize data for operators without parameters
|
||||
// const initializeOperatorParams = curry((operatorName: string, values: any) => {
|
||||
// if (isEmpty(values)) {
|
||||
// return initialFormValuesMap[operatorName as Operator];
|
||||
// }
|
||||
// return values;
|
||||
// });
|
||||
|
||||
function buildAgentTools(edges: Edge[], nodes: Node[], nodeId: string) {
|
||||
const node = nodes.find((x) => x.id === nodeId);
|
||||
const params = { ...(node?.data.form ?? {}) };
|
||||
if (node && node.data.label === Operator.Agent) {
|
||||
const bottomSubAgentEdges = edges.filter(
|
||||
(x) => x.source === nodeId && x.sourceHandle === NodeHandleId.AgentBottom,
|
||||
);
|
||||
|
||||
(params as IAgentForm).tools = (params as IAgentForm).tools.concat(
|
||||
bottomSubAgentEdges.map((x) => {
|
||||
const {
|
||||
params: formData,
|
||||
id,
|
||||
name,
|
||||
} = buildAgentTools(edges, nodes, x.target);
|
||||
|
||||
return {
|
||||
component_name: Operator.Agent,
|
||||
id,
|
||||
name: name as string, // Cast name to string and provide fallback
|
||||
params: { ...formData },
|
||||
};
|
||||
}),
|
||||
);
|
||||
}
|
||||
return { params, name: node?.data.name, id: node?.id };
|
||||
}
|
||||
|
||||
function filterTargetsBySourceHandleId(edges: Edge[], handleId: string) {
|
||||
return edges.filter((x) => x.sourceHandle === handleId).map((x) => x.target);
|
||||
}
|
||||
|
||||
function buildCategorize(edges: Edge[], nodes: Node[], nodeId: string) {
|
||||
const node = nodes.find((x) => x.id === nodeId);
|
||||
const params = { ...(node?.data.form ?? {}) } as ICategorizeForm;
|
||||
if (node && node.data.label === Operator.Categorize) {
|
||||
const subEdges = edges.filter((x) => x.source === nodeId);
|
||||
|
||||
const items = params.items || [];
|
||||
|
||||
const nextCategoryDescription = items.reduce<
|
||||
ICategorizeForm['category_description']
|
||||
>((pre, val) => {
|
||||
const key = val.name;
|
||||
pre[key] = {
|
||||
...omit(val, 'name', 'uuid'),
|
||||
examples: val.examples?.map((x) => x.value) || [],
|
||||
to: filterTargetsBySourceHandleId(subEdges, val.uuid),
|
||||
};
|
||||
return pre;
|
||||
}, {});
|
||||
|
||||
params.category_description = nextCategoryDescription;
|
||||
}
|
||||
return omit(params, 'items');
|
||||
}
|
||||
|
||||
const buildOperatorParams = (operatorName: string) =>
|
||||
pipe(
|
||||
@ -143,7 +56,7 @@ const buildOperatorParams = (operatorName: string) =>
|
||||
// initializeOperatorParams(operatorName), // Final processing, for guarantee
|
||||
);
|
||||
|
||||
const ExcludeOperators = [Operator.Note, Operator.Tool];
|
||||
const ExcludeOperators = [Operator.Note];
|
||||
|
||||
export function isBottomSubAgent(edges: Edge[], nodeId?: string) {
|
||||
const edge = edges.find(
|
||||
@ -151,6 +64,90 @@ export function isBottomSubAgent(edges: Edge[], nodeId?: string) {
|
||||
);
|
||||
return !!edge;
|
||||
}
|
||||
// Because the array of react-hook-form must be object data,
|
||||
// it needs to be converted into a simple data type array required by the backend
|
||||
function transformObjectArrayToPureArray(
|
||||
list: Array<Record<string, any>>,
|
||||
field: string,
|
||||
) {
|
||||
return Array.isArray(list)
|
||||
? list.filter((x) => !isEmpty(x[field])).map((y) => y[field])
|
||||
: [];
|
||||
}
|
||||
|
||||
function transformParserParams(params: ParserFormSchemaType) {
|
||||
const setups = params.setups.reduce<
|
||||
Record<string, ParserFormSchemaType['setups'][0]>
|
||||
>((pre, cur) => {
|
||||
if (cur.fileFormat) {
|
||||
let filteredSetup: Partial<
|
||||
ParserFormSchemaType['setups'][0] & { suffix: string[] }
|
||||
> = {
|
||||
output_format: cur.output_format,
|
||||
suffix: FileTypeSuffixMap[cur.fileFormat as FileType],
|
||||
};
|
||||
|
||||
switch (cur.fileFormat) {
|
||||
case FileType.PDF:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
parse_method: cur.parse_method,
|
||||
lang: cur.lang,
|
||||
};
|
||||
break;
|
||||
case FileType.Image:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
parse_method: cur.parse_method,
|
||||
lang: cur.lang,
|
||||
system_prompt: cur.system_prompt,
|
||||
};
|
||||
break;
|
||||
case FileType.Email:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
fields: cur.fields,
|
||||
};
|
||||
break;
|
||||
case FileType.Video:
|
||||
case FileType.Audio:
|
||||
filteredSetup = {
|
||||
...filteredSetup,
|
||||
llm_id: cur.llm_id,
|
||||
};
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
pre[cur.fileFormat] = filteredSetup;
|
||||
}
|
||||
return pre;
|
||||
}, {});
|
||||
|
||||
return { ...params, setups };
|
||||
}
|
||||
|
||||
function transformSplitterParams(params: SplitterFormSchemaType) {
|
||||
return {
|
||||
...params,
|
||||
delimiters: transformObjectArrayToPureArray(params.delimiters, 'value'),
|
||||
};
|
||||
}
|
||||
|
||||
function transformHierarchicalMergerParams(
|
||||
params: HierarchicalMergerFormSchemaType,
|
||||
) {
|
||||
const levels = params.levels.map((x) =>
|
||||
transformObjectArrayToPureArray(x.expressions, 'expression'),
|
||||
);
|
||||
|
||||
return { ...params, hierarchy: Number(params.hierarchy), levels };
|
||||
}
|
||||
|
||||
function transformExtractorParams(params: ExtractorFormSchemaType) {
|
||||
return { ...params, prompts: [{ content: params.prompts, role: 'user' }] };
|
||||
}
|
||||
|
||||
// construct a dsl based on the node information of the graph
|
||||
export const buildDslComponentsByGraph = (
|
||||
@ -172,16 +169,19 @@ export const buildDslComponentsByGraph = (
|
||||
let params = x?.data.form ?? {};
|
||||
|
||||
switch (operatorName) {
|
||||
case Operator.Agent: {
|
||||
const { params: formData } = buildAgentTools(edges, nodes, id);
|
||||
params = {
|
||||
...formData,
|
||||
exception_goto: buildAgentExceptionGoto(edges, id),
|
||||
};
|
||||
case Operator.Parser:
|
||||
params = transformParserParams(params);
|
||||
break;
|
||||
}
|
||||
case Operator.Categorize:
|
||||
params = buildCategorize(edges, nodes, id);
|
||||
|
||||
case Operator.Splitter:
|
||||
params = transformSplitterParams(params);
|
||||
break;
|
||||
|
||||
case Operator.HierarchicalMerger:
|
||||
params = transformHierarchicalMergerParams(params);
|
||||
break;
|
||||
case Operator.Extractor:
|
||||
params = transformExtractorParams(params);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -194,8 +194,8 @@ export const buildDslComponentsByGraph = (
|
||||
component_name: operatorName,
|
||||
params: buildOperatorParams(operatorName)(params) ?? {},
|
||||
},
|
||||
downstream: buildComponentDownstreamOrUpstream(edges, id, true, nodes),
|
||||
upstream: buildComponentDownstreamOrUpstream(edges, id, false, nodes),
|
||||
downstream: buildComponentDownstreamOrUpstream(edges, id, true),
|
||||
upstream: buildComponentDownstreamOrUpstream(edges, id, false),
|
||||
parent_id: x?.parentId,
|
||||
};
|
||||
});
|
||||
@ -294,10 +294,6 @@ export const getOtherFieldValues = (
|
||||
x !== form.getFieldValue([formListName, field.name, latestField]),
|
||||
);
|
||||
|
||||
export const generateSwitchHandleText = (idx: number) => {
|
||||
return `Case ${idx + 1}`;
|
||||
};
|
||||
|
||||
export const getNodeDragHandle = (nodeType?: string) => {
|
||||
return nodeType === Operator.Note ? '.note-drag-handle' : undefined;
|
||||
};
|
||||
@ -353,25 +349,6 @@ export const generateNodeNamesWithIncreasingIndex = (
|
||||
export const duplicateNodeForm = (nodeData?: RAGFlowNodeType['data']) => {
|
||||
const form: Record<string, any> = { ...(nodeData?.form ?? {}) };
|
||||
|
||||
// Delete the downstream node corresponding to the to field of the Categorize operator
|
||||
if (nodeData?.label === Operator.Categorize) {
|
||||
form.category_description = Object.keys(form.category_description).reduce<
|
||||
Record<string, Record<string, any>>
|
||||
>((pre, cur) => {
|
||||
pre[cur] = {
|
||||
...form.category_description[cur],
|
||||
to: undefined,
|
||||
};
|
||||
return pre;
|
||||
}, {});
|
||||
}
|
||||
|
||||
// Delete the downstream nodes corresponding to the yes and no fields of the Relevant operator
|
||||
if (nodeData?.label === Operator.Relevant) {
|
||||
form.yes = undefined;
|
||||
form.no = undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
...(nodeData ?? { label: '' }),
|
||||
form,
|
||||
@ -386,40 +363,6 @@ export const needsSingleStepDebugging = (label: string) => {
|
||||
return !NoDebugOperatorsList.some((x) => (label as Operator) === x);
|
||||
};
|
||||
|
||||
// Get the coordinates of the node relative to the Iteration node
|
||||
export function getRelativePositionToIterationNode(
|
||||
nodes: RAGFlowNodeType[],
|
||||
position?: XYPosition, // relative position
|
||||
) {
|
||||
if (!position) {
|
||||
return;
|
||||
}
|
||||
|
||||
const iterationNodes = nodes.filter(
|
||||
(node) => node.data.label === Operator.Iteration,
|
||||
);
|
||||
|
||||
for (const iterationNode of iterationNodes) {
|
||||
const {
|
||||
position: { x, y },
|
||||
width,
|
||||
height,
|
||||
} = iterationNode;
|
||||
const halfWidth = (width || 0) / 2;
|
||||
if (
|
||||
position.x >= x - halfWidth &&
|
||||
position.x <= x + halfWidth &&
|
||||
position.y >= y &&
|
||||
position.y <= y + (height || 0)
|
||||
) {
|
||||
return {
|
||||
parentId: iterationNode.id,
|
||||
position: { x: position.x - x + halfWidth, y: position.y - y },
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const generateDuplicateNode = (
|
||||
position?: XYPosition,
|
||||
label?: string,
|
||||
@ -454,71 +397,11 @@ export function convertToObjectArray(list: Array<string | number | boolean>) {
|
||||
return list.map((x) => ({ value: x }));
|
||||
}
|
||||
|
||||
/**
|
||||
* convert the following object into a list
|
||||
*
|
||||
* {
|
||||
"product_related": {
|
||||
"description": "The question is about product usage, appearance and how it works.",
|
||||
"examples": "Why it always beaming?\nHow to install it onto the wall?\nIt leaks, what to do?",
|
||||
"to": "generate:0"
|
||||
}
|
||||
}
|
||||
*/
|
||||
export const buildCategorizeListFromObject = (
|
||||
categorizeItem: ICategorizeItemResult,
|
||||
) => {
|
||||
// Categorize's to field has two data sources, with edges as the data source.
|
||||
// Changes in the edge or to field need to be synchronized to the form field.
|
||||
return Object.keys(categorizeItem)
|
||||
.reduce<Array<ICategorizeItem>>((pre, cur) => {
|
||||
// synchronize edge data to the to field
|
||||
|
||||
pre.push({
|
||||
name: cur,
|
||||
...categorizeItem[cur],
|
||||
examples: convertToObjectArray(categorizeItem[cur].examples),
|
||||
});
|
||||
return pre;
|
||||
}, [])
|
||||
.sort((a, b) => a.index - b.index);
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert the list in the following form into an object
|
||||
* {
|
||||
"items": [
|
||||
{
|
||||
"name": "Categorize 1",
|
||||
"description": "111",
|
||||
"examples": ["ddd"],
|
||||
"to": "Retrieval:LazyEelsStick"
|
||||
}
|
||||
]
|
||||
}
|
||||
*/
|
||||
export const buildCategorizeObjectFromList = (list: Array<ICategorizeItem>) => {
|
||||
return list.reduce<ICategorizeItemResult>((pre, cur) => {
|
||||
if (cur?.name) {
|
||||
pre[cur.name] = {
|
||||
...omit(cur, 'name', 'examples'),
|
||||
examples: convertToStringArray(cur.examples) as string[],
|
||||
};
|
||||
}
|
||||
return pre;
|
||||
}, {});
|
||||
};
|
||||
|
||||
export function getAgentNodeTools(agentNode?: RAGFlowNodeType) {
|
||||
const tools: IAgentForm['tools'] = get(agentNode, 'data.form.tools', []);
|
||||
return tools;
|
||||
}
|
||||
|
||||
export function getAgentNodeMCP(agentNode?: RAGFlowNodeType) {
|
||||
const tools: IAgentForm['mcp'] = get(agentNode, 'data.form.mcp', []);
|
||||
return tools;
|
||||
}
|
||||
|
||||
export function mapEdgeMouseEvent(
|
||||
edges: Edge[],
|
||||
edgeId: string,
|
||||
@ -538,21 +421,3 @@ export function mapEdgeMouseEvent(
|
||||
|
||||
return nextEdges;
|
||||
}
|
||||
|
||||
export function buildBeginQueryWithObject(
|
||||
inputs: Record<string, BeginQuery>,
|
||||
values: BeginQuery[],
|
||||
) {
|
||||
const nextInputs = Object.keys(inputs).reduce<Record<string, BeginQuery>>(
|
||||
(pre, key) => {
|
||||
const item = values.find((x) => x.key === key);
|
||||
if (item) {
|
||||
pre[key] = { ...item };
|
||||
}
|
||||
return pre;
|
||||
},
|
||||
{},
|
||||
);
|
||||
|
||||
return nextInputs;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user