Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)

### What problem does this PR solve?

#9869

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Signed-off-by: jinhai <haijin.chn@gmail.com>
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: chanx <1243304602@qq.com>
Co-authored-by: balibabu <cike8899@users.noreply.github.com>
Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com>
Co-authored-by: huangzl <huangzl@shinemo.com>
Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com>
Co-authored-by: Wilmer <33392318@qq.com>
Co-authored-by: Adrian Weidig <adrianweidig@gmx.net>
Co-authored-by: Zhichang Yu <yuzhichang@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yongteng Lei <yongtengrey@outlook.com>
Co-authored-by: Liu An <asiro@qq.com>
Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com>
Co-authored-by: BadwomanCraZY <511528396@qq.com>
Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com>
Co-authored-by: Russell Valentine <russ@coldstonelabs.org>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Billy Bao <newyorkupperbay@gmail.com>
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com>
Co-authored-by: TensorNull <tensor.null@gmail.com>
Co-authored-by: TeslaZY <TeslaZY@outlook.com>
Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com>
Co-authored-by: AB <aj@Ajays-MacBook-Air.local>
Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com>
Co-authored-by: He Wang <wanghechn@qq.com>
Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com>
Co-authored-by: Jin Hai <haijin.chn@gmail.com>
Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com>
Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box>
Co-authored-by: Stephen Hu <stephenhu@seismic.com>
Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com>
Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com>
Co-authored-by: mxc <mxc@example.com>
Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com>
Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com>
Co-authored-by: mcoder6425 <mcoder64@gmail.com>
Co-authored-by: lemsn <lemsn@msn.com>
Co-authored-by: lemsn <lemsn@126.com>
Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com>
Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com>
Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
Kevin Hu
2025-10-09 12:36:19 +08:00
committed by GitHub
parent ef0aecea3b
commit cbf04ee470
490 changed files with 10630 additions and 30688 deletions

View File

@ -1,42 +1,81 @@
import { TimelineNode } from '@/components/originui/timeline';
import message from '@/components/ui/message';
import {
useCreateChunk,
useDeleteChunk,
useSelectChunkList,
} from '@/hooks/chunk-hooks';
import { useCreateChunk, useDeleteChunk } from '@/hooks/chunk-hooks';
import { useSetModalState, useShowDeleteConfirm } from '@/hooks/common-hooks';
import { useGetKnowledgeSearchParams } from '@/hooks/route-hook';
import { useFetchMessageTrace } from '@/hooks/use-agent-request';
import { IChunk } from '@/interfaces/database/knowledge';
import kbService from '@/services/knowledge-service';
import { formatSecondsToHumanReadable } from '@/utils/date';
import { buildChunkHighlights } from '@/utils/document-util';
import { useMutation, useQueryClient } from '@tanstack/react-query';
import { useCallback, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { useMutation, useQuery } from '@tanstack/react-query';
import { t } from 'i18next';
import { camelCase, upperFirst } from 'lodash';
import { useCallback, useEffect, useMemo, useState } from 'react';
import { IHighlight } from 'react-pdf-highlighter';
import { ChunkTextMode } from './constant';
import { useParams, useSearchParams } from 'umi';
import { ITimelineNodeObj, TimelineNodeObj } from './components/time-line';
import {
ChunkTextMode,
PipelineResultSearchParams,
TimelineNodeType,
} from './constant';
import { IDslComponent, IPipelineFileLogDetail } from './interface';
export const useFetchPipelineFileLogDetail = ({
isAgent = false,
isEdit = true,
refreshCount,
}: {
isEdit?: boolean;
refreshCount?: number;
isAgent: boolean;
}) => {
const { id } = useParams();
const [searchParams] = useSearchParams();
const logId = searchParams.get('id') || id;
let queryKey: (string | number)[] = [];
if (typeof refreshCount === 'number') {
queryKey = ['fetchLogDetail', refreshCount];
}
const { data, isFetching: loading } = useQuery<IPipelineFileLogDetail>({
queryKey,
initialData: {} as IPipelineFileLogDetail,
gcTime: 0,
enabled: !isAgent,
queryFn: async () => {
if (isEdit) {
const { data } = await kbService.get_pipeline_detail({
log_id: logId,
});
return data?.data ?? {};
} else {
return {};
}
},
});
return { data, loading };
};
export const useHandleChunkCardClick = () => {
const [selectedChunkId, setSelectedChunkId] = useState<string>('');
const [selectedChunk, setSelectedChunk] = useState<IChunk>();
const handleChunkCardClick = useCallback((chunkId: string) => {
setSelectedChunkId(chunkId);
const handleChunkCardClick = useCallback((chunk: IChunk) => {
console.log('click-chunk-->', chunk);
setSelectedChunk(chunk);
}, []);
return { handleChunkCardClick, selectedChunkId };
return { handleChunkCardClick, selectedChunk };
};
export const useGetSelectedChunk = (selectedChunkId: string) => {
const data = useSelectChunkList();
return (
data?.data?.find((x) => x.chunk_id === selectedChunkId) ?? ({} as IChunk)
);
};
export const useGetChunkHighlights = (selectedChunkId: string) => {
export const useGetChunkHighlights = (selectedChunk?: IChunk) => {
const [size, setSize] = useState({ width: 849, height: 1200 });
const selectedChunk: IChunk = useGetSelectedChunk(selectedChunkId);
const highlights: IHighlight[] = useMemo(() => {
return buildChunkHighlights(selectedChunk, size);
return selectedChunk ? buildChunkHighlights(selectedChunk, size) : [];
}, [selectedChunk, size]);
const setWidthAndHeight = useCallback((width: number, height: number) => {
@ -131,55 +170,162 @@ export const useUpdateChunk = () => {
};
};
export const useFetchParserList = () => {
const [loading, setLoading] = useState(false);
return {
loading,
};
};
export const useRerunDataflow = ({
data,
}: {
data: IPipelineFileLogDetail;
}) => {
const [isChange, setIsChange] = useState(false);
export const useRerunDataflow = () => {
const [loading, setLoading] = useState(false);
return {
loading,
};
};
const { mutateAsync: handleReRunFunc, isPending: loading } = useMutation({
mutationKey: ['pipelineRerun', data],
mutationFn: async (newData: { value: IDslComponent; key: string }) => {
const newDsl = {
...data.dsl,
components: {
...data.dsl.components,
[newData.key]: newData.value,
},
};
export const useFetchPaserText = () => {
const initialText =
'第一行文本\n\t第二行缩进文本\n第三行 多个空格 第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格';
const [loading, setLoading] = useState(false);
const [data, setData] = useState<string>(initialText);
const { t } = useTranslation();
const queryClient = useQueryClient();
const {
// data,
// isPending: loading,
mutateAsync,
} = useMutation({
mutationKey: ['createChunk'],
mutationFn: async (payload: any) => {
// let service = kbService.create_chunk;
// if (payload.chunk_id) {
// service = kbService.set_chunk;
// }
// const { data } = await service(payload);
// if (data.code === 0) {
message.success(t('message.created'));
setTimeout(() => {
queryClient.invalidateQueries({ queryKey: ['fetchChunkList'] });
}, 1000); // Delay to ensure the list is updated
// }
// return data?.code;
// this Data provided to the interface
const params = {
id: data.id,
dsl: newDsl,
component_id: newData.key,
};
const { data: result } = await kbService.pipelineRerun(params);
if (result.code === 0) {
message.success(t('message.operated'));
// queryClient.invalidateQueries({
// queryKey: [type],
// });
}
return result;
},
});
return { data, loading, rerun: mutateAsync };
return {
loading,
isChange,
setIsChange,
handleReRunFunc,
};
};
export const useTimelineDataFlow = (data: IPipelineFileLogDetail) => {
const timelineNodes: TimelineNode[] = useMemo(() => {
const nodes: Array<ITimelineNodeObj & { id: number | string }> = [];
console.log('time-->', data);
const times = data?.dsl?.components;
if (times) {
const getNode = (
key: string,
index: number,
type:
| TimelineNodeType.begin
| TimelineNodeType.parser
| TimelineNodeType.tokenizer
| TimelineNodeType.characterSplitter
| TimelineNodeType.titleSplitter,
) => {
const node = times[key].obj;
const name = camelCase(
node.component_name,
) as keyof typeof TimelineNodeObj;
let tempType = type;
if (name === TimelineNodeType.parser) {
tempType = TimelineNodeType.parser;
} else if (name === TimelineNodeType.tokenizer) {
tempType = TimelineNodeType.tokenizer;
} else if (
name === TimelineNodeType.characterSplitter ||
name === TimelineNodeType.titleSplitter
) {
tempType = TimelineNodeType.characterSplitter;
}
const timeNode = {
...TimelineNodeObj[name],
id: index,
className: 'w-32',
completed: false,
date: formatSecondsToHumanReadable(
node.params?.outputs?._elapsed_time?.value || 0,
),
type: tempType,
detail: { value: times[key], key: key },
};
console.log('timeNodetype-->', type);
nodes.push(timeNode);
if (times[key].downstream && times[key].downstream.length > 0) {
const nextKey = times[key].downstream[0];
// nodes.push(timeNode);
getNode(nextKey, index + 1, tempType);
}
};
getNode(upperFirst(TimelineNodeType.begin), 1, TimelineNodeType.begin);
// setTimelineNodeArr(nodes as unknown as ITimelineNodeObj & {id: number | string})
}
return nodes;
}, [data]);
return {
timelineNodes,
};
};
export const useGetPipelineResultSearchParams = () => {
const [currentQueryParameters] = useSearchParams();
const is_read_only = currentQueryParameters.get(
PipelineResultSearchParams.IsReadOnly,
) as 'true' | 'false';
console.log('is_read_only', is_read_only);
return {
type: currentQueryParameters.get(PipelineResultSearchParams.Type) || '',
documentId:
currentQueryParameters.get(PipelineResultSearchParams.DocumentId) || '',
knowledgeId:
currentQueryParameters.get(PipelineResultSearchParams.KnowledgeId) || '',
isReadOnly: is_read_only === 'true',
agentId:
currentQueryParameters.get(PipelineResultSearchParams.AgentId) || '',
agentTitle:
currentQueryParameters.get(PipelineResultSearchParams.AgentTitle) || '',
documentExtension:
currentQueryParameters.get(
PipelineResultSearchParams.DocumentExtension,
) || '',
createdBy:
currentQueryParameters.get(PipelineResultSearchParams.CreatedBy) || '',
};
};
export function useFetchPipelineResult({
agentId,
}: Pick<ReturnType<typeof useGetPipelineResultSearchParams>, 'agentId'>) {
const [searchParams] = useSearchParams();
const messageId = searchParams.get('id');
const { data, setMessageId, setISStopFetchTrace } =
useFetchMessageTrace(agentId);
useEffect(() => {
if (messageId) {
setMessageId(messageId);
setISStopFetchTrace(true);
}
}, [agentId, messageId, setISStopFetchTrace, setMessageId]);
const pipelineResult = useMemo(() => {
if (Array.isArray(data)) {
const latest = data?.at(-1);
if (latest?.component_id === 'END' && Array.isArray(latest.trace)) {
return latest.trace.at(0);
}
}
}, [data]);
return { pipelineResult };
}