mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-23 03:26:53 +08:00
Feat: Use data pipeline to visualize the parsing configuration of the knowledge base (#10423)
### What problem does this PR solve? #9869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: jinhai <haijin.chn@gmail.com> Signed-off-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: chanx <1243304602@qq.com> Co-authored-by: balibabu <cike8899@users.noreply.github.com> Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: 纷繁下的无奈 <zhileihuang@126.com> Co-authored-by: huangzl <huangzl@shinemo.com> Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> Co-authored-by: Wilmer <33392318@qq.com> Co-authored-by: Adrian Weidig <adrianweidig@gmx.net> Co-authored-by: Zhichang Yu <yuzhichang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Yongteng Lei <yongtengrey@outlook.com> Co-authored-by: Liu An <asiro@qq.com> Co-authored-by: buua436 <66937541+buua436@users.noreply.github.com> Co-authored-by: BadwomanCraZY <511528396@qq.com> Co-authored-by: cucusenok <31804608+cucusenok@users.noreply.github.com> Co-authored-by: Russell Valentine <russ@coldstonelabs.org> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Billy Bao <newyorkupperbay@gmail.com> Co-authored-by: Zhedong Cen <cenzhedong2@126.com> Co-authored-by: TensorNull <129579691+TensorNull@users.noreply.github.com> Co-authored-by: TensorNull <tensor.null@gmail.com> Co-authored-by: TeslaZY <TeslaZY@outlook.com> Co-authored-by: Ajay <160579663+aybanda@users.noreply.github.com> Co-authored-by: AB <aj@Ajays-MacBook-Air.local> Co-authored-by: 天海蒼灆 <huangaoqin@tecpie.com> Co-authored-by: He Wang <wanghechn@qq.com> Co-authored-by: Atsushi Hatakeyama <atu729@icloud.com> Co-authored-by: Jin Hai <haijin.chn@gmail.com> Co-authored-by: Mohamed Mathari <155896313+melmathari@users.noreply.github.com> Co-authored-by: Mohamed Mathari <nocodeventure@Mac-mini-van-Mohamed.fritz.box> Co-authored-by: Stephen Hu <stephenhu@seismic.com> Co-authored-by: Shaun Zhang <zhangwfjh@users.noreply.github.com> Co-authored-by: zhimeng123 <60221886+zhimeng123@users.noreply.github.com> Co-authored-by: mxc <mxc@example.com> Co-authored-by: Dominik Novotný <50611433+SgtMarmite@users.noreply.github.com> Co-authored-by: EVGENY M <168018528+rjohny55@users.noreply.github.com> Co-authored-by: mcoder6425 <mcoder64@gmail.com> Co-authored-by: lemsn <lemsn@msn.com> Co-authored-by: lemsn <lemsn@126.com> Co-authored-by: Adrian Gora <47756404+adagora@users.noreply.github.com> Co-authored-by: Womsxd <45663319+Womsxd@users.noreply.github.com> Co-authored-by: FatMii <39074672+FatMii@users.noreply.github.com>
This commit is contained in:
@ -1,42 +1,81 @@
|
||||
import { TimelineNode } from '@/components/originui/timeline';
|
||||
import message from '@/components/ui/message';
|
||||
import {
|
||||
useCreateChunk,
|
||||
useDeleteChunk,
|
||||
useSelectChunkList,
|
||||
} from '@/hooks/chunk-hooks';
|
||||
import { useCreateChunk, useDeleteChunk } from '@/hooks/chunk-hooks';
|
||||
import { useSetModalState, useShowDeleteConfirm } from '@/hooks/common-hooks';
|
||||
import { useGetKnowledgeSearchParams } from '@/hooks/route-hook';
|
||||
import { useFetchMessageTrace } from '@/hooks/use-agent-request';
|
||||
import { IChunk } from '@/interfaces/database/knowledge';
|
||||
import kbService from '@/services/knowledge-service';
|
||||
import { formatSecondsToHumanReadable } from '@/utils/date';
|
||||
import { buildChunkHighlights } from '@/utils/document-util';
|
||||
import { useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { useCallback, useMemo, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import { t } from 'i18next';
|
||||
import { camelCase, upperFirst } from 'lodash';
|
||||
import { useCallback, useEffect, useMemo, useState } from 'react';
|
||||
import { IHighlight } from 'react-pdf-highlighter';
|
||||
import { ChunkTextMode } from './constant';
|
||||
import { useParams, useSearchParams } from 'umi';
|
||||
import { ITimelineNodeObj, TimelineNodeObj } from './components/time-line';
|
||||
import {
|
||||
ChunkTextMode,
|
||||
PipelineResultSearchParams,
|
||||
TimelineNodeType,
|
||||
} from './constant';
|
||||
import { IDslComponent, IPipelineFileLogDetail } from './interface';
|
||||
|
||||
export const useFetchPipelineFileLogDetail = ({
|
||||
isAgent = false,
|
||||
isEdit = true,
|
||||
refreshCount,
|
||||
}: {
|
||||
isEdit?: boolean;
|
||||
refreshCount?: number;
|
||||
isAgent: boolean;
|
||||
}) => {
|
||||
const { id } = useParams();
|
||||
const [searchParams] = useSearchParams();
|
||||
const logId = searchParams.get('id') || id;
|
||||
|
||||
let queryKey: (string | number)[] = [];
|
||||
if (typeof refreshCount === 'number') {
|
||||
queryKey = ['fetchLogDetail', refreshCount];
|
||||
}
|
||||
|
||||
const { data, isFetching: loading } = useQuery<IPipelineFileLogDetail>({
|
||||
queryKey,
|
||||
initialData: {} as IPipelineFileLogDetail,
|
||||
gcTime: 0,
|
||||
enabled: !isAgent,
|
||||
queryFn: async () => {
|
||||
if (isEdit) {
|
||||
const { data } = await kbService.get_pipeline_detail({
|
||||
log_id: logId,
|
||||
});
|
||||
return data?.data ?? {};
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return { data, loading };
|
||||
};
|
||||
|
||||
export const useHandleChunkCardClick = () => {
|
||||
const [selectedChunkId, setSelectedChunkId] = useState<string>('');
|
||||
const [selectedChunk, setSelectedChunk] = useState<IChunk>();
|
||||
|
||||
const handleChunkCardClick = useCallback((chunkId: string) => {
|
||||
setSelectedChunkId(chunkId);
|
||||
const handleChunkCardClick = useCallback((chunk: IChunk) => {
|
||||
console.log('click-chunk-->', chunk);
|
||||
setSelectedChunk(chunk);
|
||||
}, []);
|
||||
|
||||
return { handleChunkCardClick, selectedChunkId };
|
||||
return { handleChunkCardClick, selectedChunk };
|
||||
};
|
||||
|
||||
export const useGetSelectedChunk = (selectedChunkId: string) => {
|
||||
const data = useSelectChunkList();
|
||||
return (
|
||||
data?.data?.find((x) => x.chunk_id === selectedChunkId) ?? ({} as IChunk)
|
||||
);
|
||||
};
|
||||
|
||||
export const useGetChunkHighlights = (selectedChunkId: string) => {
|
||||
export const useGetChunkHighlights = (selectedChunk?: IChunk) => {
|
||||
const [size, setSize] = useState({ width: 849, height: 1200 });
|
||||
const selectedChunk: IChunk = useGetSelectedChunk(selectedChunkId);
|
||||
|
||||
const highlights: IHighlight[] = useMemo(() => {
|
||||
return buildChunkHighlights(selectedChunk, size);
|
||||
return selectedChunk ? buildChunkHighlights(selectedChunk, size) : [];
|
||||
}, [selectedChunk, size]);
|
||||
|
||||
const setWidthAndHeight = useCallback((width: number, height: number) => {
|
||||
@ -131,55 +170,162 @@ export const useUpdateChunk = () => {
|
||||
};
|
||||
};
|
||||
|
||||
export const useFetchParserList = () => {
|
||||
const [loading, setLoading] = useState(false);
|
||||
return {
|
||||
loading,
|
||||
};
|
||||
};
|
||||
export const useRerunDataflow = ({
|
||||
data,
|
||||
}: {
|
||||
data: IPipelineFileLogDetail;
|
||||
}) => {
|
||||
const [isChange, setIsChange] = useState(false);
|
||||
|
||||
export const useRerunDataflow = () => {
|
||||
const [loading, setLoading] = useState(false);
|
||||
return {
|
||||
loading,
|
||||
};
|
||||
};
|
||||
const { mutateAsync: handleReRunFunc, isPending: loading } = useMutation({
|
||||
mutationKey: ['pipelineRerun', data],
|
||||
mutationFn: async (newData: { value: IDslComponent; key: string }) => {
|
||||
const newDsl = {
|
||||
...data.dsl,
|
||||
components: {
|
||||
...data.dsl.components,
|
||||
[newData.key]: newData.value,
|
||||
},
|
||||
};
|
||||
|
||||
export const useFetchPaserText = () => {
|
||||
const initialText =
|
||||
'第一行文本\n\t第二行缩进文本\n第三行 多个空格 第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 ' +
|
||||
'多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格第一行文本\n\t第二行缩进文本\n第三行 多个空格';
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [data, setData] = useState<string>(initialText);
|
||||
const { t } = useTranslation();
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
const {
|
||||
// data,
|
||||
// isPending: loading,
|
||||
mutateAsync,
|
||||
} = useMutation({
|
||||
mutationKey: ['createChunk'],
|
||||
mutationFn: async (payload: any) => {
|
||||
// let service = kbService.create_chunk;
|
||||
// if (payload.chunk_id) {
|
||||
// service = kbService.set_chunk;
|
||||
// }
|
||||
// const { data } = await service(payload);
|
||||
// if (data.code === 0) {
|
||||
message.success(t('message.created'));
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: ['fetchChunkList'] });
|
||||
}, 1000); // Delay to ensure the list is updated
|
||||
// }
|
||||
// return data?.code;
|
||||
// this Data provided to the interface
|
||||
const params = {
|
||||
id: data.id,
|
||||
dsl: newDsl,
|
||||
component_id: newData.key,
|
||||
};
|
||||
const { data: result } = await kbService.pipelineRerun(params);
|
||||
if (result.code === 0) {
|
||||
message.success(t('message.operated'));
|
||||
// queryClient.invalidateQueries({
|
||||
// queryKey: [type],
|
||||
// });
|
||||
}
|
||||
return result;
|
||||
},
|
||||
});
|
||||
|
||||
return { data, loading, rerun: mutateAsync };
|
||||
return {
|
||||
loading,
|
||||
isChange,
|
||||
setIsChange,
|
||||
handleReRunFunc,
|
||||
};
|
||||
};
|
||||
|
||||
export const useTimelineDataFlow = (data: IPipelineFileLogDetail) => {
|
||||
const timelineNodes: TimelineNode[] = useMemo(() => {
|
||||
const nodes: Array<ITimelineNodeObj & { id: number | string }> = [];
|
||||
console.log('time-->', data);
|
||||
const times = data?.dsl?.components;
|
||||
if (times) {
|
||||
const getNode = (
|
||||
key: string,
|
||||
index: number,
|
||||
type:
|
||||
| TimelineNodeType.begin
|
||||
| TimelineNodeType.parser
|
||||
| TimelineNodeType.tokenizer
|
||||
| TimelineNodeType.characterSplitter
|
||||
| TimelineNodeType.titleSplitter,
|
||||
) => {
|
||||
const node = times[key].obj;
|
||||
const name = camelCase(
|
||||
node.component_name,
|
||||
) as keyof typeof TimelineNodeObj;
|
||||
|
||||
let tempType = type;
|
||||
if (name === TimelineNodeType.parser) {
|
||||
tempType = TimelineNodeType.parser;
|
||||
} else if (name === TimelineNodeType.tokenizer) {
|
||||
tempType = TimelineNodeType.tokenizer;
|
||||
} else if (
|
||||
name === TimelineNodeType.characterSplitter ||
|
||||
name === TimelineNodeType.titleSplitter
|
||||
) {
|
||||
tempType = TimelineNodeType.characterSplitter;
|
||||
}
|
||||
const timeNode = {
|
||||
...TimelineNodeObj[name],
|
||||
id: index,
|
||||
className: 'w-32',
|
||||
completed: false,
|
||||
date: formatSecondsToHumanReadable(
|
||||
node.params?.outputs?._elapsed_time?.value || 0,
|
||||
),
|
||||
type: tempType,
|
||||
detail: { value: times[key], key: key },
|
||||
};
|
||||
console.log('timeNodetype-->', type);
|
||||
nodes.push(timeNode);
|
||||
|
||||
if (times[key].downstream && times[key].downstream.length > 0) {
|
||||
const nextKey = times[key].downstream[0];
|
||||
|
||||
// nodes.push(timeNode);
|
||||
getNode(nextKey, index + 1, tempType);
|
||||
}
|
||||
};
|
||||
getNode(upperFirst(TimelineNodeType.begin), 1, TimelineNodeType.begin);
|
||||
// setTimelineNodeArr(nodes as unknown as ITimelineNodeObj & {id: number | string})
|
||||
}
|
||||
return nodes;
|
||||
}, [data]);
|
||||
return {
|
||||
timelineNodes,
|
||||
};
|
||||
};
|
||||
|
||||
export const useGetPipelineResultSearchParams = () => {
|
||||
const [currentQueryParameters] = useSearchParams();
|
||||
const is_read_only = currentQueryParameters.get(
|
||||
PipelineResultSearchParams.IsReadOnly,
|
||||
) as 'true' | 'false';
|
||||
console.log('is_read_only', is_read_only);
|
||||
return {
|
||||
type: currentQueryParameters.get(PipelineResultSearchParams.Type) || '',
|
||||
documentId:
|
||||
currentQueryParameters.get(PipelineResultSearchParams.DocumentId) || '',
|
||||
knowledgeId:
|
||||
currentQueryParameters.get(PipelineResultSearchParams.KnowledgeId) || '',
|
||||
isReadOnly: is_read_only === 'true',
|
||||
agentId:
|
||||
currentQueryParameters.get(PipelineResultSearchParams.AgentId) || '',
|
||||
agentTitle:
|
||||
currentQueryParameters.get(PipelineResultSearchParams.AgentTitle) || '',
|
||||
documentExtension:
|
||||
currentQueryParameters.get(
|
||||
PipelineResultSearchParams.DocumentExtension,
|
||||
) || '',
|
||||
createdBy:
|
||||
currentQueryParameters.get(PipelineResultSearchParams.CreatedBy) || '',
|
||||
};
|
||||
};
|
||||
|
||||
export function useFetchPipelineResult({
|
||||
agentId,
|
||||
}: Pick<ReturnType<typeof useGetPipelineResultSearchParams>, 'agentId'>) {
|
||||
const [searchParams] = useSearchParams();
|
||||
const messageId = searchParams.get('id');
|
||||
|
||||
const { data, setMessageId, setISStopFetchTrace } =
|
||||
useFetchMessageTrace(agentId);
|
||||
|
||||
useEffect(() => {
|
||||
if (messageId) {
|
||||
setMessageId(messageId);
|
||||
setISStopFetchTrace(true);
|
||||
}
|
||||
}, [agentId, messageId, setISStopFetchTrace, setMessageId]);
|
||||
|
||||
const pipelineResult = useMemo(() => {
|
||||
if (Array.isArray(data)) {
|
||||
const latest = data?.at(-1);
|
||||
if (latest?.component_id === 'END' && Array.isArray(latest.trace)) {
|
||||
return latest.trace.at(0);
|
||||
}
|
||||
}
|
||||
}, [data]);
|
||||
|
||||
return { pipelineResult };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user