diff --git a/web/src/components/layout-recognize-form-field.tsx b/web/src/components/layout-recognize-form-field.tsx
index d0991c0db..2a51a1bff 100644
--- a/web/src/components/layout-recognize-form-field.tsx
+++ b/web/src/components/layout-recognize-form-field.tsx
@@ -17,6 +17,7 @@ import {
export const enum ParseDocumentType {
DeepDOC = 'DeepDOC',
PlainText = 'Plain Text',
+ MinerU = 'MinerU',
}
export function LayoutRecognizeFormField({
@@ -38,9 +39,12 @@ export function LayoutRecognizeFormField({
const options = useMemo(() => {
const list = optionsWithoutLLM
? optionsWithoutLLM
- : [ParseDocumentType.DeepDOC, ParseDocumentType.PlainText].map((x) => ({
- label:
- x === ParseDocumentType.PlainText ? t(camelCase(x)) : 'DeepDoc',
+ : [
+ ParseDocumentType.DeepDOC,
+ ParseDocumentType.PlainText,
+ ParseDocumentType.MinerU,
+ ].map((x) => ({
+ label: x === ParseDocumentType.PlainText ? t(camelCase(x)) : x,
value: x,
}));
diff --git a/web/src/components/ui/modal/modal.tsx b/web/src/components/ui/modal/modal.tsx
index 64fb176a1..2e9e004f9 100644
--- a/web/src/components/ui/modal/modal.tsx
+++ b/web/src/components/ui/modal/modal.tsx
@@ -116,7 +116,10 @@ const Modal: ModalType = ({
type="button"
disabled={confirmLoading || disabled}
onClick={() => handleOk()}
- className="px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90"
+ className={cn(
+ 'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90',
+ { 'cursor-not-allowed': disabled },
+ )}
>
{confirmLoading && (
diff --git a/web/src/pages/dataset/dataset-setting/category-panel.tsx b/web/src/pages/dataset/dataset-setting/category-panel.tsx
new file mode 100644
index 000000000..06b2af743
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/category-panel.tsx
@@ -0,0 +1,72 @@
+import SvgIcon from '@/components/svg-icon';
+import { useTranslate } from '@/hooks/common-hooks';
+import { useSelectParserList } from '@/hooks/user-setting-hooks';
+import { Col, Divider, Empty, Row, Typography } from 'antd';
+import DOMPurify from 'dompurify';
+import camelCase from 'lodash/camelCase';
+import { useMemo } from 'react';
+import { TagTabs } from './tag-tabs';
+import { ImageMap } from './utils';
+
+const { Text } = Typography;
+
+const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
+ const parserList = useSelectParserList();
+ const { t } = useTranslate('knowledgeConfiguration');
+
+ const item = useMemo(() => {
+ const item = parserList.find((x) => x.value === chunkMethod);
+ if (item) {
+ return {
+ title: item.label,
+ description: t(camelCase(item.value)),
+ };
+ }
+ return { title: '', description: '' };
+ }, [parserList, chunkMethod, t]);
+
+ const imageList = useMemo(() => {
+ if (chunkMethod in ImageMap) {
+ return ImageMap[chunkMethod as keyof typeof ImageMap];
+ }
+ return [];
+ }, [chunkMethod]);
+
+ return (
+
+ {imageList.length > 0 ? (
+ <>
+
+ {`"${item.title}" ${t('methodTitle')}`}
+
+
+ {`"${item.title}" ${t('methodExamples')}`}
+ {t('methodExamplesDescription')}
+
+ {imageList.map((x) => (
+
+
+
+ ))}
+
+
+ {item.title} {t('dialogueExamplesTitle')}
+
+
+ >
+ ) : (
+
+ {t('methodEmpty')}
+
+
+ )}
+ {chunkMethod === 'tag' && }
+
+ );
+};
+
+export default CategoryPanel;
diff --git a/web/src/pages/dataset/dataset-setting/chunk-method-learn-more.tsx b/web/src/pages/dataset/dataset-setting/chunk-method-learn-more.tsx
new file mode 100644
index 000000000..6894825d5
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/chunk-method-learn-more.tsx
@@ -0,0 +1,39 @@
+import { Button } from '@/components/ui/button';
+import { cn } from '@/lib/utils';
+import { t } from 'i18next';
+import { X } from 'lucide-react';
+import { useState } from 'react';
+import CategoryPanel from './category-panel';
+
+export default ({ parserId }: { parserId: string }) => {
+ const [visible, setVisible] = useState(false);
+
+ return (
+
+
+
+
+
+
+
{
+ setVisible(false);
+ }}
+ >
+
+
+
+
+ );
+};
diff --git a/web/src/pages/dataset/dataset-setting/index.tsx b/web/src/pages/dataset/dataset-setting/index.tsx
index 079b04571..c677204b8 100644
--- a/web/src/pages/dataset/dataset-setting/index.tsx
+++ b/web/src/pages/dataset/dataset-setting/index.tsx
@@ -21,6 +21,7 @@ import {
IGenerateLogButtonProps,
} from '../dataset/generate-button/generate';
import { ChunkMethodForm } from './chunk-method-form';
+import ChunkMethodLearnMore from './chunk-method-learn-more';
import { IDataPipelineNodeProps } from './components/link-data-pipeline';
import { MainContainer } from './configuration-form-container';
import { ChunkMethodItem, ParseTypeItem } from './configuration/common-item';
@@ -169,10 +170,7 @@ export default function DatasetSettings() {
>
+
+ {parseType === 1 && }
+
);
diff --git a/web/src/pages/dataset/dataset-setting/tag-table/index.tsx b/web/src/pages/dataset/dataset-setting/tag-table/index.tsx
new file mode 100644
index 000000000..a2e38018c
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/tag-table/index.tsx
@@ -0,0 +1,305 @@
+'use client';
+
+import {
+ ColumnDef,
+ ColumnFiltersState,
+ SortingState,
+ VisibilityState,
+ flexRender,
+ getCoreRowModel,
+ getFilteredRowModel,
+ getPaginationRowModel,
+ getSortedRowModel,
+ useReactTable,
+} from '@tanstack/react-table';
+import { ArrowUpDown, Pencil, Trash2 } from 'lucide-react';
+import * as React from 'react';
+
+import { ConfirmDeleteDialog } from '@/components/confirm-delete-dialog';
+import { Button } from '@/components/ui/button';
+import { Checkbox } from '@/components/ui/checkbox';
+import { Input } from '@/components/ui/input';
+import {
+ Table,
+ TableBody,
+ TableCell,
+ TableHead,
+ TableHeader,
+ TableRow,
+} from '@/components/ui/table';
+import {
+ Tooltip,
+ TooltipContent,
+ TooltipProvider,
+ TooltipTrigger,
+} from '@/components/ui/tooltip';
+import { useDeleteTag, useFetchTagList } from '@/hooks/knowledge-hooks';
+import { useCallback, useEffect, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { useRenameKnowledgeTag } from '../hooks';
+import { RenameDialog } from './rename-dialog';
+
+export type ITag = {
+ tag: string;
+ frequency: number;
+};
+
+export function TagTable() {
+ const { t } = useTranslation();
+ const { list } = useFetchTagList();
+ const [tagList, setTagList] = useState([]);
+
+ const [sorting, setSorting] = React.useState([]);
+ const [columnFilters, setColumnFilters] = React.useState(
+ [],
+ );
+ const [columnVisibility, setColumnVisibility] =
+ React.useState({});
+ const [rowSelection, setRowSelection] = useState({});
+
+ const { deleteTag } = useDeleteTag();
+
+ useEffect(() => {
+ setTagList(list.map((x) => ({ tag: x[0], frequency: x[1] })));
+ }, [list]);
+
+ const handleDeleteTag = useCallback(
+ (tags: string[]) => () => {
+ deleteTag(tags);
+ },
+ [deleteTag],
+ );
+
+ const {
+ showTagRenameModal,
+ hideTagRenameModal,
+ tagRenameVisible,
+ initialName,
+ } = useRenameKnowledgeTag();
+
+ const columns: ColumnDef[] = [
+ {
+ id: 'select',
+ header: ({ table }) => (
+ table.toggleAllPageRowsSelected(!!value)}
+ aria-label="Select all"
+ />
+ ),
+ cell: ({ row }) => (
+ row.toggleSelected(!!value)}
+ aria-label="Select row"
+ />
+ ),
+ enableSorting: false,
+ enableHiding: false,
+ },
+ {
+ accessorKey: 'tag',
+ header: ({ column }) => {
+ return (
+
+ );
+ },
+ cell: ({ row }) => {
+ const value: string = row.getValue('tag');
+ return {value}
;
+ },
+ },
+ {
+ accessorKey: 'frequency',
+ header: ({ column }) => {
+ return (
+
+ );
+ },
+ cell: ({ row }) => (
+ {row.getValue('frequency')}
+ ),
+ },
+ {
+ id: 'actions',
+ enableHiding: false,
+ header: t('common.action'),
+ cell: ({ row }) => {
+ return (
+
+
+
+
+
+
+
+
+ {t('common.delete')}
+
+
+
+
+
+
+
+ {t('common.rename')}
+
+
+
+ );
+ },
+ },
+ ];
+
+ const table = useReactTable({
+ data: tagList,
+ columns,
+ onSortingChange: setSorting,
+ onColumnFiltersChange: setColumnFilters,
+ getCoreRowModel: getCoreRowModel(),
+ getPaginationRowModel: getPaginationRowModel(),
+ getSortedRowModel: getSortedRowModel(),
+ getFilteredRowModel: getFilteredRowModel(),
+ onColumnVisibilityChange: setColumnVisibility,
+ onRowSelectionChange: setRowSelection,
+ state: {
+ sorting,
+ columnFilters,
+ columnVisibility,
+ rowSelection,
+ },
+ });
+
+ const selectedRowLength = table.getFilteredSelectedRowModel().rows.length;
+
+ return (
+
+
+
+
+ table.getColumn('tag')?.setFilterValue(event.target.value)
+ }
+ className="w-1/2"
+ />
+ {selectedRowLength > 0 && (
+ x.original.tag),
+ )}
+ >
+
+
+ )}
+
+
+
+ {table.getHeaderGroups().map((headerGroup) => (
+
+ {headerGroup.headers.map((header) => {
+ return (
+
+ {header.isPlaceholder
+ ? null
+ : flexRender(
+ header.column.columnDef.header,
+ header.getContext(),
+ )}
+
+ );
+ })}
+
+ ))}
+
+
+ {table.getRowModel().rows?.length ? (
+ table.getRowModel().rows.map((row) => (
+
+ {row.getVisibleCells().map((cell) => (
+
+ {flexRender(
+ cell.column.columnDef.cell,
+ cell.getContext(),
+ )}
+
+ ))}
+
+ ))
+ ) : (
+
+
+ No results.
+
+
+ )}
+
+
+
+
+
+ {selectedRowLength} of {table.getFilteredRowModel().rows.length}{' '}
+ row(s) selected.
+
+
+
+
+
+
+ {tagRenameVisible && (
+
+ )}
+
+ );
+}
diff --git a/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/index.tsx b/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/index.tsx
new file mode 100644
index 000000000..b95907f92
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/index.tsx
@@ -0,0 +1,40 @@
+import {
+ Dialog,
+ DialogContent,
+ DialogFooter,
+ DialogHeader,
+ DialogTitle,
+} from '@/components/ui/dialog';
+import { LoadingButton } from '@/components/ui/loading-button';
+import { useTagIsRenaming } from '@/hooks/knowledge-hooks';
+import { IModalProps } from '@/interfaces/common';
+import { TagRenameId } from '@/pages/add-knowledge/constant';
+import { useTranslation } from 'react-i18next';
+import { RenameForm } from './rename-form';
+
+export function RenameDialog({
+ hideModal,
+ initialName,
+}: IModalProps & { initialName: string }) {
+ const { t } = useTranslation();
+ const loading = useTagIsRenaming();
+
+ return (
+
+ );
+}
diff --git a/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/rename-form.tsx b/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/rename-form.tsx
new file mode 100644
index 000000000..9c8f1cf7e
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/tag-table/rename-dialog/rename-form.tsx
@@ -0,0 +1,83 @@
+'use client';
+
+import { zodResolver } from '@hookform/resolvers/zod';
+import { useForm } from 'react-hook-form';
+import { z } from 'zod';
+
+import {
+ Form,
+ FormControl,
+ FormField,
+ FormItem,
+ FormLabel,
+ FormMessage,
+} from '@/components/ui/form';
+import { Input } from '@/components/ui/input';
+import { useRenameTag } from '@/hooks/knowledge-hooks';
+import { IModalProps } from '@/interfaces/common';
+import { TagRenameId } from '@/pages/add-knowledge/constant';
+import { useEffect } from 'react';
+import { useTranslation } from 'react-i18next';
+
+export function RenameForm({
+ initialName,
+ hideModal,
+}: IModalProps & { initialName: string }) {
+ const { t } = useTranslation();
+ const FormSchema = z.object({
+ name: z
+ .string()
+ .min(1, {
+ message: t('common.namePlaceholder'),
+ })
+ .trim(),
+ });
+
+ const form = useForm>({
+ resolver: zodResolver(FormSchema),
+ defaultValues: {
+ name: '',
+ },
+ });
+
+ const { renameTag } = useRenameTag();
+
+ async function onSubmit(data: z.infer) {
+ const ret = await renameTag({ fromTag: initialName, toTag: data.name });
+ if (ret) {
+ hideModal?.();
+ }
+ }
+
+ useEffect(() => {
+ form.setValue('name', initialName);
+ }, [form, initialName]);
+
+ return (
+
+
+ );
+}
diff --git a/web/src/pages/dataset/dataset-setting/tag-tabs.tsx b/web/src/pages/dataset/dataset-setting/tag-tabs.tsx
new file mode 100644
index 000000000..abcd3f673
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/tag-tabs.tsx
@@ -0,0 +1,40 @@
+import { Segmented } from 'antd';
+import { SegmentedLabeledOption } from 'antd/es/segmented';
+import { upperFirst } from 'lodash';
+import { useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { TagTable } from './tag-table';
+import { TagWordCloud } from './tag-word-cloud';
+
+enum TagType {
+ Cloud = 'cloud',
+ Table = 'table',
+}
+
+const TagContentMap = {
+ [TagType.Cloud]: ,
+ [TagType.Table]: ,
+};
+
+export function TagTabs() {
+ const [value, setValue] = useState(TagType.Cloud);
+ const { t } = useTranslation();
+
+ const options: SegmentedLabeledOption[] = [TagType.Cloud, TagType.Table].map(
+ (x) => ({
+ label: t(`knowledgeConfiguration.tag${upperFirst(x)}`),
+ value: x,
+ }),
+ );
+
+ return (
+
+ setValue(val as TagType)}
+ />
+ {TagContentMap[value]}
+
+ );
+}
diff --git a/web/src/pages/dataset/dataset-setting/tag-word-cloud.tsx b/web/src/pages/dataset/dataset-setting/tag-word-cloud.tsx
new file mode 100644
index 000000000..b71ed69af
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/tag-word-cloud.tsx
@@ -0,0 +1,62 @@
+import { useFetchTagList } from '@/hooks/knowledge-hooks';
+import { Chart } from '@antv/g2';
+import { sumBy } from 'lodash';
+import { useCallback, useEffect, useMemo, useRef } from 'react';
+
+export function TagWordCloud() {
+ const domRef = useRef(null);
+ let chartRef = useRef();
+ const { list } = useFetchTagList();
+
+ const { list: tagList } = useMemo(() => {
+ const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256);
+
+ return {
+ list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
+ sumValue: sumBy(nextList, (x: [string, number]) => x[1]),
+ length: nextList.length,
+ };
+ }, [list]);
+
+ const renderWordCloud = useCallback(() => {
+ if (domRef.current) {
+ chartRef.current = new Chart({ container: domRef.current });
+
+ chartRef.current.options({
+ type: 'wordCloud',
+ autoFit: true,
+ layout: {
+ fontSize: [10, 50],
+ // fontSize: (d: any) => {
+ // if (d.value) {
+ // return (d.value / sumValue) * 100 * (length / 10);
+ // }
+ // return 0;
+ // },
+ },
+ data: {
+ type: 'inline',
+ value: tagList,
+ },
+ encode: { color: 'text' },
+ legend: false,
+ tooltip: {
+ title: 'name', // title
+ items: ['value'], // data item
+ },
+ });
+
+ chartRef.current.render();
+ }
+ }, [tagList]);
+
+ useEffect(() => {
+ renderWordCloud();
+
+ return () => {
+ chartRef.current?.destroy();
+ };
+ }, [renderWordCloud]);
+
+ return ;
+}
diff --git a/web/src/pages/dataset/dataset-setting/utils.ts b/web/src/pages/dataset/dataset-setting/utils.ts
new file mode 100644
index 000000000..4c5666467
--- /dev/null
+++ b/web/src/pages/dataset/dataset-setting/utils.ts
@@ -0,0 +1,20 @@
+const getImageName = (prefix: string, length: number) =>
+ new Array(length)
+ .fill(0)
+ .map((x, idx) => `chunk-method/${prefix}-0${idx + 1}`);
+
+export const ImageMap = {
+ book: getImageName('book', 4),
+ laws: getImageName('law', 2),
+ manual: getImageName('manual', 4),
+ picture: getImageName('media', 2),
+ naive: getImageName('naive', 2),
+ paper: getImageName('paper', 2),
+ presentation: getImageName('presentation', 2),
+ qa: getImageName('qa', 2),
+ resume: getImageName('resume', 2),
+ table: getImageName('table', 2),
+ one: getImageName('one', 2),
+ knowledge_graph: getImageName('knowledge-graph', 2),
+ tag: getImageName('tag', 2),
+};