diff --git a/web/package.json b/web/package.json index f6810c800..3bfe0e6c7 100644 --- a/web/package.json +++ b/web/package.json @@ -81,6 +81,7 @@ "mammoth": "^1.7.2", "next-themes": "^0.4.6", "openai-speech-stream-player": "^1.0.8", + "pptx-preview": "^1.0.5", "rc-tween-one": "^3.0.6", "react-copy-to-clipboard": "^5.1.0", "react-dropzone": "^14.3.5", diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/csv-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/csv-preview.tsx new file mode 100644 index 000000000..d31b02975 --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/csv-preview.tsx @@ -0,0 +1,114 @@ +import message from '@/components/ui/message'; +import { Spin } from '@/components/ui/spin'; +import request from '@/utils/request'; +import classNames from 'classnames'; +import React, { useEffect, useRef, useState } from 'react'; +import { useGetDocumentUrl } from './hooks'; + +interface CSVData { + rows: string[][]; + headers: string[]; +} + +interface FileViewerProps { + className?: string; +} + +const CSVFileViewer: React.FC = () => { + const [data, setData] = useState(null); + const [isLoading, setIsLoading] = useState(true); + const containerRef = useRef(null); + const url = useGetDocumentUrl(); + const parseCSV = (csvText: string): CSVData => { + console.log('Parsing CSV data:', csvText); + const lines = csvText.split('\n'); + const headers = lines[0].split(',').map((header) => header.trim()); + const rows = lines + .slice(1) + .map((line) => line.split(',').map((cell) => cell.trim())); + + return { headers, rows }; + }; + + useEffect(() => { + const loadCSV = async () => { + try { + const res = await request(url, { + method: 'GET', + responseType: 'blob', + onError: (err) => { + message.error('file load failed'); + setIsLoading(false); + }, + }); + + // parse CSV file + const reader = new FileReader(); + reader.readAsText(res.data); + reader.onload = () => { + const parsedData = parseCSV(reader.result as string); + console.log('file loaded successfully', reader.result); + setData(parsedData); + }; + } catch (error) { + message.error('CSV file parse failed'); + console.error('Error loading CSV file:', error); + } finally { + setIsLoading(false); + } + }; + + loadCSV(); + + return () => { + setData(null); + }; + }, [url]); + + return ( +
+ {isLoading ? ( +
+ +
+ ) : data ? ( + + + + {data.headers.map((header, index) => ( + + ))} + + + + {data.rows.map((row, rowIndex) => ( + + {row.map((cell, cellIndex) => ( + + ))} + + ))} + +
+ {header} +
+ {cell || '-'} +
+ ) : null} +
+ ); +}; + +export default CSVFileViewer; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/doc-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/doc-preview.tsx new file mode 100644 index 000000000..7e4e3a8c7 --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/doc-preview.tsx @@ -0,0 +1,67 @@ +import message from '@/components/ui/message'; +import { Spin } from '@/components/ui/spin'; +import request from '@/utils/request'; +import classNames from 'classnames'; +import mammoth from 'mammoth'; +import { useEffect, useState } from 'react'; +import { useGetDocumentUrl } from './hooks'; + +interface DocPreviewerProps { + className?: string; +} + +export const DocPreviewer: React.FC = ({ className }) => { + const url = useGetDocumentUrl(); + const [htmlContent, setHtmlContent] = useState(''); + const [loading, setLoading] = useState(false); + const fetchDocument = async () => { + setLoading(true); + const res = await request(url, { + method: 'GET', + responseType: 'blob', + onError: () => { + message.error('Document parsing failed'); + console.error('Error loading document:', url); + }, + }); + try { + const arrayBuffer = await res.data.arrayBuffer(); + const result = await mammoth.convertToHtml( + { arrayBuffer }, + { includeDefaultStyleMap: true }, + ); + + const styledContent = result.value + .replace(/

/g, '

') + .replace(//g, ''); + + setHtmlContent(styledContent); + } catch (err) { + message.error('Document parsing failed'); + console.error('Error parsing document:', err); + } + setLoading(false); + }; + + useEffect(() => { + if (url) { + fetchDocument(); + } + }, [url]); + return ( +

+ {loading && ( +
+ +
+ )} + + {!loading &&
} +
+ ); +}; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/excel-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/excel-preview.tsx new file mode 100644 index 000000000..a6bcfcc73 --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/excel-preview.tsx @@ -0,0 +1,24 @@ +import { useFetchExcel } from '@/pages/document-viewer/hooks'; +import classNames from 'classnames'; +import { useGetDocumentUrl } from './hooks'; + +interface ExcelCsvPreviewerProps { + className?: string; +} + +export const ExcelCsvPreviewer: React.FC = ({ + className, +}) => { + const url = useGetDocumentUrl(); + const { containerRef } = useFetchExcel(url); + + return ( +
+ ); +}; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/image-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/image-preview.tsx new file mode 100644 index 000000000..449cf3e7e --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/image-preview.tsx @@ -0,0 +1,72 @@ +import message from '@/components/ui/message'; +import { Spin } from '@/components/ui/spin'; +import request from '@/utils/request'; +import classNames from 'classnames'; +import { useEffect, useState } from 'react'; +import { useGetDocumentUrl } from './hooks'; + +interface ImagePreviewerProps { + className?: string; +} + +export const ImagePreviewer: React.FC = ({ + className, +}) => { + const url = useGetDocumentUrl(); + const [imageSrc, setImageSrc] = useState(null); + const [isLoading, setIsLoading] = useState(true); + + const fetchImage = async () => { + setIsLoading(true); + const res = await request(url, { + method: 'GET', + responseType: 'blob', + onError: () => { + message.error('Failed to load image'); + setIsLoading(false); + }, + }); + const objectUrl = URL.createObjectURL(res.data); + setImageSrc(objectUrl); + setIsLoading(false); + }; + useEffect(() => { + if (url) { + fetchImage(); + } + }, [url]); + + useEffect(() => { + return () => { + if (imageSrc) { + URL.revokeObjectURL(imageSrc); + } + }; + }, [imageSrc]); + + return ( +
+ {isLoading && ( +
+ +
+ )} + + {!isLoading && imageSrc && ( +
+ {'image'} URL.revokeObjectURL(imageSrc!)} + /> +
+ )} +
+ ); +}; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/index.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/index.tsx new file mode 100644 index 000000000..89dc50f38 --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/index.tsx @@ -0,0 +1,65 @@ +import { memo } from 'react'; + +import CSVFileViewer from './csv-preview'; +import { DocPreviewer } from './doc-preview'; +import { ExcelCsvPreviewer } from './excel-preview'; +import { ImagePreviewer } from './image-preview'; +import styles from './index.less'; +import PdfPreviewer, { IProps } from './pdf-preview'; +import { PptPreviewer } from './ppt-preview'; +import { TxtPreviewer } from './txt-preview'; + +type PreviewProps = { + fileType: string; + className?: string; +}; +const Preview = ({ + fileType, + className, + highlights, + setWidthAndHeight, +}: PreviewProps & Partial) => { + return ( + <> + {fileType === 'pdf' && highlights && setWidthAndHeight && ( +
+ +
+ )} + {['doc', 'docx'].indexOf(fileType) > -1 && ( +
+ +
+ )} + {['txt', 'md'].indexOf(fileType) > -1 && ( +
+ +
+ )} + {['visual'].indexOf(fileType) > -1 && ( +
+ +
+ )} + {['pptx'].indexOf(fileType) > -1 && ( +
+ +
+ )} + {['xlsx'].indexOf(fileType) > -1 && ( +
+ +
+ )} + {['csv'].indexOf(fileType) > -1 && ( +
+ +
+ )} + + ); +}; +export default memo(Preview); diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/pdf-preview.tsx similarity index 96% rename from web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx rename to web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/pdf-preview.tsx index d07346a94..51d457300 100644 --- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/pdf-preview.tsx @@ -14,7 +14,7 @@ import { useCatchDocumentError } from '@/components/pdf-previewer/hooks'; import FileError from '@/pages/document-viewer/file-error'; import styles from './index.less'; -interface IProps { +export interface IProps { highlights: IHighlight[]; setWidthAndHeight: (width: number, height: number) => void; } @@ -30,7 +30,7 @@ const HighlightPopup = ({ ) : null; // TODO: merge with DocumentPreviewer -const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { +const PdfPreview = ({ highlights: state, setWidthAndHeight }: IProps) => { const url = useGetDocumentUrl(); const ref = useRef<(highlight: IHighlight) => void>(() => {}); @@ -120,4 +120,4 @@ const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { ); }; -export default memo(Preview); +export default memo(PdfPreview); diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/ppt-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/ppt-preview.tsx new file mode 100644 index 000000000..f464ae47c --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/ppt-preview.tsx @@ -0,0 +1,67 @@ +import message from '@/components/ui/message'; +import request from '@/utils/request'; +import classNames from 'classnames'; +import { init } from 'pptx-preview'; +import { useEffect, useRef } from 'react'; +import { useGetDocumentUrl } from './hooks'; +interface PptPreviewerProps { + className?: string; +} + +export const PptPreviewer: React.FC = ({ className }) => { + const url = useGetDocumentUrl(); + const wrapper = useRef(null); + const containerRef = useRef(null); + const fetchDocument = async () => { + const res = await request(url, { + method: 'GET', + responseType: 'blob', + onError: () => { + message.error('Document parsing failed'); + console.error('Error loading document:', url); + }, + }); + console.log(res); + try { + const arrayBuffer = await res.data.arrayBuffer(); + + if (containerRef.current) { + let width = 500; + let height = 900; + if (containerRef.current) { + width = containerRef.current.clientWidth - 50; + height = containerRef.current.clientHeight - 50; + } + let pptxPrviewer = init(containerRef.current, { + width: width, + height: height, + }); + pptxPrviewer.preview(arrayBuffer); + } + } catch (err) { + message.error('ppt parse failed'); + } + }; + + useEffect(() => { + if (url) { + fetchDocument(); + } + }, [url]); + + return ( +
+
+
+
+
+
+
+ ); +}; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/txt-preview.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/txt-preview.tsx new file mode 100644 index 000000000..fad0f6515 --- /dev/null +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/components/document-preview/txt-preview.tsx @@ -0,0 +1,57 @@ +import message from '@/components/ui/message'; +import request from '@/utils/request'; +import { Spin } from 'antd'; +import classNames from 'classnames'; +import { useEffect, useState } from 'react'; +import { useGetDocumentUrl } from './hooks'; + +type TxtPreviewerProps = { className?: string }; +export const TxtPreviewer = ({ className }: TxtPreviewerProps) => { + const url = useGetDocumentUrl(); + const [loading, setLoading] = useState(false); + const [data, setData] = useState(''); + const fetchTxt = async () => { + setLoading(true); + const res = await request(url, { + method: 'GET', + responseType: 'blob', + onError: (err: any) => { + message.error('Failed to load file'); + console.error('Error loading file:', err); + }, + }); + // blob to string + const reader = new FileReader(); + reader.readAsText(res.data); + reader.onload = () => { + setData(reader.result as string); + setLoading(false); + console.log('file loaded successfully', reader.result); + }; + console.log('file data:', res); + }; + useEffect(() => { + if (url) { + fetchTxt(); + } else { + setLoading(false); + setData(''); + } + }, [url]); + return ( +
+ {loading && ( +
+ +
+ )} + + {!loading &&
{data}
} +
+ ); +}; diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.less b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.less index 6d1370305..1427ad74c 100644 --- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.less +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.less @@ -35,7 +35,8 @@ .documentPreview { // width: 40%; - height: 100%; + height: calc(100vh - 130px); + overflow: auto; } .chunkContainer { diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx index 692d430a1..5f4a69c85 100644 --- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx @@ -3,11 +3,11 @@ import { useSwitchChunk, } from '@/hooks/use-chunk-request'; import classNames from 'classnames'; -import { useCallback, useEffect, useState } from 'react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import ChunkCard from './components/chunk-card'; import CreatingModal from './components/chunk-creating-modal'; -import DocumentPreview from './components/document-preview/preview'; +import DocumentPreview from './components/document-preview'; import { useChangeChunkTextMode, useDeleteChunkByIds, @@ -143,6 +143,20 @@ const Chunk = () => { const { highlights, setWidthAndHeight } = useGetChunkHighlights(selectedChunkId); + const fileType = useMemo(() => { + switch (documentInfo?.type) { + case 'doc': + return documentInfo?.name.split('.').pop() || 'doc'; + case 'visual': + case 'docx': + case 'txt': + case 'md': + case 'pdf': + return documentInfo?.type; + } + return 'unknown'; + }, [documentInfo]); + return ( <>
@@ -151,14 +165,14 @@ const Chunk = () => {
- {isPdf && ( -
- -
- )} +
+ +