diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index e67ddd82d..4cad64c35 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -42,7 +42,7 @@ def filename_type(filename): if re.match(r".*\.pdf$", filename): return FileType.PDF.value - if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): + if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|mdx|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename): return FileType.DOC.value if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename): diff --git a/api/utils/web_utils.py b/api/utils/web_utils.py index 2866e5f28..11e8428b7 100644 --- a/api/utils/web_utils.py +++ b/api/utils/web_utils.py @@ -69,6 +69,7 @@ CONTENT_TYPE_MAP = { # Web "md": "text/markdown", "markdown": "text/markdown", + "mdx": "text/markdown", "htm": "text/html", "html": "text/html", "json": "application/json", diff --git a/common/data_source/file_types.py b/common/data_source/file_types.py index bf7eafaaa..be4d56d7b 100644 --- a/common/data_source/file_types.py +++ b/common/data_source/file_types.py @@ -18,6 +18,7 @@ class UploadMimeTypes: "text/plain", "text/markdown", "text/x-markdown", + "text/mdx", "text/x-config", "text/tab-separated-values", "application/json", diff --git a/rag/app/naive.py b/rag/app/naive.py index 5f269d1c5..8811c6b70 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -823,7 +823,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca parser_config.get("delimiter", "\n!?;。;!?")) callback(0.8, "Finish parsing.") - elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): + elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128))) sections, tables, section_images = markdown_parser( diff --git a/rag/app/one.py b/rag/app/one.py index fe3a25430..bb9f09f1a 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -128,7 +128,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, excel_parser = ExcelParser() sections = excel_parser.html(binary, 1000000000) - elif re.search(r"\.(txt|md|markdown)$", filename, re.IGNORECASE): + elif re.search(r"\.(txt|md|markdown|mdx)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") txt = get_text(filename, binary) sections = txt.split("\n") diff --git a/rag/app/qa.py b/rag/app/qa.py index a31240bd3..95678faaa 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -421,7 +421,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss)) return res - elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE): + elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") txt = get_text(filename, binary) lines = txt.split("\n") diff --git a/web/src/assets/svg/file-icon/mdx.svg b/web/src/assets/svg/file-icon/mdx.svg new file mode 100644 index 000000000..a6fb749df --- /dev/null +++ b/web/src/assets/svg/file-icon/mdx.svg @@ -0,0 +1,10 @@ + + + + + + \ No newline at end of file diff --git a/web/src/components/chunk-method-dialog/hooks.ts b/web/src/components/chunk-method-dialog/hooks.ts index f6f4f80c7..0dd5a7fee 100644 --- a/web/src/components/chunk-method-dialog/hooks.ts +++ b/web/src/components/chunk-method-dialog/hooks.ts @@ -65,7 +65,10 @@ const ParserListMap = new Map([ 'knowledge_graph', ], ], - [['md'], ['naive', 'qa', 'knowledge_graph']], + [ + ['md', 'mdx'], + ['naive', 'qa', 'knowledge_graph'], + ], [['json'], ['naive', 'knowledge_graph']], [['eml'], ['email']], ]); diff --git a/web/src/components/document-preview/index.tsx b/web/src/components/document-preview/index.tsx index 7937fcd31..968dba3c8 100644 --- a/web/src/components/document-preview/index.tsx +++ b/web/src/components/document-preview/index.tsx @@ -82,7 +82,7 @@ const Preview = ({ )} - {['md'].indexOf(fileType) > -1 && ( + {['md', 'mdx'].indexOf(fileType) > -1 && (
diff --git a/web/src/components/file-upload.tsx b/web/src/components/file-upload.tsx index b6941f8e9..78957b49d 100644 --- a/web/src/components/file-upload.tsx +++ b/web/src/components/file-upload.tsx @@ -1028,7 +1028,7 @@ function getFileIcon(file: File) { if ( type.startsWith('text/') || - ['txt', 'md', 'rtf', 'pdf'].includes(extension) + ['txt', 'md', 'mdx', 'rtf', 'pdf'].includes(extension) ) { return ; } diff --git a/web/src/constants/common.ts b/web/src/constants/common.ts index 205c28f4c..2d68ac0af 100644 --- a/web/src/constants/common.ts +++ b/web/src/constants/common.ts @@ -18,6 +18,8 @@ export const fileIconMap = { jpg: 'jpg.svg', js: 'js.svg', json: 'json.svg', + md: 'md.svg', + mdx: 'mdx.svg', mkv: 'mkv.svg', mp3: 'mp3.svg', mp4: 'mp4.svg', @@ -142,6 +144,8 @@ export enum FileMimeType { Xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', Mp4 = 'video/mp4', Json = 'application/json', + Md = 'text/markdown', + Mdx = 'text/markdown', } export const Domain = 'demo.ragflow.io'; @@ -161,7 +165,15 @@ export const Images = [ ]; // Without FileViewer -export const ExceptiveType = ['xlsx', 'xls', 'pdf', 'docx', 'md', ...Images]; +export const ExceptiveType = [ + 'xlsx', + 'xls', + 'pdf', + 'docx', + 'md', + 'mdx', + ...Images, +]; export const SupportedPreviewDocumentTypes = [...ExceptiveType]; //#endregion diff --git a/web/src/constants/file.ts b/web/src/constants/file.ts index 4dea95952..8d488c971 100644 --- a/web/src/constants/file.ts +++ b/web/src/constants/file.ts @@ -12,6 +12,7 @@ export const FileIconMap = { txt: 'text', csv: 'pdf', md: 'md', + mdx: 'md', mp4: 'mp4', avi: 'avi', mkv: 'mkv', diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx index eb0e3fd50..a73960d14 100644 --- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx +++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx @@ -172,6 +172,7 @@ const Chunk = () => { case 'docx': case 'txt': case 'md': + case 'mdx': case 'pdf': return documentInfo?.type; } diff --git a/web/src/pages/dataflow-result/index.tsx b/web/src/pages/dataflow-result/index.tsx index ae43ad852..8a2780bd8 100644 --- a/web/src/pages/dataflow-result/index.tsx +++ b/web/src/pages/dataflow-result/index.tsx @@ -87,6 +87,7 @@ const Chunk = () => { case 'docx': case 'txt': case 'md': + case 'mdx': case 'pdf': return documentInfo?.type; } diff --git a/web/src/pages/document-viewer/index.tsx b/web/src/pages/document-viewer/index.tsx index 2e6fbccfd..ab611929f 100644 --- a/web/src/pages/document-viewer/index.tsx +++ b/web/src/pages/document-viewer/index.tsx @@ -40,7 +40,9 @@ const DocumentViewer = () => { )} - {ext === 'md' && } + {(ext === 'md' || ext === 'mdx') && ( + + )} {ext === 'txt' && } {ext === 'pdf' && (