diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py
index e67ddd82d..4cad64c35 100644
--- a/api/utils/file_utils.py
+++ b/api/utils/file_utils.py
@@ -42,7 +42,7 @@ def filename_type(filename):
if re.match(r".*\.pdf$", filename):
return FileType.PDF.value
- if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
+ if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|mdx|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
diff --git a/api/utils/web_utils.py b/api/utils/web_utils.py
index 2866e5f28..11e8428b7 100644
--- a/api/utils/web_utils.py
+++ b/api/utils/web_utils.py
@@ -69,6 +69,7 @@ CONTENT_TYPE_MAP = {
# Web
"md": "text/markdown",
"markdown": "text/markdown",
+ "mdx": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",
diff --git a/common/data_source/file_types.py b/common/data_source/file_types.py
index bf7eafaaa..be4d56d7b 100644
--- a/common/data_source/file_types.py
+++ b/common/data_source/file_types.py
@@ -18,6 +18,7 @@ class UploadMimeTypes:
"text/plain",
"text/markdown",
"text/x-markdown",
+ "text/mdx",
"text/x-config",
"text/tab-separated-values",
"application/json",
diff --git a/rag/app/naive.py b/rag/app/naive.py
index 5f269d1c5..8811c6b70 100644
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@@ -823,7 +823,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
parser_config.get("delimiter", "\n!?;。;!?"))
callback(0.8, "Finish parsing.")
- elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
+ elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
sections, tables, section_images = markdown_parser(
diff --git a/rag/app/one.py b/rag/app/one.py
index fe3a25430..bb9f09f1a 100644
--- a/rag/app/one.py
+++ b/rag/app/one.py
@@ -128,7 +128,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
excel_parser = ExcelParser()
sections = excel_parser.html(binary, 1000000000)
- elif re.search(r"\.(txt|md|markdown)$", filename, re.IGNORECASE):
+ elif re.search(r"\.(txt|md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
sections = txt.split("\n")
diff --git a/rag/app/qa.py b/rag/app/qa.py
index a31240bd3..95678faaa 100644
--- a/rag/app/qa.py
+++ b/rag/app/qa.py
@@ -421,7 +421,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss))
return res
- elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
+ elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
lines = txt.split("\n")
diff --git a/web/src/assets/svg/file-icon/mdx.svg b/web/src/assets/svg/file-icon/mdx.svg
new file mode 100644
index 000000000..a6fb749df
--- /dev/null
+++ b/web/src/assets/svg/file-icon/mdx.svg
@@ -0,0 +1,10 @@
+
\ No newline at end of file
diff --git a/web/src/components/chunk-method-dialog/hooks.ts b/web/src/components/chunk-method-dialog/hooks.ts
index f6f4f80c7..0dd5a7fee 100644
--- a/web/src/components/chunk-method-dialog/hooks.ts
+++ b/web/src/components/chunk-method-dialog/hooks.ts
@@ -65,7 +65,10 @@ const ParserListMap = new Map([
'knowledge_graph',
],
],
- [['md'], ['naive', 'qa', 'knowledge_graph']],
+ [
+ ['md', 'mdx'],
+ ['naive', 'qa', 'knowledge_graph'],
+ ],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']],
]);
diff --git a/web/src/components/document-preview/index.tsx b/web/src/components/document-preview/index.tsx
index 7937fcd31..968dba3c8 100644
--- a/web/src/components/document-preview/index.tsx
+++ b/web/src/components/document-preview/index.tsx
@@ -82,7 +82,7 @@ const Preview = ({
)}
- {['md'].indexOf(fileType) > -1 && (
+ {['md', 'mdx'].indexOf(fileType) > -1 && (
diff --git a/web/src/components/file-upload.tsx b/web/src/components/file-upload.tsx
index b6941f8e9..78957b49d 100644
--- a/web/src/components/file-upload.tsx
+++ b/web/src/components/file-upload.tsx
@@ -1028,7 +1028,7 @@ function getFileIcon(file: File) {
if (
type.startsWith('text/') ||
- ['txt', 'md', 'rtf', 'pdf'].includes(extension)
+ ['txt', 'md', 'mdx', 'rtf', 'pdf'].includes(extension)
) {
return ;
}
diff --git a/web/src/constants/common.ts b/web/src/constants/common.ts
index 205c28f4c..2d68ac0af 100644
--- a/web/src/constants/common.ts
+++ b/web/src/constants/common.ts
@@ -18,6 +18,8 @@ export const fileIconMap = {
jpg: 'jpg.svg',
js: 'js.svg',
json: 'json.svg',
+ md: 'md.svg',
+ mdx: 'mdx.svg',
mkv: 'mkv.svg',
mp3: 'mp3.svg',
mp4: 'mp4.svg',
@@ -142,6 +144,8 @@ export enum FileMimeType {
Xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
Mp4 = 'video/mp4',
Json = 'application/json',
+ Md = 'text/markdown',
+ Mdx = 'text/markdown',
}
export const Domain = 'demo.ragflow.io';
@@ -161,7 +165,15 @@ export const Images = [
];
// Without FileViewer
-export const ExceptiveType = ['xlsx', 'xls', 'pdf', 'docx', 'md', ...Images];
+export const ExceptiveType = [
+ 'xlsx',
+ 'xls',
+ 'pdf',
+ 'docx',
+ 'md',
+ 'mdx',
+ ...Images,
+];
export const SupportedPreviewDocumentTypes = [...ExceptiveType];
//#endregion
diff --git a/web/src/constants/file.ts b/web/src/constants/file.ts
index 4dea95952..8d488c971 100644
--- a/web/src/constants/file.ts
+++ b/web/src/constants/file.ts
@@ -12,6 +12,7 @@ export const FileIconMap = {
txt: 'text',
csv: 'pdf',
md: 'md',
+ mdx: 'md',
mp4: 'mp4',
avi: 'avi',
mkv: 'mkv',
diff --git a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx
index eb0e3fd50..a73960d14 100644
--- a/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx
+++ b/web/src/pages/chunk/parsed-result/add-knowledge/components/knowledge-chunk/index.tsx
@@ -172,6 +172,7 @@ const Chunk = () => {
case 'docx':
case 'txt':
case 'md':
+ case 'mdx':
case 'pdf':
return documentInfo?.type;
}
diff --git a/web/src/pages/dataflow-result/index.tsx b/web/src/pages/dataflow-result/index.tsx
index ae43ad852..8a2780bd8 100644
--- a/web/src/pages/dataflow-result/index.tsx
+++ b/web/src/pages/dataflow-result/index.tsx
@@ -87,6 +87,7 @@ const Chunk = () => {
case 'docx':
case 'txt':
case 'md':
+ case 'mdx':
case 'pdf':
return documentInfo?.type;
}
diff --git a/web/src/pages/document-viewer/index.tsx b/web/src/pages/document-viewer/index.tsx
index 2e6fbccfd..ab611929f 100644
--- a/web/src/pages/document-viewer/index.tsx
+++ b/web/src/pages/document-viewer/index.tsx
@@ -40,7 +40,9 @@ const DocumentViewer = () => {
)}
- {ext === 'md' && }
+ {(ext === 'md' || ext === 'mdx') && (
+
+ )}
{ext === 'txt' && }
{ext === 'pdf' && (