feat: add MDX file support (#12261)

Feat: add MDX file support  #12057 
### What problem does this PR solve?

<img width="1055" height="270" alt="image"
src="https://github.com/user-attachments/assets/a0ab49f9-7806-41cd-8a96-f593591ab36b"
/>

The page states that MDX files are supported, but uploading fails with
the error: "x.mdx: This type of file has not been supported yet!"
<img width="381" height="110" alt="image"
src="https://github.com/user-attachments/assets/4bbb7d08-cb47-416a-95fc-bc90b90fcc39"
/>


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
lys1313013
2025-12-29 12:54:31 +08:00
committed by GitHub
parent 8d3f9d61da
commit 37e4485415
15 changed files with 41 additions and 9 deletions

View File

@ -42,7 +42,7 @@ def filename_type(filename):
if re.match(r".*\.pdf$", filename):
return FileType.PDF.value
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|mdx|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
return FileType.DOC.value
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):

View File

@ -69,6 +69,7 @@ CONTENT_TYPE_MAP = {
# Web
"md": "text/markdown",
"markdown": "text/markdown",
"mdx": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",

View File

@ -18,6 +18,7 @@ class UploadMimeTypes:
"text/plain",
"text/markdown",
"text/x-markdown",
"text/mdx",
"text/x-config",
"text/tab-separated-values",
"application/json",

View File

@ -823,7 +823,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
parser_config.get("delimiter", "\n!?;。;!?"))
callback(0.8, "Finish parsing.")
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
sections, tables, section_images = markdown_parser(

View File

@ -128,7 +128,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
excel_parser = ExcelParser()
sections = excel_parser.html(binary, 1000000000)
elif re.search(r"\.(txt|md|markdown)$", filename, re.IGNORECASE):
elif re.search(r"\.(txt|md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
sections = txt.split("\n")

View File

@ -421,7 +421,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss))
return res
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
txt = get_text(filename, binary)
lines = txt.split("\n")

View File

@ -0,0 +1,10 @@
<svg width="40" height="40" viewBox="0 0 40 40" fill="none" xmlns="http://www.w3.org/2000/svg">
<path
d="M35 39.25H11C9.20507 39.25 7.75 37.7949 7.75 36V4C7.75 2.20508 9.20508 0.75 11 0.75H27C27.1212 0.75 27.2375 0.798159 27.3232 0.883883L38.1161 11.6768C38.2018 11.7625 38.25 11.8788 38.25 12V36C38.25 37.7949 36.7949 39.25 35 39.25Z"
stroke="#D0D5DD" stroke-width="1.5" />
<path d="M27 0.5V8C27 10.2091 28.7909 12 31 12H38.5" stroke="#D0D5DD" stroke-width="1.5" />
<rect x="1.7" y="18" width="31" height="16" rx="2" fill="#444CE7" />
<path
d="M5.91921 22.7273H7.81552L9.81836 27.6136H9.90359L11.9064 22.7273H13.8027V30H12.3113V25.2663H12.2509L10.3688 29.9645H9.35316L7.47106 25.2486H7.41069V30H5.91921V22.7273ZM17.6477 30H15.0696V22.7273H17.669C18.4006 22.7273 19.0303 22.8729 19.5582 23.1641C20.0862 23.4529 20.4922 23.8684 20.7763 24.4105C21.0627 24.9527 21.206 25.6013 21.206 26.3565C21.206 27.1141 21.0627 27.7652 20.7763 28.3097C20.4922 28.8542 20.0838 29.272 19.5511 29.5632C19.0208 29.8544 18.3864 30 17.6477 30ZM16.6072 28.6825H17.5838C18.0384 28.6825 18.4207 28.602 18.7308 28.4411C19.0433 28.2777 19.2777 28.0256 19.4339 27.6847C19.5926 27.3414 19.6719 26.8987 19.6719 26.3565C19.6719 25.8191 19.5926 25.38 19.4339 25.0391C19.2777 24.6982 19.0445 24.4472 18.7344 24.2862C18.4242 24.1252 18.0419 24.0447 17.5874 24.0447H16.6072V28.6825Z M21.5 22.7273H23.1L27.5 30H25.9L21.5 22.7273Z M25.9 22.7273H27.5L23.1 30H21.5L25.9 22.7273Z"
fill="white" />
</svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@ -65,7 +65,10 @@ const ParserListMap = new Map([
'knowledge_graph',
],
],
[['md'], ['naive', 'qa', 'knowledge_graph']],
[
['md', 'mdx'],
['naive', 'qa', 'knowledge_graph'],
],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']],
]);

View File

@ -82,7 +82,7 @@ const Preview = ({
<CSVFileViewer className={className} url={url} />
</section>
)}
{['md'].indexOf(fileType) > -1 && (
{['md', 'mdx'].indexOf(fileType) > -1 && (
<section>
<Md className={className} url={url} />
</section>

View File

@ -1028,7 +1028,7 @@ function getFileIcon(file: File) {
if (
type.startsWith('text/') ||
['txt', 'md', 'rtf', 'pdf'].includes(extension)
['txt', 'md', 'mdx', 'rtf', 'pdf'].includes(extension)
) {
return <FileTextIcon />;
}

View File

@ -18,6 +18,8 @@ export const fileIconMap = {
jpg: 'jpg.svg',
js: 'js.svg',
json: 'json.svg',
md: 'md.svg',
mdx: 'mdx.svg',
mkv: 'mkv.svg',
mp3: 'mp3.svg',
mp4: 'mp4.svg',
@ -142,6 +144,8 @@ export enum FileMimeType {
Xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
Mp4 = 'video/mp4',
Json = 'application/json',
Md = 'text/markdown',
Mdx = 'text/markdown',
}
export const Domain = 'demo.ragflow.io';
@ -161,7 +165,15 @@ export const Images = [
];
// Without FileViewer
export const ExceptiveType = ['xlsx', 'xls', 'pdf', 'docx', 'md', ...Images];
export const ExceptiveType = [
'xlsx',
'xls',
'pdf',
'docx',
'md',
'mdx',
...Images,
];
export const SupportedPreviewDocumentTypes = [...ExceptiveType];
//#endregion

View File

@ -12,6 +12,7 @@ export const FileIconMap = {
txt: 'text',
csv: 'pdf',
md: 'md',
mdx: 'md',
mp4: 'mp4',
avi: 'avi',
mkv: 'mkv',

View File

@ -172,6 +172,7 @@ const Chunk = () => {
case 'docx':
case 'txt':
case 'md':
case 'mdx':
case 'pdf':
return documentInfo?.type;
}

View File

@ -87,6 +87,7 @@ const Chunk = () => {
case 'docx':
case 'txt':
case 'md':
case 'mdx':
case 'pdf':
return documentInfo?.type;
}

View File

@ -40,7 +40,9 @@ const DocumentViewer = () => {
<ImagePreviewer className="w-full !h-dvh p-5" url={api} />
</div>
)}
{ext === 'md' && <Md url={api} className="!h-dvh p-5"></Md>}
{(ext === 'md' || ext === 'mdx') && (
<Md url={api} className="!h-dvh p-5"></Md>
)}
{ext === 'txt' && <TxtPreviewer url={api}></TxtPreviewer>}
{ext === 'pdf' && (