mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-29 16:05:35 +08:00
feat: add MDX file support (#12261)
Feat: add MDX file support #12057 ### What problem does this PR solve? <img width="1055" height="270" alt="image" src="https://github.com/user-attachments/assets/a0ab49f9-7806-41cd-8a96-f593591ab36b" /> The page states that MDX files are supported, but uploading fails with the error: "x.mdx: This type of file has not been supported yet!" <img width="381" height="110" alt="image" src="https://github.com/user-attachments/assets/4bbb7d08-cb47-416a-95fc-bc90b90fcc39" /> ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -42,7 +42,7 @@ def filename_type(filename):
|
||||
if re.match(r".*\.pdf$", filename):
|
||||
return FileType.PDF.value
|
||||
|
||||
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||
if re.match(r".*\.(msg|eml|doc|docx|ppt|pptx|yml|xml|htm|json|jsonl|ldjson|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|mdx|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|html|sql)$", filename):
|
||||
return FileType.DOC.value
|
||||
|
||||
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
|
||||
|
||||
@ -69,6 +69,7 @@ CONTENT_TYPE_MAP = {
|
||||
# Web
|
||||
"md": "text/markdown",
|
||||
"markdown": "text/markdown",
|
||||
"mdx": "text/markdown",
|
||||
"htm": "text/html",
|
||||
"html": "text/html",
|
||||
"json": "application/json",
|
||||
|
||||
@ -18,6 +18,7 @@ class UploadMimeTypes:
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
"text/x-markdown",
|
||||
"text/mdx",
|
||||
"text/x-config",
|
||||
"text/tab-separated-values",
|
||||
"application/json",
|
||||
|
||||
@ -823,7 +823,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
parser_config.get("delimiter", "\n!?;。;!?"))
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
|
||||
elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
|
||||
sections, tables, section_images = markdown_parser(
|
||||
|
||||
@ -128,7 +128,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
excel_parser = ExcelParser()
|
||||
sections = excel_parser.html(binary, 1000000000)
|
||||
|
||||
elif re.search(r"\.(txt|md|markdown)$", filename, re.IGNORECASE):
|
||||
elif re.search(r"\.(txt|md|markdown|mdx)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
txt = get_text(filename, binary)
|
||||
sections = txt.split("\n")
|
||||
|
||||
@ -421,7 +421,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
res.append(beAdocPdf(deepcopy(doc), q, a, eng, image, poss))
|
||||
return res
|
||||
|
||||
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
|
||||
elif re.search(r"\.(md|markdown|mdx)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
txt = get_text(filename, binary)
|
||||
lines = txt.split("\n")
|
||||
|
||||
10
web/src/assets/svg/file-icon/mdx.svg
Normal file
10
web/src/assets/svg/file-icon/mdx.svg
Normal file
@ -0,0 +1,10 @@
|
||||
<svg width="40" height="40" viewBox="0 0 40 40" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path
|
||||
d="M35 39.25H11C9.20507 39.25 7.75 37.7949 7.75 36V4C7.75 2.20508 9.20508 0.75 11 0.75H27C27.1212 0.75 27.2375 0.798159 27.3232 0.883883L38.1161 11.6768C38.2018 11.7625 38.25 11.8788 38.25 12V36C38.25 37.7949 36.7949 39.25 35 39.25Z"
|
||||
stroke="#D0D5DD" stroke-width="1.5" />
|
||||
<path d="M27 0.5V8C27 10.2091 28.7909 12 31 12H38.5" stroke="#D0D5DD" stroke-width="1.5" />
|
||||
<rect x="1.7" y="18" width="31" height="16" rx="2" fill="#444CE7" />
|
||||
<path
|
||||
d="M5.91921 22.7273H7.81552L9.81836 27.6136H9.90359L11.9064 22.7273H13.8027V30H12.3113V25.2663H12.2509L10.3688 29.9645H9.35316L7.47106 25.2486H7.41069V30H5.91921V22.7273ZM17.6477 30H15.0696V22.7273H17.669C18.4006 22.7273 19.0303 22.8729 19.5582 23.1641C20.0862 23.4529 20.4922 23.8684 20.7763 24.4105C21.0627 24.9527 21.206 25.6013 21.206 26.3565C21.206 27.1141 21.0627 27.7652 20.7763 28.3097C20.4922 28.8542 20.0838 29.272 19.5511 29.5632C19.0208 29.8544 18.3864 30 17.6477 30ZM16.6072 28.6825H17.5838C18.0384 28.6825 18.4207 28.602 18.7308 28.4411C19.0433 28.2777 19.2777 28.0256 19.4339 27.6847C19.5926 27.3414 19.6719 26.8987 19.6719 26.3565C19.6719 25.8191 19.5926 25.38 19.4339 25.0391C19.2777 24.6982 19.0445 24.4472 18.7344 24.2862C18.4242 24.1252 18.0419 24.0447 17.5874 24.0447H16.6072V28.6825Z M21.5 22.7273H23.1L27.5 30H25.9L21.5 22.7273Z M25.9 22.7273H27.5L23.1 30H21.5L25.9 22.7273Z"
|
||||
fill="white" />
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@ -65,7 +65,10 @@ const ParserListMap = new Map([
|
||||
'knowledge_graph',
|
||||
],
|
||||
],
|
||||
[['md'], ['naive', 'qa', 'knowledge_graph']],
|
||||
[
|
||||
['md', 'mdx'],
|
||||
['naive', 'qa', 'knowledge_graph'],
|
||||
],
|
||||
[['json'], ['naive', 'knowledge_graph']],
|
||||
[['eml'], ['email']],
|
||||
]);
|
||||
|
||||
@ -82,7 +82,7 @@ const Preview = ({
|
||||
<CSVFileViewer className={className} url={url} />
|
||||
</section>
|
||||
)}
|
||||
{['md'].indexOf(fileType) > -1 && (
|
||||
{['md', 'mdx'].indexOf(fileType) > -1 && (
|
||||
<section>
|
||||
<Md className={className} url={url} />
|
||||
</section>
|
||||
|
||||
@ -1028,7 +1028,7 @@ function getFileIcon(file: File) {
|
||||
|
||||
if (
|
||||
type.startsWith('text/') ||
|
||||
['txt', 'md', 'rtf', 'pdf'].includes(extension)
|
||||
['txt', 'md', 'mdx', 'rtf', 'pdf'].includes(extension)
|
||||
) {
|
||||
return <FileTextIcon />;
|
||||
}
|
||||
|
||||
@ -18,6 +18,8 @@ export const fileIconMap = {
|
||||
jpg: 'jpg.svg',
|
||||
js: 'js.svg',
|
||||
json: 'json.svg',
|
||||
md: 'md.svg',
|
||||
mdx: 'mdx.svg',
|
||||
mkv: 'mkv.svg',
|
||||
mp3: 'mp3.svg',
|
||||
mp4: 'mp4.svg',
|
||||
@ -142,6 +144,8 @@ export enum FileMimeType {
|
||||
Xlsx = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
Mp4 = 'video/mp4',
|
||||
Json = 'application/json',
|
||||
Md = 'text/markdown',
|
||||
Mdx = 'text/markdown',
|
||||
}
|
||||
|
||||
export const Domain = 'demo.ragflow.io';
|
||||
@ -161,7 +165,15 @@ export const Images = [
|
||||
];
|
||||
|
||||
// Without FileViewer
|
||||
export const ExceptiveType = ['xlsx', 'xls', 'pdf', 'docx', 'md', ...Images];
|
||||
export const ExceptiveType = [
|
||||
'xlsx',
|
||||
'xls',
|
||||
'pdf',
|
||||
'docx',
|
||||
'md',
|
||||
'mdx',
|
||||
...Images,
|
||||
];
|
||||
|
||||
export const SupportedPreviewDocumentTypes = [...ExceptiveType];
|
||||
//#endregion
|
||||
|
||||
@ -12,6 +12,7 @@ export const FileIconMap = {
|
||||
txt: 'text',
|
||||
csv: 'pdf',
|
||||
md: 'md',
|
||||
mdx: 'md',
|
||||
mp4: 'mp4',
|
||||
avi: 'avi',
|
||||
mkv: 'mkv',
|
||||
|
||||
@ -172,6 +172,7 @@ const Chunk = () => {
|
||||
case 'docx':
|
||||
case 'txt':
|
||||
case 'md':
|
||||
case 'mdx':
|
||||
case 'pdf':
|
||||
return documentInfo?.type;
|
||||
}
|
||||
|
||||
@ -87,6 +87,7 @@ const Chunk = () => {
|
||||
case 'docx':
|
||||
case 'txt':
|
||||
case 'md':
|
||||
case 'mdx':
|
||||
case 'pdf':
|
||||
return documentInfo?.type;
|
||||
}
|
||||
|
||||
@ -40,7 +40,9 @@ const DocumentViewer = () => {
|
||||
<ImagePreviewer className="w-full !h-dvh p-5" url={api} />
|
||||
</div>
|
||||
)}
|
||||
{ext === 'md' && <Md url={api} className="!h-dvh p-5"></Md>}
|
||||
{(ext === 'md' || ext === 'mdx') && (
|
||||
<Md url={api} className="!h-dvh p-5"></Md>
|
||||
)}
|
||||
{ext === 'txt' && <TxtPreviewer url={api}></TxtPreviewer>}
|
||||
|
||||
{ext === 'pdf' && (
|
||||
|
||||
Reference in New Issue
Block a user