Refa: better MIME content type (#8801)

### What problem does this PR solve?

Better uniform MIME content type.

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei
2025-07-11 18:47:19 +08:00
committed by GitHub
parent f569401398
commit 72c19b44c3
3 changed files with 58 additions and 11 deletions

View File

@ -42,7 +42,7 @@ from api.utils.api_utils import (
validate_request,
)
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
from api.utils.web_utils import html2pdf, is_valid_url
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search
from rag.utils.storage_factory import STORAGE_IMPL
@ -505,12 +505,14 @@ def get(doc_id):
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
response = flask.make_response(STORAGE_IMPL.get(b, n))
ext = re.search(r"\.([^.]+)$", doc.name)
ext = re.search(r"\.([^.]+)$", doc.name.lower())
ext = ext.group(1) if ext else None
if ext:
if doc.type == FileType.VISUAL.value:
response.headers.set("Content-Type", "image/%s" % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
response.headers.set("Content-Type", "application/%s" % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)

View File

@ -31,6 +31,7 @@ from api.db.services.file_service import FileService
from api import settings
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from api.utils.web_utils import CONTENT_TYPE_MAP
from rag.utils.storage_factory import STORAGE_IMPL
@ -334,15 +335,14 @@ def get(file_id):
blob = STORAGE_IMPL.get(b, n)
response = flask.make_response(blob)
ext = re.search(r"\.([^.]+)$", file.name)
ext = re.search(r"\.([^.]+)$", file.name.lower())
ext = ext.group(1) if ext else None
if ext:
if file.type == FileType.VISUAL.value:
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
else:
response.headers.set(
'Content-Type',
'application/%s' %
ext.group(1))
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
response.headers.set("Content-Type", content_type)
return response
except Exception as e:
return server_error_response(e)
@ -373,4 +373,4 @@ def move():
FileService.move_file(file_ids, parent_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
return server_error_response(e)

View File

@ -31,6 +31,51 @@ from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
CONTENT_TYPE_MAP = {
# Office
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"doc": "application/msword",
"pdf": "application/pdf",
"csv": "text/csv",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
# Text/code
"txt": "text/plain",
"py": "text/plain",
"js": "text/plain",
"java": "text/plain",
"c": "text/plain",
"cpp": "text/plain",
"h": "text/plain",
"php": "text/plain",
"go": "text/plain",
"ts": "text/plain",
"sh": "text/plain",
"cs": "text/plain",
"kt": "text/plain",
"sql": "text/plain",
# Web
"md": "text/markdown",
"markdown": "text/markdown",
"htm": "text/html",
"html": "text/html",
"json": "application/json",
# Image formats
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"bmp": "image/bmp",
"tiff": "image/tiff",
"tif": "image/tiff",
"webp": "image/webp",
"svg": "image/svg+xml",
"ico": "image/x-icon",
"avif": "image/avif",
"heic": "image/heic",
}
def html2pdf(
source: str,
timeout: int = 2,