mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: better MIME content type (#8801)
### What problem does this PR solve? Better uniform MIME content type. ### Type of change - [x] Refactoring
This commit is contained in:
@ -42,7 +42,7 @@ from api.utils.api_utils import (
|
|||||||
validate_request,
|
validate_request,
|
||||||
)
|
)
|
||||||
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
||||||
from api.utils.web_utils import html2pdf, is_valid_url
|
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||||
from rag.nlp import search
|
from rag.nlp import search
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
@ -505,12 +505,14 @@ def get(doc_id):
|
|||||||
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
||||||
response = flask.make_response(STORAGE_IMPL.get(b, n))
|
response = flask.make_response(STORAGE_IMPL.get(b, n))
|
||||||
|
|
||||||
ext = re.search(r"\.([^.]+)$", doc.name)
|
ext = re.search(r"\.([^.]+)$", doc.name.lower())
|
||||||
|
ext = ext.group(1) if ext else None
|
||||||
if ext:
|
if ext:
|
||||||
if doc.type == FileType.VISUAL.value:
|
if doc.type == FileType.VISUAL.value:
|
||||||
response.headers.set("Content-Type", "image/%s" % ext.group(1))
|
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||||
else:
|
else:
|
||||||
response.headers.set("Content-Type", "application/%s" % ext.group(1))
|
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||||
|
response.headers.set("Content-Type", content_type)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -31,6 +31,7 @@ from api.db.services.file_service import FileService
|
|||||||
from api import settings
|
from api import settings
|
||||||
from api.utils.api_utils import get_json_result
|
from api.utils.api_utils import get_json_result
|
||||||
from api.utils.file_utils import filename_type
|
from api.utils.file_utils import filename_type
|
||||||
|
from api.utils.web_utils import CONTENT_TYPE_MAP
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
@ -334,15 +335,14 @@ def get(file_id):
|
|||||||
blob = STORAGE_IMPL.get(b, n)
|
blob = STORAGE_IMPL.get(b, n)
|
||||||
|
|
||||||
response = flask.make_response(blob)
|
response = flask.make_response(blob)
|
||||||
ext = re.search(r"\.([^.]+)$", file.name)
|
ext = re.search(r"\.([^.]+)$", file.name.lower())
|
||||||
|
ext = ext.group(1) if ext else None
|
||||||
if ext:
|
if ext:
|
||||||
if file.type == FileType.VISUAL.value:
|
if file.type == FileType.VISUAL.value:
|
||||||
response.headers.set('Content-Type', 'image/%s' % ext.group(1))
|
content_type = CONTENT_TYPE_MAP.get(ext, f"image/{ext}")
|
||||||
else:
|
else:
|
||||||
response.headers.set(
|
content_type = CONTENT_TYPE_MAP.get(ext, f"application/{ext}")
|
||||||
'Content-Type',
|
response.headers.set("Content-Type", content_type)
|
||||||
'application/%s' %
|
|
||||||
ext.group(1))
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|||||||
@ -31,6 +31,51 @@ from selenium.webdriver.support.ui import WebDriverWait
|
|||||||
from webdriver_manager.chrome import ChromeDriverManager
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
|
|
||||||
|
CONTENT_TYPE_MAP = {
|
||||||
|
# Office
|
||||||
|
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"doc": "application/msword",
|
||||||
|
"pdf": "application/pdf",
|
||||||
|
"csv": "text/csv",
|
||||||
|
"xls": "application/vnd.ms-excel",
|
||||||
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
# Text/code
|
||||||
|
"txt": "text/plain",
|
||||||
|
"py": "text/plain",
|
||||||
|
"js": "text/plain",
|
||||||
|
"java": "text/plain",
|
||||||
|
"c": "text/plain",
|
||||||
|
"cpp": "text/plain",
|
||||||
|
"h": "text/plain",
|
||||||
|
"php": "text/plain",
|
||||||
|
"go": "text/plain",
|
||||||
|
"ts": "text/plain",
|
||||||
|
"sh": "text/plain",
|
||||||
|
"cs": "text/plain",
|
||||||
|
"kt": "text/plain",
|
||||||
|
"sql": "text/plain",
|
||||||
|
# Web
|
||||||
|
"md": "text/markdown",
|
||||||
|
"markdown": "text/markdown",
|
||||||
|
"htm": "text/html",
|
||||||
|
"html": "text/html",
|
||||||
|
"json": "application/json",
|
||||||
|
# Image formats
|
||||||
|
"png": "image/png",
|
||||||
|
"jpg": "image/jpeg",
|
||||||
|
"jpeg": "image/jpeg",
|
||||||
|
"gif": "image/gif",
|
||||||
|
"bmp": "image/bmp",
|
||||||
|
"tiff": "image/tiff",
|
||||||
|
"tif": "image/tiff",
|
||||||
|
"webp": "image/webp",
|
||||||
|
"svg": "image/svg+xml",
|
||||||
|
"ico": "image/x-icon",
|
||||||
|
"avif": "image/avif",
|
||||||
|
"heic": "image/heic",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def html2pdf(
|
def html2pdf(
|
||||||
source: str,
|
source: str,
|
||||||
timeout: int = 2,
|
timeout: int = 2,
|
||||||
|
|||||||
Reference in New Issue
Block a user