mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refa: Implement centralized file name length limit using FILE_NAME_LEN_LIMIT constant (#8318)
### What problem does this PR solve? - Replace hardcoded 255-byte file name length checks with FILE_NAME_LEN_LIMIT constant - Update error messages to show the actual limit value - #8290 ### Type of change - [x] Refactoring Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
@ -23,7 +23,7 @@ from flask import request
|
|||||||
from flask_login import current_user, login_required
|
from flask_login import current_user, login_required
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.constants import IMG_BASE64_PREFIX
|
from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX
|
||||||
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
|
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus
|
||||||
from api.db.db_models import File, Task
|
from api.db.db_models import File, Task
|
||||||
from api.db.services import duplicate_name
|
from api.db.services import duplicate_name
|
||||||
@ -61,8 +61,8 @@ def upload():
|
|||||||
for file_obj in file_objs:
|
for file_obj in file_objs:
|
||||||
if file_obj.filename == "":
|
if file_obj.filename == "":
|
||||||
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
if len(file_obj.filename.encode("utf-8")) > 255:
|
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
@ -149,8 +149,9 @@ def create():
|
|||||||
kb_id = req["kb_id"]
|
kb_id = req["kb_id"]
|
||||||
if not kb_id:
|
if not kb_id:
|
||||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
if len(req["name"].encode("utf-8")) > 255:
|
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
if req["name"].strip() == "":
|
if req["name"].strip() == "":
|
||||||
return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
req["name"] = req["name"].strip()
|
req["name"] = req["name"].strip()
|
||||||
@ -409,8 +410,8 @@ def rename():
|
|||||||
return get_data_error_result(message="Document not found!")
|
return get_data_error_result(message="Document not found!")
|
||||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||||
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
if len(req["name"].encode("utf-8")) > 255:
|
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||||
if d.name == req["name"]:
|
if d.name == req["name"]:
|
||||||
|
|||||||
@ -25,6 +25,7 @@ from peewee import OperationalError
|
|||||||
from pydantic import BaseModel, Field, validator
|
from pydantic import BaseModel, Field, validator
|
||||||
|
|
||||||
from api import settings
|
from api import settings
|
||||||
|
from api.constants import FILE_NAME_LEN_LIMIT
|
||||||
from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus
|
from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus
|
||||||
from api.db.db_models import File, Task
|
from api.db.db_models import File, Task
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
@ -129,8 +130,8 @@ def upload(dataset_id, tenant_id):
|
|||||||
for file_obj in file_objs:
|
for file_obj in file_objs:
|
||||||
if file_obj.filename == "":
|
if file_obj.filename == "":
|
||||||
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
if len(file_obj.filename.encode("utf-8")) > 255:
|
if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
"""
|
"""
|
||||||
# total size
|
# total size
|
||||||
total_size = 0
|
total_size = 0
|
||||||
@ -247,9 +248,9 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|||||||
DocumentService.update_meta_fields(document_id, req["meta_fields"])
|
DocumentService.update_meta_fields(document_id, req["meta_fields"])
|
||||||
|
|
||||||
if "name" in req and req["name"] != doc.name:
|
if "name" in req and req["name"] != doc.name:
|
||||||
if len(req["name"].encode("utf-8")) > 255:
|
if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
return get_result(
|
return get_result(
|
||||||
message="File name must be 255 bytes or less.",
|
message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.",
|
||||||
code=settings.RetCode.ARGUMENT_ERROR,
|
code=settings.RetCode.ARGUMENT_ERROR,
|
||||||
)
|
)
|
||||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
NAME_LENGTH_LIMIT = 2**10
|
NAME_LENGTH_LIMIT = 2**10
|
||||||
|
|
||||||
IMG_BASE64_PREFIX = 'data:image/png;base64,'
|
IMG_BASE64_PREFIX = "data:image/png;base64,"
|
||||||
|
|
||||||
SERVICE_CONF = "service_conf.yaml"
|
SERVICE_CONF = "service_conf.yaml"
|
||||||
|
|
||||||
@ -25,4 +25,4 @@ REQUEST_WAIT_SEC = 2
|
|||||||
REQUEST_MAX_WAIT_SEC = 300
|
REQUEST_MAX_WAIT_SEC = 300
|
||||||
|
|
||||||
DATASET_NAME_LIMIT = 128
|
DATASET_NAME_LIMIT = 128
|
||||||
FILE_NAME_LEN_LIMIT = 256
|
FILE_NAME_LEN_LIMIT = 255
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from peewee import fn
|
from peewee import fn
|
||||||
|
|
||||||
|
from api.constants import FILE_NAME_LEN_LIMIT
|
||||||
from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
|
from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
|
||||||
from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
|
from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
|
||||||
from api.db.services import duplicate_name
|
from api.db.services import duplicate_name
|
||||||
@ -30,7 +31,7 @@ from api.db.services.file2document_service import File2DocumentService
|
|||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
|
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
from api.constants import FILE_NAME_LEN_LIMIT
|
|
||||||
|
|
||||||
class FileService(CommonService):
|
class FileService(CommonService):
|
||||||
# Service class for managing file operations and storage
|
# Service class for managing file operations and storage
|
||||||
@ -412,8 +413,8 @@ class FileService(CommonService):
|
|||||||
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
|
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
|
||||||
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
||||||
raise RuntimeError("Exceed the maximum file number of a free user!")
|
raise RuntimeError("Exceed the maximum file number of a free user!")
|
||||||
if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT:
|
if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
|
||||||
raise RuntimeError("Exceed the maximum length of file name!")
|
raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.")
|
||||||
|
|
||||||
filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
|
filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
|
||||||
filetype = filename_type(filename)
|
filetype = filename_type(filename)
|
||||||
@ -492,4 +493,3 @@ class FileService(CommonService):
|
|||||||
if re.search(r"\.(eml)$", filename):
|
if re.search(r"\.(eml)$", filename):
|
||||||
return ParserType.EMAIL.value
|
return ParserType.EMAIL.value
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user