diff --git a/api/apps/document_app.py b/api/apps/document_app.py index ccfd2c1d2..ee8929eeb 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -23,7 +23,7 @@ from flask import request from flask_login import current_user, login_required from api import settings -from api.constants import IMG_BASE64_PREFIX +from api.constants import FILE_NAME_LEN_LIMIT, IMG_BASE64_PREFIX from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileSource, FileType, ParserType, TaskStatus from api.db.db_models import File, Task from api.db.services import duplicate_name @@ -61,8 +61,8 @@ def upload(): for file_obj in file_objs: if file_obj.filename == "": return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) - if len(file_obj.filename.encode("utf-8")) > 255: - return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: + return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) e, kb = KnowledgebaseService.get_by_id(kb_id) if not e: @@ -149,8 +149,9 @@ def create(): kb_id = req["kb_id"] if not kb_id: return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) - if len(req["name"].encode("utf-8")) > 255: - return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: + return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + if req["name"].strip() == "": return get_json_result(data=False, message="File name can't be empty.", code=settings.RetCode.ARGUMENT_ERROR) req["name"] = req["name"].strip() @@ -409,8 +410,8 @@ def rename(): return get_data_error_result(message="Document not found!") if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) - if len(req["name"].encode("utf-8")) > 255: - return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: + return get_json_result(data=False, message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): if d.name == req["name"]: diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index bffe5e7e8..e0f77c985 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -25,6 +25,7 @@ from peewee import OperationalError from pydantic import BaseModel, Field, validator from api import settings +from api.constants import FILE_NAME_LEN_LIMIT from api.db import FileSource, FileType, LLMType, ParserType, TaskStatus from api.db.db_models import File, Task from api.db.services.document_service import DocumentService @@ -129,8 +130,8 @@ def upload(dataset_id, tenant_id): for file_obj in file_objs: if file_obj.filename == "": return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) - if len(file_obj.filename.encode("utf-8")) > 255: - return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + if len(file_obj.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: + return get_result(message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) """ # total size total_size = 0 @@ -247,9 +248,9 @@ def update_doc(tenant_id, dataset_id, document_id): DocumentService.update_meta_fields(document_id, req["meta_fields"]) if "name" in req and req["name"] != doc.name: - if len(req["name"].encode("utf-8")) > 255: + if len(req["name"].encode("utf-8")) > FILE_NAME_LEN_LIMIT: return get_result( - message="File name must be 255 bytes or less.", + message=f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.", code=settings.RetCode.ARGUMENT_ERROR, ) if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: diff --git a/api/constants.py b/api/constants.py index f5de6d98b..ce5cdeb3a 100644 --- a/api/constants.py +++ b/api/constants.py @@ -13,9 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -NAME_LENGTH_LIMIT = 2 ** 10 +NAME_LENGTH_LIMIT = 2**10 -IMG_BASE64_PREFIX = 'data:image/png;base64,' +IMG_BASE64_PREFIX = "data:image/png;base64," SERVICE_CONF = "service_conf.yaml" @@ -25,4 +25,4 @@ REQUEST_WAIT_SEC = 2 REQUEST_MAX_WAIT_SEC = 300 DATASET_NAME_LIMIT = 128 -FILE_NAME_LEN_LIMIT = 256 +FILE_NAME_LEN_LIMIT = 255 diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 664cd1bdb..25c856531 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor from flask_login import current_user from peewee import fn +from api.constants import FILE_NAME_LEN_LIMIT from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType from api.db.db_models import DB, Document, File, File2Document, Knowledgebase from api.db.services import duplicate_name @@ -30,7 +31,7 @@ from api.db.services.file2document_service import File2DocumentService from api.utils import get_uuid from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img from rag.utils.storage_factory import STORAGE_IMPL -from api.constants import FILE_NAME_LEN_LIMIT + class FileService(CommonService): # Service class for managing file operations and storage @@ -412,8 +413,8 @@ class FileService(CommonService): MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0)) if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: raise RuntimeError("Exceed the maximum file number of a free user!") - if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT: - raise RuntimeError("Exceed the maximum length of file name!") + if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT: + raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.") filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id) filetype = filename_type(filename) @@ -492,4 +493,3 @@ class FileService(CommonService): if re.search(r"\.(eml)$", filename): return ParserType.EMAIL.value return default -