Refa: Implement centralized file name length limit using FILE_NAME_LEN_LIMIT constant (#8318)

### What problem does this PR solve?

- Replace hardcoded 255-byte file name length checks with
FILE_NAME_LEN_LIMIT constant
- Update error messages to show the actual limit value
- #8290

### Type of change

- [x] Refactoring

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
Liu An
2025-06-17 18:01:30 +08:00
committed by GitHub
parent 64e281b398
commit 0a13d79b94
4 changed files with 20 additions and 18 deletions

View File

@ -21,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor
from flask_login import current_user
from peewee import fn
from api.constants import FILE_NAME_LEN_LIMIT
from api.db import KNOWLEDGEBASE_FOLDER_NAME, FileSource, FileType, ParserType
from api.db.db_models import DB, Document, File, File2Document, Knowledgebase
from api.db.services import duplicate_name
@ -30,7 +31,7 @@ from api.db.services.file2document_service import File2DocumentService
from api.utils import get_uuid
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.utils.storage_factory import STORAGE_IMPL
from api.constants import FILE_NAME_LEN_LIMIT
class FileService(CommonService):
# Service class for managing file operations and storage
@ -412,8 +413,8 @@ class FileService(CommonService):
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
raise RuntimeError("Exceed the maximum file number of a free user!")
if len(file.filename.encode("utf-8")) >= FILE_NAME_LEN_LIMIT:
raise RuntimeError("Exceed the maximum length of file name!")
if len(file.filename.encode("utf-8")) > FILE_NAME_LEN_LIMIT:
raise RuntimeError(f"File name must be {FILE_NAME_LEN_LIMIT} bytes or less.")
filename = duplicate_name(DocumentService.query, name=file.filename, kb_id=kb.id)
filetype = filename_type(filename)
@ -492,4 +493,3 @@ class FileService(CommonService):
if re.search(r"\.(eml)$", filename):
return ParserType.EMAIL.value
return default