Fix: Enforce 255-byte filename limit (#8290)

### What problem does this PR solve?

- Add filename length validation (<=255 bytes) for document
upload/rename in both HTTP and SDK APIs
- Update error messages for consistency
- Fix comparison operator in SDK from '>=' to '>' for filename length
check

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Liu An
2025-06-16 16:39:41 +08:00
committed by GitHub
parent bde76d2f55
commit a3bebeb599
7 changed files with 26 additions and 20 deletions

View File

@ -61,6 +61,8 @@ def upload():
for file_obj in file_objs: for file_obj in file_objs:
if file_obj.filename == "": if file_obj.filename == "":
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
e, kb = KnowledgebaseService.get_by_id(kb_id) e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e: if not e:
@ -147,6 +149,8 @@ def create():
kb_id = req["kb_id"] kb_id = req["kb_id"]
if not kb_id: if not kb_id:
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
try: try:
e, kb = KnowledgebaseService.get_by_id(kb_id) e, kb = KnowledgebaseService.get_by_id(kb_id)
@ -402,6 +406,9 @@ def rename():
return get_data_error_result(message="Document not found!") return get_data_error_result(message="Document not found!")
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
if len(req["name"].encode("utf-8")) > 255:
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
if d.name == req["name"]: if d.name == req["name"]:
return get_data_error_result(message="Duplicated document name in the same knowledgebase.") return get_data_error_result(message="Duplicated document name in the same knowledgebase.")

View File

@ -129,8 +129,8 @@ def upload(dataset_id, tenant_id):
for file_obj in file_objs: for file_obj in file_objs:
if file_obj.filename == "": if file_obj.filename == "":
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
if len(file_obj.filename.encode("utf-8")) >= 128: if len(file_obj.filename.encode("utf-8")) > 255:
return get_result(message="File name should be less than 128 bytes.", code=settings.RetCode.ARGUMENT_ERROR) return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
""" """
# total size # total size
total_size = 0 total_size = 0
@ -247,9 +247,9 @@ def update_doc(tenant_id, dataset_id, document_id):
DocumentService.update_meta_fields(document_id, req["meta_fields"]) DocumentService.update_meta_fields(document_id, req["meta_fields"])
if "name" in req and req["name"] != doc.name: if "name" in req and req["name"] != doc.name:
if len(req["name"].encode("utf-8")) >= 128: if len(req["name"].encode("utf-8")) > 255:
return get_result( return get_result(
message="The name should be less than 128 bytes.", message="File name must be 255 bytes or less.",
code=settings.RetCode.ARGUMENT_ERROR, code=settings.RetCode.ARGUMENT_ERROR,
) )
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:

View File

@ -31,6 +31,6 @@ X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhc
INVALID_API_TOKEN = "invalid_key_123" INVALID_API_TOKEN = "invalid_key_123"
DATASET_NAME_LIMIT = 128 DATASET_NAME_LIMIT = 128
DOCUMENT_NAME_LIMIT = 128 DOCUMENT_NAME_LIMIT = 255
CHAT_ASSISTANT_NAME_LIMIT = 255 CHAT_ASSISTANT_NAME_LIMIT = 255
SESSION_WITH_CHAT_NAME_LIMIT = 255 SESSION_WITH_CHAT_NAME_LIMIT = 255

View File

@ -47,9 +47,9 @@ class TestDocumentsUpdated:
[ [
("new_name.txt", 0, ""), ("new_name.txt", 0, ""),
( (
f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt",
101, 0,
"The name should be less than 128 bytes.", "",
), ),
( (
0, 0,

View File

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import string import string
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
@ -128,13 +127,12 @@ class TestDocumentsUpload:
assert res.json()["message"] == "No file selected!" assert res.json()["message"] == "No file selected!"
@pytest.mark.p2 @pytest.mark.p2
def test_filename_exceeds_max_length(self, HttpApiAuth, add_dataset_func, tmp_path): def test_filename_max_length(self, HttpApiAuth, add_dataset_func, tmp_path):
dataset_id = add_dataset_func dataset_id = add_dataset_func
# filename_length = 129 fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt")
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
res = upload_documents(HttpApiAuth, dataset_id, [fp]) res = upload_documents(HttpApiAuth, dataset_id, [fp])
assert res["code"] == 101 assert res["code"] == 0
assert res["message"] == "File name should be less than 128 bytes." assert res["data"][0]["name"] == fp.name
@pytest.mark.p2 @pytest.mark.p2
def test_invalid_dataset_id(self, HttpApiAuth, tmp_path): def test_invalid_dataset_id(self, HttpApiAuth, tmp_path):

View File

@ -25,7 +25,7 @@ class TestDocumentsUpdated:
"name, expected_message", "name, expected_message",
[ [
("new_name.txt", ""), ("new_name.txt", ""),
(f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", "The name should be less than 128 bytes"), (f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", ""),
(0, "AttributeError"), (0, "AttributeError"),
(None, "AttributeError"), (None, "AttributeError"),
("", "The extension of file can't be changed"), ("", "The extension of file can't be changed"),

View File

@ -113,16 +113,17 @@ class TestDocumentsUpload:
assert str(excinfo.value) == "No file selected!", str(excinfo.value) assert str(excinfo.value) == "No file selected!", str(excinfo.value)
@pytest.mark.p2 @pytest.mark.p2
def test_filename_exceeds_max_length(self, add_dataset_func, tmp_path): def test_filename_max_length(self, add_dataset_func, tmp_path):
dataset = add_dataset_func dataset = add_dataset_func
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt")
with fp.open("rb") as f: with fp.open("rb") as f:
blob = f.read() blob = f.read()
with pytest.raises(Exception) as excinfo: documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
dataset.upload_documents([{"display_name": fp.name, "blob": blob}]) for document in documents:
assert str(excinfo.value) == "File name should be less than 128 bytes.", str(excinfo.value) assert document.dataset_id == dataset.id, str(document)
assert document.name == fp.name, str(document)
@pytest.mark.p2 @pytest.mark.p2
def test_duplicate_files(self, add_dataset_func, tmp_path): def test_duplicate_files(self, add_dataset_func, tmp_path):