diff --git a/api/apps/document_app.py b/api/apps/document_app.py index d6be6d91b..69a4edc21 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -61,6 +61,8 @@ def upload(): for file_obj in file_objs: if file_obj.filename == "": return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) + if len(file_obj.filename.encode("utf-8")) > 255: + return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) e, kb = KnowledgebaseService.get_by_id(kb_id) if not e: @@ -147,6 +149,8 @@ def create(): kb_id = req["kb_id"] if not kb_id: return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR) + if len(req["name"].encode("utf-8")) > 255: + return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) try: e, kb = KnowledgebaseService.get_by_id(kb_id) @@ -402,6 +406,9 @@ def rename(): return get_data_error_result(message="Document not found!") if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR) + if len(req["name"].encode("utf-8")) > 255: + return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) + for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id): if d.name == req["name"]: return get_data_error_result(message="Duplicated document name in the same knowledgebase.") diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index cc5e040de..bffe5e7e8 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -129,8 +129,8 @@ def upload(dataset_id, tenant_id): for file_obj in file_objs: if file_obj.filename == "": return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR) - if len(file_obj.filename.encode("utf-8")) >= 128: - return get_result(message="File name should be less than 128 bytes.", code=settings.RetCode.ARGUMENT_ERROR) + if len(file_obj.filename.encode("utf-8")) > 255: + return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR) """ # total size total_size = 0 @@ -247,9 +247,9 @@ def update_doc(tenant_id, dataset_id, document_id): DocumentService.update_meta_fields(document_id, req["meta_fields"]) if "name" in req and req["name"] != doc.name: - if len(req["name"].encode("utf-8")) >= 128: + if len(req["name"].encode("utf-8")) > 255: return get_result( - message="The name should be less than 128 bytes.", + message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR, ) if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix: diff --git a/test/configs.py b/test/configs.py index a5f316934..7381567b5 100644 --- a/test/configs.py +++ b/test/configs.py @@ -31,6 +31,6 @@ X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhc INVALID_API_TOKEN = "invalid_key_123" DATASET_NAME_LIMIT = 128 -DOCUMENT_NAME_LIMIT = 128 +DOCUMENT_NAME_LIMIT = 255 CHAT_ASSISTANT_NAME_LIMIT = 255 SESSION_WITH_CHAT_NAME_LIMIT = 255 diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_update_document.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_update_document.py index 3e21f518a..b27c7dacc 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_update_document.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_update_document.py @@ -47,9 +47,9 @@ class TestDocumentsUpdated: [ ("new_name.txt", 0, ""), ( - f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", - 101, - "The name should be less than 128 bytes.", + f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", + 0, + "", ), ( 0, diff --git a/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py b/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py index cbb6285f9..f8f238641 100644 --- a/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py +++ b/test/testcases/test_http_api/test_file_management_within_dataset/test_upload_documents.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import string from concurrent.futures import ThreadPoolExecutor, as_completed @@ -128,13 +127,12 @@ class TestDocumentsUpload: assert res.json()["message"] == "No file selected!" @pytest.mark.p2 - def test_filename_exceeds_max_length(self, HttpApiAuth, add_dataset_func, tmp_path): + def test_filename_max_length(self, HttpApiAuth, add_dataset_func, tmp_path): dataset_id = add_dataset_func - # filename_length = 129 - fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") + fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt") res = upload_documents(HttpApiAuth, dataset_id, [fp]) - assert res["code"] == 101 - assert res["message"] == "File name should be less than 128 bytes." + assert res["code"] == 0 + assert res["data"][0]["name"] == fp.name @pytest.mark.p2 def test_invalid_dataset_id(self, HttpApiAuth, tmp_path): diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py index 83615dcdb..f1d3dadb8 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_update_document.py @@ -25,7 +25,7 @@ class TestDocumentsUpdated: "name, expected_message", [ ("new_name.txt", ""), - (f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", "The name should be less than 128 bytes"), + (f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", ""), (0, "AttributeError"), (None, "AttributeError"), ("", "The extension of file can't be changed"), diff --git a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py index aed84e3fe..72034e27d 100644 --- a/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py +++ b/test/testcases/test_sdk_api/test_file_management_within_dataset/test_upload_documents.py @@ -113,16 +113,17 @@ class TestDocumentsUpload: assert str(excinfo.value) == "No file selected!", str(excinfo.value) @pytest.mark.p2 - def test_filename_exceeds_max_length(self, add_dataset_func, tmp_path): + def test_filename_max_length(self, add_dataset_func, tmp_path): dataset = add_dataset_func - fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt") + fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt") with fp.open("rb") as f: blob = f.read() - with pytest.raises(Exception) as excinfo: - dataset.upload_documents([{"display_name": fp.name, "blob": blob}]) - assert str(excinfo.value) == "File name should be less than 128 bytes.", str(excinfo.value) + documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}]) + for document in documents: + assert document.dataset_id == dataset.id, str(document) + assert document.name == fp.name, str(document) @pytest.mark.p2 def test_duplicate_files(self, add_dataset_func, tmp_path):