mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: Enforce 255-byte filename limit (#8290)
### What problem does this PR solve? - Add filename length validation (<=255 bytes) for document upload/rename in both HTTP and SDK APIs - Update error messages for consistency - Fix comparison operator in SDK from '>=' to '>' for filename length check ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -61,6 +61,8 @@ def upload():
|
|||||||
for file_obj in file_objs:
|
for file_obj in file_objs:
|
||||||
if file_obj.filename == "":
|
if file_obj.filename == "":
|
||||||
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
if len(file_obj.filename.encode("utf-8")) > 255:
|
||||||
|
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
if not e:
|
if not e:
|
||||||
@ -147,6 +149,8 @@ def create():
|
|||||||
kb_id = req["kb_id"]
|
kb_id = req["kb_id"]
|
||||||
if not kb_id:
|
if not kb_id:
|
||||||
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message='Lack of "KB ID"', code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
if len(req["name"].encode("utf-8")) > 255:
|
||||||
|
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
e, kb = KnowledgebaseService.get_by_id(kb_id)
|
||||||
@ -402,6 +406,9 @@ def rename():
|
|||||||
return get_data_error_result(message="Document not found!")
|
return get_data_error_result(message="Document not found!")
|
||||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||||
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_json_result(data=False, message="The extension of file can't be changed", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
if len(req["name"].encode("utf-8")) > 255:
|
||||||
|
return get_json_result(data=False, message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
||||||
if d.name == req["name"]:
|
if d.name == req["name"]:
|
||||||
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
return get_data_error_result(message="Duplicated document name in the same knowledgebase.")
|
||||||
|
|||||||
@ -129,8 +129,8 @@ def upload(dataset_id, tenant_id):
|
|||||||
for file_obj in file_objs:
|
for file_obj in file_objs:
|
||||||
if file_obj.filename == "":
|
if file_obj.filename == "":
|
||||||
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_result(message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
if len(file_obj.filename.encode("utf-8")) >= 128:
|
if len(file_obj.filename.encode("utf-8")) > 255:
|
||||||
return get_result(message="File name should be less than 128 bytes.", code=settings.RetCode.ARGUMENT_ERROR)
|
return get_result(message="File name must be 255 bytes or less.", code=settings.RetCode.ARGUMENT_ERROR)
|
||||||
"""
|
"""
|
||||||
# total size
|
# total size
|
||||||
total_size = 0
|
total_size = 0
|
||||||
@ -247,9 +247,9 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|||||||
DocumentService.update_meta_fields(document_id, req["meta_fields"])
|
DocumentService.update_meta_fields(document_id, req["meta_fields"])
|
||||||
|
|
||||||
if "name" in req and req["name"] != doc.name:
|
if "name" in req and req["name"] != doc.name:
|
||||||
if len(req["name"].encode("utf-8")) >= 128:
|
if len(req["name"].encode("utf-8")) > 255:
|
||||||
return get_result(
|
return get_result(
|
||||||
message="The name should be less than 128 bytes.",
|
message="File name must be 255 bytes or less.",
|
||||||
code=settings.RetCode.ARGUMENT_ERROR,
|
code=settings.RetCode.ARGUMENT_ERROR,
|
||||||
)
|
)
|
||||||
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
||||||
|
|||||||
@ -31,6 +31,6 @@ X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhc
|
|||||||
|
|
||||||
INVALID_API_TOKEN = "invalid_key_123"
|
INVALID_API_TOKEN = "invalid_key_123"
|
||||||
DATASET_NAME_LIMIT = 128
|
DATASET_NAME_LIMIT = 128
|
||||||
DOCUMENT_NAME_LIMIT = 128
|
DOCUMENT_NAME_LIMIT = 255
|
||||||
CHAT_ASSISTANT_NAME_LIMIT = 255
|
CHAT_ASSISTANT_NAME_LIMIT = 255
|
||||||
SESSION_WITH_CHAT_NAME_LIMIT = 255
|
SESSION_WITH_CHAT_NAME_LIMIT = 255
|
||||||
|
|||||||
@ -47,9 +47,9 @@ class TestDocumentsUpdated:
|
|||||||
[
|
[
|
||||||
("new_name.txt", 0, ""),
|
("new_name.txt", 0, ""),
|
||||||
(
|
(
|
||||||
f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt",
|
f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt",
|
||||||
101,
|
0,
|
||||||
"The name should be less than 128 bytes.",
|
"",
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
0,
|
0,
|
||||||
|
|||||||
@ -13,7 +13,6 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
import string
|
import string
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
@ -128,13 +127,12 @@ class TestDocumentsUpload:
|
|||||||
assert res.json()["message"] == "No file selected!"
|
assert res.json()["message"] == "No file selected!"
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_filename_exceeds_max_length(self, HttpApiAuth, add_dataset_func, tmp_path):
|
def test_filename_max_length(self, HttpApiAuth, add_dataset_func, tmp_path):
|
||||||
dataset_id = add_dataset_func
|
dataset_id = add_dataset_func
|
||||||
# filename_length = 129
|
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt")
|
||||||
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
|
|
||||||
res = upload_documents(HttpApiAuth, dataset_id, [fp])
|
res = upload_documents(HttpApiAuth, dataset_id, [fp])
|
||||||
assert res["code"] == 101
|
assert res["code"] == 0
|
||||||
assert res["message"] == "File name should be less than 128 bytes."
|
assert res["data"][0]["name"] == fp.name
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_invalid_dataset_id(self, HttpApiAuth, tmp_path):
|
def test_invalid_dataset_id(self, HttpApiAuth, tmp_path):
|
||||||
|
|||||||
@ -25,7 +25,7 @@ class TestDocumentsUpdated:
|
|||||||
"name, expected_message",
|
"name, expected_message",
|
||||||
[
|
[
|
||||||
("new_name.txt", ""),
|
("new_name.txt", ""),
|
||||||
(f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt", "The name should be less than 128 bytes"),
|
(f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt", ""),
|
||||||
(0, "AttributeError"),
|
(0, "AttributeError"),
|
||||||
(None, "AttributeError"),
|
(None, "AttributeError"),
|
||||||
("", "The extension of file can't be changed"),
|
("", "The extension of file can't be changed"),
|
||||||
|
|||||||
@ -113,16 +113,17 @@ class TestDocumentsUpload:
|
|||||||
assert str(excinfo.value) == "No file selected!", str(excinfo.value)
|
assert str(excinfo.value) == "No file selected!", str(excinfo.value)
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_filename_exceeds_max_length(self, add_dataset_func, tmp_path):
|
def test_filename_max_length(self, add_dataset_func, tmp_path):
|
||||||
dataset = add_dataset_func
|
dataset = add_dataset_func
|
||||||
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 3)}.txt")
|
fp = create_txt_file(tmp_path / f"{'a' * (DOCUMENT_NAME_LIMIT - 4)}.txt")
|
||||||
|
|
||||||
with fp.open("rb") as f:
|
with fp.open("rb") as f:
|
||||||
blob = f.read()
|
blob = f.read()
|
||||||
|
|
||||||
with pytest.raises(Exception) as excinfo:
|
documents = dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
|
||||||
dataset.upload_documents([{"display_name": fp.name, "blob": blob}])
|
for document in documents:
|
||||||
assert str(excinfo.value) == "File name should be less than 128 bytes.", str(excinfo.value)
|
assert document.dataset_id == dataset.id, str(document)
|
||||||
|
assert document.name == fp.name, str(document)
|
||||||
|
|
||||||
@pytest.mark.p2
|
@pytest.mark.p2
|
||||||
def test_duplicate_files(self, add_dataset_func, tmp_path):
|
def test_duplicate_files(self, add_dataset_func, tmp_path):
|
||||||
|
|||||||
Reference in New Issue
Block a user