tusd: Remove non-ASCII characters from path-ids.

This commit is contained in:
Alex Vandiver
2024-09-25 19:02:46 +00:00
committed by Tim Abbott
parent 84280ed7c2
commit 287850d08d
3 changed files with 14 additions and 7 deletions

View File

@@ -114,18 +114,25 @@ def get_public_upload_root_url() -> str:
return upload_backend.get_public_upload_root_url() return upload_backend.get_public_upload_root_url()
def sanitize_name(value: str) -> str: def sanitize_name(value: str, *, strict: bool = False) -> str:
""" """Sanitizes a value to be safe to store in a Linux filesystem, in
Sanitizes a value to be safe to store in a Linux filesystem, in
S3, and in a URL. So Unicode is allowed, but not special S3, and in a URL. So Unicode is allowed, but not special
characters other than ".", "-", and "_". characters other than ".", "-", and "_".
In "strict" mode, it does not allow Unicode, allowing only ASCII
[A-Za-z0-9_] as word characters. This is for the benefit of tusd,
which is not Unicode-aware.
This implementation is based on django.utils.text.slugify; it is This implementation is based on django.utils.text.slugify; it is
modified by: modified by:
* adding '.' to the list of allowed characters. * adding '.' to the list of allowed characters.
* preserving the case of the value. * preserving the case of the value.
* not stripping trailing dashes and underscores. * not stripping trailing dashes and underscores.
""" """
if strict:
value = re.sub(r"[^A-Za-z0-9_ .-]", "", value).strip()
else:
value = unicodedata.normalize("NFKC", value) value = unicodedata.normalize("NFKC", value)
value = re.sub(r"[^\w\s.-]", "", value).strip() value = re.sub(r"[^\w\s.-]", "", value).strip()
value = re.sub(r"[-\s]+", "-", value) value = re.sub(r"[-\s]+", "-", value)

View File

@@ -182,7 +182,7 @@ class TusdPreCreateTest(ZulipTestCase):
def test_sanitize_filename(self) -> None: def test_sanitize_filename(self) -> None:
self.login("hamlet") self.login("hamlet")
request = self.request() request = self.request()
request.event.upload.meta_data["filename"] = "some thing! ... like this?" request.event.upload.meta_data["filename"] = "some thing! ... like this?"
result = self.client_post( result = self.client_post(
"/api/internal/tusd", "/api/internal/tusd",
request.model_dump(), request.model_dump(),

View File

@@ -111,7 +111,7 @@ def handle_upload_pre_create_hook(
return reject_upload(str(e), 413) return reject_upload(str(e), 413)
# Determine the path_id to store it at # Determine the path_id to store it at
file_name = sanitize_name(data.meta_data.get("filename", "")) file_name = sanitize_name(data.meta_data.get("filename", ""), strict=True)
path_id = upload_backend.generate_message_upload_path(str(user_profile.realm_id), file_name) path_id = upload_backend.generate_message_upload_path(str(user_profile.realm_id), file_name)
return tusd_json_response({"ChangeFileInfo": {"ID": path_id}}) return tusd_json_response({"ChangeFileInfo": {"ID": path_id}})