tusd: Remove non-ASCII characters from path-ids.

This commit is contained in:
Alex Vandiver
2024-09-25 19:02:46 +00:00
committed by Tim Abbott
parent 84280ed7c2
commit 287850d08d
3 changed files with 14 additions and 7 deletions

View File

@@ -114,18 +114,25 @@ def get_public_upload_root_url() -> str:
return upload_backend.get_public_upload_root_url()
def sanitize_name(value: str) -> str:
"""
Sanitizes a value to be safe to store in a Linux filesystem, in
def sanitize_name(value: str, *, strict: bool = False) -> str:
"""Sanitizes a value to be safe to store in a Linux filesystem, in
S3, and in a URL. So Unicode is allowed, but not special
characters other than ".", "-", and "_".
In "strict" mode, it does not allow Unicode, allowing only ASCII
[A-Za-z0-9_] as word characters. This is for the benefit of tusd,
which is not Unicode-aware.
This implementation is based on django.utils.text.slugify; it is
modified by:
* adding '.' to the list of allowed characters.
* preserving the case of the value.
* not stripping trailing dashes and underscores.
"""
if strict:
value = re.sub(r"[^A-Za-z0-9_ .-]", "", value).strip()
else:
value = unicodedata.normalize("NFKC", value)
value = re.sub(r"[^\w\s.-]", "", value).strip()
value = re.sub(r"[-\s]+", "-", value)

View File

@@ -182,7 +182,7 @@ class TusdPreCreateTest(ZulipTestCase):
def test_sanitize_filename(self) -> None:
self.login("hamlet")
request = self.request()
request.event.upload.meta_data["filename"] = "some thing! ... like this?"
request.event.upload.meta_data["filename"] = "some thing! ... like this?"
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),

View File

@@ -111,7 +111,7 @@ def handle_upload_pre_create_hook(
return reject_upload(str(e), 413)
# Determine the path_id to store it at
file_name = sanitize_name(data.meta_data.get("filename", ""))
file_name = sanitize_name(data.meta_data.get("filename", ""), strict=True)
path_id = upload_backend.generate_message_upload_path(str(user_profile.realm_id), file_name)
return tusd_json_response({"ChangeFileInfo": {"ID": path_id}})