Rewrite sanitize_name to better preserve filenames.

The previous version of sanitize_name dropped all unicode characters
and mangled filenames with multiple `.`s in the extension, leading to
confusing URLs for files uploaded to Zulip.

Fixes #321.

[tweaked significantly by tabbott]
This commit is contained in:
Varshit
2016-03-13 14:59:33 +05:30
committed by Tim Abbott
parent 4a50336476
commit e0ef1a991e
2 changed files with 36 additions and 8 deletions

View File

@@ -2,6 +2,9 @@ from __future__ import absolute_import
from django.conf import settings
from django.template.defaultfilters import slugify
from django.utils.encoding import force_text
from django.utils.safestring import mark_safe
import unicodedata
from zerver.lib.avatar import user_avatar_hash
@@ -13,6 +16,7 @@ from zerver.models import get_user_profile_by_id
import base64
import os
import re
from PIL import Image, ImageOps
from six.moves import cStringIO as StringIO
import random
@@ -28,15 +32,26 @@ import random
# This is great, because passing the pseudofile object that Django gives
# you to boto would be a pain.
# To come up with a s3 key we randomly generate a "directory". The "file
# name" is the original filename provided by the user run through Django's
# slugify.
# To come up with a s3 key we randomly generate a "directory". The
# "file name" is the original filename provided by the user run
# through a sanitization function.
def sanitize_name(name):
split_name = name.split('.')
base = ".".join(split_name[:-1])
extension = split_name[-1]
return slugify(base) + "." + slugify(extension)
def sanitize_name(value):
"""
Sanitizes a value to be safe to store in a Linux filesystem, in
S3, and in a URL. So unicode is allowed, but not special
characters other than ".", "-", and "_".
This implementation is based on django.utils.text.slugify; it is
modified by:
* hardcoding allow_unicode=True.
* adding '.' and '_' to the list of allowed characters.
* preserving the case of the value.
"""
value = force_text(value)
value = unicodedata.normalize('NFKC', value)
value = re.sub('[^\w\s._-]', '', value, flags=re.U).strip()
return mark_safe(re.sub('[-\s]+', '-', value, flags=re.U))
def random_name(bytes=60):
return base64.urlsafe_b64encode(os.urandom(bytes))