diff --git a/zerver/lib/upload.py b/zerver/lib/upload.py index e3a9f44820..e732c0e54e 100644 --- a/zerver/lib/upload.py +++ b/zerver/lib/upload.py @@ -10,7 +10,7 @@ import shutil import unicodedata import urllib from datetime import timedelta -from mimetypes import guess_extension, guess_type +from mimetypes import guess_type from typing import IO, Any, Callable, Optional, Tuple from urllib.parse import urljoin @@ -21,7 +21,6 @@ from botocore.client import Config from django.conf import settings from django.core.files.uploadedfile import UploadedFile from django.core.signing import BadSignature, TimestampSigner -from django.http import HttpRequest from django.urls import reverse from django.utils.translation import gettext as _ from markupsafe import Markup as mark_safe @@ -374,19 +373,22 @@ def check_upload_within_quota(realm: Realm, uploaded_file_size: int) -> None: raise RealmUploadQuotaError(_("Upload would exceed your organization's upload quota.")) -def get_file_info(request: HttpRequest, user_file: UploadedFile) -> Tuple[str, Optional[str]]: +def get_file_info(user_file: UploadedFile) -> Tuple[str, str]: uploaded_file_name = user_file.name assert uploaded_file_name is not None - content_type = request.GET.get("mimetype") - if content_type is None: + + content_type = user_file.content_type + # It appears Django's UploadedFile.content_type defaults to an empty string, + # even though the value is documented as `str | None`. So we check for both. + if content_type is None or content_type == "": guessed_type = guess_type(uploaded_file_name)[0] if guessed_type is not None: content_type = guessed_type - else: - extension = guess_extension(content_type) - if extension is not None: - uploaded_file_name = uploaded_file_name + extension + else: + # Fallback to application/octet-stream if unable to determine a + # different content-type from the filename. + content_type = "application/octet-stream" uploaded_file_name = urllib.parse.unquote(uploaded_file_name) @@ -1150,9 +1152,9 @@ def create_attachment( def upload_message_image_from_request( - request: HttpRequest, user_file: UploadedFile, user_profile: UserProfile, user_file_size: int + user_file: UploadedFile, user_profile: UserProfile, user_file_size: int ) -> str: - uploaded_file_name, content_type = get_file_info(request, user_file) + uploaded_file_name, content_type = get_file_info(user_file) return upload_message_file( uploaded_file_name, user_file_size, content_type, user_file.read(), user_profile ) diff --git a/zerver/tests/test_upload.py b/zerver/tests/test_upload.py index c36c3b9915..90151fa611 100644 --- a/zerver/tests/test_upload.py +++ b/zerver/tests/test_upload.py @@ -17,6 +17,7 @@ from django.test import override_settings from django.utils.timezone import now as timezone_now from django_sendfile.utils import _get_sendfile from PIL import Image +from urllib3 import encode_multipart_formdata import zerver.lib.upload from zerver.actions.create_realm import do_create_realm @@ -142,20 +143,6 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): self.assertEqual(response.status_code, 200) self.assert_streaming_content(response, b"zulip!") - def test_upload_file_with_supplied_mimetype(self) -> None: - """ - When files are copied into the system clipboard and pasted for upload - the filename may not be supplied so the extension is determined from a - query string parameter. - """ - fp = StringIO("zulip!") - fp.name = "pasted_file" - result = self.api_post( - self.example_user("hamlet"), "/api/v1/user_uploads?mimetype=image/png", {"file": fp} - ) - uri = self.assert_json_success(result)["uri"] - self.assertTrue(uri.endswith("pasted_file.png")) - def test_file_too_big_failure(self) -> None: """ Attempting to upload big files should fail. @@ -192,6 +179,23 @@ class FileUploadTest(UploadSerializeMixin, ZulipTestCase): result = self.client_post("/json/user_uploads") self.assert_json_error(result, "You must specify a file to upload") + def test_guess_content_type_from_filename(self) -> None: + """ + Test coverage for files without content-type in the metadata; + in which case we try to guess the content-type from the filename. + """ + data, content_type = encode_multipart_formdata({"file": ("somefile", b"zulip!", None)}) + result = self.api_post( + self.example_user("hamlet"), "/api/v1/user_uploads", data, content_type=content_type + ) + self.assert_json_success(result) + + data, content_type = encode_multipart_formdata({"file": ("somefile.txt", b"zulip!", None)}) + result = self.api_post( + self.example_user("hamlet"), "/api/v1/user_uploads", data, content_type=content_type + ) + self.assert_json_success(result) + # This test will go through the code path for uploading files onto LOCAL storage # when Zulip is in DEVELOPMENT mode. def test_file_upload_authed(self) -> None: diff --git a/zerver/views/upload.py b/zerver/views/upload.py index d737bbdc17..24bfb6e429 100644 --- a/zerver/views/upload.py +++ b/zerver/views/upload.py @@ -149,5 +149,5 @@ def upload_file_backend(request: HttpRequest, user_profile: UserProfile) -> Http ) check_upload_within_quota(user_profile.realm, file_size) - uri = upload_message_image_from_request(request, user_file, user_profile, file_size) + uri = upload_message_image_from_request(user_file, user_profile, file_size) return json_success(request, data={"uri": uri})