From 2f6c5a883e106aa82a570d3d1f243993284b70f3 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Wed, 11 Jan 2023 16:36:41 +0000 Subject: [PATCH] CVE-2023-22735: Provide the Content-Disposition header from S3. The Content-Type of user-provided uploads was provided by the browser at initial upload time, and stored in S3; however, 04cf68b45ebb switched to determining the Content-Disposition merely from the filename. This makes uploads vulnerable to a stored XSS, wherein a file uploaded with a content-type of `text/html` and an extension of `.png` would be served to browsers as `Content-Disposition: inline`, which is unsafe. The `Content-Security-Policy` headers in the previous commit mitigate this, but only for browsers which support them. Revert parts of 04cf68b45ebb, specifically by allowing S3 to provide the Content-Disposition header, and using the `ResponseContentDisposition` argument when necessary to override it to `attachment`. Because we expect S3 responses to vary based on this argument, we include it in the cache key; since the query parameter has dashes in it, we can't use use the helper `$arg_` variables, and must parse it from the query parameters manually. Adding the disposition may decrease the cache hit rate somewhat, but downloads are infrequent enough that it is unlikely to have a noticeable effect. We take care to not adjust the cache key for requests which do not specify the disposition. --- .../uploads-internal.conf | 18 +++++---- .../templates/nginx/s3-cache.template.erb | 13 ++++++ zerver/lib/upload/s3.py | 13 +++--- zerver/views/upload.py | 40 +++++++++++-------- 4 files changed, 55 insertions(+), 29 deletions(-) diff --git a/puppet/zulip/files/nginx/zulip-include-frontend/uploads-internal.conf b/puppet/zulip/files/nginx/zulip-include-frontend/uploads-internal.conf index b7d20c10c1..426f5d6757 100644 --- a/puppet/zulip/files/nginx/zulip-include-frontend/uploads-internal.conf +++ b/puppet/zulip/files/nginx/zulip-include-frontend/uploads-internal.conf @@ -14,14 +14,14 @@ location ~ ^/internal/s3/(?[^/]+)/(?.*) { # Ensure that we only get _one_ of these headers: the one that # Django added, not the one from S3. - proxy_hide_header Content-Disposition; proxy_hide_header Cache-Control; proxy_hide_header Expires; proxy_hide_header Set-Cookie; - # We are _leaving_ S3 to provide Content-Type and Accept-Ranges - # headers, which are the two remaining headers which nginx would - # also pass through from the first response. Django explicitly - # unsets the former, and does not set the latter. + # We are _leaving_ S3 to provide Content-Type, + # Content-Disposition, and Accept-Ranges headers, which are the + # three remaining headers which nginx would also pass through from + # the first response. Django explicitly unsets the first, and + # does not set the latter two. # nginx does its own DNS resolution, which is necessary here to # resolve the IP of the S3 server. Point it at the local caching @@ -38,9 +38,11 @@ location ~ ^/internal/s3/(?[^/]+)/(?.*) { # `s3_disk_cache_size` and read frequency, set via # `s3_cache_inactive_time`. proxy_cache_valid 200 1y; - # Don't include query parameters in the cache key, since those - # include a time-based auth token - proxy_cache_key $download_url; + + # We only include the requested content-disposition in the cache + # key, so that we cache "Content-Disposition: attachment" + # separately from the inline version. + proxy_cache_key $download_url$s3_disposition_cache_key; } # Internal file-serving diff --git a/puppet/zulip/templates/nginx/s3-cache.template.erb b/puppet/zulip/templates/nginx/s3-cache.template.erb index cf3ab7b971..23772e8432 100644 --- a/puppet/zulip/templates/nginx/s3-cache.template.erb +++ b/puppet/zulip/templates/nginx/s3-cache.template.erb @@ -4,3 +4,16 @@ proxy_cache_path /srv/zulip-uploaded-files-cache keys_zone=uploads:<%= @s3_memory_cache_size %> inactive=<%= @s3_cache_inactive_time %> max_size=<%= @s3_disk_cache_size %>; + +# This is used when proxying requests to S3; we wish to know if the +# proxied request is asking to override the Content-Disposition in its +# response, so we can adjust our cache key. Unfortunately, $arg_foo +# style variables pre-parsed from query parameters don't work with +# query parameters with dashes, so we parse it out by hand. Despite +# needing to be declared at the 'http' level,. nginx applies maps like +# this lazily, so this only affects internal S3 proxied requests. + +map $args $s3_disposition_cache_key { + default ""; + "~(^|&)(?response-content-disposition=[^&]+)" "?$param"; +} diff --git a/zerver/lib/upload/s3.py b/zerver/lib/upload/s3.py index 0299324155..19b9d90cfd 100644 --- a/zerver/lib/upload/s3.py +++ b/zerver/lib/upload/s3.py @@ -91,7 +91,7 @@ def upload_image_to_s3( ) -def get_signed_upload_url(path: str) -> str: +def get_signed_upload_url(path: str, force_download: bool = False) -> str: client = boto3.client( "s3", aws_access_key_id=settings.S3_KEY, @@ -99,13 +99,16 @@ def get_signed_upload_url(path: str) -> str: region_name=settings.S3_REGION, endpoint_url=settings.S3_ENDPOINT_URL, ) + params = { + "Bucket": settings.S3_AUTH_UPLOADS_BUCKET, + "Key": path, + } + if force_download: + params["ResponseContentDisposition"] = "attachment" return client.generate_presigned_url( ClientMethod="get_object", - Params={ - "Bucket": settings.S3_AUTH_UPLOADS_BUCKET, - "Key": path, - }, + Params=params, ExpiresIn=SIGNED_UPLOAD_URL_DURATION, HttpMethod="GET", ) diff --git a/zerver/views/upload.py b/zerver/views/upload.py index 423b9c85b5..25856ee97b 100644 --- a/zerver/views/upload.py +++ b/zerver/views/upload.py @@ -88,8 +88,8 @@ def internal_nginx_redirect(internal_path: str) -> HttpResponse: return response -def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponse: - url = get_signed_upload_url(path_id) +def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -> HttpResponse: + url = get_signed_upload_url(path_id, force_download=force_download) assert url.startswith("https://") if settings.DEVELOPMENT: @@ -107,18 +107,30 @@ def serve_s3(request: HttpRequest, path_id: str, download: bool = False) -> Http assert parsed_url.query is not None escaped_path_parts = parsed_url.hostname + quote(parsed_url.path) + "?" + parsed_url.query response = internal_nginx_redirect("/internal/s3/" + escaped_path_parts) - patch_disposition_header(response, path_id, download) + + # It is important that S3 generate both the Content-Type and + # Content-Disposition headers; when the file was uploaded, we + # stored the browser-provided value for the former, and set + # Content-Disposition according to if that was safe. As such, + # only S3 knows if a given attachment is safe to inline; we only + # override Content-Disposition to "attachment", and do so by + # telling S3 that is what we want in the signed URL. patch_cache_control(response, private=True, immutable=True) return response -def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> HttpResponseBase: +def serve_local( + request: HttpRequest, path_id: str, force_download: bool = False +) -> HttpResponseBase: assert settings.LOCAL_FILES_DIR is not None local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id) assert_is_local_storage_path("files", local_path) if not os.path.isfile(local_path): return HttpResponseNotFound("

File not found

") + mimetype, encoding = guess_type(path_id) + download = force_download or mimetype not in INLINE_MIME_TYPES + if settings.DEVELOPMENT: # In development, we do not have the nginx server to offload # the response to; serve it directly ourselves. @@ -138,7 +150,9 @@ def serve_local(request: HttpRequest, path_id: str, download: bool = False) -> H def serve_file_download_backend( request: HttpRequest, user_profile: UserProfile, realm_id_str: str, filename: str ) -> HttpResponseBase: - return serve_file(request, user_profile, realm_id_str, filename, url_only=False, download=True) + return serve_file( + request, user_profile, realm_id_str, filename, url_only=False, force_download=True + ) def serve_file_backend( @@ -167,7 +181,7 @@ def serve_file( realm_id_str: str, filename: str, url_only: bool = False, - download: bool = False, + force_download: bool = False, ) -> HttpResponseBase: path_id = f"{realm_id_str}/{filename}" realm = get_valid_realm_from_request(request) @@ -181,13 +195,10 @@ def serve_file( url = generate_unauthed_file_access_url(path_id) return json_success(request, data=dict(url=url)) - mimetype, encoding = guess_type(path_id) - download = download or mimetype not in INLINE_MIME_TYPES - if settings.LOCAL_UPLOADS_DIR is not None: - return serve_local(request, path_id, download=download) + return serve_local(request, path_id, force_download=force_download) else: - return serve_s3(request, path_id, download=download) + return serve_s3(request, path_id, force_download=force_download) USER_UPLOADS_ACCESS_TOKEN_SALT = "user_uploads_" @@ -221,13 +232,10 @@ def serve_file_unauthed_from_token( if path_id.split("/")[-1] != filename: raise JsonableError(_("Invalid filename")) - mimetype, encoding = guess_type(path_id) - download = mimetype not in INLINE_MIME_TYPES - if settings.LOCAL_UPLOADS_DIR is not None: - return serve_local(request, path_id, download=download) + return serve_local(request, path_id) else: - return serve_s3(request, path_id, download=download) + return serve_s3(request, path_id) def serve_local_avatar_unauthed(request: HttpRequest, path: str) -> HttpResponseBase: