mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	tusd: Set metadata correctly in S3.
The Content-Type, Content-Disposition, StorageClass, and general metadata are not set according to our patterns by tusd; copy the file to itself to update those properties.
This commit is contained in:
		
				
					committed by
					
						
						Tim Abbott
					
				
			
			
				
	
			
			
			
						parent
						
							287850d08d
						
					
				
				
					commit
					638c579c56
				
			@@ -605,6 +605,7 @@ def use_s3_backend(method: Callable[P, None]) -> Callable[P, None]:
 | 
			
		||||
        with (
 | 
			
		||||
            mock.patch("zerver.lib.upload.upload_backend", backend),
 | 
			
		||||
            mock.patch("zerver.worker.thumbnail.upload_backend", backend),
 | 
			
		||||
            mock.patch("zerver.views.tusd.upload_backend", backend),
 | 
			
		||||
        ):
 | 
			
		||||
            return method(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -87,6 +87,10 @@ def upload_content_to_s3(
 | 
			
		||||
    extra_metadata: dict[str, str] | None = None,
 | 
			
		||||
    filename: str | None = None,
 | 
			
		||||
) -> None:
 | 
			
		||||
    # Note that these steps are also replicated in
 | 
			
		||||
    # handle_upload_pre_finish_hook in zerver.views.tus, to update
 | 
			
		||||
    # properties for files uploaded via TUS.
 | 
			
		||||
 | 
			
		||||
    key = bucket.Object(path)
 | 
			
		||||
    metadata: dict[str, str] = {}
 | 
			
		||||
    if user_profile:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,15 @@
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
import botocore
 | 
			
		||||
import orjson
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.test import override_settings
 | 
			
		||||
 | 
			
		||||
from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key
 | 
			
		||||
from zerver.lib.test_classes import ZulipTestCase
 | 
			
		||||
from zerver.lib.test_helpers import create_s3_buckets, use_s3_backend
 | 
			
		||||
from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment
 | 
			
		||||
from zerver.lib.upload.s3 import S3UploadBackend
 | 
			
		||||
from zerver.lib.utils import assert_is_not_none
 | 
			
		||||
from zerver.models import Attachment
 | 
			
		||||
from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload
 | 
			
		||||
@@ -392,3 +395,77 @@ class TusdPreFinishTest(ZulipTestCase):
 | 
			
		||||
        assert settings.LOCAL_FILES_DIR is not None
 | 
			
		||||
        self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
 | 
			
		||||
        self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info")))
 | 
			
		||||
 | 
			
		||||
    @use_s3_backend
 | 
			
		||||
    @override_settings(S3_UPLOADS_STORAGE_CLASS="STANDARD_IA")
 | 
			
		||||
    def test_s3_upload(self) -> None:
 | 
			
		||||
        hamlet = self.example_user("hamlet")
 | 
			
		||||
        bucket = create_s3_buckets(settings.S3_AUTH_UPLOADS_BUCKET)[0]
 | 
			
		||||
 | 
			
		||||
        upload_backend = S3UploadBackend()
 | 
			
		||||
        filename = "some 例 example.png"
 | 
			
		||||
        path_id = upload_backend.generate_message_upload_path(
 | 
			
		||||
            str(hamlet.realm.id), sanitize_name(filename, strict=True)
 | 
			
		||||
        )
 | 
			
		||||
        self.assertTrue(path_id.endswith("/some-example.png"))
 | 
			
		||||
        info = TusUpload(
 | 
			
		||||
            id=path_id,
 | 
			
		||||
            size=len("zulip!"),
 | 
			
		||||
            offset=0,
 | 
			
		||||
            size_is_deferred=False,
 | 
			
		||||
            meta_data={
 | 
			
		||||
                "filename": filename,
 | 
			
		||||
                "filetype": "image/png",
 | 
			
		||||
                "name": filename,
 | 
			
		||||
                "type": "image/png",
 | 
			
		||||
            },
 | 
			
		||||
            is_final=False,
 | 
			
		||||
            is_partial=False,
 | 
			
		||||
            partial_uploads=None,
 | 
			
		||||
            storage=None,
 | 
			
		||||
        )
 | 
			
		||||
        bucket.Object(path_id).put(
 | 
			
		||||
            Body=b"zulip!",
 | 
			
		||||
            ContentType="application/octet-stream",
 | 
			
		||||
            Metadata={k: v.encode("ascii", "replace").decode() for k, v in info.meta_data.items()},
 | 
			
		||||
        )
 | 
			
		||||
        bucket.Object(f"{path_id}.info").put(
 | 
			
		||||
            Body=info.model_dump_json().encode(),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Post the hook saying the file is in place
 | 
			
		||||
        self.login("hamlet")
 | 
			
		||||
        result = self.client_post(
 | 
			
		||||
            "/api/internal/tusd",
 | 
			
		||||
            self.request(info).model_dump(),
 | 
			
		||||
            content_type="application/json",
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(result.status_code, 200)
 | 
			
		||||
        result_json = result.json()
 | 
			
		||||
        self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200)
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            orjson.loads(result_json["HttpResponse"]["Body"]),
 | 
			
		||||
            {"url": f"/user_uploads/{path_id}", "filename": filename},
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        attachment = Attachment.objects.get(path_id=path_id)
 | 
			
		||||
        self.assertEqual(attachment.size, len("zulip!"))
 | 
			
		||||
        self.assertEqual(attachment.content_type, "image/png")
 | 
			
		||||
 | 
			
		||||
        assert settings.LOCAL_FILES_DIR is None
 | 
			
		||||
        response = bucket.Object(path_id).get()
 | 
			
		||||
        self.assertEqual(response["ContentType"], "image/png")
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            response["ContentDisposition"],
 | 
			
		||||
            "inline; filename*=utf-8''some%20%E4%BE%8B%20example.png",
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(response["StorageClass"], "STANDARD_IA")
 | 
			
		||||
        self.assertEqual(
 | 
			
		||||
            response["Metadata"],
 | 
			
		||||
            {"realm_id": str(hamlet.realm_id), "user_profile_id": str(hamlet.id)},
 | 
			
		||||
        )
 | 
			
		||||
        with self.assertRaises(botocore.exceptions.ClientError):
 | 
			
		||||
            bucket.Object(f"{path_id}.info").get()
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@ from django.conf import settings
 | 
			
		||||
from django.contrib.auth.models import AnonymousUser
 | 
			
		||||
from django.db import transaction
 | 
			
		||||
from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
 | 
			
		||||
from django.utils.http import content_disposition_header
 | 
			
		||||
from django.utils.translation import gettext as _
 | 
			
		||||
from django.views.decorators.csrf import csrf_exempt
 | 
			
		||||
from pydantic import BaseModel, ConfigDict, Field
 | 
			
		||||
@@ -24,6 +25,7 @@ from zerver.lib.upload import (
 | 
			
		||||
    sanitize_name,
 | 
			
		||||
    upload_backend,
 | 
			
		||||
)
 | 
			
		||||
from zerver.lib.upload.base import INLINE_MIME_TYPES
 | 
			
		||||
from zerver.models import UserProfile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -119,25 +121,52 @@ def handle_upload_pre_create_hook(
 | 
			
		||||
def handle_upload_pre_finish_hook(
 | 
			
		||||
    request: HttpRequest, user_profile: UserProfile, data: TusUpload
 | 
			
		||||
) -> HttpResponse:
 | 
			
		||||
    metadata = data.meta_data
 | 
			
		||||
    filename = metadata.get("filename", "")
 | 
			
		||||
    # We want to store as the filename a version that clients are
 | 
			
		||||
    # likely to be able to accept via "Save as..."
 | 
			
		||||
    if filename in {"", ".", ".."}:
 | 
			
		||||
        filename = "uploaded-file"
 | 
			
		||||
 | 
			
		||||
    content_type = metadata.get("filetype")
 | 
			
		||||
    if not content_type:
 | 
			
		||||
        content_type = guess_type(filename)[0]
 | 
			
		||||
        if content_type is None:
 | 
			
		||||
            content_type = "application/octet-stream"
 | 
			
		||||
 | 
			
		||||
    # With an S3 backend, the filename we passed in pre_create's
 | 
			
		||||
    # data.id has a randomly-generated "mutlipart-id" appended with a
 | 
			
		||||
    # `+`.  Our path_ids cannot contain `+`, so we strip any suffix
 | 
			
		||||
    # starting with `+`.
 | 
			
		||||
    path_id = data.id.partition("+")[0]
 | 
			
		||||
 | 
			
		||||
    tus_metadata = data.meta_data
 | 
			
		||||
    filename = tus_metadata.get("filename", "")
 | 
			
		||||
 | 
			
		||||
    # We want to store as the filename a version that clients are
 | 
			
		||||
    # likely to be able to accept via "Save as..."
 | 
			
		||||
    if filename in {"", ".", ".."}:
 | 
			
		||||
        filename = "uploaded-file"
 | 
			
		||||
 | 
			
		||||
    content_type = tus_metadata.get("filetype")
 | 
			
		||||
    if not content_type:
 | 
			
		||||
        content_type = guess_type(filename)[0]
 | 
			
		||||
        if content_type is None:
 | 
			
		||||
            content_type = "application/octet-stream"
 | 
			
		||||
 | 
			
		||||
    if settings.LOCAL_UPLOADS_DIR is None:
 | 
			
		||||
        # We "copy" the file to itself to update the Content-Type,
 | 
			
		||||
        # Content-Disposition, and storage class of the data.  This
 | 
			
		||||
        # parallels the work from upload_content_to_s3 in
 | 
			
		||||
        # zerver.lib.uploads.s3
 | 
			
		||||
        s3_metadata = {
 | 
			
		||||
            "user_profile_id": str(user_profile.id),
 | 
			
		||||
            "realm_id": str(user_profile.realm_id),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        is_attachment = content_type not in INLINE_MIME_TYPES
 | 
			
		||||
        content_disposition = content_disposition_header(is_attachment, filename) or "inline"
 | 
			
		||||
 | 
			
		||||
        from zerver.lib.upload.s3 import S3UploadBackend
 | 
			
		||||
 | 
			
		||||
        assert isinstance(upload_backend, S3UploadBackend)
 | 
			
		||||
        key = upload_backend.uploads_bucket.Object(path_id)
 | 
			
		||||
        key.copy_from(
 | 
			
		||||
            ContentType=content_type,
 | 
			
		||||
            ContentDisposition=content_disposition,
 | 
			
		||||
            CopySource={"Bucket": settings.S3_AUTH_UPLOADS_BUCKET, "Key": path_id},
 | 
			
		||||
            Metadata=s3_metadata,
 | 
			
		||||
            MetadataDirective="REPLACE",
 | 
			
		||||
            StorageClass=settings.S3_UPLOADS_STORAGE_CLASS,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # https://tus.github.io/tusd/storage-backends/overview/#storage-format
 | 
			
		||||
    # tusd creates a .info file next to the upload, which we do not
 | 
			
		||||
    # need to keep.  Clean it up.
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user