upload: Use tusd for resumable, larger uploads.

Currently, it handles two hook types: 'pre-create' (to verify that the
user is authenticated and the file size is within the limit) and
'pre-finish' (which creates an attachment row).

No secret is shared between Django and tusd for authentication of the
hooks endpoints, because none is necessary -- tusd forwards the
end-user's credentials, and the hook checks them like it would any
end-user request.  An end-user gaining access to the endpoint would be
able to do no more harm than via tusd or the normal file upload API.

Regardless, the previous commit has restricted access to the endpoint
at the nginx layer.

Co-authored-by: Brijmohan Siyag <brijsiyag@gmail.com>
This commit is contained in:
Alex Vandiver
2024-09-05 21:04:56 +00:00
committed by Tim Abbott
parent 02d3fb7666
commit 818c30372f
17 changed files with 817 additions and 15 deletions

View File

@@ -76,6 +76,18 @@ location ~ ^/internal/tornado/(\d+)(/.*)$ {
include /etc/nginx/zulip-include/proxy_longpolling;
}
location /api/v1/tus {
include /etc/nginx/zulip-include/api_headers;
include /etc/nginx/zulip-include/proxy;
# https://github.com/tus/tusd/blob/main/examples/nginx.conf
# Disable request body size limits, and stream the request and
# response from tusd directly.
client_max_body_size 0;
proxy_request_buffering off;
proxy_buffering off;
proxy_pass http://tusd;
}
# Send everything else to Django via uWSGI
location / {
include uwsgi_params;

View File

@@ -11,3 +11,7 @@ upstream localhost_sso {
upstream camo {
server 127.0.0.1:9292;
}
upstream tusd {
server 127.0.0.1:9900;
}

View File

@@ -2,6 +2,7 @@
# Default nginx configuration is included in extension app_frontend.pp.
class zulip::app_frontend_base {
include zulip::nginx
include zulip::tusd
include zulip::sasl_modules
include zulip::supervisor
include zulip::tornado_sharding

View File

@@ -74,6 +74,15 @@ class zulip::common {
'sha256' => 'f8cc99cb708cbd549e06099628ef286a5fdda73bb327d8c140d3014441bfefc2',
},
# https://github.com/tus/tusd/releases
'tusd' => {
'version' => '2.5.0',
'sha256' => {
'amd64' => 'f4cbdb8d228b28f46c3e7b9e29e5db262e7416f7ca1033c6c5e8186cf6c7381c',
'aarch64' => 'b2101951789857765d64c33d672a38b5825946163aa058b208fc862867cdc405',
},
},
# https://github.com/wal-g/wal-g/releases
'wal-g' => {
'version' => '3.0.0',

View File

@@ -0,0 +1,21 @@
# @summary Provide the tusd service binary
#
class zulip::tusd {
$version = $zulip::common::versions['tusd']['version']
$bin = "/srv/zulip-tusd-${version}/tusd"
# This tarball contains only a single file, which is extracted as $bin
zulip::external_dep { 'tusd':
version => $version,
url => "https://github.com/tus/tusd/releases/download/v${version}/tusd_linux_${zulip::common::goarch}.tar.gz",
tarball_prefix => "tusd_linux_${zulip::common::goarch}",
bin => [$bin],
cleanup_after => [Service[supervisor]],
}
file { '/usr/local/bin/tusd':
ensure => link,
target => $bin,
require => File[$bin],
before => Exec['Cleanup tusd'],
}
}

View File

@@ -24,6 +24,23 @@ stopasgroup=true ; Without this, we leak processes every restart
killasgroup=true ; Without this, we leak processes every restart
directory=/home/zulip/deployments/current/
[program:zulip-tus]
command=nice -n5 /home/zulip/deployments/current/manage.py runtusd 9900
environment=HTTP_proxy="<%= @proxy %>",HTTPS_proxy="<%= @proxy %>"
priority=100 ; the relative start priority (default 999)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; whether/when to restart (default: unexpected)
stopsignal=INT ; signal used to kill process (default TERM)
stopwaitsecs=30 ; max num secs to wait b4 SIGKILL (default 10)
user=zulip ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/var/log/zulip/tusd.log ; stdout log path, NONE for none; default AUTO
stdout_logfile_maxbytes=100MB ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=10 ; # of stdout logfile backups (default 10)
stopasgroup=true ; Without this, we leak processes every restart
killasgroup=true ; Without this, we leak processes every restart
directory=/home/zulip/deployments/current/
<% if @tornado_ports.length > 1 -%>
[program:zulip-tornado]
command=/home/zulip/deployments/current/manage.py runtornado 127.0.0.1:98%(process_num)02d --skip-checks

View File

@@ -105,6 +105,11 @@ if has_application_server():
# This is an optional service, so may or may not exist
workers.extend(list_supervisor_processes(["zulip-katex"]))
# This does have some Python code, which reads from settings.py,
# so we need to restart it on every deploy. A short outage during
# the restart is fine, as clients will transparently retry.
workers.append("zulip-tus")
if has_process_fts_updates():
workers.append("process-fts-updates")

View File

@@ -49,6 +49,7 @@ if has_application_server():
services.append("zulip-django")
services.append("zulip-tornado:*")
services.append("zulip-workers:*")
services.append("zulip-tus")
services.append("zulip-katex")
if has_application_server(once=True):
# These used to be included in "zulip-workers:*"; we may be

View File

@@ -421,6 +421,9 @@ def main(options: argparse.Namespace) -> NoReturn:
# Install transifex-cli.
run_as_root([*proxy_env, "tools/setup/install-transifex-cli"])
# Install tusd
run_as_root([*proxy_env, "tools/setup/install-tusd"])
setup_venvs.main()
run_as_root(["cp", REPO_STOPWORDS_PATH, TSEARCH_STOPWORDS_PATH])

View File

@@ -123,6 +123,7 @@ django_port = base_port + 1
tornado_port = base_port + 2
webpack_port = base_port + 3
help_center_port = base_port + 4
tusd_port = base_port + 5
os.chdir(os.path.join(os.path.dirname(__file__), ".."))
@@ -182,6 +183,12 @@ def server_processes() -> list[list[str]]:
"--quiet",
],
["./manage.py", "deliver_scheduled_messages"],
[
"./manage.py",
"runtusd",
f"{tusd_port}",
f"http://localhost:{django_port}/api/internal/tusd",
],
]
# NORMAL (but slower) operation:
@@ -333,6 +340,7 @@ build step and rerun `run-dev`.""")
hdrs.METH_ANY, r"/{path:json/events|api/v1/events}", partial(forward, tornado_port)
),
web.route(hdrs.METH_ANY, r"/{path:webpack/.*}", partial(forward, webpack_port)),
web.route(hdrs.METH_ANY, r"/{path:api/v1/tus/.*}", partial(forward, tusd_port)),
web.route(hdrs.METH_ANY, r"/{path:.*}", partial(forward, django_port)),
]
)
@@ -366,6 +374,9 @@ def print_listeners() -> None:
if help_center_dev_server_enabled:
ports.append((help_center_port, "Help center - Astro dev server"))
if not options.streamlined:
ports.append((tusd_port, "tusd"))
for port, label in ports:
print(f" {port}: {label}")
print()

34
tools/setup/install-tusd Executable file
View File

@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -eu
version=2.5.0
arch="$(uname -m)"
case $arch in
x86_64)
tarball="tusd_linux_amd64"
sha256=f4cbdb8d228b28f46c3e7b9e29e5db262e7416f7ca1033c6c5e8186cf6c7381c
;;
aarch64)
tarball="tusd_linux_arm64"
sha256=b2101951789857765d64c33d672a38b5825946163aa058b208fc862867cdc405
;;
esac
check_version() {
out="$(tusd --version)" && [[ "$out" = *"Version: v$version"* ]]
}
if ! check_version 2>/dev/null; then
set -x
tmpdir="$(mktemp -d)"
trap 'rm -r "$tmpdir"' EXIT
cd "$tmpdir"
curl_opts=(-fLO --retry 3)
curl "${curl_opts[@]}" "https://github.com/tus/tusd/releases/download/v${version}/${tarball}.tar.gz"
sha256sum -c <<<"${sha256} ${tarball}.tar.gz"
tar -xzf "${tarball}.tar.gz" --no-same-owner "${tarball}/tusd"
install -Dm755 "${tarball}/tusd" /usr/local/bin/tusd
check_version
fi

View File

@@ -49,4 +49,4 @@ API_FEATURE_LEVEL = 294 # Last bumped for `include_daectivated_groups` client c
# historical commits sharing the same major version, in which case a
# minor version bump suffices.
PROVISION_VERSION = (292, 3) # bumped 2024-08-25 to upgrade Transifex CLI
PROVISION_VERSION = (292, 4) # added tusd

View File

@@ -256,20 +256,31 @@ def validate_account_and_subdomain(request: HttpRequest, user_profile: UserProfi
if not user_profile.is_active:
raise UserDeactivatedError
# Either the subdomain matches, or we're accessing Tornado from
# and to localhost (aka spoofing a request as the user).
if not user_matches_subdomain(get_subdomain(request), user_profile) and not (
remote_addr = request.META.get("REMOTE_ADDR", None)
server_name = request.META.get("SERVER_NAME", None)
if (
settings.RUNNING_INSIDE_TORNADO
and request.META["SERVER_NAME"] == "127.0.0.1"
and request.META["REMOTE_ADDR"] == "127.0.0.1"
):
logging.warning(
"User %s (%s) attempted to access API on wrong subdomain (%s)",
user_profile.delivery_email,
user_profile.realm.subdomain,
get_subdomain(request),
)
raise JsonableError(_("Account is not associated with this subdomain"))
and remote_addr == "127.0.0.1"
and server_name == "127.0.0.1"
): # nocoverage
# We're accessing Tornado from and to localhost (aka spoofing
# a request as the user)
return
if remote_addr == "127.0.0.1" and server_name == "localhost": # nocoverage
# For tusd hook requests.
return
if user_matches_subdomain(get_subdomain(request), user_profile):
return
logging.warning(
"User %s (%s) attempted to access API on wrong subdomain (%s)",
user_profile.delivery_email,
user_profile.realm.subdomain,
get_subdomain(request),
)
raise JsonableError(_("Account is not associated with this subdomain"))
def access_user_by_api_key(

View File

@@ -0,0 +1,67 @@
import os
from typing import Any
from urllib.parse import SplitResult
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError, CommandParser
from typing_extensions import override
class Command(BaseCommand):
help = """Starts the tusd server"""
@override
def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"listen", help="[Port, or address:port, to bind HTTP server to]", type=str
)
parser.add_argument(
"hooks_http",
help="[An HTTP endpoint to which hook events will be sent to]",
default="http://127.0.0.1/api/internal/tusd",
nargs="?",
)
@override
def handle(self, *args: Any, **options: Any) -> None:
listen = options["listen"]
if listen.isdigit():
addr, port = "127.0.0.1", int(listen)
else:
r = SplitResult("", listen, "", "", "")
if r.port is None:
raise CommandError(f"{listen!r} does not have a valid port number.")
addr, port = r.hostname or "127.0.0.1", r.port
hooks_http = options["hooks_http"]
# https://tus.github.io/tusd/getting-started/configuration/
# We do not set a maximum upload size, as the pre-create hooks
# will set the max size that they want, based on the intended
# use of the uploaded file.
tusd_args = [
"tusd",
"-base-path=/api/v1/tus/",
f"-port={port}",
f"-host={addr}",
"-behind-proxy",
f"-hooks-http={hooks_http}",
"-hooks-http-forward-headers=Cookie,Authorization",
"--hooks-enabled-events=pre-create,pre-finish",
"-disable-download",
]
env_vars = os.environ.copy()
if settings.LOCAL_UPLOADS_DIR is not None:
assert settings.LOCAL_FILES_DIR is not None
tusd_args.append(f"-upload-dir={settings.LOCAL_FILES_DIR}")
else:
tusd_args.append(f"-s3-bucket={settings.S3_AUTH_UPLOADS_BUCKET}")
if settings.S3_ENDPOINT_URL is not None:
tusd_args.append(f"-s3-endpoint={settings.S3_ENDPOINT_URL}")
assert settings.S3_KEY is not None
assert settings.S3_SECRET_KEY is not None
assert settings.S3_REGION is not None
env_vars["AWS_ACCESS_KEY_ID"] = settings.S3_KEY
env_vars["AWS_SECRET_ACCESS_KEY"] = settings.S3_SECRET_KEY
env_vars["AWS_REGION"] = settings.S3_REGION
os.execvpe("tusd", tusd_args, env_vars)

394
zerver/tests/test_tusd.py Normal file
View File

@@ -0,0 +1,394 @@
import os
import orjson
from django.conf import settings
from django.test import override_settings
from zerver.lib.cache import cache_delete, get_realm_used_upload_space_cache_key
from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.upload import sanitize_name, upload_backend, upload_message_attachment
from zerver.lib.utils import assert_is_not_none
from zerver.models import Attachment
from zerver.views.tusd import TusEvent, TusHook, TusHTTPRequest, TusUpload
class TusdHooksTest(ZulipTestCase):
def test_non_localhost(self) -> None:
request = TusHook(
type="pre-create",
event=TusEvent(
http_request=TusHTTPRequest(
method="PATCH", uri="/api/v1/tus/thing", remote_addr="12.34.56.78", header={}
),
upload=TusUpload(
id="",
is_final=False,
is_partial=False,
meta_data={
"filename": "zulip.txt",
"filetype": "text/plain",
"name": "zulip.txt",
"type": "text/plain",
},
offset=0,
partial_uploads=None,
size=1234,
size_is_deferred=False,
storage=None,
),
),
)
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),
content_type="application/json",
REMOTE_ADDR="1.2.3.4",
)
self.assertEqual(result.status_code, 403)
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),
content_type="application/json",
REMOTE_ADDR="127.0.0.1",
)
self.assertEqual(result.status_code, 200)
def test_invalid_hook(self) -> None:
self.login("hamlet")
request = TusHook(
type="bogus",
event=TusEvent(
http_request=TusHTTPRequest(
method="PATCH", uri="/api/v1/tus/thing", remote_addr="12.34.56.78", header={}
),
upload=TusUpload(
id="",
is_final=False,
is_partial=False,
meta_data={
"filename": "zulip.txt",
"filetype": "text/plain",
"name": "zulip.txt",
"type": "text/plain",
},
offset=0,
partial_uploads=None,
size=1234,
size_is_deferred=False,
storage=None,
),
),
)
result = self.client_post(
"/api/internal/tusd", request.model_dump(), content_type="application/json"
)
self.assertEqual(result.status_code, 404)
def test_invalid_payload(self) -> None:
result = self.client_post(
"/api/internal/tusd",
{"type": "pre-create", "event": "moose"},
content_type="application/json",
)
self.assertEqual(result.status_code, 400)
class TusdPreCreateTest(ZulipTestCase):
def request(self) -> TusHook:
return TusHook(
type="pre-create",
event=TusEvent(
http_request=TusHTTPRequest(
method="PATCH", uri="/api/v1/tus/thing", remote_addr="12.34.56.78", header={}
),
upload=TusUpload(
id="",
is_final=False,
is_partial=False,
meta_data={
"filename": "zulip.txt",
"filetype": "text/plain",
"name": "zulip.txt",
"type": "text/plain",
},
offset=0,
partial_uploads=None,
size=1234,
size_is_deferred=False,
storage=None,
),
),
)
def test_tusd_pre_create_hook(self) -> None:
self.login("hamlet")
result = self.client_post(
"/api/internal/tusd",
self.request().model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json.get("HttpResponse", None), None)
self.assertEqual(result_json.get("RejectUpload", False), False)
self.assertEqual(list(result_json["ChangeFileInfo"].keys()), ["ID"])
self.assertTrue(result_json["ChangeFileInfo"]["ID"].endswith("/zulip.txt"))
def test_unauthed_rejected(self) -> None:
result = self.client_post(
"/api/internal/tusd",
self.request().model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 401)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]), {"message": "Unauthenticated upload"}
)
self.assertEqual(result_json["RejectUpload"], True)
def test_api_key_auth(self) -> None:
user_profile = self.example_user("hamlet")
result = self.client_post(
"/api/internal/tusd",
self.request().model_dump(),
content_type="application/json",
HTTP_AUTHORIZATION=self.encode_user(user_profile),
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json.get("HttpResponse", None), None)
self.assertEqual(result_json.get("RejectUpload", False), False)
self.assertEqual(list(result_json["ChangeFileInfo"].keys()), ["ID"])
self.assertTrue(result_json["ChangeFileInfo"]["ID"].endswith("/zulip.txt"))
def test_api_key_bad_auth(self) -> None:
result = self.client_post(
"/api/internal/tusd",
self.request().model_dump(),
content_type="application/json",
HTTP_AUTHORIZATION="Digest moose",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 401)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]), {"message": "Unauthenticated upload"}
)
self.assertEqual(result_json["RejectUpload"], True)
def test_sanitize_filename(self) -> None:
self.login("hamlet")
request = self.request()
request.event.upload.meta_data["filename"] = "some thing! ... like this?"
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertTrue(result_json["ChangeFileInfo"]["ID"].endswith("/some-thing-...-like-this"))
@override_settings(MAX_FILE_UPLOAD_SIZE=1) # In MB
def test_file_too_big_failure(self) -> None:
self.login("hamlet")
request = self.request()
request.event.upload.size = 1024 * 1024 * 5 # 5MB
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 413)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"message": "Uploaded file is larger than the allowed limit of 1 MiB"},
)
self.assertEqual(result_json["RejectUpload"], True)
def test_deferred_size(self) -> None:
self.login("hamlet")
request = self.request()
request.event.upload.size = None
request.event.upload.size_is_deferred = True
result = self.client_post(
"/api/internal/tusd",
request.model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 411)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"message": "SizeIsDeferred is not supported"},
)
self.assertEqual(result_json["RejectUpload"], True)
def test_quota_exceeded(self) -> None:
hamlet = self.example_user("hamlet")
self.login("hamlet")
# We fake being almost at quota
realm = hamlet.realm
realm.custom_upload_quota_gb = 1
realm.save(update_fields=["custom_upload_quota_gb"])
path_id = upload_message_attachment("zulip.txt", "text/plain", b"zulip!", hamlet)[
0
].removeprefix("/user_uploads/")
attachment = Attachment.objects.get(path_id=path_id)
attachment.size = assert_is_not_none(realm.upload_quota_bytes()) - 10
attachment.save(update_fields=["size"])
cache_delete(get_realm_used_upload_space_cache_key(realm.id))
result = self.client_post(
"/api/internal/tusd",
self.request().model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 413)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"message": "Upload would exceed your organization's upload quota."},
)
self.assertEqual(result_json["RejectUpload"], True)
class TusdPreFinishTest(ZulipTestCase):
def request(self, info: TusUpload) -> TusHook:
return TusHook(
type="pre-finish",
event=TusEvent(
upload=info,
http_request=TusHTTPRequest(
method="PATCH",
uri=f"/api/v1/tus/{info.id}",
remote_addr="12.34.56.78",
header={},
),
),
)
def test_tusd_pre_finish_hook(self) -> None:
self.login("hamlet")
hamlet = self.example_user("hamlet")
# Act like tusd does -- put the file and its .info in place
path_id = upload_backend.generate_message_upload_path(
str(hamlet.realm.id), sanitize_name("zulip.txt")
)
upload_backend.upload_message_attachment(
path_id, "zulip.txt", "text/plain", b"zulip!", hamlet
)
info = TusUpload(
id=path_id,
size=len("zulip!"),
offset=0,
size_is_deferred=False,
meta_data={
"filename": "zulip.txt",
"filetype": "text/plain",
"name": "zulip.txt",
"type": "text/plain",
},
is_final=False,
is_partial=False,
partial_uploads=None,
storage=None,
)
upload_backend.upload_message_attachment(
f"{path_id}.info",
"zulip.txt.info",
"application/octet-stream",
info.model_dump_json().encode(),
hamlet,
)
# Post the hook saying the file is in place
result = self.client_post(
"/api/internal/tusd",
self.request(info).model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"url": f"/user_uploads/{path_id}", "filename": "zulip.txt"},
)
self.assertEqual(
result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"}
)
attachment = Attachment.objects.get(path_id=path_id)
self.assertEqual(attachment.size, len("zulip!"))
self.assertEqual(attachment.content_type, "text/plain")
assert settings.LOCAL_FILES_DIR is not None
self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info")))
def test_no_metadata(self) -> None:
self.login("hamlet")
hamlet = self.example_user("hamlet")
# Act like tusd does -- put the file and its .info in place
path_id = upload_backend.generate_message_upload_path(
str(hamlet.realm.id), sanitize_name("")
)
upload_backend.upload_message_attachment(path_id, "", "ignored", b"zulip!", hamlet)
info = TusUpload(
id=path_id,
size=len("zulip!"),
offset=0,
size_is_deferred=False,
meta_data={},
is_final=False,
is_partial=False,
partial_uploads=None,
storage=None,
)
upload_backend.upload_message_attachment(
f"{path_id}.info",
".info",
"ignored",
info.model_dump_json().encode(),
hamlet,
)
# Post the hook saying the file is in place
result = self.client_post(
"/api/internal/tusd",
self.request(info).model_dump(),
content_type="application/json",
)
self.assertEqual(result.status_code, 200)
result_json = result.json()
self.assertEqual(result_json["HttpResponse"]["StatusCode"], 200)
self.assertEqual(
orjson.loads(result_json["HttpResponse"]["Body"]),
{"url": f"/user_uploads/{path_id}", "filename": "uploaded-file"},
)
self.assertEqual(
result_json["HttpResponse"]["Header"], {"Content-Type": "application/json"}
)
attachment = Attachment.objects.get(path_id=path_id)
self.assertEqual(attachment.size, len("zulip!"))
self.assertEqual(attachment.content_type, "application/octet-stream")
assert settings.LOCAL_FILES_DIR is not None
self.assertTrue(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, path_id)))
self.assertFalse(os.path.exists(os.path.join(settings.LOCAL_FILES_DIR, f"{path_id}.info")))

210
zerver/views/tusd.py Normal file
View File

@@ -0,0 +1,210 @@
from typing import Annotated, Any
import orjson
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.db import transaction
from django.http import HttpRequest, HttpResponse, HttpResponseNotFound
from django.utils.translation import gettext as _
from django.views.decorators.csrf import csrf_exempt
from pydantic import BaseModel, ConfigDict, Field
from pydantic.alias_generators import to_pascal
from zerver.decorator import get_basic_credentials, validate_api_key
from zerver.lib.exceptions import AccessDeniedError, JsonableError
from zerver.lib.mime_types import guess_type
from zerver.lib.rate_limiter import is_local_addr
from zerver.lib.typed_endpoint import JsonBodyPayload, typed_endpoint
from zerver.lib.upload import (
RealmUploadQuotaError,
attachment_vips_source,
check_upload_within_quota,
create_attachment,
delete_message_attachment,
sanitize_name,
upload_backend,
)
from zerver.models import UserProfile
# See https://tus.github.io/tusd/advanced-topics/hooks/ for the spec
# for these.
class TusUpload(BaseModel):
model_config = ConfigDict(populate_by_name=True, alias_generator=to_pascal)
id: Annotated[str, Field(alias="ID")]
size: int | None
size_is_deferred: bool
offset: int
meta_data: dict[str, str]
is_partial: bool
is_final: bool
partial_uploads: list[str] | None
storage: dict[str, str] | None
class TusHTTPRequest(BaseModel):
model_config = ConfigDict(populate_by_name=True, alias_generator=to_pascal)
method: str
uri: Annotated[str, Field(alias="URI")]
remote_addr: str
header: dict[str, list[str]]
class TusEvent(BaseModel):
model_config = ConfigDict(populate_by_name=True, alias_generator=to_pascal)
upload: TusUpload
http_request: Annotated[TusHTTPRequest, Field(alias="HTTPRequest")]
class TusHook(BaseModel):
model_config = ConfigDict(populate_by_name=True, alias_generator=to_pascal)
type: str
event: TusEvent
# Note that we do not raise JsonableError in these views
# because our client is not a consumer of the Zulip API -- it's tusd,
# which has its own ideas of what error responses look like.
def tusd_json_response(data: dict[str, Any]) -> HttpResponse:
return HttpResponse(
content=orjson.dumps(data, option=orjson.OPT_APPEND_NEWLINE),
content_type="application/json",
status=200,
)
def reject_upload(message: str, status_code: int) -> HttpResponse:
# Due to https://github.com/transloadit/uppy/issues/5460, uppy
# will retry responses with a statuscode of exactly 400, so we
# return 4xx status codes which are more specific to trigger an
# immediate rejection.
return tusd_json_response(
{
"HttpResponse": {
"StatusCode": status_code,
"Body": orjson.dumps({"message": message}).decode(),
"Header": {
"Content-Type": "application/json",
},
},
"RejectUpload": True,
}
)
def handle_upload_pre_create_hook(
request: HttpRequest, user_profile: UserProfile, data: TusUpload
) -> HttpResponse:
if data.size_is_deferred or data.size is None:
return reject_upload("SizeIsDeferred is not supported", 411)
if data.size > settings.MAX_FILE_UPLOAD_SIZE * 1024 * 1024:
return reject_upload(
_("Uploaded file is larger than the allowed limit of {max_file_size} MiB").format(
max_file_size=settings.MAX_FILE_UPLOAD_SIZE
),
413,
)
try:
check_upload_within_quota(user_profile.realm, data.size)
except RealmUploadQuotaError as e:
return reject_upload(str(e), 413)
# Determine the path_id to store it at
file_name = sanitize_name(data.meta_data.get("filename", ""))
path_id = upload_backend.generate_message_upload_path(str(user_profile.realm_id), file_name)
return tusd_json_response({"ChangeFileInfo": {"ID": path_id}})
def handle_upload_pre_finish_hook(
request: HttpRequest, user_profile: UserProfile, data: TusUpload
) -> HttpResponse:
metadata = data.meta_data
filename = metadata.get("filename", "")
# We want to store as the filename a version that clients are
# likely to be able to accept via "Save as..."
if filename in {"", ".", ".."}:
filename = "uploaded-file"
content_type = metadata.get("filetype")
if not content_type:
content_type = guess_type(filename)[0]
if content_type is None:
content_type = "application/octet-stream"
# With an S3 backend, the filename we passed in pre_create's
# data.id has a randomly-generated "mutlipart-id" appended with a
# `+`. Our path_ids cannot contain `+`, so we strip any suffix
# starting with `+`.
path_id = data.id.partition("+")[0]
# https://tus.github.io/tusd/storage-backends/overview/#storage-format
# tusd creates a .info file next to the upload, which we do not
# need to keep. Clean it up.
delete_message_attachment(f"{path_id}.info")
with transaction.atomic():
create_attachment(
filename,
path_id,
content_type,
attachment_vips_source(path_id),
user_profile,
user_profile.realm,
)
path = "/user_uploads/" + path_id
return tusd_json_response(
{
"HttpResponse": {
"StatusCode": 200,
"Body": orjson.dumps({"url": path, "filename": filename}).decode(),
"Header": {
"Content-Type": "application/json",
},
},
}
)
def authenticate_user(request: HttpRequest) -> UserProfile | AnonymousUser:
# This acts like the authenticated_rest_api_view wrapper, while
# allowing fallback to session-based request.user
if "Authorization" in request.headers:
try:
role, api_key = get_basic_credentials(request)
return validate_api_key(
request,
role,
api_key,
)
except JsonableError:
pass
# If that failed, fall back to session auth
return request.user
@csrf_exempt
@typed_endpoint
def handle_tusd_hook(
request: HttpRequest,
*,
payload: JsonBodyPayload[TusHook],
) -> HttpResponse:
# Make sure this came from localhost
if not is_local_addr(request.META["REMOTE_ADDR"]):
raise AccessDeniedError
maybe_user = authenticate_user(request)
if isinstance(maybe_user, AnonymousUser):
return reject_upload("Unauthenticated upload", 401)
hook_name = payload.type
if hook_name == "pre-create":
return handle_upload_pre_create_hook(request, maybe_user, payload.event.upload)
elif hook_name == "pre-finish":
return handle_upload_pre_finish_hook(request, maybe_user, payload.event.upload)
else:
return HttpResponseNotFound()

View File

@@ -176,6 +176,7 @@ from zerver.views.streams import (
)
from zerver.views.submessage import process_submessage
from zerver.views.thumbnail import backend_serve_thumbnail
from zerver.views.tusd import handle_tusd_hook
from zerver.views.typing import send_notification_backend
from zerver.views.unsubscribe import email_unsubscribe
from zerver.views.upload import (
@@ -732,11 +733,12 @@ for app_name in settings.EXTRA_INSTALLED_APPS:
urls += [path("", include(f"{app_name}.urls"))]
i18n_urls += import_string(f"{app_name}.urls.i18n_urlpatterns")
# Used internally for communication between command-line, Django,
# Used internally for communication between command-line, tusd, Django,
# and Tornado processes
urls += [
path("api/internal/email_mirror_message", email_mirror_message),
path("api/internal/notify_tornado", notify),
path("api/internal/tusd", handle_tusd_hook),
path("api/internal/web_reload_clients", web_reload_clients),
path("api/v1/events/internal", get_events_internal),
]