mirror of
https://github.com/zulip/zulip.git
synced 2025-11-17 04:12:02 +00:00
Add Attachment model to keep track of uploads.
This commit adds the capability to keep track and remove uploaded files. Unclaimed attachments are files that have been uploaded to the server but are not referred in any messages. A management command to remove old unclaimed files after a week is also included. Tests for getting the file referred in messages are also included.
This commit is contained in:
@@ -8,7 +8,7 @@ from django.contrib.sessions.models import Session
|
||||
from zerver.lib.cache import flush_user_profile
|
||||
from zerver.lib.context_managers import lockfile
|
||||
from zerver.models import Realm, RealmEmoji, Stream, UserProfile, UserActivity, \
|
||||
Subscription, Recipient, Message, UserMessage, valid_stream_name, \
|
||||
Subscription, Recipient, Message, Attachment, UserMessage, valid_stream_name, \
|
||||
DefaultStream, UserPresence, Referral, PushDeviceToken, MAX_SUBJECT_LENGTH, \
|
||||
MAX_MESSAGE_LENGTH, get_client, get_stream, get_recipient, get_huddle, \
|
||||
get_user_profile_by_id, PreregistrationUser, get_display_recipient, \
|
||||
@@ -17,7 +17,8 @@ from zerver.models import Realm, RealmEmoji, Stream, UserProfile, UserActivity,
|
||||
get_user_profile_by_email, get_stream_cache_key, to_dict_cache_key_id, \
|
||||
UserActivityInterval, get_active_user_dicts_in_realm, get_active_streams, \
|
||||
realm_filters_for_domain, RealmFilter, receives_offline_notifications, \
|
||||
ScheduledJob, realm_filters_for_domain, get_active_bot_dicts_in_realm
|
||||
ScheduledJob, realm_filters_for_domain, get_active_bot_dicts_in_realm, \
|
||||
get_old_unclaimed_attachments
|
||||
|
||||
from zerver.lib.avatar import get_avatar_url, avatar_url
|
||||
|
||||
@@ -57,6 +58,7 @@ from zerver.lib.push_notifications import num_push_devices_for_user, \
|
||||
from zerver.lib.notifications import clear_followup_emails_queue
|
||||
from zerver.lib.narrow import check_supported_events_narrow_filter
|
||||
from zerver.lib.session_user import get_session_user
|
||||
from zerver.lib.upload import claim_attachment, delete_message_image
|
||||
|
||||
import DNS
|
||||
import ujson
|
||||
@@ -609,6 +611,11 @@ def do_send_messages(messages):
|
||||
ums.extend(ums_to_create)
|
||||
UserMessage.objects.bulk_create(ums)
|
||||
|
||||
# Claim attachments in message
|
||||
for message in messages:
|
||||
if Message.content_has_attachment(message['message'].content):
|
||||
do_claim_attachments(message)
|
||||
|
||||
for message in messages:
|
||||
cache_save_message(message['message'])
|
||||
# Render Markdown etc. here and store (automatically) in
|
||||
@@ -2998,3 +3005,26 @@ def do_get_streams(user_profile, include_public=True, include_subscribed=True,
|
||||
streams.sort(key=lambda elt: elt["name"])
|
||||
|
||||
return streams
|
||||
|
||||
def do_claim_attachments(message):
|
||||
atttachment_url_re = re.compile('[/\-]user[\-_]uploads[/\.-].*?(?=[ )]|\Z)')
|
||||
attachment_url_list = atttachment_url_re.findall(message['message'].content)
|
||||
|
||||
results = []
|
||||
for url in attachment_url_list:
|
||||
path_id = re.sub('[/\-]user[\-_]uploads[/\.-]', '', url)
|
||||
# Remove any extra '.' after file extension. These are probably added by the user
|
||||
path_id = re.sub('[.]+$', '', path_id, re.M)
|
||||
|
||||
if path_id is not None:
|
||||
is_claimed = claim_attachment(path_id, message['message'])
|
||||
results.append((path_id, is_claimed))
|
||||
|
||||
return results
|
||||
|
||||
def do_delete_old_unclaimed_attachments(weeks_ago):
|
||||
old_unclaimed_attachments = get_old_unclaimed_attachments(weeks_ago)
|
||||
|
||||
for attachment in old_unclaimed_attachments:
|
||||
delete_message_image(attachment.path_id)
|
||||
attachment.delete()
|
||||
|
||||
@@ -14,6 +14,7 @@ from boto.s3.connection import S3Connection
|
||||
from mimetypes import guess_type, guess_extension
|
||||
|
||||
from zerver.models import get_user_profile_by_id
|
||||
from zerver.models import Attachment
|
||||
|
||||
import base64
|
||||
import os
|
||||
@@ -21,6 +22,7 @@ import re
|
||||
from PIL import Image, ImageOps
|
||||
from six.moves import cStringIO as StringIO
|
||||
import random
|
||||
import logging
|
||||
|
||||
# Performance Note:
|
||||
#
|
||||
@@ -133,6 +135,8 @@ def upload_message_image_s3(uploaded_file_name, content_type, file_data, user_pr
|
||||
user_profile,
|
||||
file_data
|
||||
)
|
||||
|
||||
create_attachment(uploaded_file_name, s3_file_name, user_profile)
|
||||
return url
|
||||
|
||||
def get_signed_upload_url(path):
|
||||
@@ -147,6 +151,20 @@ def get_realm_for_filename(path):
|
||||
return None
|
||||
return get_user_profile_by_id(key.metadata["user_profile_id"]).realm.id
|
||||
|
||||
def delete_message_image_s3(path_id):
|
||||
conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY)
|
||||
bucket = get_bucket(conn, settings.S3_AUTH_UPLOADS_BUCKET)
|
||||
|
||||
# check if file exists
|
||||
key = bucket.get_key(path_id)
|
||||
if key is not None:
|
||||
bucket.delete_key(key)
|
||||
return True
|
||||
|
||||
file_name = path_id.split("/")[-1]
|
||||
logging.warning("%s does not exist. Its entry in the database will be removed." % (file_name,))
|
||||
return False
|
||||
|
||||
def upload_avatar_image_s3(user_file, user_profile, email):
|
||||
content_type = guess_type(user_file.name)[0]
|
||||
bucket_name = settings.S3_AVATAR_BUCKET
|
||||
@@ -195,9 +213,20 @@ def upload_message_image_local(uploaded_file_name, content_type, file_data, user
|
||||
])
|
||||
|
||||
write_local_file('files', path, file_data)
|
||||
|
||||
create_attachment(uploaded_file_name, path, user_profile)
|
||||
return '/user_uploads/' + path
|
||||
|
||||
def delete_message_image_local(path_id):
|
||||
file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, 'files', path_id)
|
||||
if os.path.isfile(file_path):
|
||||
# This removes the file but the empty folders still remain.
|
||||
os.remove(file_path)
|
||||
return True
|
||||
|
||||
file_name = path_id.split("/")[-1]
|
||||
logging.warning("%s does not exist. Its entry in the database will be removed." % (file_name,))
|
||||
return False
|
||||
|
||||
def upload_avatar_image_local(user_file, user_profile, email):
|
||||
email_hash = user_avatar_hash(email)
|
||||
|
||||
@@ -212,9 +241,25 @@ def upload_avatar_image_local(user_file, user_profile, email):
|
||||
if settings.LOCAL_UPLOADS_DIR is not None:
|
||||
upload_message_image = upload_message_image_local
|
||||
upload_avatar_image = upload_avatar_image_local
|
||||
delete_message_image = delete_message_image_local
|
||||
else:
|
||||
upload_message_image = upload_message_image_s3
|
||||
upload_avatar_image = upload_avatar_image_s3
|
||||
delete_message_image = delete_message_image_s3
|
||||
|
||||
def claim_attachment(path_id, message):
|
||||
try:
|
||||
attachment = Attachment.objects.get(path_id=path_id)
|
||||
attachment.messages.add(message)
|
||||
attachment.save()
|
||||
return True
|
||||
except Attachment.DoesNotExist:
|
||||
raise JsonableError("The upload was not successful. Please reupload the file again in a new message.")
|
||||
return False
|
||||
|
||||
def create_attachment(file_name, path_id, user_profile):
|
||||
Attachment.objects.create(file_name=file_name, path_id=path_id, owner=user_profile)
|
||||
return True
|
||||
|
||||
def upload_message_image_through_web_client(request, user_file, user_profile):
|
||||
uploaded_file_name, content_type = get_file_info(request, user_file)
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from zerver.lib.actions import do_delete_old_unclaimed_attachments
|
||||
from zerver.models import Attachment, get_old_unclaimed_attachments
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """Remove unclaimed attachments from storage older than a supplied
|
||||
numerical value indicating the limit of how old the attachment can be.
|
||||
One week is taken as the default value."""
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('-w', '--weeks',
|
||||
dest='delta_weeks',
|
||||
default=1,
|
||||
help="Limiting value of how old the file can be.")
|
||||
|
||||
parser.add_argument('-f', '--for-real',
|
||||
dest='for_real',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help="Actually remove the files from the storage.")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
delta_weeks = options['delta_weeks']
|
||||
print("Deleting unclaimed attached files older than %s" % (delta_weeks,))
|
||||
print("")
|
||||
|
||||
# print the list of files that are going to be removed
|
||||
old_attachments = get_old_unclaimed_attachments(delta_weeks)
|
||||
for old_attachment in old_attachments:
|
||||
print("%s created at %s" % (old_attachment.file_name, old_attachment.create_time))
|
||||
|
||||
print("")
|
||||
if not options["for_real"]:
|
||||
print("This was a dry run. Pass -f to actually delete.")
|
||||
exit(1)
|
||||
|
||||
do_delete_old_unclaimed_attachments(delta_weeks)
|
||||
print("")
|
||||
print("Unclaimed Files deleted.")
|
||||
27
zerver/migrations/0015_attachment.py
Normal file
27
zerver/migrations/0015_attachment.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import models, migrations
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('zerver', '0014_realm_emoji_url_length'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Attachment',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('file_name', models.CharField(max_length=100, db_index=True)),
|
||||
('path_id', models.TextField(db_index=True)),
|
||||
('create_time', models.DateTimeField(default=django.utils.timezone.now, db_index=True)),
|
||||
('messages', models.ManyToManyField(to='zerver.Message')),
|
||||
('owner', models.ForeignKey(to=settings.AUTH_USER_MODEL)),
|
||||
],
|
||||
),
|
||||
]
|
||||
@@ -30,6 +30,8 @@ import pylibmc
|
||||
import re
|
||||
import ujson
|
||||
import logging
|
||||
import time
|
||||
import datetime
|
||||
|
||||
bugdown = None # type: Any
|
||||
|
||||
@@ -1066,6 +1068,39 @@ def parse_usermessage_flags(val):
|
||||
mask <<= 1
|
||||
return flags
|
||||
|
||||
class Attachment(models.Model):
|
||||
MAX_FILENAME_LENGTH = 100
|
||||
file_name = models.CharField(max_length=MAX_FILENAME_LENGTH, db_index=True)
|
||||
# path_id is a storage location agnostic representation of the path of the file.
|
||||
# If the path of a file is http://localhost:9991/user_uploads/a/b/abc/temp_file.py
|
||||
# then its path_id will be a/b/abc/temp_file.py.
|
||||
path_id = models.TextField(db_index=True)
|
||||
owner = models.ForeignKey(UserProfile)
|
||||
messages = models.ManyToManyField(Message)
|
||||
create_time = models.DateTimeField(default=timezone.now, db_index=True)
|
||||
|
||||
def __repr__(self):
|
||||
return (u"<Attachment: %s>" % (self.file_name))
|
||||
|
||||
def is_claimed(self):
|
||||
return self.messages.count() > 0
|
||||
|
||||
def get_url(self):
|
||||
return "/user_uploads/%s" % (self.path_id)
|
||||
|
||||
def get_attachments_by_owner_id(uid):
|
||||
return Attachment.objects.filter(owner=uid).select_related('owner')
|
||||
|
||||
def get_owners_from_file_name(file_name):
|
||||
# The returned vaule will list of owners since different users can upload
|
||||
# same files with the same filename.
|
||||
return Attachment.objects.filter(file_name=file_name).select_related('owner')
|
||||
|
||||
def get_old_unclaimed_attachments(weeks_ago):
|
||||
delta_weeks_ago = timezone.now() - datetime.timedelta(weeks=weeks_ago)
|
||||
old_attachments = Attachment.objects.filter(messages=None, create_time__lt=delta_weeks_ago)
|
||||
return old_attachments
|
||||
|
||||
class Subscription(models.Model):
|
||||
user_profile = models.ForeignKey(UserProfile)
|
||||
recipient = models.ForeignKey(Recipient)
|
||||
|
||||
@@ -26,7 +26,7 @@ from zerver.lib.test_helpers import (
|
||||
|
||||
from zerver.models import (
|
||||
MAX_MESSAGE_LENGTH, MAX_SUBJECT_LENGTH,
|
||||
Client, Message, Realm, Recipient, Stream, Subscription, UserMessage, UserProfile,
|
||||
Client, Message, Realm, Recipient, Stream, Subscription, UserMessage, UserProfile, Attachment,
|
||||
get_display_recipient, get_recipient, get_realm, get_stream, get_user_profile_by_email,
|
||||
)
|
||||
|
||||
@@ -36,6 +36,8 @@ from zerver.lib.actions import (
|
||||
do_add_subscription, do_create_user,
|
||||
)
|
||||
|
||||
from zerver.lib.upload import create_attachment
|
||||
|
||||
import datetime
|
||||
import time
|
||||
import re
|
||||
@@ -1407,7 +1409,7 @@ class StarTests(AuthedTestCase):
|
||||
self.assertEqual(sent_message.message.content, content)
|
||||
self.assertFalse(sent_message.flags.starred)
|
||||
|
||||
class AttachmentTest(TestCase):
|
||||
class AttachmentTest(AuthedTestCase):
|
||||
def test_basics(self):
|
||||
self.assertFalse(Message.content_has_attachment('whatever'))
|
||||
self.assertFalse(Message.content_has_attachment('yo http://foo.com'))
|
||||
@@ -1432,6 +1434,32 @@ class AttachmentTest(TestCase):
|
||||
self.assertTrue(Message.content_has_link('https://humbug-user-uploads.s3.amazonaws.com/sX_TIQx/screen-shot.jpg'))
|
||||
self.assertTrue(Message.content_has_link('https://humbug-user-uploads-test.s3.amazonaws.com/sX_TIQx/screen-shot.jpg'))
|
||||
|
||||
def test_claim_attachment(self):
|
||||
# Create dummy DB entry
|
||||
sender_email = "hamlet@zulip.com"
|
||||
user_profile = get_user_profile_by_email(sender_email)
|
||||
dummy_files = [
|
||||
('zulip.txt', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt'),
|
||||
('temp_file.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py'),
|
||||
('abc.py', '1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py')
|
||||
]
|
||||
|
||||
for file_name, path_id in dummy_files:
|
||||
create_attachment(file_name, path_id, user_profile)
|
||||
|
||||
# Send message referring the attachment
|
||||
self.subscribe_to_stream(sender_email, "Denmark")
|
||||
|
||||
body = "Some files here ...[zulip.txt](http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/zulip.txt)" + \
|
||||
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/temp_file.py.... Some more...." + \
|
||||
"http://localhost:9991/user_uploads/1/31/4CBjtTLYZhk66pZrF8hnYGwc/abc.py"
|
||||
|
||||
self.send_message(sender_email, "Denmark", Recipient.STREAM, body, "test")
|
||||
|
||||
for file_name, path_id in dummy_files:
|
||||
attachment = Attachment.objects.get(path_id=path_id)
|
||||
self.assertTrue(attachment.is_claimed())
|
||||
|
||||
class CheckMessageTest(AuthedTestCase):
|
||||
def test_basic_check_message_call(self):
|
||||
sender = get_user_profile_by_email('othello@zulip.com')
|
||||
|
||||
@@ -6,7 +6,11 @@ from unittest import skip
|
||||
|
||||
from zerver.lib.test_helpers import AuthedTestCase
|
||||
from zerver.lib.test_runner import slow
|
||||
from zerver.lib.upload import sanitize_name
|
||||
from zerver.lib.upload import sanitize_name, delete_message_image, delete_message_image_local, \
|
||||
delete_message_image_s3, upload_message_image_s3, upload_message_image_local
|
||||
from zerver.models import Attachment, Recipient, get_user_profile_by_email, \
|
||||
get_old_unclaimed_attachments
|
||||
from zerver.lib.actions import do_delete_old_unclaimed_attachments
|
||||
|
||||
import ujson
|
||||
from six.moves import urllib
|
||||
@@ -16,6 +20,12 @@ from boto.s3.key import Key
|
||||
from six.moves import StringIO
|
||||
import os
|
||||
import shutil
|
||||
import re
|
||||
import datetime
|
||||
from datetime import timedelta
|
||||
from django.utils import timezone
|
||||
|
||||
from moto import mock_s3
|
||||
|
||||
TEST_AVATAR_DIR = os.path.join(os.path.dirname(__file__), 'images')
|
||||
|
||||
@@ -46,9 +56,13 @@ class FileUploadTest(AuthedTestCase):
|
||||
result = self.client.post("/json/upload_file")
|
||||
self.assert_json_error(result, "You must specify a file to upload")
|
||||
|
||||
# This test will go through the code path for uploading files onto LOCAL storage
|
||||
# when zulip is in DEVELOPMENT mode.
|
||||
def test_file_upload_authed(self):
|
||||
"""
|
||||
A call to /json/upload_file should return a uri and actually create an object.
|
||||
A call to /json/upload_file should return a uri and actually create an
|
||||
entry in the database. This entry will be marked unclaimed till a message
|
||||
refers it.
|
||||
"""
|
||||
self.login("hamlet@zulip.com")
|
||||
fp = StringIO("zulip!")
|
||||
@@ -62,10 +76,72 @@ class FileUploadTest(AuthedTestCase):
|
||||
base = '/user_uploads/'
|
||||
self.assertEquals(base, uri[:len(base)])
|
||||
|
||||
# In the future, local file requests will follow the same style as S3
|
||||
# requests; they will be first authenthicated and redirected
|
||||
response = self.client.get(uri)
|
||||
data = "".join(response.streaming_content)
|
||||
self.assertEquals("zulip!", data)
|
||||
|
||||
# check if DB has attachment marked as unclaimed
|
||||
entry = Attachment.objects.get(file_name='zulip.txt')
|
||||
self.assertEquals(entry.is_claimed(), False)
|
||||
|
||||
def test_delete_old_unclaimed_attachments(self):
|
||||
# Upload some files and make them older than a weeek
|
||||
self.login("hamlet@zulip.com")
|
||||
d1 = StringIO("zulip!")
|
||||
d1.name = "dummy_1.txt"
|
||||
result = self.client.post("/json/upload_file", {'file': d1})
|
||||
json = ujson.loads(result.content)
|
||||
uri = json["uri"]
|
||||
d1_path_id = re.sub('/user_uploads/', '', uri)
|
||||
|
||||
d2 = StringIO("zulip!")
|
||||
d2.name = "dummy_2.txt"
|
||||
result = self.client.post("/json/upload_file", {'file': d2})
|
||||
json = ujson.loads(result.content)
|
||||
uri = json["uri"]
|
||||
d2_path_id = re.sub('/user_uploads/', '', uri)
|
||||
|
||||
two_week_ago = timezone.now() - datetime.timedelta(weeks=2)
|
||||
d1_attachment = Attachment.objects.get(path_id = d1_path_id)
|
||||
d1_attachment.create_time = two_week_ago
|
||||
d1_attachment.save()
|
||||
d2_attachment = Attachment.objects.get(path_id = d2_path_id)
|
||||
d2_attachment.create_time = two_week_ago
|
||||
d2_attachment.save()
|
||||
|
||||
# Send message refering only dummy_1
|
||||
self.subscribe_to_stream("hamlet@zulip.com", "Denmark")
|
||||
body = "Some files here ...[zulip.txt](http://localhost:9991/user_uploads/" + d1_path_id + ")"
|
||||
self.send_message("hamlet@zulip.com", "Denmark", Recipient.STREAM, body, "test")
|
||||
|
||||
# dummy_2 should not exist in database or the uploads folder
|
||||
do_delete_old_unclaimed_attachments(2)
|
||||
self.assertTrue(not Attachment.objects.filter(path_id = d2_path_id).exists())
|
||||
self.assertTrue(not delete_message_image(d2_path_id))
|
||||
|
||||
def test_multiple_claim_attachments(self):
|
||||
"""
|
||||
This test tries to claim the same attachment twice. The messages field in
|
||||
the Attachment model should have both the messages in its entry.
|
||||
"""
|
||||
self.login("hamlet@zulip.com")
|
||||
d1 = StringIO("zulip!")
|
||||
d1.name = "dummy_1.txt"
|
||||
result = self.client.post("/json/upload_file", {'file': d1})
|
||||
json = ujson.loads(result.content)
|
||||
uri = json["uri"]
|
||||
d1_path_id = re.sub('/user_uploads/', '', uri)
|
||||
|
||||
self.subscribe_to_stream("hamlet@zulip.com", "Denmark")
|
||||
body = "First message ...[zulip.txt](http://localhost:9991/user_uploads/" + d1_path_id + ")"
|
||||
self.send_message("hamlet@zulip.com", "Denmark", Recipient.STREAM, body, "test")
|
||||
body = "Second message ...[zulip.txt](http://localhost:9991/user_uploads/" + d1_path_id + ")"
|
||||
self.send_message("hamlet@zulip.com", "Denmark", Recipient.STREAM, body, "test")
|
||||
|
||||
self.assertEquals(Attachment.objects.get(path_id=d1_path_id).messages.count(), 2)
|
||||
|
||||
def tearDown(self):
|
||||
destroy_uploads()
|
||||
|
||||
@@ -136,6 +212,8 @@ class SetAvatarTest(AuthedTestCase):
|
||||
def tearDown(self):
|
||||
destroy_uploads()
|
||||
|
||||
|
||||
|
||||
class S3Test(AuthedTestCase):
|
||||
# full URIs in public bucket
|
||||
test_uris = [] # type: List[str]
|
||||
|
||||
Reference in New Issue
Block a user