Extract zerver/lib/message.py.

This pulls message-related code from models.py into a new
module called message.py, and it starts to break some bugdown
dependencies.  All the methods here are basically related to
serializing Message objects as dictionaries for caches and
events.

    extract_message_dict
    stringify_message_dict
    message_to_dict
    message_to_dict_json
    MessageDict.to_dict_uncached
    MessageDict.to_dict_uncached_helper
    MessageDict.build_dict_from_raw_db_row
    MessageDict.build_message_dict

This fix also removes a circular dependency related
to get_avatar_url.

Also, there was kind of a latent bug in Message.need_to_render_content
where it was depending on other calls to Message to import bugdown
and set it globally in the namespace.  We really need to just
eliminate the function, since it's so small and used by code that
may be doing very sketchy things, but for now I just fix it.  (The
bug would possibly be exposed by moving build_message_dict out to the
library.)
This commit is contained in:
Steve Howell
2016-10-04 06:52:26 -07:00
parent ac994fdd51
commit 583a6bbadd
6 changed files with 250 additions and 215 deletions

View File

@@ -11,11 +11,14 @@ from django.core import validators
from django.contrib.sessions.models import Session
from zerver.lib.bugdown import BugdownRenderingException
from zerver.lib.cache import (
flush_user_profile,
to_dict_cache_key,
to_dict_cache_key_id,
)
from zerver.lib.context_managers import lockfile
from zerver.lib.message import (
MessageDict,
message_to_dict,
)
from zerver.models import Realm, RealmEmoji, Stream, UserProfile, UserActivity, \
Subscription, Recipient, Message, Attachment, UserMessage, valid_stream_name, \
Client, DefaultStream, UserPresence, Referral, PushDeviceToken, MAX_SUBJECT_LENGTH, \
@@ -768,8 +771,8 @@ def do_send_messages(messages):
event = dict(
type = 'message',
message = message['message'].id,
message_dict_markdown = message['message'].to_dict(apply_markdown=True),
message_dict_no_markdown = message['message'].to_dict(apply_markdown=False),
message_dict_markdown = message_to_dict(message['message'], apply_markdown=True),
message_dict_no_markdown = message_to_dict(message['message'], apply_markdown=False),
presences = presences)
users = [{'id': user.id,
'flags': user_flags.get(user.id, []),
@@ -2562,9 +2565,9 @@ def do_update_message(user_profile, message, subject, propagate_mode, content, r
for changed_message in changed_messages:
event['message_ids'].append(changed_message.id)
items_for_remote_cache[to_dict_cache_key(changed_message, True)] = \
(changed_message.to_dict_uncached(apply_markdown=True),)
(MessageDict.to_dict_uncached(changed_message, apply_markdown=True),)
items_for_remote_cache[to_dict_cache_key(changed_message, False)] = \
(changed_message.to_dict_uncached(apply_markdown=False),)
(MessageDict.to_dict_uncached(changed_message, apply_markdown=False),)
cache_set_many(items_for_remote_cache)
def user_info(um):

213
zerver/lib/message.py Normal file
View File

@@ -0,0 +1,213 @@
from __future__ import absolute_import
import datetime
import ujson
import zlib
from six import binary_type, text_type
from zerver.lib.avatar import get_avatar_url
from zerver.lib.avatar_hash import gravatar_hash
import zerver.lib.bugdown as bugdown
from zerver.lib.cache import cache_with_key, to_dict_cache_key
from zerver.lib.str_utils import force_bytes, dict_with_str_keys
from zerver.lib.timestamp import datetime_to_timestamp
from zerver.models import (
get_display_recipient_by_id,
Message,
Recipient,
)
from typing import Any, Dict, Optional
def extract_message_dict(message_bytes):
# type: (binary_type) -> Dict[str, Any]
return dict_with_str_keys(ujson.loads(zlib.decompress(message_bytes).decode("utf-8")))
def stringify_message_dict(message_dict):
# type: (Dict[str, Any]) -> binary_type
return zlib.compress(force_bytes(ujson.dumps(message_dict)))
def message_to_dict(message, apply_markdown):
# type: (Message, bool) -> Dict[str, Any]
json = message_to_dict_json(message, apply_markdown)
return extract_message_dict(json)
@cache_with_key(to_dict_cache_key, timeout=3600*24)
def message_to_dict_json(message, apply_markdown):
# type: (Message, bool) -> binary_type
return MessageDict.to_dict_uncached(message, apply_markdown)
class MessageDict(object):
@staticmethod
def to_dict_uncached(message, apply_markdown):
# type: (Message, bool) -> binary_type
dct = MessageDict.to_dict_uncached_helper(message, apply_markdown)
return stringify_message_dict(dct)
@staticmethod
def to_dict_uncached_helper(message, apply_markdown):
# type: (Message, bool) -> Dict[str, Any]
return MessageDict.build_message_dict(
apply_markdown = apply_markdown,
message = message,
message_id = message.id,
last_edit_time = message.last_edit_time,
edit_history = message.edit_history,
content = message.content,
subject = message.subject,
pub_date = message.pub_date,
rendered_content = message.rendered_content,
rendered_content_version = message.rendered_content_version,
sender_id = message.sender.id,
sender_email = message.sender.email,
sender_realm_domain = message.sender.realm.domain,
sender_full_name = message.sender.full_name,
sender_short_name = message.sender.short_name,
sender_avatar_source = message.sender.avatar_source,
sender_is_mirror_dummy = message.sender.is_mirror_dummy,
sending_client_name = message.sending_client.name,
recipient_id = message.recipient.id,
recipient_type = message.recipient.type,
recipient_type_id = message.recipient.type_id,
)
@staticmethod
def build_dict_from_raw_db_row(row, apply_markdown):
# type: (Dict[str, Any], bool) -> Dict[str, Any]
'''
row is a row from a .values() call, and it needs to have
all the relevant fields populated
'''
return MessageDict.build_message_dict(
apply_markdown = apply_markdown,
message = None,
message_id = row['id'],
last_edit_time = row['last_edit_time'],
edit_history = row['edit_history'],
content = row['content'],
subject = row['subject'],
pub_date = row['pub_date'],
rendered_content = row['rendered_content'],
rendered_content_version = row['rendered_content_version'],
sender_id = row['sender_id'],
sender_email = row['sender__email'],
sender_realm_domain = row['sender__realm__domain'],
sender_full_name = row['sender__full_name'],
sender_short_name = row['sender__short_name'],
sender_avatar_source = row['sender__avatar_source'],
sender_is_mirror_dummy = row['sender__is_mirror_dummy'],
sending_client_name = row['sending_client__name'],
recipient_id = row['recipient_id'],
recipient_type = row['recipient__type'],
recipient_type_id = row['recipient__type_id'],
)
@staticmethod
def build_message_dict(
apply_markdown,
message,
message_id,
last_edit_time,
edit_history,
content,
subject,
pub_date,
rendered_content,
rendered_content_version,
sender_id,
sender_email,
sender_realm_domain,
sender_full_name,
sender_short_name,
sender_avatar_source,
sender_is_mirror_dummy,
sending_client_name,
recipient_id,
recipient_type,
recipient_type_id,
):
# type: (bool, Message, int, datetime.datetime, text_type, text_type, text_type, datetime.datetime, text_type, Optional[int], int, text_type, text_type, text_type, text_type, text_type, bool, text_type, int, int, int) -> Dict[str, Any]
avatar_url = get_avatar_url(sender_avatar_source, sender_email)
display_recipient = get_display_recipient_by_id(
recipient_id,
recipient_type,
recipient_type_id
)
if recipient_type == Recipient.STREAM:
display_type = "stream"
elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL):
assert not isinstance(display_recipient, text_type)
display_type = "private"
if len(display_recipient) == 1:
# add the sender in if this isn't a message between
# someone and his self, preserving ordering
recip = {'email': sender_email,
'domain': sender_realm_domain,
'full_name': sender_full_name,
'short_name': sender_short_name,
'id': sender_id,
'is_mirror_dummy': sender_is_mirror_dummy}
if recip['email'] < display_recipient[0]['email']:
display_recipient = [recip, display_recipient[0]]
elif recip['email'] > display_recipient[0]['email']:
display_recipient = [display_recipient[0], recip]
obj = dict(
id = message_id,
sender_email = sender_email,
sender_full_name = sender_full_name,
sender_short_name = sender_short_name,
sender_domain = sender_realm_domain,
sender_id = sender_id,
type = display_type,
display_recipient = display_recipient,
recipient_id = recipient_id,
subject = subject,
timestamp = datetime_to_timestamp(pub_date),
gravatar_hash = gravatar_hash(sender_email), # Deprecated June 2013
avatar_url = avatar_url,
client = sending_client_name)
obj['subject_links'] = bugdown.subject_links(sender_realm_domain.lower(), subject)
if last_edit_time != None:
obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time)
obj['edit_history'] = ujson.loads(edit_history)
if apply_markdown:
if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version):
if message is None:
# We really shouldn't be rendering objects in this method, but there is
# a scenario where we upgrade the version of bugdown and fail to run
# management commands to re-render historical messages, and then we
# need to have side effects. This method is optimized to not need full
# blown ORM objects, but the bugdown renderer is unfortunately highly
# coupled to Message, and we also need to persist the new rendered content.
# If we don't have a message object passed in, we get one here. The cost
# of going to the DB here should be overshadowed by the cost of rendering
# and updating the row.
# TODO: see #1379 to eliminate bugdown dependencies
message = Message.objects.select_related().get(id=message_id)
# It's unfortunate that we need to have side effects on the message
# in some cases.
rendered_content = message.render_markdown(content, sender_realm_domain)
message.set_rendered_content(rendered_content, True)
if rendered_content is not None:
obj['content'] = rendered_content
else:
obj['content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
obj['content_type'] = 'text/html'
else:
obj['content'] = content
obj['content_type'] = 'text/x-markdown'
return obj

View File

@@ -17,11 +17,10 @@ from zerver.lib.cache import cache_with_key, flush_user_profile, flush_realm, \
display_recipient_cache_key, cache_delete, \
get_stream_cache_key, active_user_dicts_in_realm_cache_key, \
active_bot_dicts_in_realm_cache_key, active_user_dict_fields, \
active_bot_dict_fields, flush_message, to_dict_cache_key
active_bot_dict_fields, flush_message
from zerver.lib.utils import make_safe_digest, generate_random_token
from zerver.lib.str_utils import force_bytes, ModelReprMixin, dict_with_str_keys
from zerver.lib.str_utils import ModelReprMixin
from django.db import transaction
from zerver.lib.avatar_hash import gravatar_hash
from zerver.lib.camo import get_camo_url
from django.utils import timezone
from django.contrib.sessions.models import Session
@@ -29,7 +28,6 @@ from zerver.lib.timestamp import datetime_to_timestamp
from django.db.models.signals import pre_save, post_save, post_delete
from django.core.validators import MinLengthValidator, RegexValidator
from django.utils.translation import ugettext_lazy as _
import zlib
from bitfield import BitField
from bitfield.types import BitHandler
@@ -37,9 +35,8 @@ from collections import defaultdict
from datetime import timedelta
import pylibmc
import re
import ujson
import logging
from six import binary_type, text_type
from six import text_type
import time
import datetime
@@ -774,14 +771,6 @@ def bulk_get_recipients(type, type_ids):
return generic_bulk_cached_fetch(cache_key_function, query_function, type_ids,
id_fetcher=lambda recipient: recipient.type_id)
def extract_message_dict(message_bytes):
# type: (binary_type) -> Dict[str, Any]
return dict_with_str_keys(ujson.loads(zlib.decompress(message_bytes).decode("utf-8")))
def stringify_message_dict(message_dict):
# type: (Dict[str, Any]) -> binary_type
return zlib.compress(force_bytes(ujson.dumps(message_dict)))
class Message(ModelReprMixin, models.Model):
sender = models.ForeignKey(UserProfile) # type: UserProfile
recipient = models.ForeignKey(Recipient) # type: Recipient
@@ -898,199 +887,17 @@ class Message(ModelReprMixin, models.Model):
import zerver.lib.bugdown as bugdown
# 'from zerver.lib import bugdown' gives mypy error in python 3 mode.
if Message.need_to_render_content(self.rendered_content, self.rendered_content_version):
if Message.need_to_render_content(self.rendered_content,
self.rendered_content_version,
bugdown.version):
return self.set_rendered_content(self.render_markdown(self.content, domain), save)
else:
return True
@staticmethod
def need_to_render_content(rendered_content, rendered_content_version):
# type: (Optional[text_type], int) -> bool
return rendered_content is None or rendered_content_version < bugdown.version
def to_dict(self, apply_markdown):
# type: (bool) -> Dict[str, Any]
return extract_message_dict(self.to_dict_json(apply_markdown))
@cache_with_key(to_dict_cache_key, timeout=3600*24)
def to_dict_json(self, apply_markdown):
# type: (bool) -> binary_type
return self.to_dict_uncached(apply_markdown)
def to_dict_uncached(self, apply_markdown):
# type: (bool) -> binary_type
return stringify_message_dict(self.to_dict_uncached_helper(apply_markdown))
def to_dict_uncached_helper(self, apply_markdown):
# type: (bool) -> Dict[str, Any]
return Message.build_message_dict(
apply_markdown = apply_markdown,
message = self,
message_id = self.id,
last_edit_time = self.last_edit_time,
edit_history = self.edit_history,
content = self.content,
subject = self.subject,
pub_date = self.pub_date,
rendered_content = self.rendered_content,
rendered_content_version = self.rendered_content_version,
sender_id = self.sender.id,
sender_email = self.sender.email,
sender_realm_domain = self.sender.realm.domain,
sender_full_name = self.sender.full_name,
sender_short_name = self.sender.short_name,
sender_avatar_source = self.sender.avatar_source,
sender_is_mirror_dummy = self.sender.is_mirror_dummy,
sending_client_name = self.sending_client.name,
recipient_id = self.recipient.id,
recipient_type = self.recipient.type,
recipient_type_id = self.recipient.type_id,
)
@staticmethod
def build_dict_from_raw_db_row(row, apply_markdown):
# type: (Dict[str, Any], bool) -> Dict[str, Any]
'''
row is a row from a .values() call, and it needs to have
all the relevant fields populated
'''
return Message.build_message_dict(
apply_markdown = apply_markdown,
message = None,
message_id = row['id'],
last_edit_time = row['last_edit_time'],
edit_history = row['edit_history'],
content = row['content'],
subject = row['subject'],
pub_date = row['pub_date'],
rendered_content = row['rendered_content'],
rendered_content_version = row['rendered_content_version'],
sender_id = row['sender_id'],
sender_email = row['sender__email'],
sender_realm_domain = row['sender__realm__domain'],
sender_full_name = row['sender__full_name'],
sender_short_name = row['sender__short_name'],
sender_avatar_source = row['sender__avatar_source'],
sender_is_mirror_dummy = row['sender__is_mirror_dummy'],
sending_client_name = row['sending_client__name'],
recipient_id = row['recipient_id'],
recipient_type = row['recipient__type'],
recipient_type_id = row['recipient__type_id'],
)
@staticmethod
def build_message_dict(
apply_markdown,
message,
message_id,
last_edit_time,
edit_history,
content,
subject,
pub_date,
rendered_content,
rendered_content_version,
sender_id,
sender_email,
sender_realm_domain,
sender_full_name,
sender_short_name,
sender_avatar_source,
sender_is_mirror_dummy,
sending_client_name,
recipient_id,
recipient_type,
recipient_type_id,
):
# type: (bool, Message, int, datetime.datetime, text_type, text_type, text_type, datetime.datetime, text_type, Optional[int], int, text_type, text_type, text_type, text_type, text_type, bool, text_type, int, int, int) -> Dict[str, Any]
# TODO: see #1379 to eliminate bugdown dependencies
global bugdown
if bugdown is None:
import zerver.lib.bugdown as bugdown
# 'from zerver.lib import bugdown' gives mypy error in python 3 mode.
# TODO: Remove this import cycle
from zerver.lib.avatar import get_avatar_url
avatar_url = get_avatar_url(sender_avatar_source, sender_email)
display_recipient = get_display_recipient_by_id(
recipient_id,
recipient_type,
recipient_type_id
)
if recipient_type == Recipient.STREAM:
display_type = "stream"
elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL):
assert not isinstance(display_recipient, text_type)
display_type = "private"
if len(display_recipient) == 1:
# add the sender in if this isn't a message between
# someone and his self, preserving ordering
recip = {'email': sender_email,
'domain': sender_realm_domain,
'full_name': sender_full_name,
'short_name': sender_short_name,
'id': sender_id,
'is_mirror_dummy': sender_is_mirror_dummy}
if recip['email'] < display_recipient[0]['email']:
display_recipient = [recip, display_recipient[0]]
elif recip['email'] > display_recipient[0]['email']:
display_recipient = [display_recipient[0], recip]
obj = dict(
id = message_id,
sender_email = sender_email,
sender_full_name = sender_full_name,
sender_short_name = sender_short_name,
sender_domain = sender_realm_domain,
sender_id = sender_id,
type = display_type,
display_recipient = display_recipient,
recipient_id = recipient_id,
subject = subject,
timestamp = datetime_to_timestamp(pub_date),
gravatar_hash = gravatar_hash(sender_email), # Deprecated June 2013
avatar_url = avatar_url,
client = sending_client_name)
obj['subject_links'] = bugdown.subject_links(sender_realm_domain.lower(), subject)
if last_edit_time != None:
obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time)
obj['edit_history'] = ujson.loads(edit_history)
if apply_markdown:
if Message.need_to_render_content(rendered_content, rendered_content_version):
if message is None:
# We really shouldn't be rendering objects in this method, but there is
# a scenario where we upgrade the version of bugdown and fail to run
# management commands to re-render historical messages, and then we
# need to have side effects. This method is optimized to not need full
# blown ORM objects, but the bugdown renderer is unfortunately highly
# coupled to Message, and we also need to persist the new rendered content.
# If we don't have a message object passed in, we get one here. The cost
# of going to the DB here should be overshadowed by the cost of rendering
# and updating the row.
# TODO: see #1379 to eliminate bugdown dependencies
message = Message.objects.select_related().get(id=message_id)
# It's unfortunate that we need to have side effects on the message
# in some cases.
rendered_content = message.render_markdown(content, sender_realm_domain)
message.set_rendered_content(rendered_content, True)
if rendered_content is not None:
obj['content'] = rendered_content
else:
obj['content'] = u'<p>[Zulip note: Sorry, we could not understand the formatting of your message]</p>'
obj['content_type'] = 'text/html'
else:
obj['content'] = content
obj['content_type'] = 'text/x-markdown'
return obj
def need_to_render_content(rendered_content, rendered_content_version, bugdown_version):
# type: (Optional[text_type], int, int) -> bool
return rendered_content is None or rendered_content_version < bugdown_version
def to_log_dict(self):
# type: () -> Dict[str, Any]

View File

@@ -9,6 +9,11 @@ from zerver.decorator import JsonableError
from zerver.lib.test_runner import slow
from zilencer.models import Deployment
from zerver.lib.message import (
MessageDict,
message_to_dict,
)
from zerver.lib.test_helpers import (
ZulipTestCase,
get_user_messages,
@@ -464,7 +469,7 @@ class MessageDictTest(ZulipTestCase):
rows = list(Message.get_raw_db_rows(ids))
for row in rows:
Message.build_dict_from_raw_db_row(row, False)
MessageDict.build_dict_from_raw_db_row(row, False)
delay = time.time() - t
# Make sure we don't take longer than 1ms per message to extract messages.
@@ -493,7 +498,7 @@ class MessageDictTest(ZulipTestCase):
# An important part of this test is to get the message through this exact code path,
# because there is an ugly hack we need to cover. So don't just say "row = message".
row = Message.get_raw_db_rows([message.id])[0]
dct = Message.build_dict_from_raw_db_row(row, apply_markdown=True)
dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True)
expected_content = '<p>hello <strong>world</strong></p>'
self.assertEqual(dct['content'], expected_content)
message = Message.objects.get(id=message.id)
@@ -789,8 +794,8 @@ class EditMessageTest(ZulipTestCase):
def check_message(self, msg_id, subject=None, content=None):
# type: (int, Optional[text_type], Optional[text_type]) -> Message
msg = Message.objects.get(id=msg_id)
cached = msg.to_dict(False)
uncached = msg.to_dict_uncached_helper(False)
cached = message_to_dict(msg, False)
uncached = MessageDict.to_dict_uncached_helper(msg, False)
self.assertEqual(cached, uncached)
if subject:
self.assertEqual(msg.topic_name(), subject)

View File

@@ -13,6 +13,9 @@ from zerver.models import (
get_display_recipient, get_recipient, get_realm, get_stream, get_user_profile_by_email,
)
from zerver.lib.actions import create_stream_if_needed, do_add_subscription
from zerver.lib.message import (
MessageDict,
)
from zerver.lib.narrow import (
build_narrow_filter,
)
@@ -816,7 +819,7 @@ class GetOldMessagesTest(ZulipTestCase):
m.rendered_content = m.rendered_content_version = None
m.content = 'test content'
# Use to_dict_uncached_helper directly to avoid having to deal with remote cache
d = m.to_dict_uncached_helper(True)
d = MessageDict.to_dict_uncached_helper(m, True)
self.assertEqual(d['content'], '<p>test content</p>')
def common_check_get_old_messages_query(self, query_params, expected):

View File

@@ -25,6 +25,11 @@ from zerver.lib.cache import (
generic_bulk_cached_fetch,
to_dict_cache_key_id,
)
from zerver.lib.message import (
MessageDict,
extract_message_dict,
stringify_message_dict,
)
from zerver.lib.response import json_success, json_error
from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection
from zerver.lib.utils import statsd
@@ -33,8 +38,7 @@ from zerver.lib.validator import \
from zerver.models import Message, UserProfile, Stream, Subscription, \
Realm, Recipient, UserMessage, bulk_get_recipients, get_recipient, \
get_user_profile_by_email, get_stream, \
parse_usermessage_flags, extract_message_dict, \
stringify_message_dict, \
parse_usermessage_flags, \
resolve_email_to_domain, get_realm, get_active_streams, \
bulk_get_streams, get_user_profile_by_id
@@ -636,7 +640,7 @@ def get_old_messages_backend(request, user_profile,
search_fields[message_id] = get_search_fields(rendered_content, subject,
content_matches, subject_matches)
cache_transformer = lambda row: Message.build_dict_from_raw_db_row(row, apply_markdown)
cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown)
id_fetcher = lambda row: row['id']
message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),