Add possible_mentions() to speed up rendering.

We now triage message content for possible mentions before
going to the cache/DB to get name info.  This will create an
extra data hop for messages with mentions, but it will save
a fairly expensive cache lookup for most messages.  (This will
be especially helpful for large realms.)

[Note that we need a subsequent commit to actually make the speedup
happen here, since avatars also cause us to look up all users in
the realm.]
This commit is contained in:
Steve Howell
2017-09-14 10:47:22 -07:00
committed by Tim Abbott
parent 7a4c3c1a5c
commit 4e7fce60ee
3 changed files with 82 additions and 4 deletions

View File

@@ -3,6 +3,7 @@ import subprocess
# Zulip's main markdown implementation. See docs/markdown.md for # Zulip's main markdown implementation. See docs/markdown.md for
# detailed documentation on our markdown syntax. # detailed documentation on our markdown syntax.
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Text, Tuple, TypeVar, Union from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Text, Tuple, TypeVar, Union
from mypy_extensions import TypedDict
from typing.re import Match from typing.re import Match
import markdown import markdown
@@ -16,6 +17,7 @@ import html
import twitter import twitter
import platform import platform
import time import time
import functools
import httplib2 import httplib2
import itertools import itertools
import ujson import ujson
@@ -29,11 +31,13 @@ import requests
from django.core import mail from django.core import mail
from django.conf import settings from django.conf import settings
from django.db.models import Q
from markdown.extensions import codehilite from markdown.extensions import codehilite
from zerver.lib.bugdown import fenced_code from zerver.lib.bugdown import fenced_code
from zerver.lib.bugdown.fenced_code import FENCE_RE from zerver.lib.bugdown.fenced_code import FENCE_RE
from zerver.lib.camo import get_camo_url from zerver.lib.camo import get_camo_url
from zerver.lib.mention import possible_mentions
from zerver.lib.timeout import timeout, TimeoutExpired from zerver.lib.timeout import timeout, TimeoutExpired
from zerver.lib.cache import ( from zerver.lib.cache import (
cache_with_key, cache_get_many, cache_set_many, NotFoundInCache) cache_with_key, cache_get_many, cache_set_many, NotFoundInCache)
@@ -56,6 +60,12 @@ from zerver.lib.tex import render_tex
import six import six
from six.moves import range, html_parser from six.moves import range, html_parser
FullNameInfo = TypedDict('FullNameInfo', {
'id': int,
'email': Text,
'full_name': Text,
})
# Format version of the bugdown rendering; stored along with rendered # Format version of the bugdown rendering; stored along with rendered
# messages so that we can efficiently determine what needs to be re-rendered # messages so that we can efficiently determine what needs to be re-rendered
version = 1 version = 1
@@ -1118,7 +1128,7 @@ class UserMentionPattern(markdown.inlinepatterns.Pattern):
name = match name = match
wildcard = mention.user_mention_matches_wildcard(name) wildcard = mention.user_mention_matches_wildcard(name)
user = db_data['full_names'].get(name.lower(), None) user = db_data['full_name_info'].get(name.lower(), None)
if wildcard: if wildcard:
current_message.mentions_wildcard = True current_message.mentions_wildcard = True
@@ -1465,6 +1475,32 @@ def log_bugdown_error(msg):
could cause an infinite exception loop.""" could cause an infinite exception loop."""
logging.getLogger('').error(msg) logging.getLogger('').error(msg)
def get_full_name_info(realm_id, full_names):
# type: (int, Set[Text]) -> Dict[Text, FullNameInfo]
if not full_names:
return dict()
q_list = {
Q(full_name__iexact=full_name)
for full_name in full_names
}
rows = UserProfile.objects.filter(
realm_id=realm_id
).filter(
functools.reduce(lambda a, b: a | b, q_list),
).values(
'id',
'full_name',
'email',
)
dct = {
row['full_name'].lower(): row
for row in rows
}
return dct
def do_convert(content, message=None, message_realm=None, possible_words=None, sent_by_bot=False): def do_convert(content, message=None, message_realm=None, possible_words=None, sent_by_bot=False):
# type: (Text, Optional[Message], Optional[Realm], Optional[Set[Text]], Optional[bool]) -> Text # type: (Text, Optional[Message], Optional[Realm], Optional[Set[Text]], Optional[bool]) -> Text
"""Convert Markdown to HTML, with Zulip-specific settings and hacks.""" """Convert Markdown to HTML, with Zulip-specific settings and hacks."""
@@ -1511,9 +1547,12 @@ def do_convert(content, message=None, message_realm=None, possible_words=None, s
if possible_words is None: if possible_words is None:
possible_words = set() # Set[Text] possible_words = set() # Set[Text]
full_names = possible_mentions(content)
full_name_info = get_full_name_info(message_realm.id, full_names)
db_data = {'possible_words': possible_words, db_data = {'possible_words': possible_words,
'full_names': dict((user['full_name'].lower(), user) for user in realm_users),
'by_email': dict((user['email'].lower(), user) for user in realm_users), 'by_email': dict((user['email'].lower(), user) for user in realm_users),
'full_name_info': full_name_info,
'emoji': message_realm.get_emoji(), 'emoji': message_realm.get_emoji(),
'sent_by_bot': sent_by_bot, 'sent_by_bot': sent_by_bot,
'stream_names': dict((stream['name'], stream) for stream in realm_streams)} 'stream_names': dict((stream['name'], stream) for stream in realm_streams)}

View File

@@ -1,6 +1,9 @@
from __future__ import absolute_import from __future__ import absolute_import
from typing import Text from typing import Optional, Set, Text
import re
# Match multi-word string between @** ** or match any one-word # Match multi-word string between @** ** or match any one-word
# sequences after @ # sequences after @
find_mentions = r'(?<![^\s\'\"\(,:<])@(\*\*[^\*]+\*\*|all|everyone)' find_mentions = r'(?<![^\s\'\"\(,:<])@(\*\*[^\*]+\*\*|all|everyone)'
@@ -10,3 +13,21 @@ wildcards = ['all', 'everyone']
def user_mention_matches_wildcard(mention): def user_mention_matches_wildcard(mention):
# type: (Text) -> bool # type: (Text) -> bool
return mention in wildcards return mention in wildcards
def extract_name(s):
# type: (Text) -> Optional[Text]
if s.startswith("**") and s.endswith("**"):
name = s[2:-2]
if name in wildcards:
return None
return name
# We don't care about @all or @everyone
return None
def possible_mentions(content):
# type: (Text) -> Set[Text]
matches = re.findall(find_mentions, content)
names = {extract_name(match) for match in matches}
names = {name for name in names if name}
return names

View File

@@ -13,6 +13,7 @@ from zerver.lib.actions import (
from zerver.lib.alert_words import alert_words_in_realm from zerver.lib.alert_words import alert_words_in_realm
from zerver.lib.camo import get_camo_url from zerver.lib.camo import get_camo_url
from zerver.lib.emoji import get_emoji_url from zerver.lib.emoji import get_emoji_url
from zerver.lib.mention import possible_mentions
from zerver.lib.message import render_markdown from zerver.lib.message import render_markdown
from zerver.lib.request import ( from zerver.lib.request import (
JsonableError, JsonableError,
@@ -44,7 +45,7 @@ import six
from six.moves import urllib from six.moves import urllib
from zerver.lib.str_utils import NonBinaryStr from zerver.lib.str_utils import NonBinaryStr
from typing import Any, AnyStr, Dict, List, Optional, Tuple, Text from typing import Any, AnyStr, Dict, List, Optional, Set, Tuple, Text
class FencedBlockPreprocessorTest(TestCase): class FencedBlockPreprocessorTest(TestCase):
def test_simple_quoting(self): def test_simple_quoting(self):
@@ -740,6 +741,22 @@ class BugdownTest(ZulipTestCase):
'@King Hamlet</span></p>' % (self.example_email("hamlet"), user_id)) '@King Hamlet</span></p>' % (self.example_email("hamlet"), user_id))
self.assertEqual(msg.mentions_user_ids, set([user_profile.id])) self.assertEqual(msg.mentions_user_ids, set([user_profile.id]))
def test_possible_mentions(self):
# type: () -> None
def assert_mentions(content, names):
# type: (Text, Set[Text]) -> None
self.assertEqual(possible_mentions(content), names)
assert_mentions('', set())
assert_mentions('boring', set())
assert_mentions('@all', set())
assert_mentions('smush@**steve**smush', set())
assert_mentions(
'Hello @**King Hamlet** and @**Cordelia Lear**\n@**Foo van Barson** @**all**',
{'King Hamlet', 'Cordelia Lear', 'Foo van Barson'}
)
def test_mention_multiple(self): def test_mention_multiple(self):
# type: () -> None # type: () -> None
sender_user_profile = self.example_user('othello') sender_user_profile = self.example_user('othello')
@@ -748,6 +765,7 @@ class BugdownTest(ZulipTestCase):
msg = Message(sender=sender_user_profile, sending_client=get_client("test")) msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
content = "@**King Hamlet** and @**Cordelia Lear**, check this out" content = "@**King Hamlet** and @**Cordelia Lear**, check this out"
self.assertEqual(render_markdown(msg, content), self.assertEqual(render_markdown(msg, content),
'<p>' '<p>'
'<span class="user-mention" ' '<span class="user-mention" '