From 4e7fce60eefe448768c041c45664668df6c2b78f Mon Sep 17 00:00:00 2001
From: Steve Howell 
Date: Thu, 14 Sep 2017 10:47:22 -0700
Subject: [PATCH] Add possible_mentions() to speed up rendering.
We now triage message content for possible mentions before
going to the cache/DB to get name info.  This will create an
extra data hop for messages with mentions, but it will save
a fairly expensive cache lookup for most messages.  (This will
be especially helpful for large realms.)
[Note that we need a subsequent commit to actually make the speedup
happen here, since avatars also cause us to look up all users in
the realm.]
---
 zerver/lib/bugdown/__init__.py | 43 ++++++++++++++++++++++++++++++++--
 zerver/lib/mention.py          | 23 +++++++++++++++++-
 zerver/tests/test_bugdown.py   | 20 +++++++++++++++-
 3 files changed, 82 insertions(+), 4 deletions(-)
diff --git a/zerver/lib/bugdown/__init__.py b/zerver/lib/bugdown/__init__.py
index 32c754b52d..a4a0d0f5a4 100644
--- a/zerver/lib/bugdown/__init__.py
+++ b/zerver/lib/bugdown/__init__.py
@@ -3,6 +3,7 @@ import subprocess
 # Zulip's main markdown implementation.  See docs/markdown.md for
 # detailed documentation on our markdown syntax.
 from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Text, Tuple, TypeVar, Union
+from mypy_extensions import TypedDict
 from typing.re import Match
 
 import markdown
@@ -16,6 +17,7 @@ import html
 import twitter
 import platform
 import time
+import functools
 import httplib2
 import itertools
 import ujson
@@ -29,11 +31,13 @@ import requests
 
 from django.core import mail
 from django.conf import settings
+from django.db.models import Q
 
 from markdown.extensions import codehilite
 from zerver.lib.bugdown import fenced_code
 from zerver.lib.bugdown.fenced_code import FENCE_RE
 from zerver.lib.camo import get_camo_url
+from zerver.lib.mention import possible_mentions
 from zerver.lib.timeout import timeout, TimeoutExpired
 from zerver.lib.cache import (
     cache_with_key, cache_get_many, cache_set_many, NotFoundInCache)
@@ -56,6 +60,12 @@ from zerver.lib.tex import render_tex
 import six
 from six.moves import range, html_parser
 
+FullNameInfo = TypedDict('FullNameInfo', {
+    'id': int,
+    'email': Text,
+    'full_name': Text,
+})
+
 # Format version of the bugdown rendering; stored along with rendered
 # messages so that we can efficiently determine what needs to be re-rendered
 version = 1
@@ -1118,7 +1128,7 @@ class UserMentionPattern(markdown.inlinepatterns.Pattern):
                 name = match
 
             wildcard = mention.user_mention_matches_wildcard(name)
-            user = db_data['full_names'].get(name.lower(), None)
+            user = db_data['full_name_info'].get(name.lower(), None)
 
             if wildcard:
                 current_message.mentions_wildcard = True
@@ -1465,6 +1475,32 @@ def log_bugdown_error(msg):
     could cause an infinite exception loop."""
     logging.getLogger('').error(msg)
 
+def get_full_name_info(realm_id, full_names):
+    # type: (int, Set[Text]) -> Dict[Text, FullNameInfo]
+    if not full_names:
+        return dict()
+
+    q_list = {
+        Q(full_name__iexact=full_name)
+        for full_name in full_names
+    }
+
+    rows = UserProfile.objects.filter(
+        realm_id=realm_id
+    ).filter(
+        functools.reduce(lambda a, b: a | b, q_list),
+    ).values(
+        'id',
+        'full_name',
+        'email',
+    )
+
+    dct = {
+        row['full_name'].lower(): row
+        for row in rows
+    }
+    return dct
+
 def do_convert(content, message=None, message_realm=None, possible_words=None, sent_by_bot=False):
     # type: (Text, Optional[Message], Optional[Realm], Optional[Set[Text]], Optional[bool]) -> Text
     """Convert Markdown to HTML, with Zulip-specific settings and hacks."""
@@ -1511,9 +1547,12 @@ def do_convert(content, message=None, message_realm=None, possible_words=None, s
         if possible_words is None:
             possible_words = set()  # Set[Text]
 
+        full_names = possible_mentions(content)
+        full_name_info = get_full_name_info(message_realm.id, full_names)
+
         db_data = {'possible_words': possible_words,
-                   'full_names': dict((user['full_name'].lower(), user) for user in realm_users),
                    'by_email': dict((user['email'].lower(), user) for user in realm_users),
+                   'full_name_info': full_name_info,
                    'emoji': message_realm.get_emoji(),
                    'sent_by_bot': sent_by_bot,
                    'stream_names': dict((stream['name'], stream) for stream in realm_streams)}
diff --git a/zerver/lib/mention.py b/zerver/lib/mention.py
index 80e19e4e1f..ec74df33f7 100644
--- a/zerver/lib/mention.py
+++ b/zerver/lib/mention.py
@@ -1,6 +1,9 @@
 from __future__ import absolute_import
 
-from typing import Text
+from typing import Optional, Set, Text
+
+import re
+
 # Match multi-word string between @** ** or match any one-word
 # sequences after @
 find_mentions = r'(? bool
     return mention in wildcards
+
+def extract_name(s):
+    # type: (Text) -> Optional[Text]
+    if s.startswith("**") and s.endswith("**"):
+        name = s[2:-2]
+        if name in wildcards:
+            return None
+        return name
+
+    # We don't care about @all or @everyone
+    return None
+
+def possible_mentions(content):
+    # type: (Text) -> Set[Text]
+    matches = re.findall(find_mentions, content)
+    names = {extract_name(match) for match in matches}
+    names = {name for name in names if name}
+    return names
diff --git a/zerver/tests/test_bugdown.py b/zerver/tests/test_bugdown.py
index 9ffc6b8aad..76ce6bc1a6 100644
--- a/zerver/tests/test_bugdown.py
+++ b/zerver/tests/test_bugdown.py
@@ -13,6 +13,7 @@ from zerver.lib.actions import (
 from zerver.lib.alert_words import alert_words_in_realm
 from zerver.lib.camo import get_camo_url
 from zerver.lib.emoji import get_emoji_url
+from zerver.lib.mention import possible_mentions
 from zerver.lib.message import render_markdown
 from zerver.lib.request import (
     JsonableError,
@@ -44,7 +45,7 @@ import six
 
 from six.moves import urllib
 from zerver.lib.str_utils import NonBinaryStr
-from typing import Any, AnyStr, Dict, List, Optional, Tuple, Text
+from typing import Any, AnyStr, Dict, List, Optional, Set, Tuple, Text
 
 class FencedBlockPreprocessorTest(TestCase):
     def test_simple_quoting(self):
@@ -740,6 +741,22 @@ class BugdownTest(ZulipTestCase):
                          '@King Hamlet
' % (self.example_email("hamlet"), user_id))
         self.assertEqual(msg.mentions_user_ids, set([user_profile.id]))
 
+    def test_possible_mentions(self):
+        # type: () -> None
+        def assert_mentions(content, names):
+            # type: (Text, Set[Text]) -> None
+            self.assertEqual(possible_mentions(content), names)
+
+        assert_mentions('', set())
+        assert_mentions('boring', set())
+        assert_mentions('@all', set())
+        assert_mentions('smush@**steve**smush', set())
+
+        assert_mentions(
+            'Hello @**King Hamlet** and @**Cordelia Lear**\n@**Foo van Barson** @**all**',
+            {'King Hamlet', 'Cordelia Lear', 'Foo van Barson'}
+        )
+
     def test_mention_multiple(self):
         # type: () -> None
         sender_user_profile = self.example_user('othello')
@@ -748,6 +765,7 @@ class BugdownTest(ZulipTestCase):
         msg = Message(sender=sender_user_profile, sending_client=get_client("test"))
 
         content = "@**King Hamlet** and @**Cordelia Lear**, check this out"
+
         self.assertEqual(render_markdown(msg, content),
                          ''
                          '