alert words: Refactor alert word detection.

The bugdown parser no longer has a concept of which users need which alert words, since it can't really do anything actionable with that info from a rendering standpoint. Instead, our calling code passes in a set of search words to the parser. The parser returns the list of words it finds in the message. Then the model method builds up the list of user ids that should be flagged as having alert words in the message. This refactoring is a little more involved than I'd like, but there are still some circular dependency issues with rendering code, so I need to pass in the rather complicated realm_alert_words data structure all the way from the action through the model to the renderer. This change shouldn't change the overall behavior of the system, except that it does remove some duplicate regex checks that were occurring when multiple users may have had the same alert word.
2025-11-16 20:02:15 +00:00 · 2016-09-14 09:02:24 -07:00
parent cb0d75b23b
commit 40b18094ec
4 changed files with 60 additions and 26 deletions
--- a/zerver/lib/actions.py
+++ b/zerver/lib/actions.py
@@ -25,6 +25,7 @@ from zerver.models import Realm, RealmEmoji, Stream, UserProfile, UserActivity,
    ScheduledJob, realm_filters_for_domain, get_owned_bot_dicts, \
    get_old_unclaimed_attachments, get_cross_realm_users

+from zerver.lib.alert_words import alert_words_in_realm
 from zerver.lib.avatar import get_avatar_url, avatar_url

 from django.db import transaction, IntegrityError
@@ -629,8 +630,12 @@ def do_send_message(message, rendered_content = None, no_log = False, stream = N

 def render_incoming_message(message, content):
    # type: (Message, text_type) -> text_type
+    realm_alert_words = alert_words_in_realm(message.get_realm())
    try:
-        rendered_content = message.render_markdown(content)
+        rendered_content = message.render_markdown(
+            content=content,
+            realm_alert_words=realm_alert_words,
+        )
    except BugdownRenderingException:
        raise JsonableError(_('Unable to render message'))
    return rendered_content
--- a/zerver/lib/bugdown/init.py
+++ b/zerver/lib/bugdown/init.py
@@ -887,23 +887,28 @@ class AlertWordsNotificationProcessor(markdown.preprocessors.Preprocessor):
    def run(self, lines):
        # type: (Iterable[text_type]) -> Iterable[text_type]
        if current_message and db_data is not None:
-            # We check for a user's custom notifications here, as we want
-            # to check for plaintext words that depend on the recipient.
-            realm_words = db_data['realm_alert_words']
+            # We check for custom alert words here, the set of which are
+            # dependent on which users may see this message.
+            #
+            # Our caller passes in the list of possible_words.  We
+            # don't do any special rendering; we just append the alert words
+            # we find to the set current_message.alert_words.
+
+            realm_words = db_data['possible_words']
+
            content = '\n'.join(lines).lower()

            allowed_before_punctuation = "|".join([r'\s', '^', r'[\(\".,\';\[\*`>]'])
            allowed_after_punctuation = "|".join([r'\s', '$', r'[\)\"\?:.,\';\]!\*`]'])

-            for user_id, words in six.iteritems(realm_words):
-                for word in words:
+            for word in realm_words:
                escaped = re.escape(word.lower())
                match_re = re.compile(u'(?:%s)%s(?:%s)' %
                                        (allowed_before_punctuation,
                                         escaped,
                                         allowed_after_punctuation))
                if re.search(match_re, content):
-                        current_message.user_ids_with_alert_words.add(user_id)
+                    current_message.alert_words.add(word)

        return lines

@@ -1132,8 +1137,8 @@ def log_bugdown_error(msg):
    could cause an infinite exception loop."""
    logging.getLogger('').error(msg)

-def do_convert(md, realm_domain=None, message=None):
-    # type: (markdown.Markdown, Optional[text_type], Optional[Message]) -> Optional[text_type]
+def do_convert(md, realm_domain=None, message=None, possible_words=None):
+    # type: (markdown.Markdown, Optional[text_type], Optional[Message], Optional[Set[text_type]]) -> Optional[text_type]
    """Convert Markdown to HTML, with Zulip-specific settings and hacks."""
    from zerver.models import get_active_user_dicts_in_realm, UserProfile

@@ -1155,7 +1160,10 @@ def do_convert(md, realm_domain=None, message=None):
    if message:
        realm_users = get_active_user_dicts_in_realm(message.get_realm())

-        db_data = {'realm_alert_words': alert_words.alert_words_in_realm(message.get_realm()),
+        if possible_words is None:
+            possible_words = set() # Set[text_type]
+
+        db_data = {'possible_words':    possible_words,
                   'full_names':        dict((user['full_name'].lower(), user) for user in realm_users),
                   'short_names':       dict((user['short_name'].lower(), user) for user in realm_users),
                   'emoji':             message.get_realm().get_emoji()}
@@ -1210,9 +1218,9 @@ def bugdown_stats_finish():
    bugdown_total_requests += 1
    bugdown_total_time += (time.time() - bugdown_time_start)

-def convert(md, realm_domain=None, message=None):
-    # type: (markdown.Markdown, Optional[text_type], Optional[Message]) -> Optional[text_type]
+def convert(md, realm_domain=None, message=None, possible_words=None):
+    # type: (markdown.Markdown, Optional[text_type], Optional[Message], Optional[Set[text_type]]) -> Optional[text_type]
    bugdown_stats_start()
-    ret = do_convert(md, realm_domain, message)
+    ret = do_convert(md, realm_domain, message, possible_words)
    bugdown_stats_finish()
    return ret
--- a/zerver/models.py
+++ b/zerver/models.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from typing import Any, List, Set, Tuple, TypeVar, \
+from typing import Any, Dict, List, Set, Tuple, TypeVar, \
    Union, Optional, Sequence, AbstractSet
 from typing.re import Match
 from zerver.lib.str_utils import NonBinaryStr
@@ -46,6 +46,8 @@ import datetime
 # TODO: see #1379 to eliminate bugdown dependencies
 bugdown = None # type: Any

+RealmAlertWords = Dict[int, List[text_type]]
+
 MAX_SUBJECT_LENGTH = 60
 MAX_MESSAGE_LENGTH = 10000
 MAX_LANGUAGE_ID_LENGTH = 50 # type: int
@@ -807,8 +809,8 @@ class Message(ModelReprMixin, models.Model):
        # type: () -> Realm
        return self.sender.realm

-    def render_markdown(self, content, domain=None):
-        # type: (text_type, Optional[text_type]) -> text_type
+    def render_markdown(self, content, domain=None, realm_alert_words=None):
+        # type: (text_type, Optional[text_type], Optional[RealmAlertWords]) -> text_type
        """Return HTML for given markdown. Bugdown may add properties to the
        message object such as `mentions_user_ids` and `mentions_wildcard`.
        These are only on this Django object and are not saved in the
@@ -823,7 +825,7 @@ class Message(ModelReprMixin, models.Model):
        self.mentions_wildcard = False
        self.is_me_message = False
        self.mentions_user_ids = set() # type: Set[int]
-        self.user_ids_with_alert_words = set() # type: Set[int]
+        self.alert_words = set() # type: Set[text_type]

        if not domain:
            domain = self.sender.realm.domain
@@ -831,7 +833,21 @@ class Message(ModelReprMixin, models.Model):
            # Use slightly customized Markdown processor for content
            # delivered via zephyr_mirror
            domain = u"zephyr_mirror"
-        rendered_content = bugdown.convert(content, domain, self)
+
+        possible_words = set() # type: Set[text_type]
+        if realm_alert_words is not None:
+            for words in realm_alert_words.values():
+                possible_words.update(set(words))
+
+        # DO MAIN WORK HERE -- call bugdown to convert
+        rendered_content = bugdown.convert(content, domain, self, possible_words)
+
+        self.user_ids_with_alert_words = set() # type: Set[int]
+
+        if realm_alert_words is not None:
+            for user_id, words in realm_alert_words.items():
+                if set(words).intersection(self.alert_words):
+                    self.user_ids_with_alert_words.add(user_id)

        self.is_me_message = Message.is_status_message(content, rendered_content)

--- a/zerver/tests/test_bugdown.py
+++ b/zerver/tests/test_bugdown.py
@@ -11,6 +11,7 @@ from zerver.lib.actions import (
    do_set_alert_words,
    get_realm,
 )
+from zerver.lib.alert_words import alert_words_in_realm
 from zerver.lib.camo import get_camo_url
 from zerver.lib.request import (
    JsonableError,
@@ -417,14 +418,18 @@ class BugdownTest(TestCase):
        user_profile = get_user_profile_by_email("othello@zulip.com")
        do_set_alert_words(user_profile, ["ALERTWORD", "scaryword"])
        msg = Message(sender=user_profile, sending_client=get_client("test"))
+        realm_alert_words = alert_words_in_realm(user_profile.realm)
+
+        def render(msg, content):
+            return msg.render_markdown(content, realm_alert_words=realm_alert_words)

        content = "We have an ALERTWORD day today!"
-        self.assertEqual(msg.render_markdown(content), "<p>We have an ALERTWORD day today!</p>")
+        self.assertEqual(render(msg, content), "<p>We have an ALERTWORD day today!</p>")
        self.assertEqual(msg.user_ids_with_alert_words, set([user_profile.id]))

        msg = Message(sender=user_profile, sending_client=get_client("test"))
        content = "We have a NOTHINGWORD day today!"
-        self.assertEqual(msg.render_markdown(content), "<p>We have a NOTHINGWORD day today!</p>")
+        self.assertEqual(render(msg, content), "<p>We have a NOTHINGWORD day today!</p>")
        self.assertEqual(msg.user_ids_with_alert_words, set())

    def test_mention_wildcard(self):