alert_words: Fix case-sensitivity of alert words.

Previously, alert words were case-insensitive in practice, by which I mean the Markdown logic had always been case-insensitive; but the data model was not, so you could create "duplicate" alert words with the same words in different cases. We fix this inconsistency by making the database model case-insensitive. I'd prefer to be using the Postgres `citext` extension to have postgres take care of case-insensitive logic for us, but that requires installing a postgres extension as root on the postgres server, which is a pain and perhaps not worth the effort to arrange given that we can achieve our goals with transaction when adding alert words. We take advantage of the migrate_alert_words migration we're already doing for all users to effect this transition. Fixes #12563.
2025-11-03 05:23:35 +00:00 · 2020-04-27 11:04:38 -07:00
parent 052368bd3e
commit 8e5b0351b3
4 changed files with 43 additions and 16 deletions
--- a/zerver/lib/alert_words.py
+++ b/zerver/lib/alert_words.py
@@ -1,3 +1,5 @@
 from django.db import transaction
 from zerver.models import UserProfile, Realm, AlertWord
 from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \
    realm_alert_words_automaton_cache_key
@@ -38,22 +40,29 @@ def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
 def user_alert_words(user_profile: UserProfile) -> List[str]:
    return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
-def add_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]:
+@transaction.atomic
-    words = user_alert_words(user_profile)
+def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> List[str]:
    existing_words_lower = {word.lower() for word in user_alert_words(user_profile)}
-    new_words = [w for w in alert_words if w not in words]
+    # Keeping the case, use a dictionary to get the set of
-
+    # case-insensitive distinct, new alert words
-    # to avoid duplication of words
+    word_dict: Dict[str, str] = {}
-    new_words = list(set(new_words))
+    for word in new_words:
        if word.lower() in existing_words_lower:
            continue
        word_dict[word.lower()] = word
    AlertWord.objects.bulk_create(
-        AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in new_words)
+        AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
-
+        for word in word_dict.values()
-    return words+new_words
+    )
 def remove_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]:
    words = user_alert_words(user_profile)
    delete_words = [w for w in alert_words if w in words]
    AlertWord.objects.filter(user_profile=user_profile, word__in=delete_words).delete()
    return user_alert_words(user_profile)
@transaction.atomic
 def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> List[str]:
    # TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`.
    # We can clean this up if/when Postgres has more native support for case-insensitive fields.
    for delete_word in delete_words:
        AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete()
    return user_alert_words(user_profile)
--- a/zerver/migrations/0277_migrate_alert_word.py
+++ b/zerver/migrations/0277_migrate_alert_word.py
@@ -11,10 +11,17 @@ def move_to_seperate_table(apps: StateApps, schema_editor: DatabaseSchemaEditor)
    AlertWord = apps.get_model('zerver', 'AlertWord')
    for user_profile in UserProfile.objects.all():
        list_of_words = ujson.loads(user_profile.alert_words)
        # Remove duplicates with our case-insensitive model.
        word_dict: Dict[str, str] = {}
        for word in list_of_words:
            word_dict[word.lower()] = word
        AlertWord.objects.bulk_create(
-            AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in list_of_words)
+            AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
            for word in word_dict.values()
        )
 def move_back_to_user_profile(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
    AlertWord = apps.get_model('zerver', 'AlertWord')
--- a/zerver/models.py
+++ b/zerver/models.py
@@ -2894,6 +2894,7 @@ class AlertWord(models.Model):
    # all the alert words in a realm.
    realm = models.ForeignKey(Realm, db_index=True, on_delete=CASCADE)  # type: Realm
    user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE)  # type: UserProfile
    # Case-insensitive name for the alert word.
    word = models.TextField()   # type: str
    class Meta:
--- a/zerver/tests/test_alert_words.py
+++ b/zerver/tests/test_alert_words.py
@@ -56,6 +56,16 @@ class AlertWordTests(ZulipTestCase):
        words = user_alert_words(user)
        self.assertEqual(set(words), set(self.interesting_alert_word_list))
        # Test the case-insensitivity of adding words
        add_user_alert_words(user, set(["ALert", "ALERT"]))
        words = user_alert_words(user)
        self.assertEqual(set(words), set(self.interesting_alert_word_list))
        # Test the case-insensitivity of removing words
        remove_user_alert_words(user, set(["ALert"]))
        words = user_alert_words(user)
        self.assertEqual(set(words), set(self.interesting_alert_word_list) - {'alert'})
    def test_remove_word(self) -> None:
        """
        Removing alert words works via remove_user_alert_words, even