From 8e5b0351b3f5fe4c246620e697cac1eb3e4c7fde Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 27 Apr 2020 11:04:38 -0700 Subject: [PATCH] alert_words: Fix case-sensitivity of alert words. Previously, alert words were case-insensitive in practice, by which I mean the Markdown logic had always been case-insensitive; but the data model was not, so you could create "duplicate" alert words with the same words in different cases. We fix this inconsistency by making the database model case-insensitive. I'd prefer to be using the Postgres `citext` extension to have postgres take care of case-insensitive logic for us, but that requires installing a postgres extension as root on the postgres server, which is a pain and perhaps not worth the effort to arrange given that we can achieve our goals with transaction when adding alert words. We take advantage of the migrate_alert_words migration we're already doing for all users to effect this transition. Fixes #12563. --- zerver/lib/alert_words.py | 37 ++++++++++++-------- zerver/migrations/0277_migrate_alert_word.py | 11 ++++-- zerver/models.py | 1 + zerver/tests/test_alert_words.py | 10 ++++++ 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/zerver/lib/alert_words.py b/zerver/lib/alert_words.py index 0bed6b6b27..f61e56915f 100644 --- a/zerver/lib/alert_words.py +++ b/zerver/lib/alert_words.py @@ -1,3 +1,5 @@ +from django.db import transaction + from zerver.models import UserProfile, Realm, AlertWord from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \ realm_alert_words_automaton_cache_key @@ -38,22 +40,29 @@ def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton: def user_alert_words(user_profile: UserProfile) -> List[str]: return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True)) -def add_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]: - words = user_alert_words(user_profile) +@transaction.atomic +def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> List[str]: + existing_words_lower = {word.lower() for word in user_alert_words(user_profile)} - new_words = [w for w in alert_words if w not in words] - - # to avoid duplication of words - new_words = list(set(new_words)) + # Keeping the case, use a dictionary to get the set of + # case-insensitive distinct, new alert words + word_dict: Dict[str, str] = {} + for word in new_words: + if word.lower() in existing_words_lower: + continue + word_dict[word.lower()] = word AlertWord.objects.bulk_create( - AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in new_words) - - return words+new_words - -def remove_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]: - words = user_alert_words(user_profile) - delete_words = [w for w in alert_words if w in words] - AlertWord.objects.filter(user_profile=user_profile, word__in=delete_words).delete() + AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) + for word in word_dict.values() + ) return user_alert_words(user_profile) + +@transaction.atomic +def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> List[str]: + # TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`. + # We can clean this up if/when Postgres has more native support for case-insensitive fields. + for delete_word in delete_words: + AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete() + return user_alert_words(user_profile) diff --git a/zerver/migrations/0277_migrate_alert_word.py b/zerver/migrations/0277_migrate_alert_word.py index 9bb45138af..f2d80cf41c 100644 --- a/zerver/migrations/0277_migrate_alert_word.py +++ b/zerver/migrations/0277_migrate_alert_word.py @@ -11,10 +11,17 @@ def move_to_seperate_table(apps: StateApps, schema_editor: DatabaseSchemaEditor) AlertWord = apps.get_model('zerver', 'AlertWord') for user_profile in UserProfile.objects.all(): - list_of_words = ujson.loads(user_profile.alert_words) + + # Remove duplicates with our case-insensitive model. + word_dict: Dict[str, str] = {} + for word in list_of_words: + word_dict[word.lower()] = word + AlertWord.objects.bulk_create( - AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in list_of_words) + AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) + for word in word_dict.values() + ) def move_back_to_user_profile(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: AlertWord = apps.get_model('zerver', 'AlertWord') diff --git a/zerver/models.py b/zerver/models.py index 89c7060dc9..cf33c17033 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -2894,6 +2894,7 @@ class AlertWord(models.Model): # all the alert words in a realm. realm = models.ForeignKey(Realm, db_index=True, on_delete=CASCADE) # type: Realm user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile + # Case-insensitive name for the alert word. word = models.TextField() # type: str class Meta: diff --git a/zerver/tests/test_alert_words.py b/zerver/tests/test_alert_words.py index 5e85413a00..9390cfc6b7 100644 --- a/zerver/tests/test_alert_words.py +++ b/zerver/tests/test_alert_words.py @@ -56,6 +56,16 @@ class AlertWordTests(ZulipTestCase): words = user_alert_words(user) self.assertEqual(set(words), set(self.interesting_alert_word_list)) + # Test the case-insensitivity of adding words + add_user_alert_words(user, set(["ALert", "ALERT"])) + words = user_alert_words(user) + self.assertEqual(set(words), set(self.interesting_alert_word_list)) + + # Test the case-insensitivity of removing words + remove_user_alert_words(user, set(["ALert"])) + words = user_alert_words(user) + self.assertEqual(set(words), set(self.interesting_alert_word_list) - {'alert'}) + def test_remove_word(self) -> None: """ Removing alert words works via remove_user_alert_words, even