alert_words: Fix case-sensitivity of alert words.

Previously, alert words were case-insensitive in practice, by which I
mean the Markdown logic had always been case-insensitive; but the data
model was not, so you could create "duplicate" alert words with the
same words in different cases.  We fix this inconsistency by making
the database model case-insensitive.

I'd prefer to be using the Postgres `citext` extension to have
postgres take care of case-insensitive logic for us, but that requires
installing a postgres extension as root on the postgres server, which
is a pain and perhaps not worth the effort to arrange given that we
can achieve our goals with transaction when adding alert words.

We take advantage of the migrate_alert_words migration we're already
doing for all users to effect this transition.

Fixes #12563.
This commit is contained in:
Tim Abbott
2020-04-27 11:04:38 -07:00
parent 052368bd3e
commit 8e5b0351b3
4 changed files with 43 additions and 16 deletions

View File

@@ -1,3 +1,5 @@
from django.db import transaction
from zerver.models import UserProfile, Realm, AlertWord from zerver.models import UserProfile, Realm, AlertWord
from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \ from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \
realm_alert_words_automaton_cache_key realm_alert_words_automaton_cache_key
@@ -38,22 +40,29 @@ def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
def user_alert_words(user_profile: UserProfile) -> List[str]: def user_alert_words(user_profile: UserProfile) -> List[str]:
return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True)) return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
def add_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]: @transaction.atomic
words = user_alert_words(user_profile) def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> List[str]:
existing_words_lower = {word.lower() for word in user_alert_words(user_profile)}
new_words = [w for w in alert_words if w not in words] # Keeping the case, use a dictionary to get the set of
# case-insensitive distinct, new alert words
# to avoid duplication of words word_dict: Dict[str, str] = {}
new_words = list(set(new_words)) for word in new_words:
if word.lower() in existing_words_lower:
continue
word_dict[word.lower()] = word
AlertWord.objects.bulk_create( AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in new_words) AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
for word in word_dict.values()
return words+new_words )
def remove_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]:
words = user_alert_words(user_profile)
delete_words = [w for w in alert_words if w in words]
AlertWord.objects.filter(user_profile=user_profile, word__in=delete_words).delete()
return user_alert_words(user_profile) return user_alert_words(user_profile)
@transaction.atomic
def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> List[str]:
# TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`.
# We can clean this up if/when Postgres has more native support for case-insensitive fields.
for delete_word in delete_words:
AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete()
return user_alert_words(user_profile)

View File

@@ -11,10 +11,17 @@ def move_to_seperate_table(apps: StateApps, schema_editor: DatabaseSchemaEditor)
AlertWord = apps.get_model('zerver', 'AlertWord') AlertWord = apps.get_model('zerver', 'AlertWord')
for user_profile in UserProfile.objects.all(): for user_profile in UserProfile.objects.all():
list_of_words = ujson.loads(user_profile.alert_words) list_of_words = ujson.loads(user_profile.alert_words)
# Remove duplicates with our case-insensitive model.
word_dict: Dict[str, str] = {}
for word in list_of_words:
word_dict[word.lower()] = word
AlertWord.objects.bulk_create( AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in list_of_words) AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
for word in word_dict.values()
)
def move_back_to_user_profile(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: def move_back_to_user_profile(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
AlertWord = apps.get_model('zerver', 'AlertWord') AlertWord = apps.get_model('zerver', 'AlertWord')

View File

@@ -2894,6 +2894,7 @@ class AlertWord(models.Model):
# all the alert words in a realm. # all the alert words in a realm.
realm = models.ForeignKey(Realm, db_index=True, on_delete=CASCADE) # type: Realm realm = models.ForeignKey(Realm, db_index=True, on_delete=CASCADE) # type: Realm
user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile
# Case-insensitive name for the alert word.
word = models.TextField() # type: str word = models.TextField() # type: str
class Meta: class Meta:

View File

@@ -56,6 +56,16 @@ class AlertWordTests(ZulipTestCase):
words = user_alert_words(user) words = user_alert_words(user)
self.assertEqual(set(words), set(self.interesting_alert_word_list)) self.assertEqual(set(words), set(self.interesting_alert_word_list))
# Test the case-insensitivity of adding words
add_user_alert_words(user, set(["ALert", "ALERT"]))
words = user_alert_words(user)
self.assertEqual(set(words), set(self.interesting_alert_word_list))
# Test the case-insensitivity of removing words
remove_user_alert_words(user, set(["ALert"]))
words = user_alert_words(user)
self.assertEqual(set(words), set(self.interesting_alert_word_list) - {'alert'})
def test_remove_word(self) -> None: def test_remove_word(self) -> None:
""" """
Removing alert words works via remove_user_alert_words, even Removing alert words works via remove_user_alert_words, even