Files
zulip/zerver/lib/alert_words.py
Tim Abbott 03fecba917 alert_words: Fix cache flushing behavior and add tests.
The post_init cache-flushing behavior in the original alert words
migration was subtly wrong; while it may have passed tests, it didn't
have the right ordering for unlikely races.

We use post_save rather than post_init hooks precisely because they
ensure that we flush the cache after we know the database has been
updated and any future reads from the database will have the latest
state.
2020-04-27 11:48:32 -07:00

72 lines
3.6 KiB
Python

from django.db import transaction
from zerver.models import UserProfile, Realm, AlertWord, flush_realm_alert_words
from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \
realm_alert_words_automaton_cache_key
import ahocorasick
from typing import Dict, Iterable, List
@cache_with_key(realm_alert_words_cache_key, timeout=3600*24)
def alert_words_in_realm(realm: Realm) -> Dict[int, List[str]]:
user_ids_and_words = AlertWord.objects.filter(
realm=realm, user_profile__is_active=True).values("user_profile_id", "word")
user_ids_with_words = dict() # type: Dict[int, List[str]]
for id_and_word in user_ids_and_words:
user_ids_with_words.setdefault(id_and_word["user_profile_id"], [])
user_ids_with_words[id_and_word["user_profile_id"]].append(id_and_word["word"])
return user_ids_with_words
@cache_with_key(realm_alert_words_automaton_cache_key, timeout=3600*24)
def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
user_id_with_words = alert_words_in_realm(realm)
alert_word_automaton = ahocorasick.Automaton()
for (user_id, alert_words) in user_id_with_words.items():
for alert_word in alert_words:
alert_word_lower = alert_word.lower()
if alert_word_automaton.exists(alert_word_lower):
(key, user_ids_for_alert_word) = alert_word_automaton.get(alert_word_lower)
user_ids_for_alert_word.add(user_id)
else:
alert_word_automaton.add_word(alert_word_lower, (alert_word_lower, {user_id}))
alert_word_automaton.make_automaton()
# If the kind is not AHOCORASICK after calling make_automaton, it means there is no key present
# and hence we cannot call items on the automaton yet. To avoid it we return None for such cases
# where there is no alert-words in the realm.
# https://pyahocorasick.readthedocs.io/en/latest/index.html?highlight=Automaton.kind#module-constants
if alert_word_automaton.kind != ahocorasick.AHOCORASICK:
return None
return alert_word_automaton
def user_alert_words(user_profile: UserProfile) -> List[str]:
return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
@transaction.atomic
def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> List[str]:
existing_words_lower = {word.lower() for word in user_alert_words(user_profile)}
# Keeping the case, use a dictionary to get the set of
# case-insensitive distinct, new alert words
word_dict: Dict[str, str] = {}
for word in new_words:
if word.lower() in existing_words_lower:
continue
word_dict[word.lower()] = word
AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
for word in word_dict.values()
)
# Django bulk_create operations don't flush caches, so we need to do this ourselves.
flush_realm_alert_words(user_profile.realm)
return user_alert_words(user_profile)
@transaction.atomic
def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> List[str]:
# TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`.
# We can clean this up if/when Postgres has more native support for case-insensitive fields.
# If we turn this into a bulk operation, we will need to call flush_realm_alert_words() here.
for delete_word in delete_words:
AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete()
return user_alert_words(user_profile)