Files
zulip/zerver/lib/alert_words.py
Anders Kaseorg 365fe0b3d5 python: Sort imports with isort.
Fixes #2665.

Regenerated by tabbott with `lint --fix` after a rebase and change in
parameters.

Note from tabbott: In a few cases, this converts technical debt in the
form of unsorted imports into different technical debt in the form of
our largest files having very long, ugly import sequences at the
start.  I expect this change will increase pressure for us to split
those files, which isn't a bad thing.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2020-06-11 16:45:32 -07:00

77 lines
3.6 KiB
Python

from typing import Dict, Iterable, List
import ahocorasick
from django.db import transaction
from zerver.lib.cache import (
cache_with_key,
realm_alert_words_automaton_cache_key,
realm_alert_words_cache_key,
)
from zerver.models import AlertWord, Realm, UserProfile, flush_realm_alert_words
@cache_with_key(realm_alert_words_cache_key, timeout=3600*24)
def alert_words_in_realm(realm: Realm) -> Dict[int, List[str]]:
user_ids_and_words = AlertWord.objects.filter(
realm=realm, user_profile__is_active=True).values("user_profile_id", "word")
user_ids_with_words: Dict[int, List[str]] = dict()
for id_and_word in user_ids_and_words:
user_ids_with_words.setdefault(id_and_word["user_profile_id"], [])
user_ids_with_words[id_and_word["user_profile_id"]].append(id_and_word["word"])
return user_ids_with_words
@cache_with_key(realm_alert_words_automaton_cache_key, timeout=3600*24)
def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
user_id_with_words = alert_words_in_realm(realm)
alert_word_automaton = ahocorasick.Automaton()
for (user_id, alert_words) in user_id_with_words.items():
for alert_word in alert_words:
alert_word_lower = alert_word.lower()
if alert_word_automaton.exists(alert_word_lower):
(key, user_ids_for_alert_word) = alert_word_automaton.get(alert_word_lower)
user_ids_for_alert_word.add(user_id)
else:
alert_word_automaton.add_word(alert_word_lower, (alert_word_lower, {user_id}))
alert_word_automaton.make_automaton()
# If the kind is not AHOCORASICK after calling make_automaton, it means there is no key present
# and hence we cannot call items on the automaton yet. To avoid it we return None for such cases
# where there is no alert-words in the realm.
# https://pyahocorasick.readthedocs.io/en/latest/index.html?highlight=Automaton.kind#module-constants
if alert_word_automaton.kind != ahocorasick.AHOCORASICK:
return None
return alert_word_automaton
def user_alert_words(user_profile: UserProfile) -> List[str]:
return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
@transaction.atomic
def add_user_alert_words(user_profile: UserProfile, new_words: Iterable[str]) -> List[str]:
existing_words_lower = {word.lower() for word in user_alert_words(user_profile)}
# Keeping the case, use a dictionary to get the set of
# case-insensitive distinct, new alert words
word_dict: Dict[str, str] = {}
for word in new_words:
if word.lower() in existing_words_lower:
continue
word_dict[word.lower()] = word
AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm)
for word in word_dict.values()
)
# Django bulk_create operations don't flush caches, so we need to do this ourselves.
flush_realm_alert_words(user_profile.realm)
return user_alert_words(user_profile)
@transaction.atomic
def remove_user_alert_words(user_profile: UserProfile, delete_words: Iterable[str]) -> List[str]:
# TODO: Ideally, this would be a bulk query, but Django doesn't have a `__iexact`.
# We can clean this up if/when Postgres has more native support for case-insensitive fields.
# If we turn this into a bulk operation, we will need to call flush_realm_alert_words() here.
for delete_word in delete_words:
AlertWord.objects.filter(user_profile=user_profile, word__iexact=delete_word).delete()
return user_alert_words(user_profile)