alert_words: Move alert_words from UserProfile to separate model.

Previously, alert words were a JSON list of strings stored in a
TextField on user_profile.  That hacky model reflected the fact that
they were an early prototype feature.

This commit migrates from that to a separate table, 'AlertWord'.  The
new AlertWord has user_profile, word, id and realm(denormalization so
we can provide a nice index for fetching all the alert words in a
realm).

This transition requires moving the logic for flushing the Alert Words
caches to their own independent feature.

Note that this commit should not be cherry-picked without the
following commit, which fixes case-sensitivity issues with Alert Words.
This commit is contained in:
Abhishek-Balaji
2020-04-15 16:04:26 +05:30
committed by Tim Abbott
parent 818776faae
commit 052368bd3e
10 changed files with 135 additions and 36 deletions

View File

@@ -1,17 +1,17 @@
from django.db.models import Q
from zerver.models import UserProfile, Realm
from zerver.models import UserProfile, Realm, AlertWord
from zerver.lib.cache import cache_with_key, realm_alert_words_cache_key, \
realm_alert_words_automaton_cache_key
import ujson
import ahocorasick
from typing import Dict, Iterable, List
@cache_with_key(realm_alert_words_cache_key, timeout=3600*24)
def alert_words_in_realm(realm: Realm) -> Dict[int, List[str]]:
users_query = UserProfile.objects.filter(realm=realm, is_active=True)
alert_word_data = users_query.filter(~Q(alert_words=ujson.dumps([]))).values('id', 'alert_words')
all_user_words = {elt['id']: ujson.loads(elt['alert_words']) for elt in alert_word_data}
user_ids_with_words = {user_id: w for (user_id, w) in all_user_words.items() if len(w)}
user_ids_and_words = AlertWord.objects.filter(
realm=realm, user_profile__is_active=True).values("user_profile_id", "word")
user_ids_with_words = dict() # type: Dict[int, List[str]]
for id_and_word in user_ids_and_words:
user_ids_with_words.setdefault(id_and_word["user_profile_id"], [])
user_ids_with_words[id_and_word["user_profile_id"]].append(id_and_word["word"])
return user_ids_with_words
@cache_with_key(realm_alert_words_automaton_cache_key, timeout=3600*24)
@@ -36,26 +36,24 @@ def get_alert_word_automaton(realm: Realm) -> ahocorasick.Automaton:
return alert_word_automaton
def user_alert_words(user_profile: UserProfile) -> List[str]:
return ujson.loads(user_profile.alert_words)
return list(AlertWord.objects.filter(user_profile=user_profile).values_list("word", flat=True))
def add_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]:
words = user_alert_words(user_profile)
new_words = [w for w in alert_words if w not in words]
words.extend(new_words)
set_user_alert_words(user_profile, words)
# to avoid duplication of words
new_words = list(set(new_words))
return words
AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in new_words)
return words+new_words
def remove_user_alert_words(user_profile: UserProfile, alert_words: Iterable[str]) -> List[str]:
words = user_alert_words(user_profile)
words = [w for w in words if w not in alert_words]
delete_words = [w for w in alert_words if w in words]
AlertWord.objects.filter(user_profile=user_profile, word__in=delete_words).delete()
set_user_alert_words(user_profile, words)
return words
def set_user_alert_words(user_profile: UserProfile, alert_words: List[str]) -> None:
user_profile.alert_words = ujson.dumps(alert_words)
user_profile.save(update_fields=['alert_words'])
return user_alert_words(user_profile)

View File

@@ -540,12 +540,6 @@ def flush_user_profile(sender: Any, **kwargs: Any) -> None:
if user_profile.is_bot and changed(kwargs, bot_dict_fields):
cache_delete(bot_dicts_in_realm_cache_key(user_profile.realm))
# Invalidate realm-wide alert words cache if any user in the realm has changed
# alert words
if changed(kwargs, ['alert_words']):
cache_delete(realm_alert_words_cache_key(user_profile.realm))
cache_delete(realm_alert_words_automaton_cache_key(user_profile.realm))
# Called by models.py to flush various caches whenever we save
# a Realm object. The main tricky thing here is that Realm info is
# generally cached indirectly through user_profile objects.

View File

@@ -77,6 +77,7 @@ ALL_ZULIP_TABLES = {
'social_auth_partial',
'social_auth_usersocialauth',
'two_factor_phonedevice',
'zerver_alertword',
'zerver_archivedattachment',
'zerver_archivedattachment_messages',
'zerver_archivedmessage',

View File

@@ -58,7 +58,7 @@ from zerver.models import (
from typing import Any, Dict, List, Optional, Set, Tuple, Sequence
from typing_extensions import TypedDict
RealmAlertWords = Dict[int, List[str]]
RealmAlertWord = Dict[int, List[str]]
RawUnreadMessagesResult = TypedDict('RawUnreadMessagesResult', {
'pm_dict': Dict[int, Any],

View File

@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('zerver', '0275_remove_userprofile_last_pointer_updater'),
]
operations = [
migrations.CreateModel(
name='AlertWord',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('word', models.TextField()),
('realm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm')),
('user_profile', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'unique_together': {('user_profile', 'word')},
},
),
]

View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
from django.db import migrations
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
import ujson
from typing import Dict, List
def move_to_seperate_table(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
UserProfile = apps.get_model('zerver', 'UserProfile')
AlertWord = apps.get_model('zerver', 'AlertWord')
for user_profile in UserProfile.objects.all():
list_of_words = ujson.loads(user_profile.alert_words)
AlertWord.objects.bulk_create(
AlertWord(user_profile=user_profile, word=word, realm=user_profile.realm) for word in list_of_words)
def move_back_to_user_profile(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
AlertWord = apps.get_model('zerver', 'AlertWord')
UserProfile = apps.get_model('zerver', 'UserProfile')
user_ids_and_words = AlertWord.objects.all().values("user_profile_id", "word")
user_ids_with_words = dict() # type: Dict[int, List[str]]
for id_and_word in user_ids_and_words:
user_ids_with_words.setdefault(id_and_word["user_profile_id"], [])
user_ids_with_words[id_and_word["user_profile_id"]].append(id_and_word["word"])
for (user_id, words) in user_ids_with_words.items():
user_profile = UserProfile.objects.get(id=user_id)
user_profile.alert_words = ujson.dumps(words)
user_profile.save(update_fields=['alert_words'])
class Migration(migrations.Migration):
dependencies = [
('zerver', '0276_alertword'),
]
operations = [
migrations.RunPython(move_to_seperate_table, move_back_to_user_profile)
]

View File

@@ -0,0 +1,17 @@
# Generated by Django 2.2.10 on 2020-03-23 20:08
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('zerver', '0277_migrate_alert_word'),
]
operations = [
migrations.RemoveField(
model_name='userprofile',
name='alert_words',
),
]

View File

@@ -19,12 +19,13 @@ from zerver.lib.cache import cache_with_key, flush_user_profile, flush_realm, \
get_stream_cache_key, realm_user_dicts_cache_key, \
bot_dicts_in_realm_cache_key, realm_user_dict_fields, \
bot_dict_fields, flush_message, flush_submessage, bot_profile_cache_key, \
flush_used_upload_space_cache, get_realm_used_upload_space_cache_key
flush_used_upload_space_cache, get_realm_used_upload_space_cache_key, \
realm_alert_words_cache_key, realm_alert_words_automaton_cache_key
from zerver.lib.utils import make_safe_digest, generate_random_token
from django.db import transaction
from django.utils.timezone import now as timezone_now
from zerver.lib.timestamp import datetime_to_timestamp
from django.db.models.signals import post_save, post_delete
from django.db.models.signals import post_save, post_delete, post_init
from django.utils.translation import ugettext_lazy as _
from zerver.lib import cache
from zerver.lib.validator import check_int, \
@@ -916,9 +917,6 @@ class UserProfile(AbstractBaseUser, PermissionsMixin):
enable_login_emails: bool = models.BooleanField(default=True)
realm_name_in_notifications: bool = models.BooleanField(default=False)
# Words that trigger a mention for this user, formatted as a json-serialized list of strings
alert_words: str = models.TextField(default='[]')
# Used for rate-limiting certain automated messages generated by bots
last_reminder: Optional[datetime.datetime] = models.DateTimeField(default=None, null=True)
@@ -2888,3 +2886,24 @@ def get_fake_email_domain() -> str:
raise InvalidFakeEmailDomain(settings.FAKE_EMAIL_DOMAIN + ' is not a valid domain.')
return settings.FAKE_EMAIL_DOMAIN
class AlertWord(models.Model):
# Realm isn't necessary, but it's a nice denormalization. Users
# never move to another realm, so it's static, and having Realm
# here optimizes the main query on this table, which is fetching
# all the alert words in a realm.
realm = models.ForeignKey(Realm, db_index=True, on_delete=CASCADE) # type: Realm
user_profile = models.ForeignKey(UserProfile, on_delete=CASCADE) # type: UserProfile
word = models.TextField() # type: str
class Meta:
unique_together = ("user_profile", "word")
def flush_alert_word(sender: Any, **kwargs: Any) -> None:
realm = kwargs['instance'].realm
cache_delete(realm_alert_words_cache_key(realm))
cache_delete(realm_alert_words_automaton_cache_key(realm))
post_init.connect(flush_alert_word, sender=AlertWord)
post_delete.connect(flush_alert_word, sender=AlertWord)

View File

@@ -3632,10 +3632,10 @@ class FetchQueriesTest(ZulipTestCase):
client_gravatar=False,
)
self.assert_length(queries, 31)
self.assert_length(queries, 32)
expected_counts = dict(
alert_words=0,
alert_words=1,
custom_profile_fields=1,
default_streams=1,
default_stream_groups=1,

View File

@@ -249,7 +249,7 @@ class HomeTest(ZulipTestCase):
self.assertEqual(set(result["Cache-Control"].split(", ")),
{"must-revalidate", "no-store", "no-cache"})
self.assert_length(queries, 42)
self.assert_length(queries, 43)
self.assert_length(cache_mock.call_args_list, 5)
html = result.content.decode('utf-8')
@@ -315,7 +315,7 @@ class HomeTest(ZulipTestCase):
result = self._get_home_page()
self.assertEqual(result.status_code, 200)
self.assert_length(cache_mock.call_args_list, 6)
self.assert_length(queries, 40)
self.assert_length(queries, 41)
@slow("Creates and subscribes 10 users in a loop. Should use bulk queries.")
def test_num_queries_with_streams(self) -> None:
@@ -347,7 +347,7 @@ class HomeTest(ZulipTestCase):
with queries_captured() as queries2:
result = self._get_home_page()
self.assert_length(queries2, 37)
self.assert_length(queries2, 38)
# Do a sanity check that our new streams were in the payload.
html = result.content.decode('utf-8')