Files
zulip/analytics/management/commands/populate_analytics_db.py
Mateusz Mandera 8acfa17fe6 models: Add recipient foreign key in UserProfile and Stream.
This is adds foreign keys to the corresponding Recipient object in the
UserProfile on Stream tables, a denormalization intended to improve
performance as this is a common query.

In the migration for setting the field correctly for existing users,
we do a direct SQL query (because Django 1.11 doesn't provide any good
method for doing it properly in bulk using the ORM.).

A consequence of this change to the model is that a bit of code needs
to be added to the functions responsible for creating new users (to
set the field after the Recipient object gets created).  Fortunately,
there's only a few code paths for doing that.

Also an adjustment is needed in the import system - this introduces a
circular relation between Recipient and UserProfile. The field cannot be
set until the Recipient objects have been created, but UserProfiles need
to be created before their corresponding Recipients. We deal with this
by first importing UserProfiles same way as before, but we leave the
personal_recipient field uninitialized. After creating the Recipient
objects, we call a function to set the field for all the imported users
in bulk.

A similar change is made for managing Stream objects.
2019-12-09 15:14:41 -08:00

217 lines
12 KiB
Python

from datetime import timedelta
from typing import Any, Dict, List, Mapping, Optional, Type
import mock
from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now
from analytics.lib.counts import COUNT_STATS, \
CountStat, do_drop_all_analytics_tables
from analytics.lib.fixtures import generate_time_series_data
from analytics.lib.time_utils import time_range
from analytics.models import BaseCount, FillState, RealmCount, UserCount, \
StreamCount, InstallationCount
from zerver.lib.actions import do_change_is_admin, STREAM_ASSIGNMENT_COLORS
from zerver.lib.create_user import create_user
from zerver.lib.timestamp import floor_to_day
from zerver.models import Realm, Stream, Client, \
Recipient, Subscription
class Command(BaseCommand):
help = """Populates analytics tables with randomly generated data."""
DAYS_OF_DATA = 100
random_seed = 26
def generate_fixture_data(self, stat: CountStat, business_hours_base: float,
non_business_hours_base: float, growth: float,
autocorrelation: float, spikiness: float,
holiday_rate: float=0, partial_sum: bool=False) -> List[int]:
self.random_seed += 1
return generate_time_series_data(
days=self.DAYS_OF_DATA, business_hours_base=business_hours_base,
non_business_hours_base=non_business_hours_base, growth=growth,
autocorrelation=autocorrelation, spikiness=spikiness, holiday_rate=holiday_rate,
frequency=stat.frequency, partial_sum=partial_sum, random_seed=self.random_seed)
def handle(self, *args: Any, **options: Any) -> None:
# TODO: This should arguably only delete the objects
# associated with the "analytics" realm.
do_drop_all_analytics_tables()
# This also deletes any objects with this realm as a foreign key
Realm.objects.filter(string_id='analytics').delete()
# Because we just deleted a bunch of objects in the database
# directly (rather than deleting individual objects in Django,
# in which case our post_save hooks would have flushed the
# individual objects from memcached for us), we need to flush
# memcached in order to ensure deleted objects aren't still
# present in the memcached cache.
from zerver.apps import flush_cache
flush_cache(None)
installation_time = timezone_now() - timedelta(days=self.DAYS_OF_DATA)
last_end_time = floor_to_day(timezone_now())
realm = Realm.objects.create(
string_id='analytics', name='Analytics', date_created=installation_time)
with mock.patch("zerver.lib.create_user.timezone_now", return_value=installation_time):
shylock = create_user('shylock@analytics.ds', 'Shylock', realm,
full_name='Shylock', short_name='shylock',
is_realm_admin=True)
do_change_is_admin(shylock, True)
stream = Stream.objects.create(
name='all', realm=realm, date_created=installation_time)
recipient = Recipient.objects.create(type_id=stream.id, type=Recipient.STREAM)
stream.recipient = recipient
stream.save(update_fields=["recipient"])
# Subscribe shylock to the stream to avoid invariant failures.
# TODO: This should use subscribe_users_to_streams from populate_db.
subs = [
Subscription(recipient=recipient,
user_profile=shylock,
color=STREAM_ASSIGNMENT_COLORS[0]),
]
Subscription.objects.bulk_create(subs)
def insert_fixture_data(stat: CountStat,
fixture_data: Mapping[Optional[str], List[int]],
table: Type[BaseCount]) -> None:
end_times = time_range(last_end_time, last_end_time, stat.frequency,
len(list(fixture_data.values())[0]))
if table == InstallationCount:
id_args = {} # type: Dict[str, Any]
if table == RealmCount:
id_args = {'realm': realm}
if table == UserCount:
id_args = {'realm': realm, 'user': shylock}
if table == StreamCount:
id_args = {'stream': stream, 'realm': realm}
for subgroup, values in fixture_data.items():
table.objects.bulk_create([
table(property=stat.property, subgroup=subgroup, end_time=end_time,
value=value, **id_args)
for end_time, value in zip(end_times, values) if value != 0])
stat = COUNT_STATS['1day_actives::day']
realm_data = {
None: self.generate_fixture_data(stat, .08, .02, 3, .3, 6, partial_sum=True),
} # type: Mapping[Optional[str], List[int]]
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {
None: self.generate_fixture_data(stat, .8, .2, 4, .3, 6, partial_sum=True),
} # type: Mapping[Optional[str], List[int]]
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['realm_active_humans::day']
realm_data = {
None: self.generate_fixture_data(stat, .1, .03, 3, .5, 3, partial_sum=True),
}
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {
None: self.generate_fixture_data(stat, 1, .3, 4, .5, 3, partial_sum=True),
}
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['active_users_audit:is_bot:day']
realm_data = {
'false': self.generate_fixture_data(stat, .1, .03, 3.5, .8, 2, partial_sum=True),
}
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {
'false': self.generate_fixture_data(stat, 1, .3, 6, .8, 2, partial_sum=True),
}
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['messages_sent:is_bot:hour']
user_data = {'false': self.generate_fixture_data(
stat, 2, 1, 1.5, .6, 8, holiday_rate=.1)} # type: Mapping[Optional[str], List[int]]
insert_fixture_data(stat, user_data, UserCount)
realm_data = {'false': self.generate_fixture_data(stat, 35, 15, 6, .6, 4),
'true': self.generate_fixture_data(stat, 15, 15, 3, .4, 2)}
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {'false': self.generate_fixture_data(stat, 350, 150, 6, .6, 4),
'true': self.generate_fixture_data(stat, 150, 150, 3, .4, 2)}
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['messages_sent:message_type:day']
user_data = {
'public_stream': self.generate_fixture_data(stat, 1.5, 1, 3, .6, 8),
'private_message': self.generate_fixture_data(stat, .5, .3, 1, .6, 8),
'huddle_message': self.generate_fixture_data(stat, .2, .2, 2, .6, 8)}
insert_fixture_data(stat, user_data, UserCount)
realm_data = {
'public_stream': self.generate_fixture_data(stat, 30, 8, 5, .6, 4),
'private_stream': self.generate_fixture_data(stat, 7, 7, 5, .6, 4),
'private_message': self.generate_fixture_data(stat, 13, 5, 5, .6, 4),
'huddle_message': self.generate_fixture_data(stat, 6, 3, 3, .6, 4)}
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {
'public_stream': self.generate_fixture_data(stat, 300, 80, 5, .6, 4),
'private_stream': self.generate_fixture_data(stat, 70, 70, 5, .6, 4),
'private_message': self.generate_fixture_data(stat, 130, 50, 5, .6, 4),
'huddle_message': self.generate_fixture_data(stat, 60, 30, 3, .6, 4)}
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
website, created = Client.objects.get_or_create(name='website')
old_desktop, created = Client.objects.get_or_create(name='desktop app Linux 0.3.7')
android, created = Client.objects.get_or_create(name='ZulipAndroid')
iOS, created = Client.objects.get_or_create(name='ZulipiOS')
react_native, created = Client.objects.get_or_create(name='ZulipMobile')
API, created = Client.objects.get_or_create(name='API: Python')
zephyr_mirror, created = Client.objects.get_or_create(name='zephyr_mirror')
unused, created = Client.objects.get_or_create(name='unused')
long_webhook, created = Client.objects.get_or_create(name='ZulipLooooooooooongNameWebhook')
stat = COUNT_STATS['messages_sent:client:day']
user_data = {
website.id: self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8),
zephyr_mirror.id: self.generate_fixture_data(stat, 0, .3, 1.5, .6, 8)}
insert_fixture_data(stat, user_data, UserCount)
realm_data = {
website.id: self.generate_fixture_data(stat, 30, 20, 5, .6, 3),
old_desktop.id: self.generate_fixture_data(stat, 5, 3, 8, .6, 3),
android.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3),
iOS.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3),
react_native.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3),
API.id: self.generate_fixture_data(stat, 5, 5, 5, .6, 3),
zephyr_mirror.id: self.generate_fixture_data(stat, 1, 1, 3, .6, 3),
unused.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0),
long_webhook.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3)}
insert_fixture_data(stat, realm_data, RealmCount)
installation_data = {
website.id: self.generate_fixture_data(stat, 300, 200, 5, .6, 3),
old_desktop.id: self.generate_fixture_data(stat, 50, 30, 8, .6, 3),
android.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3),
iOS.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3),
react_native.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3),
API.id: self.generate_fixture_data(stat, 50, 50, 5, .6, 3),
zephyr_mirror.id: self.generate_fixture_data(stat, 10, 10, 3, .6, 3),
unused.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0),
long_webhook.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3)}
insert_fixture_data(stat, installation_data, InstallationCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['messages_in_stream:is_bot:day']
realm_data = {'false': self.generate_fixture_data(stat, 30, 5, 6, .6, 4),
'true': self.generate_fixture_data(stat, 20, 2, 3, .2, 3)}
insert_fixture_data(stat, realm_data, RealmCount)
stream_data = {'false': self.generate_fixture_data(stat, 10, 7, 5, .6, 4),
'true': self.generate_fixture_data(stat, 5, 3, 2, .4, 2)} # type: Mapping[Optional[str], List[int]]
insert_fixture_data(stat, stream_data, StreamCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)