analytics: Add active_users_audit:is_bot:day.

This commit is contained in:
Rishi Gupta
2017-03-31 18:26:35 -07:00
committed by Tim Abbott
parent cc75d83b74
commit 6bb97db136
2 changed files with 207 additions and 2 deletions

View File

@@ -6,7 +6,7 @@ from django.utils import timezone
from analytics.models import InstallationCount, RealmCount, \ from analytics.models import InstallationCount, RealmCount, \
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch
from zerver.models import Realm, UserProfile, Message, Stream, \ from zerver.models import Realm, UserProfile, Message, Stream, \
UserActivityInterval, models UserActivityInterval, RealmAuditLog, models
from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \ from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
ceiling_to_hour ceiling_to_hour
@@ -393,6 +393,36 @@ check_useractivityinterval_by_user_query = """
zerver_check_useractivityinterval_by_user = ZerverCountQuery( zerver_check_useractivityinterval_by_user = ZerverCountQuery(
UserActivityInterval, UserCount, check_useractivityinterval_by_user_query) UserActivityInterval, UserCount, check_useractivityinterval_by_user_query)
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
# event_type in ['user_created', 'user_deactivated', etc].
# In particular, it's important to ensure that migrations don't cause that to happen.
check_realmauditlog_by_user_query = """
INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time)
SELECT
ral1.modified_user_id, ral1.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
FROM zerver_realmauditlog ral1
JOIN (
SELECT modified_user_id, max(event_time) AS max_event_time
FROM zerver_realmauditlog
WHERE
event_type in ('user_created', 'user_deactivated', 'user_activated', 'user_reactivated') AND
event_time < %%(time_end)s
GROUP BY modified_user_id
) ral2
ON
ral1.event_time = max_event_time AND
ral1.modified_user_id = ral2.modified_user_id
JOIN zerver_userprofile
ON
ral1.modified_user_id = zerver_userprofile.id
WHERE
ral1.event_type in ('user_created', 'user_activated', 'user_reactivated')
"""
zerver_check_realmauditlog_by_user = ZerverCountQuery(
RealmAuditLog, UserCount, check_realmauditlog_by_user_query)
def do_pull_minutes_active(stat, start_time, end_time): def do_pull_minutes_active(stat, start_time, end_time):
# type: (CountStat, datetime, datetime) -> None # type: (CountStat, datetime, datetime) -> None
timer_start = time.time() timer_start = time.time()
@@ -427,8 +457,18 @@ count_stats_ = [
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {}, CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.DAY), (UserProfile, 'is_bot'), CountStat.DAY),
# Sanity check on the bottom two stats. Is only an approximation,
# e.g. if a user is deactivated between the end of the day and when this
# stat is run, they won't be counted.
CountStat('active_users:is_bot:day', zerver_count_user_by_realm, {'is_active': True}, CountStat('active_users:is_bot:day', zerver_count_user_by_realm, {'is_active': True},
(UserProfile, 'is_bot'), CountStat.DAY, interval=TIMEDELTA_MAX), (UserProfile, 'is_bot'), CountStat.DAY, interval=TIMEDELTA_MAX),
# In RealmCount, 'active_humans_audit::day' should be the partial sum sequence
# of 'active_users_log:is_bot:day', for any realm that started after the
# latter stat was introduced.
# 'active_users_audit:is_bot:day' is the canonical record of which users were
# active on which days (in the UserProfile.is_active sense).
CountStat('active_users_audit:is_bot:day', zerver_check_realmauditlog_by_user, {},
(UserProfile, 'is_bot'), CountStat.DAY),
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY), LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
# The minutes=15 part is due to the 15 minutes added in # The minutes=15 part is due to the 15 minutes added in

View File

@@ -16,8 +16,10 @@ from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, FillState, Anomaly, installation_epoch UserCount, StreamCount, FillState, Anomaly, installation_epoch
from zerver.lib.actions import do_create_user, do_deactivate_user, \ from zerver.lib.actions import do_create_user, do_deactivate_user, \
do_activate_user, do_reactivate_user do_activate_user, do_reactivate_user
from zerver.lib.timestamp import floor_to_day
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \ from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
Huddle, Client, UserActivityInterval, get_user_profile_by_email, get_client Huddle, Client, UserActivityInterval, RealmAuditLog, \
get_user_profile_by_email, get_client
from datetime import datetime, timedelta from datetime import datetime, timedelta
@@ -729,3 +731,166 @@ class TestDeleteStats(AnalyticsTestCase):
do_drop_all_analytics_tables() do_drop_all_analytics_tables()
for table in list(analytics.models.values()): for table in list(analytics.models.values()):
self.assertFalse(table.objects.exists()) self.assertFalse(table.objects.exists())
class TestActiveUsersAudit(AnalyticsTestCase):
def setUp(self):
# type: () -> None
super(TestActiveUsersAudit, self).setUp()
self.user = self.create_user()
self.stat = COUNT_STATS['active_users_audit:is_bot:day']
self.current_property = self.stat.property
def add_event(self, event_type, days_offset, user=None):
# type: (str, float, Optional[UserProfile]) -> None
hours_offset = int(24*days_offset)
if user is None:
user = self.user
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=event_type,
event_time=self.TIME_ZERO - hours_offset*self.HOUR)
def test_user_deactivated_in_future(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('user_deactivated', 0)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_user_reactivated_in_future(self):
# type: () -> None
self.add_event('user_deactivated', 1)
self.add_event('user_reactivated', 0)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_user_active_then_deactivated_same_day(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('user_deactivated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_user_unactive_then_activated_same_day(self):
# type: () -> None
self.add_event('user_deactivated', 1)
self.add_event('user_reactivated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
# Arguably these next two tests are duplicates of the _in_future tests, but are
# a guard against future refactorings where they may no longer be duplicates
def test_user_active_then_deactivated_with_day_gap(self):
# type: () -> None
self.add_event('user_created', 2)
self.add_event('user_deactivated', 1)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO - self.DAY]])
def test_user_deactivated_then_reactivated_with_day_gap(self):
# type: () -> None
self.add_event('user_deactivated', 2)
self.add_event('user_reactivated', 1)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_event_types(self):
# type: () -> None
self.add_event('user_created', 4)
self.add_event('user_deactivated', 3)
self.add_event('user_activated', 2)
self.add_event('user_reactivated', 1)
for i in range(4):
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO - i*self.DAY)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO - i*self.DAY] for i in [3, 1, 0]])
# Also tests that aggregation to RealmCount and InstallationCount is
# being done, and that we're storing the user correctly in UserCount
def test_multiple_users_realms_and_bots(self):
# type: () -> None
user1 = self.create_user()
user2 = self.create_user()
second_realm = Realm.objects.create(string_id='moo', name='moo')
user3 = self.create_user(realm=second_realm)
user4 = self.create_user(realm=second_realm, is_bot=True)
for user in [user1, user2, user3, user4]:
self.add_event('user_created', 1, user=user)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'user'],
[['false', user1], ['false', user2], ['false', user3], ['true', user4]])
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
[[2, 'false', self.default_realm], [1, 'false', second_realm],
[1, 'true', second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [1, 'true']])
self.assertTableState(StreamCount, [], [])
# Not that interesting a test if you look at the SQL query at hand, but
# almost all other CountStats have a start_date, so guarding against a
# refactoring that adds that in.
# Also tests the slightly more end-to-end process_count_stat rather than
# do_fill_count_stat_at_hour. E.g. if one changes self.stat.frequency to
# CountStat.HOUR from CountStat.DAY, this will fail, while many of the
# tests above will not.
def test_update_from_two_days_ago(self):
# type: () -> None
self.add_event('user_created', 2)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO], ['false', self.TIME_ZERO-self.DAY]])
# User with no relevant activity could happen e.g. for a system bot that
# doesn't go through do_create_user. Mainly just want to make sure that
# that situation doesn't throw an error.
def test_empty_realm_or_user_with_no_relevant_activity(self):
# type: () -> None
self.add_event('unrelated', 1)
self.create_user() # also test a user with no RealmAuditLog entries
Realm.objects.create(string_id='moo', name='moo')
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_max_audit_entry_is_unrelated(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('unrelated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
# Simultaneous related audit entries should not be allowed, and so not testing for that.
def test_simultaneous_unrelated_audit_entry(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('unrelated', 1)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_simultaneous_max_audit_entries_of_different_users(self):
# type: () -> None
user1 = self.create_user()
user2 = self.create_user()
user3 = self.create_user()
self.add_event('user_created', .5, user=user1)
self.add_event('user_created', .5, user=user2)
self.add_event('user_created', 1, user=user3)
self.add_event('user_deactivated', .5, user=user3)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['user', 'subgroup'],
[[user1, 'false'], [user2, 'false']])
def test_end_to_end_with_actions_dot_py(self):
# type: () -> None
user1 = do_create_user('email1', 'password', self.default_realm, 'full_name', 'short_name')
user2 = do_create_user('email2', 'password', self.default_realm, 'full_name', 'short_name')
user3 = do_create_user('email3', 'password', self.default_realm, 'full_name', 'short_name')
user4 = do_create_user('email4', 'password', self.default_realm, 'full_name', 'short_name')
do_deactivate_user(user2)
do_activate_user(user3)
do_reactivate_user(user4)
end_time = floor_to_day(timezone.now()) + self.DAY
do_fill_count_stat_at_hour(self.stat, end_time)
for user in [user1, user3, user4]:
self.assertTrue(UserCount.objects.filter(
user=user, property=self.current_property, subgroup='false',
end_time=end_time, value=1).exists())
self.assertFalse(UserCount.objects.filter(user=user2).exists())