mirror of
https://github.com/zulip/zulip.git
synced 2025-11-08 16:01:58 +00:00
analytics: Add active_users_audit:is_bot:day.
This commit is contained in:
@@ -6,7 +6,7 @@ from django.utils import timezone
|
|||||||
from analytics.models import InstallationCount, RealmCount, \
|
from analytics.models import InstallationCount, RealmCount, \
|
||||||
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch
|
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch
|
||||||
from zerver.models import Realm, UserProfile, Message, Stream, \
|
from zerver.models import Realm, UserProfile, Message, Stream, \
|
||||||
UserActivityInterval, models
|
UserActivityInterval, RealmAuditLog, models
|
||||||
from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
|
from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
|
||||||
ceiling_to_hour
|
ceiling_to_hour
|
||||||
|
|
||||||
@@ -393,6 +393,36 @@ check_useractivityinterval_by_user_query = """
|
|||||||
zerver_check_useractivityinterval_by_user = ZerverCountQuery(
|
zerver_check_useractivityinterval_by_user = ZerverCountQuery(
|
||||||
UserActivityInterval, UserCount, check_useractivityinterval_by_user_query)
|
UserActivityInterval, UserCount, check_useractivityinterval_by_user_query)
|
||||||
|
|
||||||
|
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
|
||||||
|
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
|
||||||
|
# event_type in ['user_created', 'user_deactivated', etc].
|
||||||
|
# In particular, it's important to ensure that migrations don't cause that to happen.
|
||||||
|
check_realmauditlog_by_user_query = """
|
||||||
|
INSERT INTO analytics_usercount
|
||||||
|
(user_id, realm_id, value, property, subgroup, end_time)
|
||||||
|
SELECT
|
||||||
|
ral1.modified_user_id, ral1.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
|
||||||
|
FROM zerver_realmauditlog ral1
|
||||||
|
JOIN (
|
||||||
|
SELECT modified_user_id, max(event_time) AS max_event_time
|
||||||
|
FROM zerver_realmauditlog
|
||||||
|
WHERE
|
||||||
|
event_type in ('user_created', 'user_deactivated', 'user_activated', 'user_reactivated') AND
|
||||||
|
event_time < %%(time_end)s
|
||||||
|
GROUP BY modified_user_id
|
||||||
|
) ral2
|
||||||
|
ON
|
||||||
|
ral1.event_time = max_event_time AND
|
||||||
|
ral1.modified_user_id = ral2.modified_user_id
|
||||||
|
JOIN zerver_userprofile
|
||||||
|
ON
|
||||||
|
ral1.modified_user_id = zerver_userprofile.id
|
||||||
|
WHERE
|
||||||
|
ral1.event_type in ('user_created', 'user_activated', 'user_reactivated')
|
||||||
|
"""
|
||||||
|
zerver_check_realmauditlog_by_user = ZerverCountQuery(
|
||||||
|
RealmAuditLog, UserCount, check_realmauditlog_by_user_query)
|
||||||
|
|
||||||
def do_pull_minutes_active(stat, start_time, end_time):
|
def do_pull_minutes_active(stat, start_time, end_time):
|
||||||
# type: (CountStat, datetime, datetime) -> None
|
# type: (CountStat, datetime, datetime) -> None
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
@@ -427,8 +457,18 @@ count_stats_ = [
|
|||||||
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
|
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
|
||||||
(UserProfile, 'is_bot'), CountStat.DAY),
|
(UserProfile, 'is_bot'), CountStat.DAY),
|
||||||
|
|
||||||
|
# Sanity check on the bottom two stats. Is only an approximation,
|
||||||
|
# e.g. if a user is deactivated between the end of the day and when this
|
||||||
|
# stat is run, they won't be counted.
|
||||||
CountStat('active_users:is_bot:day', zerver_count_user_by_realm, {'is_active': True},
|
CountStat('active_users:is_bot:day', zerver_count_user_by_realm, {'is_active': True},
|
||||||
(UserProfile, 'is_bot'), CountStat.DAY, interval=TIMEDELTA_MAX),
|
(UserProfile, 'is_bot'), CountStat.DAY, interval=TIMEDELTA_MAX),
|
||||||
|
# In RealmCount, 'active_humans_audit::day' should be the partial sum sequence
|
||||||
|
# of 'active_users_log:is_bot:day', for any realm that started after the
|
||||||
|
# latter stat was introduced.
|
||||||
|
# 'active_users_audit:is_bot:day' is the canonical record of which users were
|
||||||
|
# active on which days (in the UserProfile.is_active sense).
|
||||||
|
CountStat('active_users_audit:is_bot:day', zerver_check_realmauditlog_by_user, {},
|
||||||
|
(UserProfile, 'is_bot'), CountStat.DAY),
|
||||||
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
|
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
|
||||||
|
|
||||||
# The minutes=15 part is due to the 15 minutes added in
|
# The minutes=15 part is due to the 15 minutes added in
|
||||||
|
|||||||
@@ -16,8 +16,10 @@ from analytics.models import BaseCount, InstallationCount, RealmCount, \
|
|||||||
UserCount, StreamCount, FillState, Anomaly, installation_epoch
|
UserCount, StreamCount, FillState, Anomaly, installation_epoch
|
||||||
from zerver.lib.actions import do_create_user, do_deactivate_user, \
|
from zerver.lib.actions import do_create_user, do_deactivate_user, \
|
||||||
do_activate_user, do_reactivate_user
|
do_activate_user, do_reactivate_user
|
||||||
|
from zerver.lib.timestamp import floor_to_day
|
||||||
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
|
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
|
||||||
Huddle, Client, UserActivityInterval, get_user_profile_by_email, get_client
|
Huddle, Client, UserActivityInterval, RealmAuditLog, \
|
||||||
|
get_user_profile_by_email, get_client
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
@@ -729,3 +731,166 @@ class TestDeleteStats(AnalyticsTestCase):
|
|||||||
do_drop_all_analytics_tables()
|
do_drop_all_analytics_tables()
|
||||||
for table in list(analytics.models.values()):
|
for table in list(analytics.models.values()):
|
||||||
self.assertFalse(table.objects.exists())
|
self.assertFalse(table.objects.exists())
|
||||||
|
|
||||||
|
class TestActiveUsersAudit(AnalyticsTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# type: () -> None
|
||||||
|
super(TestActiveUsersAudit, self).setUp()
|
||||||
|
self.user = self.create_user()
|
||||||
|
self.stat = COUNT_STATS['active_users_audit:is_bot:day']
|
||||||
|
self.current_property = self.stat.property
|
||||||
|
|
||||||
|
def add_event(self, event_type, days_offset, user=None):
|
||||||
|
# type: (str, float, Optional[UserProfile]) -> None
|
||||||
|
hours_offset = int(24*days_offset)
|
||||||
|
if user is None:
|
||||||
|
user = self.user
|
||||||
|
RealmAuditLog.objects.create(
|
||||||
|
realm=user.realm, modified_user=user, event_type=event_type,
|
||||||
|
event_time=self.TIME_ZERO - hours_offset*self.HOUR)
|
||||||
|
|
||||||
|
def test_user_deactivated_in_future(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 1)
|
||||||
|
self.add_event('user_deactivated', 0)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup'], [['false']])
|
||||||
|
|
||||||
|
def test_user_reactivated_in_future(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_deactivated', 1)
|
||||||
|
self.add_event('user_reactivated', 0)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, [], [])
|
||||||
|
|
||||||
|
def test_user_active_then_deactivated_same_day(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 1)
|
||||||
|
self.add_event('user_deactivated', .5)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, [], [])
|
||||||
|
|
||||||
|
def test_user_unactive_then_activated_same_day(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_deactivated', 1)
|
||||||
|
self.add_event('user_reactivated', .5)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup'], [['false']])
|
||||||
|
|
||||||
|
# Arguably these next two tests are duplicates of the _in_future tests, but are
|
||||||
|
# a guard against future refactorings where they may no longer be duplicates
|
||||||
|
def test_user_active_then_deactivated_with_day_gap(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 2)
|
||||||
|
self.add_event('user_deactivated', 1)
|
||||||
|
process_count_stat(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup', 'end_time'],
|
||||||
|
[['false', self.TIME_ZERO - self.DAY]])
|
||||||
|
|
||||||
|
def test_user_deactivated_then_reactivated_with_day_gap(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_deactivated', 2)
|
||||||
|
self.add_event('user_reactivated', 1)
|
||||||
|
process_count_stat(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup'], [['false']])
|
||||||
|
|
||||||
|
def test_event_types(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 4)
|
||||||
|
self.add_event('user_deactivated', 3)
|
||||||
|
self.add_event('user_activated', 2)
|
||||||
|
self.add_event('user_reactivated', 1)
|
||||||
|
for i in range(4):
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO - i*self.DAY)
|
||||||
|
self.assertTableState(UserCount, ['subgroup', 'end_time'],
|
||||||
|
[['false', self.TIME_ZERO - i*self.DAY] for i in [3, 1, 0]])
|
||||||
|
|
||||||
|
# Also tests that aggregation to RealmCount and InstallationCount is
|
||||||
|
# being done, and that we're storing the user correctly in UserCount
|
||||||
|
def test_multiple_users_realms_and_bots(self):
|
||||||
|
# type: () -> None
|
||||||
|
user1 = self.create_user()
|
||||||
|
user2 = self.create_user()
|
||||||
|
second_realm = Realm.objects.create(string_id='moo', name='moo')
|
||||||
|
user3 = self.create_user(realm=second_realm)
|
||||||
|
user4 = self.create_user(realm=second_realm, is_bot=True)
|
||||||
|
for user in [user1, user2, user3, user4]:
|
||||||
|
self.add_event('user_created', 1, user=user)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup', 'user'],
|
||||||
|
[['false', user1], ['false', user2], ['false', user3], ['true', user4]])
|
||||||
|
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
|
||||||
|
[[2, 'false', self.default_realm], [1, 'false', second_realm],
|
||||||
|
[1, 'true', second_realm]])
|
||||||
|
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [1, 'true']])
|
||||||
|
self.assertTableState(StreamCount, [], [])
|
||||||
|
|
||||||
|
# Not that interesting a test if you look at the SQL query at hand, but
|
||||||
|
# almost all other CountStats have a start_date, so guarding against a
|
||||||
|
# refactoring that adds that in.
|
||||||
|
# Also tests the slightly more end-to-end process_count_stat rather than
|
||||||
|
# do_fill_count_stat_at_hour. E.g. if one changes self.stat.frequency to
|
||||||
|
# CountStat.HOUR from CountStat.DAY, this will fail, while many of the
|
||||||
|
# tests above will not.
|
||||||
|
def test_update_from_two_days_ago(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 2)
|
||||||
|
process_count_stat(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup', 'end_time'],
|
||||||
|
[['false', self.TIME_ZERO], ['false', self.TIME_ZERO-self.DAY]])
|
||||||
|
|
||||||
|
# User with no relevant activity could happen e.g. for a system bot that
|
||||||
|
# doesn't go through do_create_user. Mainly just want to make sure that
|
||||||
|
# that situation doesn't throw an error.
|
||||||
|
def test_empty_realm_or_user_with_no_relevant_activity(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('unrelated', 1)
|
||||||
|
self.create_user() # also test a user with no RealmAuditLog entries
|
||||||
|
Realm.objects.create(string_id='moo', name='moo')
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, [], [])
|
||||||
|
|
||||||
|
def test_max_audit_entry_is_unrelated(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 1)
|
||||||
|
self.add_event('unrelated', .5)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup'], [['false']])
|
||||||
|
|
||||||
|
# Simultaneous related audit entries should not be allowed, and so not testing for that.
|
||||||
|
def test_simultaneous_unrelated_audit_entry(self):
|
||||||
|
# type: () -> None
|
||||||
|
self.add_event('user_created', 1)
|
||||||
|
self.add_event('unrelated', 1)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['subgroup'], [['false']])
|
||||||
|
|
||||||
|
def test_simultaneous_max_audit_entries_of_different_users(self):
|
||||||
|
# type: () -> None
|
||||||
|
user1 = self.create_user()
|
||||||
|
user2 = self.create_user()
|
||||||
|
user3 = self.create_user()
|
||||||
|
self.add_event('user_created', .5, user=user1)
|
||||||
|
self.add_event('user_created', .5, user=user2)
|
||||||
|
self.add_event('user_created', 1, user=user3)
|
||||||
|
self.add_event('user_deactivated', .5, user=user3)
|
||||||
|
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
|
||||||
|
self.assertTableState(UserCount, ['user', 'subgroup'],
|
||||||
|
[[user1, 'false'], [user2, 'false']])
|
||||||
|
|
||||||
|
def test_end_to_end_with_actions_dot_py(self):
|
||||||
|
# type: () -> None
|
||||||
|
user1 = do_create_user('email1', 'password', self.default_realm, 'full_name', 'short_name')
|
||||||
|
user2 = do_create_user('email2', 'password', self.default_realm, 'full_name', 'short_name')
|
||||||
|
user3 = do_create_user('email3', 'password', self.default_realm, 'full_name', 'short_name')
|
||||||
|
user4 = do_create_user('email4', 'password', self.default_realm, 'full_name', 'short_name')
|
||||||
|
do_deactivate_user(user2)
|
||||||
|
do_activate_user(user3)
|
||||||
|
do_reactivate_user(user4)
|
||||||
|
end_time = floor_to_day(timezone.now()) + self.DAY
|
||||||
|
do_fill_count_stat_at_hour(self.stat, end_time)
|
||||||
|
for user in [user1, user3, user4]:
|
||||||
|
self.assertTrue(UserCount.objects.filter(
|
||||||
|
user=user, property=self.current_property, subgroup='false',
|
||||||
|
end_time=end_time, value=1).exists())
|
||||||
|
self.assertFalse(UserCount.objects.filter(user=user2).exists())
|
||||||
|
|||||||
Reference in New Issue
Block a user