analytics: Add LoggingCountStat for number of users.

This commit is contained in:
Rishi Gupta
2017-02-15 08:26:22 -08:00
committed by Tim Abbott
parent 51b7677db7
commit 8feea6c598
3 changed files with 230 additions and 12 deletions

View File

@@ -1,13 +1,15 @@
from django.conf import settings from django.conf import settings
from django.db import connection, models from django.db import connection, models
from django.db.models import F
from django.utils import timezone from django.utils import timezone
from analytics.models import InstallationCount, RealmCount, \ from analytics.models import InstallationCount, RealmCount, \
UserCount, StreamCount, BaseCount, FillState, installation_epoch UserCount, StreamCount, BaseCount, FillState, installation_epoch
from zerver.models import Realm, UserProfile, Message, Stream, models from zerver.models import Realm, UserProfile, Message, Stream, models
from zerver.lib.timestamp import floor_to_day from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
ceiling_to_hour
from typing import Any, Dict, Optional, Type, Tuple, Text from typing import Any, Dict, Optional, Text, Tuple, Type, Union
from datetime import timedelta, datetime from datetime import timedelta, datetime
import logging import logging
@@ -46,11 +48,19 @@ class CountStat(object):
raise ValueError("Unknown frequency: %s" % (frequency,)) raise ValueError("Unknown frequency: %s" % (frequency,))
self.frequency = frequency self.frequency = frequency
self.interval = self.GAUGE if is_gauge else frequency self.interval = self.GAUGE if is_gauge else frequency
self.is_logging = False
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<CountStat: %s>" % (self.property,) return u"<CountStat: %s>" % (self.property,)
class LoggingCountStat(CountStat):
def __init__(self, property, analytics_table, frequency):
# type: (str, Type[BaseCount], str) -> None
CountStat.__init__(self, property, ZerverCountQuery(None, analytics_table, None), {}, None,
frequency, False)
self.is_logging = True
class ZerverCountQuery(object): class ZerverCountQuery(object):
def __init__(self, zerver_table, analytics_table, query): def __init__(self, zerver_table, analytics_table, query):
# type: (Type[models.Model], Type[BaseCount], Text) -> None # type: (Type[models.Model], Type[BaseCount], Text) -> None
@@ -109,15 +119,21 @@ def do_fill_count_stat_at_hour(stat, end_time):
else: # stat.interval == CountStat.GAUGE else: # stat.interval == CountStat.GAUGE
start_time = MIN_TIME start_time = MIN_TIME
do_pull_from_zerver(stat, start_time, end_time) if not stat.is_logging:
do_pull_from_zerver(stat, start_time, end_time)
do_aggregate_to_summary_table(stat, end_time) do_aggregate_to_summary_table(stat, end_time)
def do_delete_counts_at_hour(stat, end_time): def do_delete_counts_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None # type: (CountStat, datetime) -> None
UserCount.objects.filter(property = stat.property, end_time = end_time).delete() if stat.is_logging:
StreamCount.objects.filter(property = stat.property, end_time = end_time).delete() InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete() if stat.zerver_count_query.analytics_table in [UserCount, StreamCount]:
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete() RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
else:
UserCount.objects.filter(property=stat.property, end_time=end_time).delete()
StreamCount.objects.filter(property=stat.property, end_time=end_time).delete()
RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
def do_delete_count_stat(property): def do_delete_count_stat(property):
# type: (str) -> None # type: (str) -> None
@@ -211,6 +227,29 @@ def do_pull_from_zerver(stat, start_time, end_time):
logger.info("%s do_pull_from_zerver (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount)) logger.info("%s do_pull_from_zerver (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
cursor.close() cursor.close()
# called from zerver/lib/actions.py; should not throw any errors
def do_increment_logging_stat(zerver_object, stat, subgroup, event_time, increment=1):
# type: (Union[Realm, UserProfile, Stream], CountStat, Optional[Union[str, int, bool]], datetime, int) -> None
table = stat.zerver_count_query.analytics_table
if table == RealmCount:
id_args = {'realm': zerver_object}
elif table == UserCount:
id_args = {'realm': zerver_object.realm, 'user': zerver_object}
else: # StreamCount
id_args = {'realm': zerver_object.realm, 'stream': zerver_object}
if stat.frequency == CountStat.DAY:
end_time = ceiling_to_day(event_time)
else: # CountStat.HOUR:
end_time = ceiling_to_hour(event_time)
row, created = table.objects.get_or_create(
property=stat.property, subgroup=subgroup, end_time=end_time,
defaults={'value': increment}, **id_args)
if not created:
row.value = F('value') + increment
row.save(update_fields=['value'])
count_user_by_realm_query = """ count_user_by_realm_query = """
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
@@ -352,7 +391,8 @@ count_stats_ = [
CountStat('messages_sent:client:day', zerver_count_message_by_user, {}, CountStat('messages_sent:client:day', zerver_count_message_by_user, {},
(Message, 'sending_client_id'), CountStat.DAY, False), (Message, 'sending_client_id'), CountStat.DAY, False),
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {}, CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.DAY, False) (UserProfile, 'is_bot'), CountStat.DAY, False),
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
] ]
COUNT_STATS = {stat.property: stat for stat in count_stats_} COUNT_STATS = {stat.property: stat for stat in count_stats_}

View File

@@ -1,15 +1,19 @@
from __future__ import absolute_import from __future__ import absolute_import
from django.db import models from django.db import models
from django.db.models import Sum
from django.test import TestCase from django.test import TestCase
from django.utils import timezone from django.utils import timezone
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \ from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
zerver_count_user_by_realm, zerver_count_message_by_user, \ zerver_count_user_by_realm, zerver_count_message_by_user, \
zerver_count_message_by_stream, zerver_count_stream_by_realm, \ zerver_count_message_by_stream, zerver_count_stream_by_realm, \
do_fill_count_stat_at_hour, ZerverCountQuery do_fill_count_stat_at_hour, do_increment_logging_stat, ZerverCountQuery, \
LoggingCountStat
from analytics.models import BaseCount, InstallationCount, RealmCount, \ from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, FillState, installation_epoch UserCount, StreamCount, FillState, installation_epoch
from zerver.lib.actions import do_create_user, do_deactivate_user, \
do_activate_user, do_reactivate_user
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \ from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
Huddle, Client, get_user_profile_by_email, get_client Huddle, Client, get_user_profile_by_email, get_client
@@ -103,7 +107,7 @@ class AnalyticsTestCase(TestCase):
self.assertEqual(queryset.values_list('value', flat=True)[0], value) self.assertEqual(queryset.values_list('value', flat=True)[0], value)
def assertTableState(self, table, arg_keys, arg_values): def assertTableState(self, table, arg_keys, arg_values):
# type: (Type[BaseCount], List[str], List[List[Union[int, str, Realm, UserProfile, Stream]]]) -> None # type: (Type[BaseCount], List[str], List[List[Union[int, str, bool, datetime, Realm, UserProfile, Stream]]]) -> None
"""Assert that the state of a *Count table is what it should be. """Assert that the state of a *Count table is what it should be.
Example usage: Example usage:
@@ -192,6 +196,52 @@ class TestProcessCountStat(AnalyticsTestCase):
self.assertFillStateEquals(current_time) self.assertFillStateEquals(current_time)
self.assertEqual(InstallationCount.objects.filter(property=property).count(), 2) self.assertEqual(InstallationCount.objects.filter(property=property).count(), 2)
# This tests the is_logging branch of the code in do_delete_counts_at_hour.
# It is important that do_delete_counts_at_hour not delete any of the collected
# logging data!
def test_process_logging_stat(self):
# type: () -> None
end_time = self.TIME_ZERO
user_stat = LoggingCountStat('user stat', UserCount, CountStat.DAY)
stream_stat = LoggingCountStat('stream stat', StreamCount, CountStat.DAY)
realm_stat = LoggingCountStat('realm stat', RealmCount, CountStat.DAY)
user = self.create_user()
stream = self.create_stream_with_recipient()[0]
realm = self.default_realm
UserCount.objects.create(
user=user, realm=realm, property=user_stat.property, end_time=end_time, value=5)
StreamCount.objects.create(
stream=stream, realm=realm, property=stream_stat.property, end_time=end_time, value=5)
RealmCount.objects.create(
realm=realm, property=realm_stat.property, end_time=end_time, value=5)
# Normal run of process_count_stat
for stat in [user_stat, stream_stat, realm_stat]:
process_count_stat(stat, end_time)
self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 5]])
self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 5]])
self.assertTableState(RealmCount, ['property', 'value'],
[[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]])
self.assertTableState(InstallationCount, ['property', 'value'],
[[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]])
# Change the logged data and mark FillState as dirty
UserCount.objects.update(value=6)
StreamCount.objects.update(value=6)
RealmCount.objects.filter(property=realm_stat.property).update(value=6)
FillState.objects.update(state=FillState.STARTED)
# Check that the change propagated (and the collected data wasn't deleted)
for stat in [user_stat, stream_stat, realm_stat]:
process_count_stat(stat, end_time)
self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 6]])
self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 6]])
self.assertTableState(RealmCount, ['property', 'value'],
[[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]])
self.assertTableState(InstallationCount, ['property', 'value'],
[[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]])
class TestCountStats(AnalyticsTestCase): class TestCountStats(AnalyticsTestCase):
def setUp(self): def setUp(self):
# type: () -> None # type: () -> None
@@ -427,3 +477,121 @@ class TestCountStats(AnalyticsTestCase):
[[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]]) [[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']]) self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']])
self.assertTableState(UserCount, [], []) self.assertTableState(UserCount, [], [])
class TestDoIncrementLoggingStat(AnalyticsTestCase):
def test_table_and_id_args(self):
# type: () -> None
# For realms, streams, and users, tests that the new rows are going to
# the appropriate *Count table, and that using a different zerver_object
# results in a new row being created
self.current_property = 'test'
second_realm = Realm.objects.create(string_id='moo', name='moo', domain='moo')
stat = LoggingCountStat('test', RealmCount, CountStat.DAY)
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
do_increment_logging_stat(second_realm, stat, None, self.TIME_ZERO)
self.assertTableState(RealmCount, ['realm'], [[self.default_realm], [second_realm]])
user1 = self.create_user()
user2 = self.create_user()
stat = LoggingCountStat('test', UserCount, CountStat.DAY)
do_increment_logging_stat(user1, stat, None, self.TIME_ZERO)
do_increment_logging_stat(user2, stat, None, self.TIME_ZERO)
self.assertTableState(UserCount, ['user'], [[user1], [user2]])
stream1 = self.create_stream_with_recipient()[0]
stream2 = self.create_stream_with_recipient()[0]
stat = LoggingCountStat('test', StreamCount, CountStat.DAY)
do_increment_logging_stat(stream1, stat, None, self.TIME_ZERO)
do_increment_logging_stat(stream2, stat, None, self.TIME_ZERO)
self.assertTableState(StreamCount, ['stream'], [[stream1], [stream2]])
def test_frequency(self):
# type: () -> None
times = [self.TIME_ZERO - self.MINUTE*i for i in [0, 1, 61, 24*60+1]]
stat = LoggingCountStat('day test', RealmCount, CountStat.DAY)
for time_ in times:
do_increment_logging_stat(self.default_realm, stat, None, time_)
stat = LoggingCountStat('hour test', RealmCount, CountStat.HOUR)
for time_ in times:
do_increment_logging_stat(self.default_realm, stat, None, time_)
self.assertTableState(RealmCount, ['value', 'property', 'end_time'],
[[3, 'day test', self.TIME_ZERO],
[1, 'day test', self.TIME_ZERO - self.DAY],
[2, 'hour test', self.TIME_ZERO],
[1, 'hour test', self.TIME_LAST_HOUR],
[1, 'hour test', self.TIME_ZERO - self.DAY]])
def test_get_or_create(self):
# type: () -> None
stat = LoggingCountStat('test', RealmCount, CountStat.HOUR)
# All these should trigger the create part of get_or_create.
# property is tested in test_frequency, and id_args are tested in test_id_args,
# so this only tests a new subgroup and end_time
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_ZERO)
do_increment_logging_stat(self.default_realm, stat, 'subgroup2', self.TIME_ZERO)
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_LAST_HOUR)
self.current_property = 'test'
self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'],
[[1, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO],
[1, 'subgroup1', self.TIME_LAST_HOUR]])
# This should trigger the get part of get_or_create
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_ZERO)
self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'],
[[2, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO],
[1, 'subgroup1', self.TIME_LAST_HOUR]])
def test_increment(self):
# type: () -> None
stat = LoggingCountStat('test', RealmCount, CountStat.DAY)
self.current_property = 'test'
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO, increment=-1)
self.assertTableState(RealmCount, ['value'], [[-1]])
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO, increment=3)
self.assertTableState(RealmCount, ['value'], [[2]])
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
self.assertTableState(RealmCount, ['value'], [[3]])
class TestLoggingCountStats(AnalyticsTestCase):
def test_aggregation(self):
# type: () -> None
stat = LoggingCountStat('realm test', RealmCount, CountStat.DAY)
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
user = self.create_user()
stat = LoggingCountStat('user test', UserCount, CountStat.DAY)
do_increment_logging_stat(user, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
stream = self.create_stream_with_recipient()[0]
stat = LoggingCountStat('stream test', StreamCount, CountStat.DAY)
do_increment_logging_stat(stream, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
self.assertTableState(InstallationCount, ['property', 'value'],
[['realm test', 1], ['user test', 1], ['stream test', 1]])
self.assertTableState(RealmCount, ['property', 'value'],
[['realm test', 1], ['user test', 1], ['stream test', 1]])
self.assertTableState(UserCount, ['property', 'value'], [['user test', 1]])
self.assertTableState(StreamCount, ['property', 'value'], [['stream test', 1]])
def test_active_users_log_by_is_bot(self):
# type: () -> None
property = 'active_users_log:is_bot:day'
user = do_create_user('email', 'password', self.default_realm, 'full_name', 'short_name')
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_deactivate_user(user)
self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_activate_user(user)
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_deactivate_user(user)
self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_reactivate_user(user)
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])

View File

@@ -10,6 +10,7 @@ from django.utils.translation import ugettext as _
from django.conf import settings from django.conf import settings
from django.core import validators from django.core import validators
from django.contrib.sessions.models import Session from django.contrib.sessions.models import Session
from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat
from zerver.lib.bugdown import ( from zerver.lib.bugdown import (
BugdownRenderingException, BugdownRenderingException,
version as bugdown_version version as bugdown_version
@@ -407,11 +408,14 @@ def do_create_user(email, password, realm, full_name, short_name,
default_events_register_stream=default_events_register_stream, default_events_register_stream=default_events_register_stream,
default_all_public_streams=default_all_public_streams) default_all_public_streams=default_all_public_streams)
event_time = user_profile.date_joined
RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile, RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile,
event_type='user_created', event_time=user_profile.date_joined) event_type='user_created', event_time=event_time)
do_increment_logging_stat(user_profile.realm, COUNT_STATS['active_users_log:is_bot:day'],
user_profile.is_bot, event_time)
event = {'type': 'user_created', event = {'type': 'user_created',
'timestamp': datetime_to_timestamp(user_profile.date_joined), 'timestamp': datetime_to_timestamp(event_time),
'full_name': full_name, 'full_name': full_name,
'short_name': short_name, 'short_name': short_name,
'user': email, 'user': email,
@@ -635,6 +639,8 @@ def do_deactivate_user(user_profile, log=True, _cascade=True):
event_time = timezone.now() event_time = timezone.now()
RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile, RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile,
event_type='user_deactivated', event_time=event_time) event_type='user_deactivated', event_time=event_time)
do_increment_logging_stat(user_profile.realm, COUNT_STATS['active_users_log:is_bot:day'],
user_profile.is_bot, event_time, increment=-1)
if log: if log:
log_event({'type': 'user_deactivated', log_event({'type': 'user_deactivated',
@@ -1856,6 +1862,8 @@ def do_activate_user(user_profile, log=True, join_date=timezone.now()):
event_time = timezone.now() event_time = timezone.now()
RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile, RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile,
event_type='user_activated', event_time=event_time) event_type='user_activated', event_time=event_time)
do_increment_logging_stat(user_profile.realm, COUNT_STATS['active_users_log:is_bot:day'],
user_profile.is_bot, event_time)
if log: if log:
domain = user_profile.realm.domain domain = user_profile.realm.domain
@@ -1876,6 +1884,8 @@ def do_reactivate_user(user_profile):
event_time = timezone.now() event_time = timezone.now()
RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile, RealmAuditLog.objects.create(realm=user_profile.realm, modified_user=user_profile,
event_type='user_reactivated', event_time=event_time) event_type='user_reactivated', event_time=event_time)
do_increment_logging_stat(user_profile.realm, COUNT_STATS['active_users_log:is_bot:day'],
user_profile.is_bot, event_time)
domain = user_profile.realm.domain domain = user_profile.realm.domain
log_event({'type': 'user_reactivated', log_event({'type': 'user_reactivated',