diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index 6dad7a6230..3153058e9d 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -36,10 +36,11 @@ logger = logging.getLogger('zulip.management') log_to_file(logger, settings.ANALYTICS_LOG_PATH) # You can't subtract timedelta.max from a datetime, so use this instead -TIMEDELTA_MAX = timedelta(days=365*1000) +TIMEDELTA_MAX = timedelta(days=365 * 1000) ## Class definitions ## + class CountStat: HOUR = 'hour' DAY = 'day' @@ -51,8 +52,13 @@ class CountStat: return timedelta(hours=1) return timedelta(days=1) - def __init__(self, property: str, data_collector: 'DataCollector', frequency: str, - interval: Optional[timedelta]=None) -> None: + def __init__( + self, + property: str, + data_collector: 'DataCollector', + frequency: str, + interval: Optional[timedelta] = None, + ) -> None: self.property = property self.data_collector = data_collector # might have to do something different for bitfields @@ -75,26 +81,41 @@ class CountStat: return fillstate.end_time return fillstate.end_time - self.time_increment + class LoggingCountStat(CountStat): def __init__(self, property: str, output_table: Type[BaseCount], frequency: str) -> None: CountStat.__init__(self, property, DataCollector(output_table, None), frequency) + class DependentCountStat(CountStat): - def __init__(self, property: str, data_collector: 'DataCollector', frequency: str, - interval: Optional[timedelta] = None, dependencies: Sequence[str] = []) -> None: + def __init__( + self, + property: str, + data_collector: 'DataCollector', + frequency: str, + interval: Optional[timedelta] = None, + dependencies: Sequence[str] = [], + ) -> None: CountStat.__init__(self, property, data_collector, frequency, interval=interval) self.dependencies = dependencies + class DataCollector: - def __init__(self, output_table: Type[BaseCount], - pull_function: Optional[Callable[[str, datetime, datetime, Optional[Realm]], int]]) -> None: + def __init__( + self, + output_table: Type[BaseCount], + pull_function: Optional[Callable[[str, datetime, datetime, Optional[Realm]], int]], + ) -> None: self.output_table = output_table self.pull_function = pull_function + ## CountStat-level operations ## -def process_count_stat(stat: CountStat, fill_to_time: datetime, - realm: Optional[Realm]=None) -> None: + +def process_count_stat( + stat: CountStat, fill_to_time: datetime, realm: Optional[Realm] = None +) -> None: # TODO: The realm argument is not yet supported, in that we don't # have a solution for how to update FillState if it is passed. It # exists solely as partial plumbing for when we do fully implement @@ -111,9 +132,9 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime, fill_state = FillState.objects.filter(property=stat.property).first() if fill_state is None: currently_filled = installation_epoch() - fill_state = FillState.objects.create(property=stat.property, - end_time=currently_filled, - state=FillState.DONE) + fill_state = FillState.objects.create( + property=stat.property, end_time=currently_filled, state=FillState.DONE + ) logger.info("INITIALIZED %s %s", stat.property, currently_filled) elif fill_state.state == FillState.STARTED: logger.info("UNDO START %s %s", stat.property, fill_state.end_time) @@ -130,8 +151,9 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime, for dependency in stat.dependencies: dependency_fill_time = COUNT_STATS[dependency].last_successful_fill() if dependency_fill_time is None: - logger.warning("DependentCountStat %s run before dependency %s.", - stat.property, dependency) + logger.warning( + "DependentCountStat %s run before dependency %s.", stat.property, dependency + ) return fill_to_time = min(fill_to_time, dependency_fill_time) @@ -144,25 +166,34 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime, do_update_fill_state(fill_state, currently_filled, FillState.DONE) end = time.time() currently_filled = currently_filled + stat.time_increment - logger.info("DONE %s (%dms)", stat.property, (end-start)*1000) + logger.info("DONE %s (%dms)", stat.property, (end - start) * 1000) + def do_update_fill_state(fill_state: FillState, end_time: datetime, state: int) -> None: fill_state.end_time = end_time fill_state.state = state fill_state.save() + # We assume end_time is valid (e.g. is on a day or hour boundary as appropriate) # and is timezone aware. It is the caller's responsibility to enforce this! -def do_fill_count_stat_at_hour(stat: CountStat, end_time: datetime, realm: Optional[Realm]=None) -> None: +def do_fill_count_stat_at_hour( + stat: CountStat, end_time: datetime, realm: Optional[Realm] = None +) -> None: start_time = end_time - stat.interval if not isinstance(stat, LoggingCountStat): timer = time.time() - assert(stat.data_collector.pull_function is not None) + assert stat.data_collector.pull_function is not None rows_added = stat.data_collector.pull_function(stat.property, start_time, end_time, realm) - logger.info("%s run pull_function (%dms/%sr)", - stat.property, (time.time()-timer)*1000, rows_added) + logger.info( + "%s run pull_function (%dms/%sr)", + stat.property, + (time.time() - timer) * 1000, + rows_added, + ) do_aggregate_to_summary_table(stat, end_time, realm) + def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None: if isinstance(stat, LoggingCountStat): InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete() @@ -174,8 +205,10 @@ def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None: RealmCount.objects.filter(property=stat.property, end_time=end_time).delete() InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete() -def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime, - realm: Optional[Realm]=None) -> None: + +def do_aggregate_to_summary_table( + stat: CountStat, end_time: datetime, realm: Optional[Realm] = None +) -> None: cursor = connection.cursor() # Aggregate into RealmCount @@ -186,7 +219,8 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime, realm_clause = SQL("") if output_table in (UserCount, StreamCount): - realmcount_query = SQL(""" + realmcount_query = SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, subgroup, end_time) SELECT @@ -201,19 +235,25 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime, {output_table}.end_time = %(end_time)s {realm_clause} GROUP BY zerver_realm.id, {output_table}.subgroup - """).format( + """ + ).format( output_table=Identifier(output_table._meta.db_table), realm_clause=realm_clause, ) start = time.time() - cursor.execute(realmcount_query, { - 'property': stat.property, - 'end_time': end_time, - }) + cursor.execute( + realmcount_query, + { + 'property': stat.property, + 'end_time': end_time, + }, + ) end = time.time() logger.info( "%s RealmCount aggregation (%dms/%sr)", - stat.property, (end - start) * 1000, cursor.rowcount, + stat.property, + (end - start) * 1000, + cursor.rowcount, ) if realm is None: @@ -222,7 +262,8 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime, # # TODO: Add support for updating installation data after # changing an individual realm's values. - installationcount_query = SQL(""" + installationcount_query = SQL( + """ INSERT INTO analytics_installationcount (value, property, subgroup, end_time) SELECT @@ -232,26 +273,37 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime, property = %(property)s AND end_time = %(end_time)s GROUP BY analytics_realmcount.subgroup - """) + """ + ) start = time.time() - cursor.execute(installationcount_query, { - 'property': stat.property, - 'end_time': end_time, - }) + cursor.execute( + installationcount_query, + { + 'property': stat.property, + 'end_time': end_time, + }, + ) end = time.time() logger.info( "%s InstallationCount aggregation (%dms/%sr)", - stat.property, (end - start) * 1000, cursor.rowcount, + stat.property, + (end - start) * 1000, + cursor.rowcount, ) cursor.close() + ## Utility functions called from outside counts.py ## # called from zerver/lib/actions.py; should not throw any errors -def do_increment_logging_stat(zerver_object: Union[Realm, UserProfile, Stream], stat: CountStat, - subgroup: Optional[Union[str, int, bool]], event_time: datetime, - increment: int=1) -> None: +def do_increment_logging_stat( + zerver_object: Union[Realm, UserProfile, Stream], + stat: CountStat, + subgroup: Optional[Union[str, int, bool]], + event_time: datetime, + increment: int = 1, +) -> None: if not increment: return @@ -269,12 +321,17 @@ def do_increment_logging_stat(zerver_object: Union[Realm, UserProfile, Stream], end_time = ceiling_to_hour(event_time) row, created = table.objects.get_or_create( - property=stat.property, subgroup=subgroup, end_time=end_time, - defaults={'value': increment}, **id_args) + property=stat.property, + subgroup=subgroup, + end_time=end_time, + defaults={'value': increment}, + **id_args, + ) if not created: row.value = F('value') + increment row.save(update_fields=['value']) + def do_drop_all_analytics_tables() -> None: UserCount.objects.all().delete() StreamCount.objects.all().delete() @@ -282,6 +339,7 @@ def do_drop_all_analytics_tables() -> None: InstallationCount.objects.all().delete() FillState.objects.all().delete() + def do_drop_single_stat(property: str) -> None: UserCount.objects.filter(property=property).delete() StreamCount.objects.filter(property=property).delete() @@ -289,10 +347,12 @@ def do_drop_single_stat(property: str) -> None: InstallationCount.objects.filter(property=property).delete() FillState.objects.filter(property=property).delete() + ## DataCollector-level operations ## QueryFn = Callable[[Dict[str, Composable]], Composable] + def do_pull_by_sql_query( property: str, start_time: datetime, @@ -311,27 +371,34 @@ def do_pull_by_sql_query( # group_by_clause given as a param. # We pass in the datetimes as params to cursor.execute so that we don't have to # think about how to convert python datetimes to SQL datetimes. - query_ = query({ - 'subgroup': subgroup, - 'group_by_clause': group_by_clause, - }) + query_ = query( + { + 'subgroup': subgroup, + 'group_by_clause': group_by_clause, + } + ) cursor = connection.cursor() - cursor.execute(query_, { - 'property': property, - 'time_start': start_time, - 'time_end': end_time, - }) + cursor.execute( + query_, + { + 'property': property, + 'time_start': start_time, + 'time_end': end_time, + }, + ) rowcount = cursor.rowcount cursor.close() return rowcount + def sql_data_collector( output_table: Type[BaseCount], query: QueryFn, group_by: Optional[Tuple[models.Model, str]], ) -> DataCollector: - def pull_function(property: str, start_time: datetime, end_time: datetime, - realm: Optional[Realm] = None) -> int: + def pull_function( + property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None + ) -> int: # The pull function type needs to accept a Realm argument # because the 'minutes_active::day' CountStat uses # DataCollector directly for do_pull_minutes_active, which @@ -339,16 +406,23 @@ def sql_data_collector( # realm should have been already encoded in the `query` we're # passed. return do_pull_by_sql_query(property, start_time, end_time, query, group_by) + return DataCollector(output_table, pull_function) -def do_pull_minutes_active(property: str, start_time: datetime, end_time: datetime, - realm: Optional[Realm] = None) -> int: - user_activity_intervals = UserActivityInterval.objects.filter( - end__gt=start_time, start__lt=end_time, - ).select_related( - 'user_profile', - ).values_list( - 'user_profile_id', 'user_profile__realm_id', 'start', 'end') + +def do_pull_minutes_active( + property: str, start_time: datetime, end_time: datetime, realm: Optional[Realm] = None +) -> int: + user_activity_intervals = ( + UserActivityInterval.objects.filter( + end__gt=start_time, + start__lt=end_time, + ) + .select_related( + 'user_profile', + ) + .values_list('user_profile_id', 'user_profile__realm_id', 'start', 'end') + ) seconds_active: Dict[Tuple[int, int], float] = defaultdict(float) for user_id, realm_id, interval_start, interval_end in user_activity_intervals: @@ -357,18 +431,28 @@ def do_pull_minutes_active(property: str, start_time: datetime, end_time: dateti end = min(end_time, interval_end) seconds_active[(user_id, realm_id)] += (end - start).total_seconds() - rows = [UserCount(user_id=ids[0], realm_id=ids[1], property=property, - end_time=end_time, value=int(seconds // 60)) - for ids, seconds in seconds_active.items() if seconds >= 60] + rows = [ + UserCount( + user_id=ids[0], + realm_id=ids[1], + property=property, + end_time=end_time, + value=int(seconds // 60), + ) + for ids, seconds in seconds_active.items() + if seconds >= 60 + ] UserCount.objects.bulk_create(rows) return len(rows) + def count_message_by_user_query(realm: Optional[Realm]) -> QueryFn: if realm is None: realm_clause = SQL("") else: realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_usercount (user_id, realm_id, value, property, subgroup, end_time) SELECT @@ -384,7 +468,9 @@ def count_message_by_user_query(realm: Optional[Realm]) -> QueryFn: {realm_clause} zerver_message.date_sent < %(time_end)s GROUP BY zerver_userprofile.id {group_by_clause} -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + # Note: ignores the group_by / group_by_clause. def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn: @@ -392,7 +478,8 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn: realm_clause = SQL("") else: realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_usercount (realm_id, user_id, value, property, subgroup, end_time) SELECT realm_id, id, SUM(count) AS value, %(property)s, message_type, %(time_end)s @@ -427,7 +514,9 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn: zerver_recipient.type, zerver_stream.invite_only ) AS subquery GROUP BY realm_id, id, message_type -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + # This query joins to the UserProfile table since all current queries that # use this also subgroup on UserProfile.is_bot. If in the future there is a @@ -438,7 +527,8 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn: realm_clause = SQL("") else: realm_clause = SQL("zerver_stream.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_streamcount (stream_id, realm_id, value, property, subgroup, end_time) SELECT @@ -460,7 +550,9 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn: {realm_clause} zerver_message.date_sent < %(time_end)s GROUP BY zerver_stream.id {group_by_clause} -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + # Hardcodes the query needed by active_users:is_bot:day, since that is # currently the only stat that uses this. @@ -469,7 +561,8 @@ def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn: realm_clause = SQL("") else: realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, subgroup, end_time) SELECT @@ -485,7 +578,9 @@ def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn: {realm_clause} zerver_userprofile.is_active = TRUE GROUP BY zerver_realm.id {group_by_clause} -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + # Currently hardcodes the query needed for active_users_audit:is_bot:day. # Assumes that a user cannot have two RealmAuditLog entries with the same event_time and @@ -496,7 +591,8 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn: realm_clause = SQL("") else: realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_usercount (user_id, realm_id, value, property, subgroup, end_time) SELECT @@ -519,7 +615,8 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn: ral1.modified_user_id = zerver_userprofile.id WHERE ral1.event_type in ({user_created}, {user_activated}, {user_reactivated}) - """).format( + """ + ).format( **kwargs, user_created=Literal(RealmAuditLog.USER_CREATED), user_activated=Literal(RealmAuditLog.USER_ACTIVATED), @@ -528,12 +625,14 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn: realm_clause=realm_clause, ) + def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> QueryFn: if realm is None: realm_clause = SQL("") else: realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_usercount (user_id, realm_id, value, property, subgroup, end_time) SELECT @@ -547,14 +646,17 @@ def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> QueryFn: {realm_clause} zerver_useractivityinterval.start < %(time_end)s GROUP BY zerver_userprofile.id {group_by_clause} -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn: if realm is None: realm_clause = SQL("") else: realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id)) - return lambda kwargs: SQL(""" + return lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, subgroup, end_time) SELECT @@ -579,10 +681,13 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn: ON usercount1.user_id = usercount2.user_id GROUP BY usercount1.realm_id -""").format(**kwargs, realm_clause=realm_clause) +""" + ).format(**kwargs, realm_clause=realm_clause) + # Currently unused and untested -count_stream_by_realm_query = lambda kwargs: SQL(""" +count_stream_by_realm_query = lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, subgroup, end_time) SELECT @@ -596,62 +701,77 @@ count_stream_by_realm_query = lambda kwargs: SQL(""" zerver_stream.date_created >= %(time_start)s AND zerver_stream.date_created < %(time_end)s GROUP BY zerver_realm.id {group_by_clause} -""").format(**kwargs) +""" +).format(**kwargs) -def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]: + +def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]: ## CountStat declarations ## count_stats_ = [ # Messages sent stats # Stats that count the number of messages sent in various ways. # These are also the set of stats that read from the Message table. - - CountStat('messages_sent:is_bot:hour', - sql_data_collector(UserCount, count_message_by_user_query( - realm), (UserProfile, 'is_bot')), - CountStat.HOUR), - CountStat('messages_sent:message_type:day', - sql_data_collector( - UserCount, count_message_type_by_user_query(realm), None), - CountStat.DAY), - CountStat('messages_sent:client:day', - sql_data_collector(UserCount, count_message_by_user_query(realm), - (Message, 'sending_client_id')), CountStat.DAY), - CountStat('messages_in_stream:is_bot:day', - sql_data_collector(StreamCount, count_message_by_stream_query(realm), - (UserProfile, 'is_bot')), CountStat.DAY), - + CountStat( + 'messages_sent:is_bot:hour', + sql_data_collector( + UserCount, count_message_by_user_query(realm), (UserProfile, 'is_bot') + ), + CountStat.HOUR, + ), + CountStat( + 'messages_sent:message_type:day', + sql_data_collector(UserCount, count_message_type_by_user_query(realm), None), + CountStat.DAY, + ), + CountStat( + 'messages_sent:client:day', + sql_data_collector( + UserCount, count_message_by_user_query(realm), (Message, 'sending_client_id') + ), + CountStat.DAY, + ), + CountStat( + 'messages_in_stream:is_bot:day', + sql_data_collector( + StreamCount, count_message_by_stream_query(realm), (UserProfile, 'is_bot') + ), + CountStat.DAY, + ), # Number of users stats # Stats that count the number of active users in the UserProfile.is_active sense. - # 'active_users_audit:is_bot:day' is the canonical record of which users were # active on which days (in the UserProfile.is_active sense). # Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected. - CountStat('active_users_audit:is_bot:day', - sql_data_collector(UserCount, check_realmauditlog_by_user_query( - realm), (UserProfile, 'is_bot')), - CountStat.DAY), - + CountStat( + 'active_users_audit:is_bot:day', + sql_data_collector( + UserCount, check_realmauditlog_by_user_query(realm), (UserProfile, 'is_bot') + ), + CountStat.DAY, + ), # Important note: LoggingCountStat objects aren't passed the # Realm argument, because by nature they have a logging # structure, not a pull-from-database structure, so there's no # way to compute them for a single realm after the fact (the # use case for passing a Realm argument). - # Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats. # In RealmCount, 'active_users_audit:is_bot:day' should be the partial # sum sequence of 'active_users_log:is_bot:day', for any realm that # started after the latter stat was introduced. - LoggingCountStat('active_users_log:is_bot:day', - RealmCount, CountStat.DAY), + LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY), # Another sanity check on 'active_users_audit:is_bot:day'. Is only an # approximation, e.g. if a user is deactivated between the end of the # day and when this stat is run, they won't be counted. However, is the # simplest of the three to inspect by hand. - CountStat('active_users:is_bot:day', - sql_data_collector(RealmCount, count_user_by_realm_query(realm), (UserProfile, 'is_bot')), - CountStat.DAY, interval=TIMEDELTA_MAX), - + CountStat( + 'active_users:is_bot:day', + sql_data_collector( + RealmCount, count_user_by_realm_query(realm), (UserProfile, 'is_bot') + ), + CountStat.DAY, + interval=TIMEDELTA_MAX, + ), # Messages read stats. messages_read::hour is the total # number of messages read, whereas # messages_read_interactions::hour tries to count the total @@ -661,42 +781,45 @@ def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]: # operation). LoggingCountStat('messages_read::hour', UserCount, CountStat.HOUR), LoggingCountStat('messages_read_interactions::hour', UserCount, CountStat.HOUR), - # User activity stats # Stats that measure user activity in the UserActivityInterval sense. - - CountStat('1day_actives::day', - sql_data_collector( - UserCount, check_useractivityinterval_by_user_query(realm), None), - CountStat.DAY, interval=timedelta(days=1)-UserActivityInterval.MIN_INTERVAL_LENGTH), - CountStat('7day_actives::day', - sql_data_collector( - UserCount, check_useractivityinterval_by_user_query(realm), None), - CountStat.DAY, interval=timedelta(days=7)-UserActivityInterval.MIN_INTERVAL_LENGTH), - CountStat('15day_actives::day', - sql_data_collector( - UserCount, check_useractivityinterval_by_user_query(realm), None), - CountStat.DAY, interval=timedelta(days=15)-UserActivityInterval.MIN_INTERVAL_LENGTH), - CountStat('minutes_active::day', DataCollector( - UserCount, do_pull_minutes_active), CountStat.DAY), - + CountStat( + '1day_actives::day', + sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None), + CountStat.DAY, + interval=timedelta(days=1) - UserActivityInterval.MIN_INTERVAL_LENGTH, + ), + CountStat( + '7day_actives::day', + sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None), + CountStat.DAY, + interval=timedelta(days=7) - UserActivityInterval.MIN_INTERVAL_LENGTH, + ), + CountStat( + '15day_actives::day', + sql_data_collector(UserCount, check_useractivityinterval_by_user_query(realm), None), + CountStat.DAY, + interval=timedelta(days=15) - UserActivityInterval.MIN_INTERVAL_LENGTH, + ), + CountStat( + 'minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY + ), # Rate limiting stats - # Used to limit the number of invitation emails sent by a realm LoggingCountStat('invites_sent::day', RealmCount, CountStat.DAY), - # Dependent stats # Must come after their dependencies. - # Canonical account of the number of active humans in a realm on each day. - DependentCountStat('realm_active_humans::day', - sql_data_collector( - RealmCount, count_realm_active_humans_query(realm), None), - CountStat.DAY, - dependencies=['active_users_audit:is_bot:day', '15day_actives::day']), + DependentCountStat( + 'realm_active_humans::day', + sql_data_collector(RealmCount, count_realm_active_humans_query(realm), None), + CountStat.DAY, + dependencies=['active_users_audit:is_bot:day', '15day_actives::day'], + ), ] return OrderedDict((stat.property, stat) for stat in count_stats_) + # To avoid refactoring for now COUNT_STATS can be used as before COUNT_STATS = get_count_stats() diff --git a/analytics/lib/fixtures.py b/analytics/lib/fixtures.py index 41f8c8dfa3..fe554be68c 100644 --- a/analytics/lib/fixtures.py +++ b/analytics/lib/fixtures.py @@ -5,11 +5,18 @@ from typing import List from analytics.lib.counts import CountStat -def generate_time_series_data(days: int=100, business_hours_base: float=10, - non_business_hours_base: float=10, growth: float=1, - autocorrelation: float=0, spikiness: float=1, - holiday_rate: float=0, frequency: str=CountStat.DAY, - partial_sum: bool=False, random_seed: int=26) -> List[int]: +def generate_time_series_data( + days: int = 100, + business_hours_base: float = 10, + non_business_hours_base: float = 10, + growth: float = 1, + autocorrelation: float = 0, + spikiness: float = 1, + holiday_rate: float = 0, + frequency: str = CountStat.DAY, + partial_sum: bool = False, + random_seed: int = 26, +) -> List[int]: """ Generate semi-realistic looking time series data for testing analytics graphs. @@ -30,34 +37,43 @@ def generate_time_series_data(days: int=100, business_hours_base: float=10, random_seed -- Seed for random number generator. """ if frequency == CountStat.HOUR: - length = days*24 + length = days * 24 seasonality = [non_business_hours_base] * 24 * 7 for day in range(5): for hour in range(8): - seasonality[24*day + hour] = business_hours_base - holidays = [] + seasonality[24 * day + hour] = business_hours_base + holidays = [] for i in range(days): holidays.extend([random() < holiday_rate] * 24) elif frequency == CountStat.DAY: length = days - seasonality = [8*business_hours_base + 16*non_business_hours_base] * 5 + \ - [24*non_business_hours_base] * 2 + seasonality = [8 * business_hours_base + 16 * non_business_hours_base] * 5 + [ + 24 * non_business_hours_base + ] * 2 holidays = [random() < holiday_rate for i in range(days)] else: raise AssertionError(f"Unknown frequency: {frequency}") if length < 2: - raise AssertionError(f"Must be generating at least 2 data points. Currently generating {length}") - growth_base = growth ** (1. / (length-1)) - values_no_noise = [seasonality[i % len(seasonality)] * (growth_base**i) for i in range(length)] + raise AssertionError( + f"Must be generating at least 2 data points. Currently generating {length}" + ) + growth_base = growth ** (1.0 / (length - 1)) + values_no_noise = [ + seasonality[i % len(seasonality)] * (growth_base ** i) for i in range(length) + ] seed(random_seed) noise_scalars = [gauss(0, 1)] for i in range(1, length): - noise_scalars.append(noise_scalars[-1]*autocorrelation + gauss(0, 1)*(1-autocorrelation)) + noise_scalars.append( + noise_scalars[-1] * autocorrelation + gauss(0, 1) * (1 - autocorrelation) + ) - values = [0 if holiday else int(v + sqrt(v)*noise_scalar*spikiness) - for v, noise_scalar, holiday in zip(values_no_noise, noise_scalars, holidays)] + values = [ + 0 if holiday else int(v + sqrt(v) * noise_scalar * spikiness) + for v, noise_scalar, holiday in zip(values_no_noise, noise_scalars, holidays) + ] if partial_sum: for i in range(1, length): - values[i] = values[i-1] + values[i] + values[i] = values[i - 1] + values[i] return [max(v, 0) for v in values] diff --git a/analytics/lib/time_utils.py b/analytics/lib/time_utils.py index b990ae252d..888baf96d5 100644 --- a/analytics/lib/time_utils.py +++ b/analytics/lib/time_utils.py @@ -9,8 +9,9 @@ from zerver.lib.timestamp import floor_to_day, floor_to_hour, verify_UTC # If min_length is greater than 0, pads the list to the left. # So informally, time_range(Sep 20, Sep 22, day, None) returns [Sep 20, Sep 21, Sep 22], # and time_range(Sep 20, Sep 22, day, 5) returns [Sep 18, Sep 19, Sep 20, Sep 21, Sep 22] -def time_range(start: datetime, end: datetime, frequency: str, - min_length: Optional[int]) -> List[datetime]: +def time_range( + start: datetime, end: datetime, frequency: str, min_length: Optional[int] +) -> List[datetime]: verify_UTC(start) verify_UTC(end) if frequency == CountStat.HOUR: @@ -24,7 +25,7 @@ def time_range(start: datetime, end: datetime, frequency: str, times = [] if min_length is not None: - start = min(start, end - (min_length-1)*step) + start = min(start, end - (min_length - 1) * step) current = end while current >= start: times.append(current) diff --git a/analytics/management/commands/check_analytics_state.py b/analytics/management/commands/check_analytics_state.py index 48576cda19..d80ca29fc8 100644 --- a/analytics/management/commands/check_analytics_state.py +++ b/analytics/management/commands/check_analytics_state.py @@ -18,6 +18,7 @@ states = { 3: "UNKNOWN", } + class Command(BaseCommand): help = """Checks FillState table. @@ -60,7 +61,10 @@ class Command(BaseCommand): critical_threshold = timedelta(minutes=150) if floor_function(last_fill) != last_fill: - return {'status': 2, 'message': f'FillState not on {stat.frequency} boundary for {property}'} + return { + 'status': 2, + 'message': f'FillState not on {stat.frequency} boundary for {property}', + } time_to_last_fill = timezone_now() - last_fill if time_to_last_fill > critical_threshold: diff --git a/analytics/management/commands/clear_analytics_tables.py b/analytics/management/commands/clear_analytics_tables.py index e668d156ce..d331954682 100644 --- a/analytics/management/commands/clear_analytics_tables.py +++ b/analytics/management/commands/clear_analytics_tables.py @@ -10,12 +10,12 @@ class Command(BaseCommand): help = """Clear analytics tables.""" def add_arguments(self, parser: ArgumentParser) -> None: - parser.add_argument('--force', - action='store_true', - help="Clear analytics tables.") + parser.add_argument('--force', action='store_true', help="Clear analytics tables.") def handle(self, *args: Any, **options: Any) -> None: if options['force']: do_drop_all_analytics_tables() else: - raise CommandError("Would delete all data from analytics tables (!); use --force to do so.") + raise CommandError( + "Would delete all data from analytics tables (!); use --force to do so." + ) diff --git a/analytics/management/commands/clear_single_stat.py b/analytics/management/commands/clear_single_stat.py index 3b440a0503..f7575ec65b 100644 --- a/analytics/management/commands/clear_single_stat.py +++ b/analytics/management/commands/clear_single_stat.py @@ -10,11 +10,8 @@ class Command(BaseCommand): help = """Clear analytics tables.""" def add_arguments(self, parser: ArgumentParser) -> None: - parser.add_argument('--force', - action='store_true', - help="Actually do it.") - parser.add_argument('--property', - help="The property of the stat to be cleared.") + parser.add_argument('--force', action='store_true', help="Actually do it.") + parser.add_argument('--property', help="The property of the stat to be cleared.") def handle(self, *args: Any, **options: Any) -> None: property = options['property'] diff --git a/analytics/management/commands/populate_analytics_db.py b/analytics/management/commands/populate_analytics_db.py index 740d0e2994..badf0cb6c5 100644 --- a/analytics/management/commands/populate_analytics_db.py +++ b/analytics/management/commands/populate_analytics_db.py @@ -28,16 +28,30 @@ class Command(BaseCommand): DAYS_OF_DATA = 100 random_seed = 26 - def generate_fixture_data(self, stat: CountStat, business_hours_base: float, - non_business_hours_base: float, growth: float, - autocorrelation: float, spikiness: float, - holiday_rate: float=0, partial_sum: bool=False) -> List[int]: + def generate_fixture_data( + self, + stat: CountStat, + business_hours_base: float, + non_business_hours_base: float, + growth: float, + autocorrelation: float, + spikiness: float, + holiday_rate: float = 0, + partial_sum: bool = False, + ) -> List[int]: self.random_seed += 1 return generate_time_series_data( - days=self.DAYS_OF_DATA, business_hours_base=business_hours_base, - non_business_hours_base=non_business_hours_base, growth=growth, - autocorrelation=autocorrelation, spikiness=spikiness, holiday_rate=holiday_rate, - frequency=stat.frequency, partial_sum=partial_sum, random_seed=self.random_seed) + days=self.DAYS_OF_DATA, + business_hours_base=business_hours_base, + non_business_hours_base=non_business_hours_base, + growth=growth, + autocorrelation=autocorrelation, + spikiness=spikiness, + holiday_rate=holiday_rate, + frequency=stat.frequency, + partial_sum=partial_sum, + random_seed=self.random_seed, + ) def handle(self, *args: Any, **options: Any) -> None: # TODO: This should arguably only delete the objects @@ -54,23 +68,24 @@ class Command(BaseCommand): # memcached in order to ensure deleted objects aren't still # present in the memcached cache. from zerver.apps import flush_cache + flush_cache(None) installation_time = timezone_now() - timedelta(days=self.DAYS_OF_DATA) last_end_time = floor_to_day(timezone_now()) realm = Realm.objects.create( - string_id='analytics', name='Analytics', date_created=installation_time) + string_id='analytics', name='Analytics', date_created=installation_time + ) with mock.patch("zerver.lib.create_user.timezone_now", return_value=installation_time): shylock = create_user( 'shylock@analytics.ds', 'Shylock', realm, full_name='Shylock', - role=UserProfile.ROLE_REALM_ADMINISTRATOR + role=UserProfile.ROLE_REALM_ADMINISTRATOR, ) do_change_user_role(shylock, UserProfile.ROLE_REALM_ADMINISTRATOR, acting_user=None) - stream = Stream.objects.create( - name='all', realm=realm, date_created=installation_time) + stream = Stream.objects.create(name='all', realm=realm, date_created=installation_time) recipient = Recipient.objects.create(type_id=stream.id, type=Recipient.STREAM) stream.recipient = recipient stream.save(update_fields=["recipient"]) @@ -78,17 +93,18 @@ class Command(BaseCommand): # Subscribe shylock to the stream to avoid invariant failures. # TODO: This should use subscribe_users_to_streams from populate_db. subs = [ - Subscription(recipient=recipient, - user_profile=shylock, - color=STREAM_ASSIGNMENT_COLORS[0]), + Subscription( + recipient=recipient, user_profile=shylock, color=STREAM_ASSIGNMENT_COLORS[0] + ), ] Subscription.objects.bulk_create(subs) - def insert_fixture_data(stat: CountStat, - fixture_data: Mapping[Optional[str], List[int]], - table: Type[BaseCount]) -> None: - end_times = time_range(last_end_time, last_end_time, stat.frequency, - len(list(fixture_data.values())[0])) + def insert_fixture_data( + stat: CountStat, fixture_data: Mapping[Optional[str], List[int]], table: Type[BaseCount] + ) -> None: + end_times = time_range( + last_end_time, last_end_time, stat.frequency, len(list(fixture_data.values())[0]) + ) if table == InstallationCount: id_args: Dict[str, Any] = {} if table == RealmCount: @@ -100,94 +116,114 @@ class Command(BaseCommand): for subgroup, values in fixture_data.items(): table.objects.bulk_create( - table(property=stat.property, subgroup=subgroup, end_time=end_time, - value=value, **id_args) - for end_time, value in zip(end_times, values) if value != 0) + table( + property=stat.property, + subgroup=subgroup, + end_time=end_time, + value=value, + **id_args, + ) + for end_time, value in zip(end_times, values) + if value != 0 + ) stat = COUNT_STATS['1day_actives::day'] realm_data: Mapping[Optional[str], List[int]] = { - None: self.generate_fixture_data(stat, .08, .02, 3, .3, 6, partial_sum=True), + None: self.generate_fixture_data(stat, 0.08, 0.02, 3, 0.3, 6, partial_sum=True), } insert_fixture_data(stat, realm_data, RealmCount) installation_data: Mapping[Optional[str], List[int]] = { - None: self.generate_fixture_data(stat, .8, .2, 4, .3, 6, partial_sum=True), + None: self.generate_fixture_data(stat, 0.8, 0.2, 4, 0.3, 6, partial_sum=True), } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['7day_actives::day'] realm_data = { - None: self.generate_fixture_data(stat, .2, .07, 3, .3, 6, partial_sum=True), + None: self.generate_fixture_data(stat, 0.2, 0.07, 3, 0.3, 6, partial_sum=True), } insert_fixture_data(stat, realm_data, RealmCount) installation_data = { - None: self.generate_fixture_data(stat, 2, .7, 4, .3, 6, partial_sum=True), + None: self.generate_fixture_data(stat, 2, 0.7, 4, 0.3, 6, partial_sum=True), } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['realm_active_humans::day'] realm_data = { - None: self.generate_fixture_data(stat, .8, .08, 3, .5, 3, partial_sum=True), + None: self.generate_fixture_data(stat, 0.8, 0.08, 3, 0.5, 3, partial_sum=True), } insert_fixture_data(stat, realm_data, RealmCount) installation_data = { - None: self.generate_fixture_data(stat, 1, .3, 4, .5, 3, partial_sum=True), + None: self.generate_fixture_data(stat, 1, 0.3, 4, 0.5, 3, partial_sum=True), } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['active_users_audit:is_bot:day'] realm_data = { - 'false': self.generate_fixture_data(stat, 1, .2, 3.5, .8, 2, partial_sum=True), - 'true': self.generate_fixture_data(stat, .3, .05, 3, .3, 2, partial_sum=True), + 'false': self.generate_fixture_data(stat, 1, 0.2, 3.5, 0.8, 2, partial_sum=True), + 'true': self.generate_fixture_data(stat, 0.3, 0.05, 3, 0.3, 2, partial_sum=True), } insert_fixture_data(stat, realm_data, RealmCount) installation_data = { - 'false': self.generate_fixture_data(stat, 3, 1, 4, .8, 2, partial_sum=True), - 'true': self.generate_fixture_data(stat, 1, .4, 4, .8, 2, partial_sum=True), + 'false': self.generate_fixture_data(stat, 3, 1, 4, 0.8, 2, partial_sum=True), + 'true': self.generate_fixture_data(stat, 1, 0.4, 4, 0.8, 2, partial_sum=True), } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['messages_sent:is_bot:hour'] user_data: Mapping[Optional[str], List[int]] = { - 'false': self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8, holiday_rate=.1), + 'false': self.generate_fixture_data(stat, 2, 1, 1.5, 0.6, 8, holiday_rate=0.1), } insert_fixture_data(stat, user_data, UserCount) - realm_data = {'false': self.generate_fixture_data(stat, 35, 15, 6, .6, 4), - 'true': self.generate_fixture_data(stat, 15, 15, 3, .4, 2)} + realm_data = { + 'false': self.generate_fixture_data(stat, 35, 15, 6, 0.6, 4), + 'true': self.generate_fixture_data(stat, 15, 15, 3, 0.4, 2), + } insert_fixture_data(stat, realm_data, RealmCount) - installation_data = {'false': self.generate_fixture_data(stat, 350, 150, 6, .6, 4), - 'true': self.generate_fixture_data(stat, 150, 150, 3, .4, 2)} + installation_data = { + 'false': self.generate_fixture_data(stat, 350, 150, 6, 0.6, 4), + 'true': self.generate_fixture_data(stat, 150, 150, 3, 0.4, 2), + } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['messages_sent:message_type:day'] user_data = { - 'public_stream': self.generate_fixture_data(stat, 1.5, 1, 3, .6, 8), - 'private_message': self.generate_fixture_data(stat, .5, .3, 1, .6, 8), - 'huddle_message': self.generate_fixture_data(stat, .2, .2, 2, .6, 8)} + 'public_stream': self.generate_fixture_data(stat, 1.5, 1, 3, 0.6, 8), + 'private_message': self.generate_fixture_data(stat, 0.5, 0.3, 1, 0.6, 8), + 'huddle_message': self.generate_fixture_data(stat, 0.2, 0.2, 2, 0.6, 8), + } insert_fixture_data(stat, user_data, UserCount) realm_data = { - 'public_stream': self.generate_fixture_data(stat, 30, 8, 5, .6, 4), - 'private_stream': self.generate_fixture_data(stat, 7, 7, 5, .6, 4), - 'private_message': self.generate_fixture_data(stat, 13, 5, 5, .6, 4), - 'huddle_message': self.generate_fixture_data(stat, 6, 3, 3, .6, 4)} + 'public_stream': self.generate_fixture_data(stat, 30, 8, 5, 0.6, 4), + 'private_stream': self.generate_fixture_data(stat, 7, 7, 5, 0.6, 4), + 'private_message': self.generate_fixture_data(stat, 13, 5, 5, 0.6, 4), + 'huddle_message': self.generate_fixture_data(stat, 6, 3, 3, 0.6, 4), + } insert_fixture_data(stat, realm_data, RealmCount) installation_data = { - 'public_stream': self.generate_fixture_data(stat, 300, 80, 5, .6, 4), - 'private_stream': self.generate_fixture_data(stat, 70, 70, 5, .6, 4), - 'private_message': self.generate_fixture_data(stat, 130, 50, 5, .6, 4), - 'huddle_message': self.generate_fixture_data(stat, 60, 30, 3, .6, 4)} + 'public_stream': self.generate_fixture_data(stat, 300, 80, 5, 0.6, 4), + 'private_stream': self.generate_fixture_data(stat, 70, 70, 5, 0.6, 4), + 'private_message': self.generate_fixture_data(stat, 130, 50, 5, 0.6, 4), + 'huddle_message': self.generate_fixture_data(stat, 60, 30, 3, 0.6, 4), + } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) website, created = Client.objects.get_or_create(name='website') old_desktop, created = Client.objects.get_or_create(name='desktop app Linux 0.3.7') @@ -201,54 +237,60 @@ class Command(BaseCommand): stat = COUNT_STATS['messages_sent:client:day'] user_data = { - website.id: self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8), - zephyr_mirror.id: self.generate_fixture_data(stat, 0, .3, 1.5, .6, 8)} + website.id: self.generate_fixture_data(stat, 2, 1, 1.5, 0.6, 8), + zephyr_mirror.id: self.generate_fixture_data(stat, 0, 0.3, 1.5, 0.6, 8), + } insert_fixture_data(stat, user_data, UserCount) realm_data = { - website.id: self.generate_fixture_data(stat, 30, 20, 5, .6, 3), - old_desktop.id: self.generate_fixture_data(stat, 5, 3, 8, .6, 3), - android.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3), - iOS.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3), - react_native.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3), - API.id: self.generate_fixture_data(stat, 5, 5, 5, .6, 3), - zephyr_mirror.id: self.generate_fixture_data(stat, 1, 1, 3, .6, 3), + website.id: self.generate_fixture_data(stat, 30, 20, 5, 0.6, 3), + old_desktop.id: self.generate_fixture_data(stat, 5, 3, 8, 0.6, 3), + android.id: self.generate_fixture_data(stat, 5, 5, 2, 0.6, 3), + iOS.id: self.generate_fixture_data(stat, 5, 5, 2, 0.6, 3), + react_native.id: self.generate_fixture_data(stat, 5, 5, 10, 0.6, 3), + API.id: self.generate_fixture_data(stat, 5, 5, 5, 0.6, 3), + zephyr_mirror.id: self.generate_fixture_data(stat, 1, 1, 3, 0.6, 3), unused.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0), - long_webhook.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3)} + long_webhook.id: self.generate_fixture_data(stat, 5, 5, 2, 0.6, 3), + } insert_fixture_data(stat, realm_data, RealmCount) installation_data = { - website.id: self.generate_fixture_data(stat, 300, 200, 5, .6, 3), - old_desktop.id: self.generate_fixture_data(stat, 50, 30, 8, .6, 3), - android.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3), - iOS.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3), - react_native.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3), - API.id: self.generate_fixture_data(stat, 50, 50, 5, .6, 3), - zephyr_mirror.id: self.generate_fixture_data(stat, 10, 10, 3, .6, 3), + website.id: self.generate_fixture_data(stat, 300, 200, 5, 0.6, 3), + old_desktop.id: self.generate_fixture_data(stat, 50, 30, 8, 0.6, 3), + android.id: self.generate_fixture_data(stat, 50, 50, 2, 0.6, 3), + iOS.id: self.generate_fixture_data(stat, 50, 50, 2, 0.6, 3), + react_native.id: self.generate_fixture_data(stat, 5, 5, 10, 0.6, 3), + API.id: self.generate_fixture_data(stat, 50, 50, 5, 0.6, 3), + zephyr_mirror.id: self.generate_fixture_data(stat, 10, 10, 3, 0.6, 3), unused.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0), - long_webhook.id: self.generate_fixture_data(stat, 50, 50, 2, .6, 3)} + long_webhook.id: self.generate_fixture_data(stat, 50, 50, 2, 0.6, 3), + } insert_fixture_data(stat, installation_data, InstallationCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['messages_in_stream:is_bot:day'] - realm_data = {'false': self.generate_fixture_data(stat, 30, 5, 6, .6, 4), - 'true': self.generate_fixture_data(stat, 20, 2, 3, .2, 3)} + realm_data = { + 'false': self.generate_fixture_data(stat, 30, 5, 6, 0.6, 4), + 'true': self.generate_fixture_data(stat, 20, 2, 3, 0.2, 3), + } insert_fixture_data(stat, realm_data, RealmCount) stream_data: Mapping[Optional[str], List[int]] = { - 'false': self.generate_fixture_data(stat, 10, 7, 5, .6, 4), - 'true': self.generate_fixture_data(stat, 5, 3, 2, .4, 2), + 'false': self.generate_fixture_data(stat, 10, 7, 5, 0.6, 4), + 'true': self.generate_fixture_data(stat, 5, 3, 2, 0.4, 2), } insert_fixture_data(stat, stream_data, StreamCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) stat = COUNT_STATS['messages_read::hour'] user_data = { - None: self.generate_fixture_data(stat, 7, 3, 2, .6, 8, holiday_rate=.1), + None: self.generate_fixture_data(stat, 7, 3, 2, 0.6, 8, holiday_rate=0.1), } insert_fixture_data(stat, user_data, UserCount) - realm_data = { - None: self.generate_fixture_data(stat, 50, 35, 6, .6, 4) - } + realm_data = {None: self.generate_fixture_data(stat, 50, 35, 6, 0.6, 4)} insert_fixture_data(stat, realm_data, RealmCount) - FillState.objects.create(property=stat.property, end_time=last_end_time, - state=FillState.DONE) + FillState.objects.create( + property=stat.property, end_time=last_end_time, state=FillState.DONE + ) diff --git a/analytics/management/commands/stream_stats.py b/analytics/management/commands/stream_stats.py index 940604c99d..3a421916e9 100644 --- a/analytics/management/commands/stream_stats.py +++ b/analytics/management/commands/stream_stats.py @@ -11,8 +11,9 @@ class Command(BaseCommand): help = "Generate statistics on the streams for a realm." def add_arguments(self, parser: ArgumentParser) -> None: - parser.add_argument('realms', metavar='', nargs='*', - help="realm to generate statistics for") + parser.add_argument( + 'realms', metavar='', nargs='*', help="realm to generate statistics for" + ) def handle(self, *args: Any, **options: str) -> None: if options['realms']: @@ -48,8 +49,12 @@ class Command(BaseCommand): stream_type = 'public' print(f"{stream.name:>25}", end=' ') recipient = Recipient.objects.filter(type=Recipient.STREAM, type_id=stream.id) - print("{:10}".format(len(Subscription.objects.filter(recipient=recipient, - active=True))), end=' ') + print( + "{:10}".format( + len(Subscription.objects.filter(recipient=recipient, active=True)) + ), + end=' ', + ) num_messages = len(Message.objects.filter(recipient=recipient)) print(f"{num_messages:12}", end=' ') print(f"{stream_type:>15}") diff --git a/analytics/management/commands/update_analytics_counts.py b/analytics/management/commands/update_analytics_counts.py index 70b86369e0..edb49f1e99 100644 --- a/analytics/management/commands/update_analytics_counts.py +++ b/analytics/management/commands/update_analytics_counts.py @@ -22,25 +22,29 @@ class Command(BaseCommand): Run as a cron job that runs every hour.""" def add_arguments(self, parser: ArgumentParser) -> None: - parser.add_argument('--time', '-t', - help='Update stat tables from current state to ' - '--time. Defaults to the current time.', - default=timezone_now().isoformat()) - parser.add_argument('--utc', - action='store_true', - help="Interpret --time in UTC.") - parser.add_argument('--stat', '-s', - help="CountStat to process. If omitted, all stats are processed.") - parser.add_argument('--verbose', - action='store_true', - help="Print timing information to stdout.") + parser.add_argument( + '--time', + '-t', + help='Update stat tables from current state to ' + '--time. Defaults to the current time.', + default=timezone_now().isoformat(), + ) + parser.add_argument('--utc', action='store_true', help="Interpret --time in UTC.") + parser.add_argument( + '--stat', '-s', help="CountStat to process. If omitted, all stats are processed." + ) + parser.add_argument( + '--verbose', action='store_true', help="Print timing information to stdout." + ) def handle(self, *args: Any, **options: Any) -> None: try: os.mkdir(settings.ANALYTICS_LOCK_DIR) except OSError: - print(f"{WARNING}Analytics lock {settings.ANALYTICS_LOCK_DIR} is unavailable;" - f" exiting.{ENDC}") + print( + f"{WARNING}Analytics lock {settings.ANALYTICS_LOCK_DIR} is unavailable;" + f" exiting.{ENDC}" + ) return try: @@ -59,7 +63,9 @@ class Command(BaseCommand): if options['utc']: fill_to_time = fill_to_time.replace(tzinfo=timezone.utc) if fill_to_time.tzinfo is None: - raise ValueError("--time must be timezone aware. Maybe you meant to use the --utc option?") + raise ValueError( + "--time must be timezone aware. Maybe you meant to use the --utc option?" + ) fill_to_time = floor_to_hour(fill_to_time.astimezone(timezone.utc)) @@ -80,7 +86,9 @@ class Command(BaseCommand): last = time.time() if options['verbose']: - print(f"Finished updating analytics counts through {fill_to_time} in {time.time() - start:.3f}s") + print( + f"Finished updating analytics counts through {fill_to_time} in {time.time() - start:.3f}s" + ) logger.info("Finished updating analytics counts through %s", fill_to_time) if settings.PUSH_NOTIFICATION_BOUNCER_URL and settings.SUBMIT_USAGE_STATISTICS: diff --git a/analytics/migrations/0001_initial.py b/analytics/migrations/0001_initial.py index f9ace30741..c8ef31daf0 100644 --- a/analytics/migrations/0001_initial.py +++ b/analytics/migrations/0001_initial.py @@ -14,7 +14,12 @@ class Migration(migrations.Migration): migrations.CreateModel( name='Anomaly', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), ('info', models.CharField(max_length=1000)), ], bases=(models.Model,), @@ -22,68 +27,162 @@ class Migration(migrations.Migration): migrations.CreateModel( name='HuddleCount', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('huddle', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Recipient')), - ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), + ( + 'huddle', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to='zerver.Recipient' + ), + ), + ( + 'user', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL + ), + ), ('property', models.CharField(max_length=40)), ('end_time', models.DateTimeField()), ('interval', models.CharField(max_length=20)), ('value', models.BigIntegerField()), - ('anomaly', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analytics.Anomaly', null=True)), + ( + 'anomaly', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='analytics.Anomaly', + null=True, + ), + ), ], bases=(models.Model,), ), migrations.CreateModel( name='InstallationCount', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), ('property', models.CharField(max_length=40)), ('end_time', models.DateTimeField()), ('interval', models.CharField(max_length=20)), ('value', models.BigIntegerField()), - ('anomaly', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analytics.Anomaly', null=True)), + ( + 'anomaly', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='analytics.Anomaly', + null=True, + ), + ), ], bases=(models.Model,), ), migrations.CreateModel( name='RealmCount', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('realm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm')), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), + ( + 'realm', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm' + ), + ), ('property', models.CharField(max_length=40)), ('end_time', models.DateTimeField()), ('interval', models.CharField(max_length=20)), ('value', models.BigIntegerField()), - ('anomaly', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analytics.Anomaly', null=True)), - + ( + 'anomaly', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='analytics.Anomaly', + null=True, + ), + ), ], bases=(models.Model,), ), migrations.CreateModel( name='StreamCount', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('realm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm')), - ('stream', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Stream')), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), + ( + 'realm', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm' + ), + ), + ( + 'stream', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to='zerver.Stream' + ), + ), ('property', models.CharField(max_length=40)), ('end_time', models.DateTimeField()), ('interval', models.CharField(max_length=20)), ('value', models.BigIntegerField()), - ('anomaly', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analytics.Anomaly', null=True)), + ( + 'anomaly', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='analytics.Anomaly', + null=True, + ), + ), ], bases=(models.Model,), ), migrations.CreateModel( name='UserCount', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), - ('realm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm')), - ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), + ( + 'realm', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to='zerver.Realm' + ), + ), + ( + 'user', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL + ), + ), ('property', models.CharField(max_length=40)), ('end_time', models.DateTimeField()), ('interval', models.CharField(max_length=20)), ('value', models.BigIntegerField()), - ('anomaly', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analytics.Anomaly', null=True)), + ( + 'anomaly', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to='analytics.Anomaly', + null=True, + ), + ), ], bases=(models.Model,), ), diff --git a/analytics/migrations/0003_fillstate.py b/analytics/migrations/0003_fillstate.py index a89132aa40..41ea6ffa91 100644 --- a/analytics/migrations/0003_fillstate.py +++ b/analytics/migrations/0003_fillstate.py @@ -11,7 +11,12 @@ class Migration(migrations.Migration): migrations.CreateModel( name='FillState', fields=[ - ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ( + 'id', + models.AutoField( + verbose_name='ID', serialize=False, auto_created=True, primary_key=True + ), + ), ('property', models.CharField(unique=True, max_length=40)), ('end_time', models.DateTimeField()), ('state', models.PositiveSmallIntegerField()), diff --git a/analytics/migrations/0009_remove_messages_to_stream_stat.py b/analytics/migrations/0009_remove_messages_to_stream_stat.py index 2a0c412e11..c5fbff381a 100644 --- a/analytics/migrations/0009_remove_messages_to_stream_stat.py +++ b/analytics/migrations/0009_remove_messages_to_stream_stat.py @@ -3,7 +3,9 @@ from django.db.backends.postgresql.schema import DatabaseSchemaEditor from django.db.migrations.state import StateApps -def delete_messages_sent_to_stream_stat(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: +def delete_messages_sent_to_stream_stat( + apps: StateApps, schema_editor: DatabaseSchemaEditor +) -> None: UserCount = apps.get_model('analytics', 'UserCount') StreamCount = apps.get_model('analytics', 'StreamCount') RealmCount = apps.get_model('analytics', 'RealmCount') @@ -17,6 +19,7 @@ def delete_messages_sent_to_stream_stat(apps: StateApps, schema_editor: Database InstallationCount.objects.filter(property=property).delete() FillState.objects.filter(property=property).delete() + class Migration(migrations.Migration): dependencies = [ diff --git a/analytics/migrations/0010_clear_messages_sent_values.py b/analytics/migrations/0010_clear_messages_sent_values.py index b1bd03e543..fdd89a67a6 100644 --- a/analytics/migrations/0010_clear_messages_sent_values.py +++ b/analytics/migrations/0010_clear_messages_sent_values.py @@ -3,7 +3,9 @@ from django.db.backends.postgresql.schema import DatabaseSchemaEditor from django.db.migrations.state import StateApps -def clear_message_sent_by_message_type_values(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: +def clear_message_sent_by_message_type_values( + apps: StateApps, schema_editor: DatabaseSchemaEditor +) -> None: UserCount = apps.get_model('analytics', 'UserCount') StreamCount = apps.get_model('analytics', 'StreamCount') RealmCount = apps.get_model('analytics', 'RealmCount') @@ -17,6 +19,7 @@ def clear_message_sent_by_message_type_values(apps: StateApps, schema_editor: Da InstallationCount.objects.filter(property=property).delete() FillState.objects.filter(property=property).delete() + class Migration(migrations.Migration): dependencies = [('analytics', '0009_remove_messages_to_stream_stat')] diff --git a/analytics/migrations/0011_clear_analytics_tables.py b/analytics/migrations/0011_clear_analytics_tables.py index d7b69ce53e..80c522d2ca 100644 --- a/analytics/migrations/0011_clear_analytics_tables.py +++ b/analytics/migrations/0011_clear_analytics_tables.py @@ -16,6 +16,7 @@ def clear_analytics_tables(apps: StateApps, schema_editor: DatabaseSchemaEditor) InstallationCount.objects.all().delete() FillState.objects.all().delete() + class Migration(migrations.Migration): dependencies = [ diff --git a/analytics/migrations/0012_add_on_delete.py b/analytics/migrations/0012_add_on_delete.py index 0d0a5c22b8..bd2942bf8f 100644 --- a/analytics/migrations/0012_add_on_delete.py +++ b/analytics/migrations/0012_add_on_delete.py @@ -14,21 +14,29 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='installationcount', name='anomaly', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly'), + field=models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly' + ), ), migrations.AlterField( model_name='realmcount', name='anomaly', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly'), + field=models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly' + ), ), migrations.AlterField( model_name='streamcount', name='anomaly', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly'), + field=models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly' + ), ), migrations.AlterField( model_name='usercount', name='anomaly', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly'), + field=models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to='analytics.Anomaly' + ), ), ] diff --git a/analytics/migrations/0015_clear_duplicate_counts.py b/analytics/migrations/0015_clear_duplicate_counts.py index a7ecc3dd87..77038ae490 100644 --- a/analytics/migrations/0015_clear_duplicate_counts.py +++ b/analytics/migrations/0015_clear_duplicate_counts.py @@ -20,17 +20,23 @@ def clear_duplicate_counts(apps: StateApps, schema_editor: DatabaseSchemaEditor) this means deleting the extra rows, but for LoggingCountStat objects, we need to additionally combine the sums. """ - count_tables = dict(realm=apps.get_model('analytics', 'RealmCount'), - user=apps.get_model('analytics', 'UserCount'), - stream=apps.get_model('analytics', 'StreamCount'), - installation=apps.get_model('analytics', 'InstallationCount')) + count_tables = dict( + realm=apps.get_model('analytics', 'RealmCount'), + user=apps.get_model('analytics', 'UserCount'), + stream=apps.get_model('analytics', 'StreamCount'), + installation=apps.get_model('analytics', 'InstallationCount'), + ) for name, count_table in count_tables.items(): value = [name, 'property', 'end_time'] if name == 'installation': value = ['property', 'end_time'] - counts = count_table.objects.filter(subgroup=None).values(*value).annotate( - Count('id'), Sum('value')).filter(id__count__gt=1) + counts = ( + count_table.objects.filter(subgroup=None) + .values(*value) + .annotate(Count('id'), Sum('value')) + .filter(id__count__gt=1) + ) for count in counts: count.pop('id__count') @@ -47,6 +53,7 @@ def clear_duplicate_counts(apps: StateApps, schema_editor: DatabaseSchemaEditor) for duplicate_count in to_cleanup: duplicate_count.delete() + class Migration(migrations.Migration): dependencies = [ @@ -54,6 +61,5 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython(clear_duplicate_counts, - reverse_code=migrations.RunPython.noop), + migrations.RunPython(clear_duplicate_counts, reverse_code=migrations.RunPython.noop), ] diff --git a/analytics/migrations/0016_unique_constraint_when_subgroup_null.py b/analytics/migrations/0016_unique_constraint_when_subgroup_null.py index 726a3269b1..6d2aaef375 100644 --- a/analytics/migrations/0016_unique_constraint_when_subgroup_null.py +++ b/analytics/migrations/0016_unique_constraint_when_subgroup_null.py @@ -28,34 +28,66 @@ class Migration(migrations.Migration): ), migrations.AddConstraint( model_name='installationcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('property', 'subgroup', 'end_time'), name='unique_installation_count'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=False), + fields=('property', 'subgroup', 'end_time'), + name='unique_installation_count', + ), ), migrations.AddConstraint( model_name='installationcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('property', 'end_time'), name='unique_installation_count_null_subgroup'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=True), + fields=('property', 'end_time'), + name='unique_installation_count_null_subgroup', + ), ), migrations.AddConstraint( model_name='realmcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('realm', 'property', 'subgroup', 'end_time'), name='unique_realm_count'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=False), + fields=('realm', 'property', 'subgroup', 'end_time'), + name='unique_realm_count', + ), ), migrations.AddConstraint( model_name='realmcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('realm', 'property', 'end_time'), name='unique_realm_count_null_subgroup'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=True), + fields=('realm', 'property', 'end_time'), + name='unique_realm_count_null_subgroup', + ), ), migrations.AddConstraint( model_name='streamcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('stream', 'property', 'subgroup', 'end_time'), name='unique_stream_count'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=False), + fields=('stream', 'property', 'subgroup', 'end_time'), + name='unique_stream_count', + ), ), migrations.AddConstraint( model_name='streamcount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('stream', 'property', 'end_time'), name='unique_stream_count_null_subgroup'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=True), + fields=('stream', 'property', 'end_time'), + name='unique_stream_count_null_subgroup', + ), ), migrations.AddConstraint( model_name='usercount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=False), fields=('user', 'property', 'subgroup', 'end_time'), name='unique_user_count'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=False), + fields=('user', 'property', 'subgroup', 'end_time'), + name='unique_user_count', + ), ), migrations.AddConstraint( model_name='usercount', - constraint=models.UniqueConstraint(condition=models.Q(subgroup__isnull=True), fields=('user', 'property', 'end_time'), name='unique_user_count_null_subgroup'), + constraint=models.UniqueConstraint( + condition=models.Q(subgroup__isnull=True), + fields=('user', 'property', 'end_time'), + name='unique_user_count_null_subgroup', + ), ), ] diff --git a/analytics/models.py b/analytics/models.py index be9e87cbad..df436ffd3c 100644 --- a/analytics/models.py +++ b/analytics/models.py @@ -20,12 +20,16 @@ class FillState(models.Model): def __str__(self) -> str: return f"" + # The earliest/starting end_time in FillState # We assume there is at least one realm def installation_epoch() -> datetime.datetime: - earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min'] + earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))[ + 'date_created__min' + ] return floor_to_day(earliest_realm_creation) + class BaseCount(models.Model): # Note: When inheriting from BaseCount, you may want to rearrange # the order of the columns in the migration to make sure they @@ -38,24 +42,27 @@ class BaseCount(models.Model): class Meta: abstract = True -class InstallationCount(BaseCount): +class InstallationCount(BaseCount): class Meta: # Handles invalid duplicate InstallationCount data constraints = [ UniqueConstraint( fields=["property", "subgroup", "end_time"], condition=Q(subgroup__isnull=False), - name='unique_installation_count'), + name='unique_installation_count', + ), UniqueConstraint( fields=["property", "end_time"], condition=Q(subgroup__isnull=True), - name='unique_installation_count_null_subgroup'), + name='unique_installation_count_null_subgroup', + ), ] def __str__(self) -> str: return f"" + class RealmCount(BaseCount): realm = models.ForeignKey(Realm, on_delete=models.CASCADE) @@ -65,17 +72,20 @@ class RealmCount(BaseCount): UniqueConstraint( fields=["realm", "property", "subgroup", "end_time"], condition=Q(subgroup__isnull=False), - name='unique_realm_count'), + name='unique_realm_count', + ), UniqueConstraint( fields=["realm", "property", "end_time"], condition=Q(subgroup__isnull=True), - name='unique_realm_count_null_subgroup'), + name='unique_realm_count_null_subgroup', + ), ] index_together = ["property", "end_time"] def __str__(self) -> str: return f"" + class UserCount(BaseCount): user = models.ForeignKey(UserProfile, on_delete=models.CASCADE) realm = models.ForeignKey(Realm, on_delete=models.CASCADE) @@ -86,11 +96,13 @@ class UserCount(BaseCount): UniqueConstraint( fields=["user", "property", "subgroup", "end_time"], condition=Q(subgroup__isnull=False), - name='unique_user_count'), + name='unique_user_count', + ), UniqueConstraint( fields=["user", "property", "end_time"], condition=Q(subgroup__isnull=True), - name='unique_user_count_null_subgroup'), + name='unique_user_count_null_subgroup', + ), ] # This index dramatically improves the performance of # aggregating from users to realms @@ -99,6 +111,7 @@ class UserCount(BaseCount): def __str__(self) -> str: return f"" + class StreamCount(BaseCount): stream = models.ForeignKey(Stream, on_delete=models.CASCADE) realm = models.ForeignKey(Realm, on_delete=models.CASCADE) @@ -109,15 +122,19 @@ class StreamCount(BaseCount): UniqueConstraint( fields=["stream", "property", "subgroup", "end_time"], condition=Q(subgroup__isnull=False), - name='unique_stream_count'), + name='unique_stream_count', + ), UniqueConstraint( fields=["stream", "property", "end_time"], condition=Q(subgroup__isnull=True), - name='unique_stream_count_null_subgroup'), + name='unique_stream_count_null_subgroup', + ), ] # This index dramatically improves the performance of # aggregating from streams to realms index_together = ["property", "realm", "end_time"] def __str__(self) -> str: - return f"" + return ( + f"" + ) diff --git a/analytics/tests/test_counts.py b/analytics/tests/test_counts.py index 729a4133fb..c91e181b2f 100644 --- a/analytics/tests/test_counts.py +++ b/analytics/tests/test_counts.py @@ -67,7 +67,7 @@ from zerver.models import ( class AnalyticsTestCase(ZulipTestCase): - MINUTE = timedelta(seconds = 60) + MINUTE = timedelta(seconds=60) HOUR = MINUTE * 60 DAY = HOUR * 24 TIME_ZERO = datetime(1988, 3, 14, tzinfo=timezone.utc) @@ -76,7 +76,8 @@ class AnalyticsTestCase(ZulipTestCase): def setUp(self) -> None: super().setUp() self.default_realm = Realm.objects.create( - string_id='realmtest', name='Realm Test', date_created=self.TIME_ZERO - 2*self.DAY) + string_id='realmtest', name='Realm Test', date_created=self.TIME_ZERO - 2 * self.DAY + ) # used to generate unique names in self.create_* self.name_counter = 100 # used as defaults in self.assertCountEquals @@ -91,7 +92,8 @@ class AnalyticsTestCase(ZulipTestCase): 'full_name': 'full_name', 'is_active': True, 'is_bot': False, - 'realm': self.default_realm} + 'realm': self.default_realm, + } for key, value in defaults.items(): kwargs[key] = kwargs.get(key, value) kwargs['delivery_email'] = kwargs['email'] @@ -107,14 +109,16 @@ class AnalyticsTestCase(ZulipTestCase): active=kwargs['is_active'], full_name=kwargs['full_name'], role=UserProfile.ROLE_REALM_ADMINISTRATOR, - **pass_kwargs + **pass_kwargs, ) def create_stream_with_recipient(self, **kwargs: Any) -> Tuple[Stream, Recipient]: self.name_counter += 1 - defaults = {'name': f'stream name {self.name_counter}', - 'realm': self.default_realm, - 'date_created': self.TIME_LAST_HOUR} + defaults = { + 'name': f'stream name {self.name_counter}', + 'realm': self.default_realm, + 'date_created': self.TIME_LAST_HOUR, + } for key, value in defaults.items(): kwargs[key] = kwargs.get(key, value) stream = Stream.objects.create(**kwargs) @@ -141,15 +145,23 @@ class AnalyticsTestCase(ZulipTestCase): DB_TOPIC_NAME: 'subject', 'content': 'hi', 'date_sent': self.TIME_LAST_HOUR, - 'sending_client': get_client("website")} + 'sending_client': get_client("website"), + } for key, value in defaults.items(): kwargs[key] = kwargs.get(key, value) return Message.objects.create(**kwargs) # kwargs should only ever be a UserProfile or Stream. - def assertCountEquals(self, table: Type[BaseCount], value: int, property: Optional[str]=None, - subgroup: Optional[str]=None, end_time: datetime=TIME_ZERO, - realm: Optional[Realm]=None, **kwargs: models.Model) -> None: + def assertCountEquals( + self, + table: Type[BaseCount], + value: int, + property: Optional[str] = None, + subgroup: Optional[str] = None, + end_time: datetime = TIME_ZERO, + realm: Optional[Realm] = None, + **kwargs: models.Model, + ) -> None: if property is None: property = self.current_property queryset = table.objects.filter(property=property, end_time=end_time).filter(**kwargs) @@ -161,8 +173,9 @@ class AnalyticsTestCase(ZulipTestCase): queryset = queryset.filter(subgroup=subgroup) self.assertEqual(queryset.values_list('value', flat=True)[0], value) - def assertTableState(self, table: Type[BaseCount], arg_keys: List[str], - arg_values: List[List[object]]) -> None: + def assertTableState( + self, table: Type[BaseCount], arg_keys: List[str], arg_values: List[List[object]] + ) -> None: """Assert that the state of a *Count table is what it should be. Example usage: @@ -188,7 +201,8 @@ class AnalyticsTestCase(ZulipTestCase): 'property': self.current_property, 'subgroup': None, 'end_time': self.TIME_ZERO, - 'value': 1} + 'value': 1, + } for values in arg_values: kwargs: Dict[str, Any] = {} for i in range(len(values)): @@ -206,19 +220,23 @@ class AnalyticsTestCase(ZulipTestCase): self.assertEqual(table.objects.filter(**kwargs).count(), 1) self.assertEqual(table.objects.count(), len(arg_values)) + class TestProcessCountStat(AnalyticsTestCase): def make_dummy_count_stat(self, property: str) -> CountStat: - query = lambda kwargs: SQL(""" + query = lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, end_time) VALUES ({default_realm_id}, 1, {property}, %(time_end)s) - """).format( + """ + ).format( default_realm_id=Literal(self.default_realm.id), property=Literal(property), ) return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR) - def assertFillStateEquals(self, stat: CountStat, end_time: datetime, - state: int=FillState.DONE) -> None: + def assertFillStateEquals( + self, stat: CountStat, end_time: datetime, state: int = FillState.DONE + ) -> None: fill_state = FillState.objects.filter(property=stat.property).first() self.assertEqual(fill_state.end_time, end_time) self.assertEqual(fill_state.state, state) @@ -252,7 +270,7 @@ class TestProcessCountStat(AnalyticsTestCase): def test_bad_fill_to_time(self) -> None: stat = self.make_dummy_count_stat('test stat') with self.assertRaises(ValueError): - process_count_stat(stat, installation_epoch() + 65*self.MINUTE) + process_count_stat(stat, installation_epoch() + 65 * self.MINUTE) with self.assertRaises(TimezoneNotUTCException): process_count_stat(stat, installation_epoch().replace(tzinfo=None)) @@ -269,25 +287,30 @@ class TestProcessCountStat(AnalyticsTestCase): stream = self.create_stream_with_recipient()[0] realm = self.default_realm UserCount.objects.create( - user=user, realm=realm, property=user_stat.property, end_time=end_time, value=5) + user=user, realm=realm, property=user_stat.property, end_time=end_time, value=5 + ) StreamCount.objects.create( - stream=stream, realm=realm, property=stream_stat.property, end_time=end_time, value=5) + stream=stream, realm=realm, property=stream_stat.property, end_time=end_time, value=5 + ) RealmCount.objects.create( - realm=realm, property=realm_stat.property, end_time=end_time, value=5) + realm=realm, property=realm_stat.property, end_time=end_time, value=5 + ) # Normal run of process_count_stat for stat in [user_stat, stream_stat, realm_stat]: process_count_stat(stat, end_time) self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 5]]) self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 5]]) - self.assertTableState(RealmCount, ['property', 'value'], - [[user_stat.property, 5], - [stream_stat.property, 5], - [realm_stat.property, 5]]) - self.assertTableState(InstallationCount, ['property', 'value'], - [[user_stat.property, 5], - [stream_stat.property, 5], - [realm_stat.property, 5]]) + self.assertTableState( + RealmCount, + ['property', 'value'], + [[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]], + ) + self.assertTableState( + InstallationCount, + ['property', 'value'], + [[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]], + ) # Change the logged data and mark FillState as dirty UserCount.objects.update(value=6) @@ -300,85 +323,117 @@ class TestProcessCountStat(AnalyticsTestCase): process_count_stat(stat, end_time) self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 6]]) self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 6]]) - self.assertTableState(RealmCount, ['property', 'value'], - [[user_stat.property, 6], - [stream_stat.property, 6], - [realm_stat.property, 6]]) - self.assertTableState(InstallationCount, ['property', 'value'], - [[user_stat.property, 6], - [stream_stat.property, 6], - [realm_stat.property, 6]]) + self.assertTableState( + RealmCount, + ['property', 'value'], + [[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]], + ) + self.assertTableState( + InstallationCount, + ['property', 'value'], + [[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]], + ) def test_process_dependent_stat(self) -> None: stat1 = self.make_dummy_count_stat('stat1') stat2 = self.make_dummy_count_stat('stat2') - query = lambda kwargs: SQL(""" + query = lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, end_time) VALUES ({default_realm_id}, 1, {property}, %(time_end)s) - """).format( + """ + ).format( default_realm_id=Literal(self.default_realm.id), property=Literal('stat3'), ) - stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None), - CountStat.HOUR, - dependencies=['stat1', 'stat2']) + stat3 = DependentCountStat( + 'stat3', + sql_data_collector(RealmCount, query, None), + CountStat.HOUR, + dependencies=['stat1', 'stat2'], + ) - query = lambda kwargs: SQL(""" + query = lambda kwargs: SQL( + """ INSERT INTO analytics_realmcount (realm_id, value, property, end_time) VALUES ({default_realm_id}, 1, {property}, %(time_end)s) - """).format( + """ + ).format( default_realm_id=Literal(self.default_realm.id), property=Literal('stat4'), ) - stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None), - CountStat.DAY, - dependencies=['stat1', 'stat2']) + stat4 = DependentCountStat( + 'stat4', + sql_data_collector(RealmCount, query, None), + CountStat.DAY, + dependencies=['stat1', 'stat2'], + ) dummy_count_stats = { "stat1": stat1, "stat2": stat2, "stat3": stat3, "stat4": stat4, - } with mock.patch("analytics.lib.counts.COUNT_STATS", dummy_count_stats): - hour = [installation_epoch() + i*self.HOUR for i in range(5)] + hour = [installation_epoch() + i * self.HOUR for i in range(5)] # test when one dependency has been run, and the other hasn't process_count_stat(stat1, hour[2]) process_count_stat(stat3, hour[1]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]]]) + self.assertTableState( + InstallationCount, + ['property', 'end_time'], + [['stat1', hour[1]], ['stat1', hour[2]]], + ) self.assertFillStateEquals(stat3, hour[0]) # test that we don't fill past the fill_to_time argument, even if # dependencies have later last_successful_fill process_count_stat(stat2, hour[3]) process_count_stat(stat3, hour[1]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]], - ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], - ['stat3', hour[1]]]) + self.assertTableState( + InstallationCount, + ['property', 'end_time'], + [ + ['stat1', hour[1]], + ['stat1', hour[2]], + ['stat2', hour[1]], + ['stat2', hour[2]], + ['stat2', hour[3]], + ['stat3', hour[1]], + ], + ) self.assertFillStateEquals(stat3, hour[1]) # test that we don't fill past the dependency last_successful_fill times, # even if fill_to_time is later process_count_stat(stat3, hour[4]) - self.assertTableState(InstallationCount, ['property', 'end_time'], - [['stat1', hour[1]], ['stat1', hour[2]], - ['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]], - ['stat3', hour[1]], ['stat3', hour[2]]]) + self.assertTableState( + InstallationCount, + ['property', 'end_time'], + [ + ['stat1', hour[1]], + ['stat1', hour[2]], + ['stat2', hour[1]], + ['stat2', hour[2]], + ['stat2', hour[3]], + ['stat3', hour[1]], + ['stat3', hour[2]], + ], + ) self.assertFillStateEquals(stat3, hour[2]) # test daily dependent stat with hourly dependencies - hour24 = installation_epoch() + 24*self.HOUR - hour25 = installation_epoch() + 25*self.HOUR + hour24 = installation_epoch() + 24 * self.HOUR + hour25 = installation_epoch() + 25 * self.HOUR process_count_stat(stat1, hour25) process_count_stat(stat2, hour25) process_count_stat(stat4, hour25) self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1) self.assertFillStateEquals(stat4, hour24) + class TestCountStats(AnalyticsTestCase): def setUp(self) -> None: super().setUp() @@ -386,15 +441,20 @@ class TestCountStats(AnalyticsTestCase): # more than 1 realm, and the time bounds (time_start and time_end in # the queries). self.second_realm = Realm.objects.create( - string_id='second-realm', name='Second Realm', - date_created=self.TIME_ZERO-2*self.DAY) - for minutes_ago in [0, 1, 61, 60*24+1]: - creation_time = self.TIME_ZERO - minutes_ago*self.MINUTE - user = self.create_user(email=f'user-{minutes_ago}@second.analytics', - realm=self.second_realm, date_joined=creation_time) + string_id='second-realm', + name='Second Realm', + date_created=self.TIME_ZERO - 2 * self.DAY, + ) + for minutes_ago in [0, 1, 61, 60 * 24 + 1]: + creation_time = self.TIME_ZERO - minutes_ago * self.MINUTE + user = self.create_user( + email=f'user-{minutes_ago}@second.analytics', + realm=self.second_realm, + date_joined=creation_time, + ) recipient = self.create_stream_with_recipient( - name=f'stream {minutes_ago}', realm=self.second_realm, - date_created=creation_time)[1] + name=f'stream {minutes_ago}', realm=self.second_realm, date_created=creation_time + )[1] self.create_message(user, recipient, date_sent=creation_time) self.hourly_user = get_user('user-1@second.analytics', self.second_realm) self.daily_user = get_user('user-61@second.analytics', self.second_realm) @@ -402,8 +462,10 @@ class TestCountStats(AnalyticsTestCase): # This realm should not show up in the *Count tables for any of the # messages_* CountStats self.no_message_realm = Realm.objects.create( - string_id='no-message-realm', name='No Message Realm', - date_created=self.TIME_ZERO-2*self.DAY) + string_id='no-message-realm', + name='No Message Realm', + date_created=self.TIME_ZERO - 2 * self.DAY, + ) self.create_user(realm=self.no_message_realm) self.create_stream_with_recipient(realm=self.no_message_realm) # This huddle should not show up anywhere @@ -415,7 +477,7 @@ class TestCountStats(AnalyticsTestCase): # To be included self.create_user(is_bot=True) - self.create_user(is_bot=True, date_joined=self.TIME_ZERO-25*self.HOUR) + self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR) self.create_user(is_bot=False) # To be excluded @@ -423,13 +485,17 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[2, 'true'], [1, 'false'], - [3, 'false', self.second_realm], - [1, 'false', self.no_message_realm]]) - self.assertTableState(InstallationCount, - ['value', 'subgroup'], - [[2, 'true'], [5, 'false']]) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [ + [2, 'true'], + [1, 'false'], + [3, 'false', self.second_realm], + [1, 'false', self.no_message_realm], + ], + ) + self.assertTableState(InstallationCount, ['value', 'subgroup'], [[2, 'true'], [5, 'false']]) self.assertTableState(UserCount, [], []) self.assertTableState(StreamCount, [], []) @@ -441,16 +507,18 @@ class TestCountStats(AnalyticsTestCase): self.current_property = stat.property # To be included - self.create_user(is_bot=True, date_joined=self.TIME_ZERO-25*self.HOUR) + self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR) self.create_user(is_bot=False) # To be excluded - self.create_user(email='test@second.analytics', - realm=self.second_realm, date_joined=self.TIME_ZERO-2*self.DAY) + self.create_user( + email='test@second.analytics', + realm=self.second_realm, + date_joined=self.TIME_ZERO - 2 * self.DAY, + ) do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(RealmCount, ['value', 'subgroup'], - [[1, 'true'], [1, 'false']]) + self.assertTableState(RealmCount, ['value', 'subgroup'], [[1, 'true'], [1, 'false']]) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value', 'subgroup'], []) self.assertTableState(UserCount, [], []) @@ -463,8 +531,7 @@ class TestCountStats(AnalyticsTestCase): bot = self.create_user(is_bot=True) human1 = self.create_user() human2 = self.create_user() - recipient_human1 = Recipient.objects.get(type_id=human1.id, - type=Recipient.PERSONAL) + recipient_human1 = Recipient.objects.get(type_id=human1.id, type=Recipient.PERSONAL) recipient_stream = self.create_stream_with_recipient()[1] recipient_huddle = self.create_huddle_with_recipient()[1] @@ -477,11 +544,21 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[1, 'false', human1], [1, 'false', human2], [3, 'true', bot], - [1, 'false', self.hourly_user]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[2, 'false'], [3, 'true'], [1, 'false', self.second_realm]]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [ + [1, 'false', human1], + [1, 'false', human2], + [3, 'true', bot], + [1, 'false', self.hourly_user], + ], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [[2, 'false'], [3, 'true'], [1, 'false', self.second_realm]], + ) self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [3, 'true']]) self.assertTableState(StreamCount, [], []) @@ -495,8 +572,7 @@ class TestCountStats(AnalyticsTestCase): bot = self.create_user(is_bot=True) human1 = self.create_user() human2 = self.create_user() - recipient_human1 = Recipient.objects.get(type_id=human1.id, - type=Recipient.PERSONAL) + recipient_human1 = Recipient.objects.get(type_id=human1.id, type=Recipient.PERSONAL) recipient_stream = self.create_stream_with_recipient()[1] recipient_huddle = self.create_huddle_with_recipient()[1] @@ -515,12 +591,16 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[1, 'false', human1], [1, 'false', human2], - [3, 'true', bot]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[2, 'false', self.default_realm], - [3, 'true', self.default_realm]]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [[1, 'false', human1], [1, 'false', human2], [3, 'true', bot]], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [[2, 'false', self.default_realm], [3, 'true', self.default_realm]], + ) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value', 'subgroup'], []) self.assertTableState(StreamCount, [], []) @@ -567,24 +647,44 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[1, 'private_stream', user1], - [2, 'private_stream', user2], - [2, 'public_stream', user1], - [1, 'public_stream', user2], - [1, 'private_message', user1], - [1, 'private_message', user2], - [1, 'private_message', user3], - [1, 'huddle_message', user1], - [1, 'huddle_message', user2], - [1, 'public_stream', self.hourly_user], - [1, 'public_stream', self.daily_user]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[3, 'private_stream'], [3, 'public_stream'], [3, 'private_message'], - [2, 'huddle_message'], [2, 'public_stream', self.second_realm]]) - self.assertTableState(InstallationCount, ['value', 'subgroup'], - [[3, 'private_stream'], [5, 'public_stream'], [3, 'private_message'], - [2, 'huddle_message']]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [ + [1, 'private_stream', user1], + [2, 'private_stream', user2], + [2, 'public_stream', user1], + [1, 'public_stream', user2], + [1, 'private_message', user1], + [1, 'private_message', user2], + [1, 'private_message', user3], + [1, 'huddle_message', user1], + [1, 'huddle_message', user2], + [1, 'public_stream', self.hourly_user], + [1, 'public_stream', self.daily_user], + ], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [ + [3, 'private_stream'], + [3, 'public_stream'], + [3, 'private_message'], + [2, 'huddle_message'], + [2, 'public_stream', self.second_realm], + ], + ) + self.assertTableState( + InstallationCount, + ['value', 'subgroup'], + [ + [3, 'private_stream'], + [5, 'public_stream'], + [3, 'private_message'], + [2, 'huddle_message'], + ], + ) self.assertTableState(StreamCount, [], []) def test_messages_sent_by_message_type_realm_constraint(self) -> None: @@ -614,12 +714,26 @@ class TestCountStats(AnalyticsTestCase): self.create_message(self.hourly_user, stream_recipient) self.create_message(self.hourly_user, huddle_recipient) - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[1, 'private_message', user], [1, 'private_stream', user], - [1, 'huddle_message', user], [1, 'public_stream', user]]) - self.assertTableState(RealmCount, ['value', 'subgroup'], - [[1, 'private_message'], [1, 'private_stream'], - [1, 'public_stream'], [1, 'huddle_message']]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [ + [1, 'private_message', user], + [1, 'private_stream', user], + [1, 'huddle_message', user], + [1, 'public_stream', user], + ], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup'], + [ + [1, 'private_message'], + [1, 'private_stream'], + [1, 'public_stream'], + [1, 'huddle_message'], + ], + ) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value', 'subgroup'], []) self.assertTableState(StreamCount, [], []) @@ -666,16 +780,25 @@ class TestCountStats(AnalyticsTestCase): client2_id = str(client2.id) website_client_id = str(get_client('website').id) # default for self.create_message - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[2, website_client_id, user1], - [1, client2_id, user1], [2, client2_id, user2], - [1, website_client_id, self.hourly_user], - [1, website_client_id, self.daily_user]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[2, website_client_id], [3, client2_id], - [2, website_client_id, self.second_realm]]) - self.assertTableState(InstallationCount, ['value', 'subgroup'], - [[4, website_client_id], [3, client2_id]]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [ + [2, website_client_id, user1], + [1, client2_id, user1], + [2, client2_id, user2], + [1, website_client_id, self.hourly_user], + [1, website_client_id, self.daily_user], + ], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [[2, website_client_id], [3, client2_id], [2, website_client_id, self.second_realm]], + ) + self.assertTableState( + InstallationCount, ['value', 'subgroup'], [[4, website_client_id], [3, client2_id]] + ) self.assertTableState(StreamCount, [], []) def test_messages_sent_by_client_realm_constraint(self) -> None: @@ -705,11 +828,14 @@ class TestCountStats(AnalyticsTestCase): client2_id = str(client2.id) website_client_id = str(get_client('website').id) # default for self.create_message - self.assertTableState(UserCount, ['value', 'subgroup', 'user'], - [[1, client2_id, user1], [1, client2_id, user2], - [1, website_client_id, user2]]) - self.assertTableState(RealmCount, ['value', 'subgroup'], - [[1, website_client_id], [2, client2_id]]) + self.assertTableState( + UserCount, + ['value', 'subgroup', 'user'], + [[1, client2_id, user1], [1, client2_id, user2], [1, website_client_id, user2]], + ) + self.assertTableState( + RealmCount, ['value', 'subgroup'], [[1, website_client_id], [2, client2_id]] + ) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value', 'subgroup'], []) self.assertTableState(StreamCount, [], []) @@ -741,13 +867,23 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'], - [[2, 'false', stream1], [1, 'false', stream2], [2, 'true', stream2], - # "hourly" and "daily" stream, from TestCountStats.setUp - [1, 'false', Stream.objects.get(name='stream 1')], - [1, 'false', Stream.objects.get(name='stream 61')]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]]) + self.assertTableState( + StreamCount, + ['value', 'subgroup', 'stream'], + [ + [2, 'false', stream1], + [1, 'false', stream2], + [2, 'true', stream2], + # "hourly" and "daily" stream, from TestCountStats.setUp + [1, 'false', Stream.objects.get(name='stream 1')], + [1, 'false', Stream.objects.get(name='stream 61')], + ], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]], + ) self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']]) self.assertTableState(UserCount, [], []) @@ -775,26 +911,30 @@ class TestCountStats(AnalyticsTestCase): do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'], - [[1, 'false', stream1], - [1, 'true', stream1]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[1, 'false'], [1, 'true']]) + self.assertTableState( + StreamCount, + ['value', 'subgroup', 'stream'], + [[1, 'false', stream1], [1, 'true', stream1]], + ) + self.assertTableState( + RealmCount, ['value', 'subgroup', 'realm'], [[1, 'false'], [1, 'true']] + ) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value', 'subgroup'], []) self.assertTableState(UserCount, [], []) - def create_interval(self, user: UserProfile, start_offset: timedelta, - end_offset: timedelta) -> None: + def create_interval( + self, user: UserProfile, start_offset: timedelta, end_offset: timedelta + ) -> None: UserActivityInterval.objects.create( - user_profile=user, start=self.TIME_ZERO-start_offset, - end=self.TIME_ZERO-end_offset) + user_profile=user, start=self.TIME_ZERO - start_offset, end=self.TIME_ZERO - end_offset + ) def test_1day_actives(self) -> None: stat = COUNT_STATS['1day_actives::day'] self.current_property = stat.property - _1day = 1*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH + _1day = 1 * self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH # Outside time range, should not appear. Also tests upper boundary. user1 = self.create_user() @@ -807,29 +947,33 @@ class TestCountStats(AnalyticsTestCase): # Multiple intervals, including one outside boundary user3 = self.create_user() - self.create_interval(user3, 2*self.DAY, 1*self.DAY) - self.create_interval(user3, 20*self.HOUR, 19*self.HOUR) - self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user3, 2 * self.DAY, 1 * self.DAY) + self.create_interval(user3, 20 * self.HOUR, 19 * self.HOUR) + self.create_interval(user3, 20 * self.MINUTE, 19 * self.MINUTE) # Intervals crossing boundary user4 = self.create_user() - self.create_interval(user4, 1.5*self.DAY, 0.5*self.DAY) + self.create_interval(user4, 1.5 * self.DAY, 0.5 * self.DAY) user5 = self.create_user() self.create_interval(user5, self.MINUTE, -self.MINUTE) # Interval subsuming time range user6 = self.create_user() - self.create_interval(user6, 2*self.DAY, -2*self.DAY) + self.create_interval(user6, 2 * self.DAY, -2 * self.DAY) # Second realm user7 = self.create_user(realm=self.second_realm) - self.create_interval(user7, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user7, 20 * self.MINUTE, 19 * self.MINUTE) do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['value', 'user'], - [[1, user2], [1, user3], [1, user4], [1, user5], [1, user6], [1, user7]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[5, self.default_realm], [1, self.second_realm]]) + self.assertTableState( + UserCount, + ['value', 'user'], + [[1, user2], [1, user3], [1, user4], [1, user5], [1, user6], [1, user7]], + ) + self.assertTableState( + RealmCount, ['value', 'realm'], [[5, self.default_realm], [1, self.second_realm]] + ) self.assertTableState(InstallationCount, ['value'], [[6]]) self.assertTableState(StreamCount, [], []) @@ -840,23 +984,21 @@ class TestCountStats(AnalyticsTestCase): stat = COUNT_STATS['1day_actives::day'] self.current_property = stat.property - _1day = 1*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH + _1day = 1 * self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH user1 = self.create_user() user2 = self.create_user() # To be included - self.create_interval(user1, 20*self.HOUR, 19*self.HOUR) + self.create_interval(user1, 20 * self.HOUR, 19 * self.HOUR) self.create_interval(user2, _1day + self.DAY, _1day) # To be excluded user3 = self.create_user(realm=self.second_realm) - self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user3, 20 * self.MINUTE, 19 * self.MINUTE) do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(UserCount, ['value', 'user'], - [[1, user2], [1, user2]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[2, self.default_realm]]) + self.assertTableState(UserCount, ['value', 'user'], [[1, user2], [1, user2]]) + self.assertTableState(RealmCount, ['value', 'realm'], [[2, self.default_realm]]) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value'], []) self.assertTableState(StreamCount, [], []) @@ -865,7 +1007,7 @@ class TestCountStats(AnalyticsTestCase): stat = COUNT_STATS['15day_actives::day'] self.current_property = stat.property - _15day = 15*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH + _15day = 15 * self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH # Outside time range, should not appear. Also tests upper boundary. user1 = self.create_user() @@ -878,29 +1020,33 @@ class TestCountStats(AnalyticsTestCase): # Multiple intervals, including one outside boundary user3 = self.create_user() - self.create_interval(user3, 20*self.DAY, 19*self.DAY) - self.create_interval(user3, 20*self.HOUR, 19*self.HOUR) - self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user3, 20 * self.DAY, 19 * self.DAY) + self.create_interval(user3, 20 * self.HOUR, 19 * self.HOUR) + self.create_interval(user3, 20 * self.MINUTE, 19 * self.MINUTE) # Intervals crossing boundary user4 = self.create_user() - self.create_interval(user4, 20*self.DAY, 10*self.DAY) + self.create_interval(user4, 20 * self.DAY, 10 * self.DAY) user5 = self.create_user() self.create_interval(user5, self.MINUTE, -self.MINUTE) # Interval subsuming time range user6 = self.create_user() - self.create_interval(user6, 20*self.DAY, -2*self.DAY) + self.create_interval(user6, 20 * self.DAY, -2 * self.DAY) # Second realm user7 = self.create_user(realm=self.second_realm) - self.create_interval(user7, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user7, 20 * self.MINUTE, 19 * self.MINUTE) do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['value', 'user'], - [[1, user2], [1, user3], [1, user4], [1, user5], [1, user6], [1, user7]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[5, self.default_realm], [1, self.second_realm]]) + self.assertTableState( + UserCount, + ['value', 'user'], + [[1, user2], [1, user3], [1, user4], [1, user5], [1, user6], [1, user7]], + ) + self.assertTableState( + RealmCount, ['value', 'realm'], [[5, self.default_realm], [1, self.second_realm]] + ) self.assertTableState(InstallationCount, ['value'], [[6]]) self.assertTableState(StreamCount, [], []) @@ -911,7 +1057,7 @@ class TestCountStats(AnalyticsTestCase): stat = COUNT_STATS['15day_actives::day'] self.current_property = stat.property - _15day = 15*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH + _15day = 15 * self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH user1 = self.create_user() user2 = self.create_user() @@ -919,17 +1065,15 @@ class TestCountStats(AnalyticsTestCase): # To be included self.create_interval(user1, _15day + self.DAY, _15day) - self.create_interval(user2, 20*self.HOUR, 19*self.HOUR) + self.create_interval(user2, 20 * self.HOUR, 19 * self.HOUR) # To be excluded - self.create_interval(user3, 20*self.HOUR, 19*self.HOUR) + self.create_interval(user3, 20 * self.HOUR, 19 * self.HOUR) do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(UserCount, ['value', 'user'], - [[1, user1], [1, user2]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[2, self.default_realm]]) + self.assertTableState(UserCount, ['value', 'user'], [[1, user1], [1, user2]]) + self.assertTableState(RealmCount, ['value', 'realm'], [[2, self.default_realm]]) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value'], []) self.assertTableState(StreamCount, [], []) @@ -941,23 +1085,23 @@ class TestCountStats(AnalyticsTestCase): # Outside time range, should not appear. Also testing for intervals # starting and ending on boundary user1 = self.create_user() - self.create_interval(user1, 25*self.HOUR, self.DAY) + self.create_interval(user1, 25 * self.HOUR, self.DAY) self.create_interval(user1, timedelta(0), -self.HOUR) # Multiple intervals, including one outside boundary user2 = self.create_user() - self.create_interval(user2, 20*self.DAY, 19*self.DAY) - self.create_interval(user2, 20*self.HOUR, 19*self.HOUR) - self.create_interval(user2, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user2, 20 * self.DAY, 19 * self.DAY) + self.create_interval(user2, 20 * self.HOUR, 19 * self.HOUR) + self.create_interval(user2, 20 * self.MINUTE, 19 * self.MINUTE) # Intervals crossing boundary user3 = self.create_user() - self.create_interval(user3, 25*self.HOUR, 22*self.HOUR) + self.create_interval(user3, 25 * self.HOUR, 22 * self.HOUR) self.create_interval(user3, self.MINUTE, -self.MINUTE) # Interval subsuming time range user4 = self.create_user() - self.create_interval(user4, 2*self.DAY, -2*self.DAY) + self.create_interval(user4, 2 * self.DAY, -2 * self.DAY) # Less than 60 seconds, should not appear user5 = self.create_user() @@ -966,14 +1110,18 @@ class TestCountStats(AnalyticsTestCase): # Second realm user6 = self.create_user(realm=self.second_realm) - self.create_interval(user6, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user6, 20 * self.MINUTE, 19 * self.MINUTE) do_fill_count_stat_at_hour(stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['value', 'user'], - [[61, user2], [121, user3], [24*60, user4], [1, user6]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[61 + 121 + 24*60, self.default_realm], [1, self.second_realm]]) - self.assertTableState(InstallationCount, ['value'], [[61 + 121 + 24*60 + 1]]) + self.assertTableState( + UserCount, ['value', 'user'], [[61, user2], [121, user3], [24 * 60, user4], [1, user6]] + ) + self.assertTableState( + RealmCount, + ['value', 'realm'], + [[61 + 121 + 24 * 60, self.default_realm], [1, self.second_realm]], + ) + self.assertTableState(InstallationCount, ['value'], [[61 + 121 + 24 * 60 + 1]]) self.assertTableState(StreamCount, [], []) def test_minutes_active_realm_constraint(self) -> None: @@ -990,17 +1138,15 @@ class TestCountStats(AnalyticsTestCase): user3 = self.create_user(realm=self.second_realm) # To be included - self.create_interval(user1, 20*self.HOUR, 19*self.HOUR) - self.create_interval(user2, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user1, 20 * self.HOUR, 19 * self.HOUR) + self.create_interval(user2, 20 * self.MINUTE, 19 * self.MINUTE) # To be excluded - self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE) + self.create_interval(user3, 20 * self.MINUTE, 19 * self.MINUTE) do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm) - self.assertTableState(UserCount, ['value', 'user'], - [[60, user1], [1, user2]]) - self.assertTableState(RealmCount, ['value', 'realm'], - [[60 + 1, self.default_realm]]) + self.assertTableState(UserCount, ['value', 'user'], [[60, user1], [1, user2]]) + self.assertTableState(RealmCount, ['value', 'realm'], [[60 + 1, self.default_realm]]) # No aggregation to InstallationCount with realm constraint self.assertTableState(InstallationCount, ['value'], []) self.assertTableState(StreamCount, [], []) @@ -1012,18 +1158,24 @@ class TestCountStats(AnalyticsTestCase): one_hour_before = datetime(2016, 3, 14, 18, tzinfo=timezone.utc) one_day_before = datetime(2016, 3, 13, 19, tzinfo=timezone.utc) - fillstate = FillState.objects.create(property=COUNT_STATS["messages_sent:is_bot:hour"].property, - end_time=a_time, state=FillState.DONE) + fillstate = FillState.objects.create( + property=COUNT_STATS["messages_sent:is_bot:hour"].property, + end_time=a_time, + state=FillState.DONE, + ) self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), a_time) fillstate.state = FillState.STARTED fillstate.save(update_fields=["state"]) - self.assertEqual(COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), one_hour_before) + self.assertEqual( + COUNT_STATS["messages_sent:is_bot:hour"].last_successful_fill(), one_hour_before + ) fillstate.property = COUNT_STATS["7day_actives::day"].property fillstate.save(update_fields=["property"]) self.assertEqual(COUNT_STATS["7day_actives::day"].last_successful_fill(), one_day_before) + class TestDoAggregateToSummaryTable(AnalyticsTestCase): # do_aggregate_to_summary_table is mostly tested by the end to end # nature of the tests in TestCountStats. But want to highlight one @@ -1036,6 +1188,7 @@ class TestDoAggregateToSummaryTable(AnalyticsTestCase): self.assertFalse(RealmCount.objects.exists()) self.assertFalse(InstallationCount.objects.exists()) + class TestDoIncrementLoggingStat(AnalyticsTestCase): def test_table_and_id_args(self) -> None: # For realms, streams, and users, tests that the new rows are going to @@ -1063,7 +1216,7 @@ class TestDoIncrementLoggingStat(AnalyticsTestCase): self.assertTableState(StreamCount, ['stream'], [[stream1], [stream2]]) def test_frequency(self) -> None: - times = [self.TIME_ZERO - self.MINUTE*i for i in [0, 1, 61, 24*60+1]] + times = [self.TIME_ZERO - self.MINUTE * i for i in [0, 1, 61, 24 * 60 + 1]] stat = LoggingCountStat('day test', RealmCount, CountStat.DAY) for time_ in times: @@ -1072,12 +1225,17 @@ class TestDoIncrementLoggingStat(AnalyticsTestCase): for time_ in times: do_increment_logging_stat(self.default_realm, stat, None, time_) - self.assertTableState(RealmCount, ['value', 'property', 'end_time'], - [[3, 'day test', self.TIME_ZERO], - [1, 'day test', self.TIME_ZERO - self.DAY], - [2, 'hour test', self.TIME_ZERO], - [1, 'hour test', self.TIME_LAST_HOUR], - [1, 'hour test', self.TIME_ZERO - self.DAY]]) + self.assertTableState( + RealmCount, + ['value', 'property', 'end_time'], + [ + [3, 'day test', self.TIME_ZERO], + [1, 'day test', self.TIME_ZERO - self.DAY], + [2, 'hour test', self.TIME_ZERO], + [1, 'hour test', self.TIME_LAST_HOUR], + [1, 'hour test', self.TIME_ZERO - self.DAY], + ], + ) def test_get_or_create(self) -> None: stat = LoggingCountStat('test', RealmCount, CountStat.HOUR) @@ -1088,14 +1246,26 @@ class TestDoIncrementLoggingStat(AnalyticsTestCase): do_increment_logging_stat(self.default_realm, stat, 'subgroup2', self.TIME_ZERO) do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_LAST_HOUR) self.current_property = 'test' - self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'], - [[1, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO], - [1, 'subgroup1', self.TIME_LAST_HOUR]]) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'end_time'], + [ + [1, 'subgroup1', self.TIME_ZERO], + [1, 'subgroup2', self.TIME_ZERO], + [1, 'subgroup1', self.TIME_LAST_HOUR], + ], + ) # This should trigger the get part of get_or_create do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_ZERO) - self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'], - [[2, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO], - [1, 'subgroup1', self.TIME_LAST_HOUR]]) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'end_time'], + [ + [2, 'subgroup1', self.TIME_ZERO], + [1, 'subgroup2', self.TIME_ZERO], + [1, 'subgroup1', self.TIME_LAST_HOUR], + ], + ) def test_increment(self) -> None: stat = LoggingCountStat('test', RealmCount, CountStat.DAY) @@ -1107,6 +1277,7 @@ class TestDoIncrementLoggingStat(AnalyticsTestCase): do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO) self.assertTableState(RealmCount, ['value'], [[3]]) + class TestLoggingCountStats(AnalyticsTestCase): def test_aggregation(self) -> None: stat = LoggingCountStat('realm test', RealmCount, CountStat.DAY) @@ -1123,37 +1294,67 @@ class TestLoggingCountStats(AnalyticsTestCase): do_increment_logging_stat(stream, stat, None, self.TIME_ZERO) process_count_stat(stat, self.TIME_ZERO) - self.assertTableState(InstallationCount, ['property', 'value'], - [['realm test', 1], ['user test', 1], ['stream test', 1]]) - self.assertTableState(RealmCount, ['property', 'value'], - [['realm test', 1], ['user test', 1], ['stream test', 1]]) + self.assertTableState( + InstallationCount, + ['property', 'value'], + [['realm test', 1], ['user test', 1], ['stream test', 1]], + ) + self.assertTableState( + RealmCount, + ['property', 'value'], + [['realm test', 1], ['user test', 1], ['stream test', 1]], + ) self.assertTableState(UserCount, ['property', 'value'], [['user test', 1]]) self.assertTableState(StreamCount, ['property', 'value'], [['stream test', 1]]) def test_active_users_log_by_is_bot(self) -> None: property = 'active_users_log:is_bot:day' user = do_create_user('email', 'password', self.default_realm, 'full_name') - self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 1, + RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum('value'))[ + 'value__sum' + ], + ) do_deactivate_user(user) - self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 0, + RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum('value'))[ + 'value__sum' + ], + ) do_activate_user(user) - self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 1, + RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum('value'))[ + 'value__sum' + ], + ) do_deactivate_user(user) - self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 0, + RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum('value'))[ + 'value__sum' + ], + ) do_reactivate_user(user) - self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 1, + RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum('value'))[ + 'value__sum' + ], + ) def test_invites_sent(self) -> None: property = 'invites_sent::day' def assertInviteCountEquals(count: int) -> None: - self.assertEqual(count, RealmCount.objects.filter(property=property, subgroup=None) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + count, + RealmCount.objects.filter(property=property, subgroup=None).aggregate(Sum('value'))[ + 'value__sum' + ], + ) user = self.create_user(email='first@domain.tld') stream, _ = self.create_stream_with_recipient() @@ -1200,25 +1401,50 @@ class TestLoggingCountStats(AnalyticsTestCase): self.send_personal_message(user1, user2) client = get_client("website") do_mark_all_as_read(user2, client) - self.assertEqual(1, UserCount.objects.filter(property=read_count_property) - .aggregate(Sum('value'))['value__sum']) - self.assertEqual(1, UserCount.objects.filter(property=interactions_property) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 1, + UserCount.objects.filter(property=read_count_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) + self.assertEqual( + 1, + UserCount.objects.filter(property=interactions_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) self.send_stream_message(user1, stream.name) self.send_stream_message(user1, stream.name) do_mark_stream_messages_as_read(user2, stream.recipient_id) - self.assertEqual(3, UserCount.objects.filter(property=read_count_property) - .aggregate(Sum('value'))['value__sum']) - self.assertEqual(2, UserCount.objects.filter(property=interactions_property) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 3, + UserCount.objects.filter(property=read_count_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) + self.assertEqual( + 2, + UserCount.objects.filter(property=interactions_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) message = self.send_stream_message(user2, stream.name) do_update_message_flags(user1, client, 'add', 'read', [message]) - self.assertEqual(4, UserCount.objects.filter(property=read_count_property) - .aggregate(Sum('value'))['value__sum']) - self.assertEqual(3, UserCount.objects.filter(property=interactions_property) - .aggregate(Sum('value'))['value__sum']) + self.assertEqual( + 4, + UserCount.objects.filter(property=read_count_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) + self.assertEqual( + 3, + UserCount.objects.filter(property=interactions_property).aggregate(Sum('value'))[ + 'value__sum' + ], + ) + class TestDeleteStats(AnalyticsTestCase): def test_do_drop_all_analytics_tables(self) -> None: @@ -1251,7 +1477,9 @@ class TestDeleteStats(AnalyticsTestCase): StreamCount.objects.create(stream=stream, realm=stream.realm, **count_args) RealmCount.objects.create(realm=user.realm, **count_args) InstallationCount.objects.create(**count_args) - FillState.objects.create(property='to_delete', end_time=self.TIME_ZERO, state=FillState.DONE) + FillState.objects.create( + property='to_delete', end_time=self.TIME_ZERO, state=FillState.DONE + ) FillState.objects.create(property='to_save', end_time=self.TIME_ZERO, state=FillState.DONE) analytics = apps.get_app_config('analytics') @@ -1263,6 +1491,7 @@ class TestDeleteStats(AnalyticsTestCase): self.assertFalse(table.objects.filter(property='to_delete').exists()) self.assertTrue(table.objects.filter(property='to_save').exists()) + class TestActiveUsersAudit(AnalyticsTestCase): def setUp(self) -> None: super().setUp() @@ -1270,14 +1499,18 @@ class TestActiveUsersAudit(AnalyticsTestCase): self.stat = COUNT_STATS['active_users_audit:is_bot:day'] self.current_property = self.stat.property - def add_event(self, event_type: int, days_offset: float, - user: Optional[UserProfile]=None) -> None: - hours_offset = int(24*days_offset) + def add_event( + self, event_type: int, days_offset: float, user: Optional[UserProfile] = None + ) -> None: + hours_offset = int(24 * days_offset) if user is None: user = self.user RealmAuditLog.objects.create( - realm=user.realm, modified_user=user, event_type=event_type, - event_time=self.TIME_ZERO - hours_offset*self.HOUR) + realm=user.realm, + modified_user=user, + event_type=event_type, + event_time=self.TIME_ZERO - hours_offset * self.HOUR, + ) def test_user_deactivated_in_future(self) -> None: self.add_event(RealmAuditLog.USER_CREATED, 1) @@ -1293,13 +1526,13 @@ class TestActiveUsersAudit(AnalyticsTestCase): def test_user_active_then_deactivated_same_day(self) -> None: self.add_event(RealmAuditLog.USER_CREATED, 1) - self.add_event(RealmAuditLog.USER_DEACTIVATED, .5) + self.add_event(RealmAuditLog.USER_DEACTIVATED, 0.5) do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO) self.assertTableState(UserCount, [], []) def test_user_unactive_then_activated_same_day(self) -> None: self.add_event(RealmAuditLog.USER_DEACTIVATED, 1) - self.add_event(RealmAuditLog.USER_REACTIVATED, .5) + self.add_event(RealmAuditLog.USER_REACTIVATED, 0.5) do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO) self.assertTableState(UserCount, ['subgroup'], [['false']]) @@ -1309,8 +1542,9 @@ class TestActiveUsersAudit(AnalyticsTestCase): self.add_event(RealmAuditLog.USER_CREATED, 2) self.add_event(RealmAuditLog.USER_DEACTIVATED, 1) process_count_stat(self.stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['subgroup', 'end_time'], - [['false', self.TIME_ZERO - self.DAY]]) + self.assertTableState( + UserCount, ['subgroup', 'end_time'], [['false', self.TIME_ZERO - self.DAY]] + ) def test_user_deactivated_then_reactivated_with_day_gap(self) -> None: self.add_event(RealmAuditLog.USER_DEACTIVATED, 2) @@ -1324,9 +1558,12 @@ class TestActiveUsersAudit(AnalyticsTestCase): self.add_event(RealmAuditLog.USER_ACTIVATED, 2) self.add_event(RealmAuditLog.USER_REACTIVATED, 1) for i in range(4): - do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO - i*self.DAY) - self.assertTableState(UserCount, ['subgroup', 'end_time'], - [['false', self.TIME_ZERO - i*self.DAY] for i in [3, 1, 0]]) + do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO - i * self.DAY) + self.assertTableState( + UserCount, + ['subgroup', 'end_time'], + [['false', self.TIME_ZERO - i * self.DAY] for i in [3, 1, 0]], + ) # Also tests that aggregation to RealmCount and InstallationCount is # being done, and that we're storing the user correctly in UserCount @@ -1339,11 +1576,20 @@ class TestActiveUsersAudit(AnalyticsTestCase): for user in [user1, user2, user3, user4]: self.add_event(RealmAuditLog.USER_CREATED, 1, user=user) do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['subgroup', 'user'], - [['false', user1], ['false', user2], ['false', user3], ['true', user4]]) - self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], - [[2, 'false', self.default_realm], [1, 'false', second_realm], - [1, 'true', second_realm]]) + self.assertTableState( + UserCount, + ['subgroup', 'user'], + [['false', user1], ['false', user2], ['false', user3], ['true', user4]], + ) + self.assertTableState( + RealmCount, + ['value', 'subgroup', 'realm'], + [ + [2, 'false', self.default_realm], + [1, 'false', second_realm], + [1, 'true', second_realm], + ], + ) self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [1, 'true']]) self.assertTableState(StreamCount, [], []) @@ -1357,8 +1603,11 @@ class TestActiveUsersAudit(AnalyticsTestCase): def test_update_from_two_days_ago(self) -> None: self.add_event(RealmAuditLog.USER_CREATED, 2) process_count_stat(self.stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['subgroup', 'end_time'], - [['false', self.TIME_ZERO], ['false', self.TIME_ZERO-self.DAY]]) + self.assertTableState( + UserCount, + ['subgroup', 'end_time'], + [['false', self.TIME_ZERO], ['false', self.TIME_ZERO - self.DAY]], + ) # User with no relevant activity could happen e.g. for a system bot that # doesn't go through do_create_user. Mainly just want to make sure that @@ -1372,7 +1621,7 @@ class TestActiveUsersAudit(AnalyticsTestCase): def test_max_audit_entry_is_unrelated(self) -> None: self.add_event(RealmAuditLog.USER_CREATED, 1) - self.add_event(RealmAuditLog.USER_SOFT_ACTIVATED, .5) + self.add_event(RealmAuditLog.USER_SOFT_ACTIVATED, 0.5) do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO) self.assertTableState(UserCount, ['subgroup'], [['false']]) @@ -1387,13 +1636,12 @@ class TestActiveUsersAudit(AnalyticsTestCase): user1 = self.create_user() user2 = self.create_user() user3 = self.create_user() - self.add_event(RealmAuditLog.USER_CREATED, .5, user=user1) - self.add_event(RealmAuditLog.USER_CREATED, .5, user=user2) + self.add_event(RealmAuditLog.USER_CREATED, 0.5, user=user1) + self.add_event(RealmAuditLog.USER_CREATED, 0.5, user=user2) self.add_event(RealmAuditLog.USER_CREATED, 1, user=user3) - self.add_event(RealmAuditLog.USER_DEACTIVATED, .5, user=user3) + self.add_event(RealmAuditLog.USER_DEACTIVATED, 0.5, user=user3) do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO) - self.assertTableState(UserCount, ['user', 'subgroup'], - [[user1, 'false'], [user2, 'false']]) + self.assertTableState(UserCount, ['user', 'subgroup'], [[user1, 'false'], [user2, 'false']]) def test_end_to_end_with_actions_dot_py(self) -> None: user1 = do_create_user('email1', 'password', self.default_realm, 'full_name') @@ -1406,30 +1654,42 @@ class TestActiveUsersAudit(AnalyticsTestCase): end_time = floor_to_day(timezone_now()) + self.DAY do_fill_count_stat_at_hour(self.stat, end_time) for user in [user1, user3, user4]: - self.assertTrue(UserCount.objects.filter( - user=user, property=self.current_property, subgroup='false', - end_time=end_time, value=1).exists()) + self.assertTrue( + UserCount.objects.filter( + user=user, + property=self.current_property, + subgroup='false', + end_time=end_time, + value=1, + ).exists() + ) self.assertFalse(UserCount.objects.filter(user=user2, end_time=end_time).exists()) + class TestRealmActiveHumans(AnalyticsTestCase): def setUp(self) -> None: super().setUp() self.stat = COUNT_STATS['realm_active_humans::day'] self.current_property = self.stat.property - def mark_audit_active(self, user: UserProfile, end_time: Optional[datetime]=None) -> None: + def mark_audit_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None: if end_time is None: end_time = self.TIME_ZERO UserCount.objects.create( - user=user, realm=user.realm, property='active_users_audit:is_bot:day', - subgroup=orjson.dumps(user.is_bot).decode(), end_time=end_time, value=1) + user=user, + realm=user.realm, + property='active_users_audit:is_bot:day', + subgroup=orjson.dumps(user.is_bot).decode(), + end_time=end_time, + value=1, + ) - def mark_15day_active(self, user: UserProfile, end_time: Optional[datetime]=None) -> None: + def mark_15day_active(self, user: UserProfile, end_time: Optional[datetime] = None) -> None: if end_time is None: end_time = self.TIME_ZERO UserCount.objects.create( - user=user, realm=user.realm, property='15day_actives::day', - end_time=end_time, value=1) + user=user, realm=user.realm, property='15day_actives::day', end_time=end_time, value=1 + ) def test_basic_boolean_logic(self) -> None: user = self.create_user() @@ -1439,7 +1699,7 @@ class TestRealmActiveHumans(AnalyticsTestCase): self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY) for i in [-1, 0, 1]: - do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY) + do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY) self.assertTableState(RealmCount, ['value', 'end_time'], [[1, self.TIME_ZERO + self.DAY]]) def test_bots_not_counted(self) -> None: @@ -1465,12 +1725,17 @@ class TestRealmActiveHumans(AnalyticsTestCase): self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY) for i in [-1, 0, 1]: - do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY) - self.assertTableState(RealmCount, ['value', 'realm', 'end_time'], - [[2, self.default_realm, self.TIME_ZERO], - [3, second_realm, self.TIME_ZERO], - [1, self.default_realm, self.TIME_ZERO - self.DAY], - [2, second_realm, self.TIME_ZERO - self.DAY]]) + do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY) + self.assertTableState( + RealmCount, + ['value', 'realm', 'end_time'], + [ + [2, self.default_realm, self.TIME_ZERO], + [3, second_realm, self.TIME_ZERO], + [1, self.default_realm, self.TIME_ZERO - self.DAY], + [2, second_realm, self.TIME_ZERO - self.DAY], + ], + ) # Check that adding spurious entries doesn't make a difference self.mark_audit_active(user1, end_time=self.TIME_ZERO + self.DAY) @@ -1483,12 +1748,17 @@ class TestRealmActiveHumans(AnalyticsTestCase): RealmCount.objects.all().delete() InstallationCount.objects.all().delete() for i in [-1, 0, 1]: - do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY) - self.assertTableState(RealmCount, ['value', 'realm', 'end_time'], - [[2, self.default_realm, self.TIME_ZERO], - [3, second_realm, self.TIME_ZERO], - [1, self.default_realm, self.TIME_ZERO - self.DAY], - [2, second_realm, self.TIME_ZERO - self.DAY]]) + do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i * self.DAY) + self.assertTableState( + RealmCount, + ['value', 'realm', 'end_time'], + [ + [2, self.default_realm, self.TIME_ZERO], + [3, second_realm, self.TIME_ZERO], + [1, self.default_realm, self.TIME_ZERO - self.DAY], + [2, second_realm, self.TIME_ZERO - self.DAY], + ], + ) def test_end_to_end(self) -> None: user1 = do_create_user('email1', 'password', self.default_realm, 'full_name') @@ -1498,10 +1768,17 @@ class TestRealmActiveHumans(AnalyticsTestCase): update_user_activity_interval(user1, time_zero) update_user_activity_interval(user2, time_zero) do_deactivate_user(user2) - for property in ['active_users_audit:is_bot:day', '15day_actives::day', - 'realm_active_humans::day']: + for property in [ + 'active_users_audit:is_bot:day', + '15day_actives::day', + 'realm_active_humans::day', + ]: FillState.objects.create(property=property, state=FillState.DONE, end_time=time_zero) - process_count_stat(COUNT_STATS[property], time_zero+self.DAY) - self.assertEqual(RealmCount.objects.filter( - property='realm_active_humans::day', end_time=time_zero+self.DAY, value=1).count(), 1) + process_count_stat(COUNT_STATS[property], time_zero + self.DAY) + self.assertEqual( + RealmCount.objects.filter( + property='realm_active_humans::day', end_time=time_zero + self.DAY, value=1 + ).count(), + 1, + ) self.assertEqual(RealmCount.objects.filter(property='realm_active_humans::day').count(), 1) diff --git a/analytics/tests/test_fixtures.py b/analytics/tests/test_fixtures.py index a5e7f47125..cc9da62750 100644 --- a/analytics/tests/test_fixtures.py +++ b/analytics/tests/test_fixtures.py @@ -9,22 +9,32 @@ class TestFixtures(ZulipTestCase): # test basic business_hour / non_business_hour calculation # test we get an array of the right length with frequency=CountStat.DAY data = generate_time_series_data( - days=7, business_hours_base=20, non_business_hours_base=15, spikiness=0) + days=7, business_hours_base=20, non_business_hours_base=15, spikiness=0 + ) self.assertEqual(data, [400, 400, 400, 400, 400, 360, 360]) data = generate_time_series_data( - days=1, business_hours_base=2000, non_business_hours_base=1500, - growth=2, spikiness=0, frequency=CountStat.HOUR) + days=1, + business_hours_base=2000, + non_business_hours_base=1500, + growth=2, + spikiness=0, + frequency=CountStat.HOUR, + ) # test we get an array of the right length with frequency=CountStat.HOUR self.assertEqual(len(data), 24) # test that growth doesn't affect the first data point self.assertEqual(data[0], 2000) # test that the last data point is growth times what it otherwise would be - self.assertEqual(data[-1], 1500*2) + self.assertEqual(data[-1], 1500 * 2) # test autocorrelation == 1, since that's the easiest value to test data = generate_time_series_data( - days=1, business_hours_base=2000, non_business_hours_base=2000, - autocorrelation=1, frequency=CountStat.HOUR) + days=1, + business_hours_base=2000, + non_business_hours_base=2000, + autocorrelation=1, + frequency=CountStat.HOUR, + ) self.assertEqual(data[0], data[1]) self.assertEqual(data[0], data[-1]) diff --git a/analytics/tests/test_views.py b/analytics/tests/test_views.py index 378a4c014f..10efb2891a 100644 --- a/analytics/tests/test_views.py +++ b/analytics/tests/test_views.py @@ -81,22 +81,26 @@ class TestStatsEndpoint(ZulipTestCase): self.assertEqual(result.status_code, 200) self.assert_in_response("Zulip analytics for", result) + class TestGetChartData(ZulipTestCase): def setUp(self) -> None: super().setUp() self.realm = get_realm('zulip') self.user = self.example_user('hamlet') self.login_user(self.user) - self.end_times_hour = [ceiling_to_hour(self.realm.date_created) + timedelta(hours=i) - for i in range(4)] - self.end_times_day = [ceiling_to_day(self.realm.date_created) + timedelta(days=i) - for i in range(4)] + self.end_times_hour = [ + ceiling_to_hour(self.realm.date_created) + timedelta(hours=i) for i in range(4) + ] + self.end_times_day = [ + ceiling_to_day(self.realm.date_created) + timedelta(days=i) for i in range(4) + ] def data(self, i: int) -> List[int]: return [0, 0, i, 0] - def insert_data(self, stat: CountStat, realm_subgroups: List[Optional[str]], - user_subgroups: List[str]) -> None: + def insert_data( + self, stat: CountStat, realm_subgroups: List[Optional[str]], user_subgroups: List[str] + ) -> None: if stat.frequency == CountStat.HOUR: insert_time = self.end_times_hour[2] fill_time = self.end_times_hour[-1] @@ -105,13 +109,26 @@ class TestGetChartData(ZulipTestCase): fill_time = self.end_times_day[-1] RealmCount.objects.bulk_create( - RealmCount(property=stat.property, subgroup=subgroup, end_time=insert_time, - value=100+i, realm=self.realm) - for i, subgroup in enumerate(realm_subgroups)) + RealmCount( + property=stat.property, + subgroup=subgroup, + end_time=insert_time, + value=100 + i, + realm=self.realm, + ) + for i, subgroup in enumerate(realm_subgroups) + ) UserCount.objects.bulk_create( - UserCount(property=stat.property, subgroup=subgroup, end_time=insert_time, - value=200+i, realm=self.realm, user=self.user) - for i, subgroup in enumerate(user_subgroups)) + UserCount( + property=stat.property, + subgroup=subgroup, + end_time=insert_time, + value=200 + i, + realm=self.realm, + user=self.user, + ) + for i, subgroup in enumerate(user_subgroups) + ) FillState.objects.create(property=stat.property, end_time=fill_time, state=FillState.DONE) def test_number_of_humans(self) -> None: @@ -121,55 +138,83 @@ class TestGetChartData(ZulipTestCase): self.insert_data(stat, [None], []) stat = COUNT_STATS['active_users_audit:is_bot:day'] self.insert_data(stat, ['false'], []) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans'}) + result = self.client_get('/json/analytics/chart_data', {'chart_name': 'number_of_humans'}) self.assert_json_success(result) data = result.json() - self.assertEqual(data, { - 'msg': '', - 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], - 'frequency': CountStat.DAY, - 'everyone': {'_1day': self.data(100), '_15day': self.data(100), 'all_time': self.data(100)}, - 'display_order': None, - 'result': 'success', - }) + self.assertEqual( + data, + { + 'msg': '', + 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], + 'frequency': CountStat.DAY, + 'everyone': { + '_1day': self.data(100), + '_15day': self.data(100), + 'all_time': self.data(100), + }, + 'display_order': None, + 'result': 'success', + }, + ) def test_messages_sent_over_time(self) -> None: stat = COUNT_STATS['messages_sent:is_bot:hour'] self.insert_data(stat, ['true', 'false'], ['false']) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data, { - 'msg': '', - 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_hour], - 'frequency': CountStat.HOUR, - 'everyone': {'bot': self.data(100), 'human': self.data(101)}, - 'user': {'bot': self.data(0), 'human': self.data(200)}, - 'display_order': None, - 'result': 'success', - }) + self.assertEqual( + data, + { + 'msg': '', + 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_hour], + 'frequency': CountStat.HOUR, + 'everyone': {'bot': self.data(100), 'human': self.data(101)}, + 'user': {'bot': self.data(0), 'human': self.data(200)}, + 'display_order': None, + 'result': 'success', + }, + ) def test_messages_sent_by_message_type(self) -> None: stat = COUNT_STATS['messages_sent:message_type:day'] - self.insert_data(stat, ['public_stream', 'private_message'], - ['public_stream', 'private_stream']) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_by_message_type'}) + self.insert_data( + stat, ['public_stream', 'private_message'], ['public_stream', 'private_stream'] + ) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_by_message_type'} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data, { - 'msg': '', - 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], - 'frequency': CountStat.DAY, - 'everyone': {'Public streams': self.data(100), 'Private streams': self.data(0), - 'Private messages': self.data(101), 'Group private messages': self.data(0)}, - 'user': {'Public streams': self.data(200), 'Private streams': self.data(201), - 'Private messages': self.data(0), 'Group private messages': self.data(0)}, - 'display_order': ['Private messages', 'Public streams', 'Private streams', 'Group private messages'], - 'result': 'success', - }) + self.assertEqual( + data, + { + 'msg': '', + 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], + 'frequency': CountStat.DAY, + 'everyone': { + 'Public streams': self.data(100), + 'Private streams': self.data(0), + 'Private messages': self.data(101), + 'Group private messages': self.data(0), + }, + 'user': { + 'Public streams': self.data(200), + 'Private streams': self.data(201), + 'Private messages': self.data(0), + 'Group private messages': self.data(0), + }, + 'display_order': [ + 'Private messages', + 'Public streams', + 'Private streams', + 'Group private messages', + ], + 'result': 'success', + }, + ) def test_messages_sent_by_client(self) -> None: stat = COUNT_STATS['messages_sent:client:day'] @@ -177,80 +222,112 @@ class TestGetChartData(ZulipTestCase): client2 = Client.objects.create(name='client 2') client3 = Client.objects.create(name='client 3') client4 = Client.objects.create(name='client 4') - self.insert_data(stat, [client4.id, client3.id, client2.id], - [client3.id, client1.id]) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_by_client'}) + self.insert_data(stat, [client4.id, client3.id, client2.id], [client3.id, client1.id]) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_by_client'} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data, { - 'msg': '', - 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], - 'frequency': CountStat.DAY, - 'everyone': {'client 4': self.data(100), 'client 3': self.data(101), - 'client 2': self.data(102)}, - 'user': {'client 3': self.data(200), 'client 1': self.data(201)}, - 'display_order': ['client 1', 'client 2', 'client 3', 'client 4'], - 'result': 'success', - }) + self.assertEqual( + data, + { + 'msg': '', + 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day], + 'frequency': CountStat.DAY, + 'everyone': { + 'client 4': self.data(100), + 'client 3': self.data(101), + 'client 2': self.data(102), + }, + 'user': {'client 3': self.data(200), 'client 1': self.data(201)}, + 'display_order': ['client 1', 'client 2', 'client 3', 'client 4'], + 'result': 'success', + }, + ) def test_messages_read_over_time(self) -> None: stat = COUNT_STATS['messages_read::hour'] self.insert_data(stat, [None], []) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_read_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_read_over_time'} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data, { - 'msg': '', - 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_hour], - 'frequency': CountStat.HOUR, - 'everyone': {'read': self.data(100)}, - 'user': {'read': self.data(0)}, - 'display_order': None, - 'result': 'success', - }) + self.assertEqual( + data, + { + 'msg': '', + 'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_hour], + 'frequency': CountStat.HOUR, + 'everyone': {'read': self.data(100)}, + 'user': {'read': self.data(0)}, + 'display_order': None, + 'result': 'success', + }, + ) def test_include_empty_subgroups(self) -> None: FillState.objects.create( - property='realm_active_humans::day', end_time=self.end_times_day[0], - state=FillState.DONE) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans'}) + property='realm_active_humans::day', + end_time=self.end_times_day[0], + state=FillState.DONE, + ) + result = self.client_get('/json/analytics/chart_data', {'chart_name': 'number_of_humans'}) self.assert_json_success(result) data = result.json() self.assertEqual(data['everyone'], {"_1day": [0], "_15day": [0], "all_time": [0]}) self.assertFalse('user' in data) FillState.objects.create( - property='messages_sent:is_bot:hour', end_time=self.end_times_hour[0], - state=FillState.DONE) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + property='messages_sent:is_bot:hour', + end_time=self.end_times_hour[0], + state=FillState.DONE, + ) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) data = result.json() self.assertEqual(data['everyone'], {'human': [0], 'bot': [0]}) self.assertEqual(data['user'], {'human': [0], 'bot': [0]}) FillState.objects.create( - property='messages_sent:message_type:day', end_time=self.end_times_day[0], - state=FillState.DONE) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_by_message_type'}) + property='messages_sent:message_type:day', + end_time=self.end_times_day[0], + state=FillState.DONE, + ) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_by_message_type'} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data['everyone'], { - 'Public streams': [0], 'Private streams': [0], - 'Private messages': [0], 'Group private messages': [0]}) - self.assertEqual(data['user'], { - 'Public streams': [0], 'Private streams': [0], - 'Private messages': [0], 'Group private messages': [0]}) + self.assertEqual( + data['everyone'], + { + 'Public streams': [0], + 'Private streams': [0], + 'Private messages': [0], + 'Group private messages': [0], + }, + ) + self.assertEqual( + data['user'], + { + 'Public streams': [0], + 'Private streams': [0], + 'Private messages': [0], + 'Group private messages': [0], + }, + ) FillState.objects.create( - property='messages_sent:client:day', end_time=self.end_times_day[0], - state=FillState.DONE) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_by_client'}) + property='messages_sent:client:day', + end_time=self.end_times_day[0], + state=FillState.DONE, + ) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_by_client'} + ) self.assert_json_success(result) data = result.json() self.assertEqual(data['everyone'], {}) @@ -266,20 +343,30 @@ class TestGetChartData(ZulipTestCase): end_time_timestamps = [datetime_to_timestamp(dt) for dt in self.end_times_day] # valid start and end - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans', - 'start': end_time_timestamps[1], - 'end': end_time_timestamps[2]}) + result = self.client_get( + '/json/analytics/chart_data', + { + 'chart_name': 'number_of_humans', + 'start': end_time_timestamps[1], + 'end': end_time_timestamps[2], + }, + ) self.assert_json_success(result) data = result.json() self.assertEqual(data['end_times'], end_time_timestamps[1:3]) - self.assertEqual(data['everyone'], {'_1day': [0, 100], '_15day': [0, 100], 'all_time': [0, 100]}) + self.assertEqual( + data['everyone'], {'_1day': [0, 100], '_15day': [0, 100], 'all_time': [0, 100]} + ) # start later then end - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans', - 'start': end_time_timestamps[2], - 'end': end_time_timestamps[1]}) + result = self.client_get( + '/json/analytics/chart_data', + { + 'chart_name': 'number_of_humans', + 'start': end_time_timestamps[2], + 'end': end_time_timestamps[1], + }, + ) self.assert_json_error_contains(result, 'Start time is later than') def test_min_length(self) -> None: @@ -290,26 +377,39 @@ class TestGetChartData(ZulipTestCase): stat = COUNT_STATS['active_users_audit:is_bot:day'] self.insert_data(stat, ['false'], []) # test min_length is too short to change anything - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans', - 'min_length': 2}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'number_of_humans', 'min_length': 2} + ) self.assert_json_success(result) data = result.json() - self.assertEqual(data['end_times'], [datetime_to_timestamp(dt) for dt in self.end_times_day]) - self.assertEqual(data['everyone'], {'_1day': self.data(100), '_15day': self.data(100), 'all_time': self.data(100)}) + self.assertEqual( + data['end_times'], [datetime_to_timestamp(dt) for dt in self.end_times_day] + ) + self.assertEqual( + data['everyone'], + {'_1day': self.data(100), '_15day': self.data(100), 'all_time': self.data(100)}, + ) # test min_length larger than filled data - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'number_of_humans', - 'min_length': 5}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'number_of_humans', 'min_length': 5} + ) self.assert_json_success(result) data = result.json() - end_times = [ceiling_to_day(self.realm.date_created) + timedelta(days=i) for i in range(-1, 4)] + end_times = [ + ceiling_to_day(self.realm.date_created) + timedelta(days=i) for i in range(-1, 4) + ] self.assertEqual(data['end_times'], [datetime_to_timestamp(dt) for dt in end_times]) - self.assertEqual(data['everyone'], {'_1day': [0, *self.data(100)], '_15day': [0, *self.data(100)], 'all_time': [0, *self.data(100)]}) + self.assertEqual( + data['everyone'], + { + '_1day': [0, *self.data(100)], + '_15day': [0, *self.data(100)], + 'all_time': [0, *self.data(100)], + }, + ) def test_non_existent_chart(self) -> None: - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'does_not_exist'}) + result = self.client_get('/json/analytics/chart_data', {'chart_name': 'does_not_exist'}) self.assert_json_error_contains(result, 'Unknown chart name') def test_analytics_not_running(self) -> None: @@ -320,49 +420,72 @@ class TestGetChartData(ZulipTestCase): realm.date_created = timezone_now() - timedelta(days=3) realm.save(update_fields=["date_created"]) with self.assertLogs(level="WARNING") as m: - result = self.client_get('/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'}) - self.assertEqual(m.output, [f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: 0001-01-01 00:00:00+00:00 (last successful analytics update). Is the analytics cron job running?"]) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) + self.assertEqual( + m.output, + [ + f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: 0001-01-01 00:00:00+00:00 (last successful analytics update). Is the analytics cron job running?" + ], + ) self.assert_json_error_contains(result, 'No analytics data available') realm.date_created = timezone_now() - timedelta(days=1, hours=2) realm.save(update_fields=["date_created"]) with self.assertLogs(level="WARNING") as m: - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) - self.assertEqual(m.output, [f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: 0001-01-01 00:00:00+00:00 (last successful analytics update). Is the analytics cron job running?"]) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) + self.assertEqual( + m.output, + [ + f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: 0001-01-01 00:00:00+00:00 (last successful analytics update). Is the analytics cron job running?" + ], + ) self.assert_json_error_contains(result, 'No analytics data available') realm.date_created = timezone_now() - timedelta(days=1, minutes=10) realm.save(update_fields=["date_created"]) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) realm.date_created = timezone_now() - timedelta(hours=10) realm.save(update_fields=["date_created"]) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) end_time = timezone_now() - timedelta(days=5) - fill_state = FillState.objects.create(property='messages_sent:is_bot:hour', end_time=end_time, - state=FillState.DONE) + fill_state = FillState.objects.create( + property='messages_sent:is_bot:hour', end_time=end_time, state=FillState.DONE + ) realm.date_created = timezone_now() - timedelta(days=3) realm.save(update_fields=["date_created"]) with self.assertLogs(level="WARNING") as m: - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) - self.assertEqual(m.output, [f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: {end_time} (last successful analytics update). Is the analytics cron job running?"]) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) + self.assertEqual( + m.output, + [ + f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: {end_time} (last successful analytics update). Is the analytics cron job running?" + ], + ) self.assert_json_error_contains(result, 'No analytics data available') realm.date_created = timezone_now() - timedelta(days=1, minutes=10) realm.save(update_fields=["date_created"]) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) end_time = timezone_now() - timedelta(days=2) @@ -371,30 +494,40 @@ class TestGetChartData(ZulipTestCase): realm.date_created = timezone_now() - timedelta(days=3) realm.save(update_fields=["date_created"]) - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) realm.date_created = timezone_now() - timedelta(days=1, hours=2) realm.save(update_fields=["date_created"]) with self.assertLogs(level="WARNING") as m: - result = self.client_get('/json/analytics/chart_data', - {'chart_name': 'messages_sent_over_time'}) - self.assertEqual(m.output, [f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: {end_time} (last successful analytics update). Is the analytics cron job running?"]) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) + self.assertEqual( + m.output, + [ + f"WARNING:root:User from realm zulip attempted to access /stats, but the computed start time: {realm.date_created} (creation of realm or installation) is later than the computed end time: {end_time} (last successful analytics update). Is the analytics cron job running?" + ], + ) self.assert_json_error_contains(result, 'No analytics data available') realm.date_created = timezone_now() - timedelta(days=1, minutes=10) realm.save(update_fields=["date_created"]) - result = self.client_get('/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'}) + result = self.client_get( + '/json/analytics/chart_data', {'chart_name': 'messages_sent_over_time'} + ) self.assert_json_success(result) def test_get_chart_data_for_realm(self) -> None: user = self.example_user('hamlet') self.login_user(user) - result = self.client_get('/json/analytics/chart_data/realm/zulip', - {'chart_name': 'number_of_humans'}) + result = self.client_get( + '/json/analytics/chart_data/realm/zulip', {'chart_name': 'number_of_humans'} + ) self.assert_json_error(result, "Must be an server administrator", 400) user = self.example_user('hamlet') @@ -403,20 +536,24 @@ class TestGetChartData(ZulipTestCase): stat = COUNT_STATS['realm_active_humans::day'] self.insert_data(stat, [None], []) - result = self.client_get('/json/analytics/chart_data/realm/not_existing_realm', - {'chart_name': 'number_of_humans'}) + result = self.client_get( + '/json/analytics/chart_data/realm/not_existing_realm', + {'chart_name': 'number_of_humans'}, + ) self.assert_json_error(result, 'Invalid organization', 400) - result = self.client_get('/json/analytics/chart_data/realm/zulip', - {'chart_name': 'number_of_humans'}) + result = self.client_get( + '/json/analytics/chart_data/realm/zulip', {'chart_name': 'number_of_humans'} + ) self.assert_json_success(result) def test_get_chart_data_for_installation(self) -> None: user = self.example_user('hamlet') self.login_user(user) - result = self.client_get('/json/analytics/chart_data/installation', - {'chart_name': 'number_of_humans'}) + result = self.client_get( + '/json/analytics/chart_data/installation', {'chart_name': 'number_of_humans'} + ) self.assert_json_error(result, "Must be an server administrator", 400) user = self.example_user('hamlet') @@ -425,91 +562,139 @@ class TestGetChartData(ZulipTestCase): stat = COUNT_STATS['realm_active_humans::day'] self.insert_data(stat, [None], []) - result = self.client_get('/json/analytics/chart_data/installation', - {'chart_name': 'number_of_humans'}) + result = self.client_get( + '/json/analytics/chart_data/installation', {'chart_name': 'number_of_humans'} + ) self.assert_json_success(result) + class TestSupportEndpoint(ZulipTestCase): def test_search(self) -> None: reset_emails_in_zulip_realm() def check_hamlet_user_query_result(result: HttpResponse) -> None: - self.assert_in_success_response(['user\n', '

King Hamlet

', - 'Email: hamlet@zulip.com', 'Is active: True
', - 'Admins: desdemona@zulip.com, iago@zulip.com\n', - 'class="copy-button" data-copytext="desdemona@zulip.com, iago@zulip.com"', - ], result) + self.assert_in_success_response( + [ + 'user\n', + '

King Hamlet

', + 'Email: hamlet@zulip.com', + 'Is active: True
', + 'Admins: desdemona@zulip.com, iago@zulip.com\n', + 'class="copy-button" data-copytext="desdemona@zulip.com, iago@zulip.com"', + ], + result, + ) def check_othello_user_query_result(result: HttpResponse) -> None: - self.assert_in_success_response(['user\n', '

Othello, the Moor of Venice

', - 'Email: othello@zulip.com', 'Is active: True
' - ], result) + self.assert_in_success_response( + [ + 'user\n', + '

Othello, the Moor of Venice

', + 'Email: othello@zulip.com', + 'Is active: True
', + ], + result, + ) def check_zulip_realm_query_result(result: HttpResponse) -> None: zulip_realm = get_realm("zulip") - self.assert_in_success_response([f'', - '', - '', - 'input type="number" name="discount" value="None"', - '', - '', - 'scrub-realm-button">', - 'data-string-id="zulip"'], result) + self.assert_in_success_response( + [ + f'', + '', + '', + 'input type="number" name="discount" value="None"', + '', + '', + 'scrub-realm-button">', + 'data-string-id="zulip"', + ], + result, + ) def check_lear_realm_query_result(result: HttpResponse) -> None: lear_realm = get_realm("lear") - self.assert_in_success_response([f'', - '', - '', - 'input type="number" name="discount" value="None"', - '', - '', - 'scrub-realm-button">', - 'data-string-id="lear"', - 'Name: Zulip Standard', - 'Status: Active', - 'Billing schedule: Annual', - 'Licenses: 2/10 (Manual)', - 'Price per license: $80.0', - 'Next invoice date: 02 January 2017', - '', + '', + 'input type="number" name="discount" value="None"', + '', + '', + 'scrub-realm-button">', + 'data-string-id="lear"', + 'Name: Zulip Standard', + 'Status: Active', + 'Billing schedule: Annual', + 'Licenses: 2/10 (Manual)', + 'Price per license: $80.0', + 'Next invoice date: 02 January 2017', + '