mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	analytics: Add realm argument to analytics.
This changeset is prepartory work for doing something reasonable with analytics data during the zulip -> zulip data import process (and potentially e.g. slack -> Zulip as well). To support that, we need to make it possible to do our analytics calculations for a single realm. We do this while maintaining backwards compatibility and avoiding massive duplicated code by adding an optional `realm` argument to the entrypoints to the analytics system, especially process_count_stat. More work involving restructuring FillState will be required for this to be actually usable for its intented purpose, but this commit is a nice checkpoint along the way. Tweaked by tabbott to adjust comments and disable InstallationCount updates when a realm argument is specified.
This commit is contained in:
		@@ -63,13 +63,23 @@ class DependentCountStat(CountStat):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class DataCollector:
 | 
					class DataCollector:
 | 
				
			||||||
    def __init__(self, output_table: Type[BaseCount],
 | 
					    def __init__(self, output_table: Type[BaseCount],
 | 
				
			||||||
                 pull_function: Optional[Callable[[str, datetime, datetime], int]]) -> None:
 | 
					                 pull_function: Optional[Callable[[str, datetime, datetime, Optional[Realm]], int]]) -> None:
 | 
				
			||||||
        self.output_table = output_table
 | 
					        self.output_table = output_table
 | 
				
			||||||
        self.pull_function = pull_function
 | 
					        self.pull_function = pull_function
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## CountStat-level operations ##
 | 
					## CountStat-level operations ##
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def process_count_stat(stat: CountStat, fill_to_time: datetime) -> None:
 | 
					def process_count_stat(stat: CountStat, fill_to_time: datetime,
 | 
				
			||||||
 | 
					                       realm: Optional[Realm]=None) -> None:
 | 
				
			||||||
 | 
					    # TODO: The realm argument is not yet supported, in that we don't
 | 
				
			||||||
 | 
					    # have a solution for how to update FillState if it is passed.  It
 | 
				
			||||||
 | 
					    # exists solely as partial plumbing for when we do fully implement
 | 
				
			||||||
 | 
					    # doing single-realm analytics runs for use cases like data import.
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # Also, note that for the realm argument to be properly supported,
 | 
				
			||||||
 | 
					    # the CountStat object passed in needs to have come from
 | 
				
			||||||
 | 
					    # E.g. get_count_stats(realm), i.e. have the realm_id already
 | 
				
			||||||
 | 
					    # entered into the SQL query defined by the CountState object.
 | 
				
			||||||
    if stat.frequency == CountStat.HOUR:
 | 
					    if stat.frequency == CountStat.HOUR:
 | 
				
			||||||
        time_increment = timedelta(hours=1)
 | 
					        time_increment = timedelta(hours=1)
 | 
				
			||||||
    elif stat.frequency == CountStat.DAY:
 | 
					    elif stat.frequency == CountStat.DAY:
 | 
				
			||||||
@@ -113,7 +123,7 @@ def process_count_stat(stat: CountStat, fill_to_time: datetime) -> None:
 | 
				
			|||||||
        logger.info("START %s %s" % (stat.property, currently_filled))
 | 
					        logger.info("START %s %s" % (stat.property, currently_filled))
 | 
				
			||||||
        start = time.time()
 | 
					        start = time.time()
 | 
				
			||||||
        do_update_fill_state(fill_state, currently_filled, FillState.STARTED)
 | 
					        do_update_fill_state(fill_state, currently_filled, FillState.STARTED)
 | 
				
			||||||
        do_fill_count_stat_at_hour(stat, currently_filled)
 | 
					        do_fill_count_stat_at_hour(stat, currently_filled, realm)
 | 
				
			||||||
        do_update_fill_state(fill_state, currently_filled, FillState.DONE)
 | 
					        do_update_fill_state(fill_state, currently_filled, FillState.DONE)
 | 
				
			||||||
        end = time.time()
 | 
					        end = time.time()
 | 
				
			||||||
        currently_filled = currently_filled + time_increment
 | 
					        currently_filled = currently_filled + time_increment
 | 
				
			||||||
@@ -126,15 +136,15 @@ def do_update_fill_state(fill_state: FillState, end_time: datetime, state: int)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# We assume end_time is valid (e.g. is on a day or hour boundary as appropriate)
 | 
					# We assume end_time is valid (e.g. is on a day or hour boundary as appropriate)
 | 
				
			||||||
# and is timezone aware. It is the caller's responsibility to enforce this!
 | 
					# and is timezone aware. It is the caller's responsibility to enforce this!
 | 
				
			||||||
def do_fill_count_stat_at_hour(stat: CountStat, end_time: datetime) -> None:
 | 
					def do_fill_count_stat_at_hour(stat: CountStat, end_time: datetime, realm: Optional[Realm]=None) -> None:
 | 
				
			||||||
    start_time = end_time - stat.interval
 | 
					    start_time = end_time - stat.interval
 | 
				
			||||||
    if not isinstance(stat, LoggingCountStat):
 | 
					    if not isinstance(stat, LoggingCountStat):
 | 
				
			||||||
        timer = time.time()
 | 
					        timer = time.time()
 | 
				
			||||||
        assert(stat.data_collector.pull_function is not None)
 | 
					        assert(stat.data_collector.pull_function is not None)
 | 
				
			||||||
        rows_added = stat.data_collector.pull_function(stat.property, start_time, end_time)
 | 
					        rows_added = stat.data_collector.pull_function(stat.property, start_time, end_time, realm)
 | 
				
			||||||
        logger.info("%s run pull_function (%dms/%sr)" %
 | 
					        logger.info("%s run pull_function (%dms/%sr)" %
 | 
				
			||||||
                    (stat.property, (time.time()-timer)*1000, rows_added))
 | 
					                    (stat.property, (time.time()-timer)*1000, rows_added))
 | 
				
			||||||
    do_aggregate_to_summary_table(stat, end_time)
 | 
					    do_aggregate_to_summary_table(stat, end_time, realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None:
 | 
					def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None:
 | 
				
			||||||
    if isinstance(stat, LoggingCountStat):
 | 
					    if isinstance(stat, LoggingCountStat):
 | 
				
			||||||
@@ -147,11 +157,17 @@ def do_delete_counts_at_hour(stat: CountStat, end_time: datetime) -> None:
 | 
				
			|||||||
        RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
 | 
					        RealmCount.objects.filter(property=stat.property, end_time=end_time).delete()
 | 
				
			||||||
        InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
 | 
					        InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime) -> None:
 | 
					def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime,
 | 
				
			||||||
 | 
					                                  realm: Optional[Realm]=None) -> None:
 | 
				
			||||||
    cursor = connection.cursor()
 | 
					    cursor = connection.cursor()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Aggregate into RealmCount
 | 
					    # Aggregate into RealmCount
 | 
				
			||||||
    output_table = stat.data_collector.output_table
 | 
					    output_table = stat.data_collector.output_table
 | 
				
			||||||
 | 
					    if realm is not None:
 | 
				
			||||||
 | 
					        realm_clause = "AND zerver_realm.id = %s" % (realm.id,)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if output_table in (UserCount, StreamCount):
 | 
					    if output_table in (UserCount, StreamCount):
 | 
				
			||||||
        realmcount_query = """
 | 
					        realmcount_query = """
 | 
				
			||||||
            INSERT INTO analytics_realmcount
 | 
					            INSERT INTO analytics_realmcount
 | 
				
			||||||
@@ -166,16 +182,23 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime) -> None:
 | 
				
			|||||||
            WHERE
 | 
					            WHERE
 | 
				
			||||||
                %(output_table)s.property = '%(property)s' AND
 | 
					                %(output_table)s.property = '%(property)s' AND
 | 
				
			||||||
                %(output_table)s.end_time = %%(end_time)s
 | 
					                %(output_table)s.end_time = %%(end_time)s
 | 
				
			||||||
 | 
					                %(realm_clause)s
 | 
				
			||||||
            GROUP BY zerver_realm.id, %(output_table)s.subgroup
 | 
					            GROUP BY zerver_realm.id, %(output_table)s.subgroup
 | 
				
			||||||
        """ % {'output_table': output_table._meta.db_table,
 | 
					        """ % {'output_table': output_table._meta.db_table,
 | 
				
			||||||
               'property': stat.property}
 | 
					               'property': stat.property,
 | 
				
			||||||
 | 
					               'realm_clause': realm_clause}
 | 
				
			||||||
        start = time.time()
 | 
					        start = time.time()
 | 
				
			||||||
        cursor.execute(realmcount_query, {'end_time': end_time})
 | 
					        cursor.execute(realmcount_query, {'end_time': end_time})
 | 
				
			||||||
        end = time.time()
 | 
					        end = time.time()
 | 
				
			||||||
        logger.info("%s RealmCount aggregation (%dms/%sr)" % (
 | 
					        logger.info("%s RealmCount aggregation (%dms/%sr)" % (
 | 
				
			||||||
            stat.property, (end - start) * 1000, cursor.rowcount))
 | 
					            stat.property, (end - start) * 1000, cursor.rowcount))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Aggregate into InstallationCount
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        # Aggregate into InstallationCount.  Only run if we just
 | 
				
			||||||
 | 
					        # processed counts for all realms.
 | 
				
			||||||
 | 
					        #
 | 
				
			||||||
 | 
					        # TODO: Add support for updating installation data after
 | 
				
			||||||
 | 
					        # changing an individual realm's values.
 | 
				
			||||||
        installationcount_query = """
 | 
					        installationcount_query = """
 | 
				
			||||||
            INSERT INTO analytics_installationcount
 | 
					            INSERT INTO analytics_installationcount
 | 
				
			||||||
                (value, property, subgroup, end_time)
 | 
					                (value, property, subgroup, end_time)
 | 
				
			||||||
@@ -192,6 +215,7 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime) -> None:
 | 
				
			|||||||
        end = time.time()
 | 
					        end = time.time()
 | 
				
			||||||
        logger.info("%s InstallationCount aggregation (%dms/%sr)" % (
 | 
					        logger.info("%s InstallationCount aggregation (%dms/%sr)" % (
 | 
				
			||||||
            stat.property, (end - start) * 1000, cursor.rowcount))
 | 
					            stat.property, (end - start) * 1000, cursor.rowcount))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cursor.close()
 | 
					    cursor.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Utility functions called from outside counts.py ##
 | 
					## Utility functions called from outside counts.py ##
 | 
				
			||||||
@@ -259,11 +283,13 @@ def do_pull_by_sql_query(property: str, start_time: datetime, end_time: datetime
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def sql_data_collector(output_table: Type[BaseCount], query: str,
 | 
					def sql_data_collector(output_table: Type[BaseCount], query: str,
 | 
				
			||||||
                       group_by: Optional[Tuple[models.Model, str]]) -> DataCollector:
 | 
					                       group_by: Optional[Tuple[models.Model, str]]) -> DataCollector:
 | 
				
			||||||
    def pull_function(property: str, start_time: datetime, end_time: datetime) -> int:
 | 
					    def pull_function(property: str, start_time: datetime, end_time: datetime,
 | 
				
			||||||
 | 
					                      realm: Optional[Realm] = None) -> int:
 | 
				
			||||||
        return do_pull_by_sql_query(property, start_time, end_time, query, group_by)
 | 
					        return do_pull_by_sql_query(property, start_time, end_time, query, group_by)
 | 
				
			||||||
    return DataCollector(output_table, pull_function)
 | 
					    return DataCollector(output_table, pull_function)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def do_pull_minutes_active(property: str, start_time: datetime, end_time: datetime) -> int:
 | 
					def do_pull_minutes_active(property: str, start_time: datetime, end_time: datetime,
 | 
				
			||||||
 | 
					                           realm: Optional[Realm] = None) -> int:
 | 
				
			||||||
    user_activity_intervals = UserActivityInterval.objects.filter(
 | 
					    user_activity_intervals = UserActivityInterval.objects.filter(
 | 
				
			||||||
        end__gt=start_time, start__lt=end_time
 | 
					        end__gt=start_time, start__lt=end_time
 | 
				
			||||||
    ).select_related(
 | 
					    ).select_related(
 | 
				
			||||||
@@ -273,6 +299,7 @@ def do_pull_minutes_active(property: str, start_time: datetime, end_time: dateti
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    seconds_active = defaultdict(float)  # type: Dict[Tuple[int, int], float]
 | 
					    seconds_active = defaultdict(float)  # type: Dict[Tuple[int, int], float]
 | 
				
			||||||
    for user_id, realm_id, interval_start, interval_end in user_activity_intervals:
 | 
					    for user_id, realm_id, interval_start, interval_end in user_activity_intervals:
 | 
				
			||||||
 | 
					        if realm is None or realm.id == realm_id:
 | 
				
			||||||
            start = max(start_time, interval_start)
 | 
					            start = max(start_time, interval_start)
 | 
				
			||||||
            end = min(end_time, interval_end)
 | 
					            end = min(end_time, interval_end)
 | 
				
			||||||
            seconds_active[(user_id, realm_id)] += (end - start).total_seconds()
 | 
					            seconds_active[(user_id, realm_id)] += (end - start).total_seconds()
 | 
				
			||||||
@@ -283,7 +310,12 @@ def do_pull_minutes_active(property: str, start_time: datetime, end_time: dateti
 | 
				
			|||||||
    UserCount.objects.bulk_create(rows)
 | 
					    UserCount.objects.bulk_create(rows)
 | 
				
			||||||
    return len(rows)
 | 
					    return len(rows)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
count_message_by_user_query = """
 | 
					def count_message_by_user_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_usercount
 | 
					    INSERT INTO analytics_usercount
 | 
				
			||||||
        (user_id, realm_id, value, property, subgroup, end_time)
 | 
					        (user_id, realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -296,12 +328,18 @@ count_message_by_user_query = """
 | 
				
			|||||||
    WHERE
 | 
					    WHERE
 | 
				
			||||||
        zerver_userprofile.date_joined < %%(time_end)s AND
 | 
					        zerver_userprofile.date_joined < %%(time_end)s AND
 | 
				
			||||||
        zerver_message.date_sent >= %%(time_start)s AND
 | 
					        zerver_message.date_sent >= %%(time_start)s AND
 | 
				
			||||||
 | 
					        {realm_clause}
 | 
				
			||||||
        zerver_message.date_sent < %%(time_end)s
 | 
					        zerver_message.date_sent < %%(time_end)s
 | 
				
			||||||
    GROUP BY zerver_userprofile.id %(group_by_clause)s
 | 
					    GROUP BY zerver_userprofile.id %(group_by_clause)s
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Note: ignores the group_by / group_by_clause.
 | 
					# Note: ignores the group_by / group_by_clause.
 | 
				
			||||||
count_message_type_by_user_query = """
 | 
					def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_usercount
 | 
					    INSERT INTO analytics_usercount
 | 
				
			||||||
            (realm_id, user_id, value, property, subgroup, end_time)
 | 
					            (realm_id, user_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
 | 
					    SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
 | 
				
			||||||
@@ -323,6 +361,7 @@ count_message_type_by_user_query = """
 | 
				
			|||||||
        ON
 | 
					        ON
 | 
				
			||||||
            zerver_userprofile.id = zerver_message.sender_id AND
 | 
					            zerver_userprofile.id = zerver_message.sender_id AND
 | 
				
			||||||
            zerver_message.date_sent >= %%(time_start)s AND
 | 
					            zerver_message.date_sent >= %%(time_start)s AND
 | 
				
			||||||
 | 
					            {realm_clause}
 | 
				
			||||||
            zerver_message.date_sent < %%(time_end)s
 | 
					            zerver_message.date_sent < %%(time_end)s
 | 
				
			||||||
        JOIN zerver_recipient
 | 
					        JOIN zerver_recipient
 | 
				
			||||||
        ON
 | 
					        ON
 | 
				
			||||||
@@ -335,13 +374,18 @@ count_message_type_by_user_query = """
 | 
				
			|||||||
            zerver_recipient.type, zerver_stream.invite_only
 | 
					            zerver_recipient.type, zerver_stream.invite_only
 | 
				
			||||||
    ) AS subquery
 | 
					    ) AS subquery
 | 
				
			||||||
    GROUP BY realm_id, id, message_type
 | 
					    GROUP BY realm_id, id, message_type
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# This query joins to the UserProfile table since all current queries that
 | 
					# This query joins to the UserProfile table since all current queries that
 | 
				
			||||||
# use this also subgroup on UserProfile.is_bot. If in the future there is a
 | 
					# use this also subgroup on UserProfile.is_bot. If in the future there is a
 | 
				
			||||||
# stat that counts messages by stream and doesn't need the UserProfile
 | 
					# stat that counts messages by stream and doesn't need the UserProfile
 | 
				
			||||||
# table, consider writing a new query for efficiency.
 | 
					# table, consider writing a new query for efficiency.
 | 
				
			||||||
count_message_by_stream_query = """
 | 
					def count_message_by_stream_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "zerver_stream.realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_streamcount
 | 
					    INSERT INTO analytics_streamcount
 | 
				
			||||||
        (stream_id, realm_id, value, property, subgroup, end_time)
 | 
					        (stream_id, realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -360,13 +404,19 @@ count_message_by_stream_query = """
 | 
				
			|||||||
        zerver_stream.date_created < %%(time_end)s AND
 | 
					        zerver_stream.date_created < %%(time_end)s AND
 | 
				
			||||||
        zerver_recipient.type = 2 AND
 | 
					        zerver_recipient.type = 2 AND
 | 
				
			||||||
        zerver_message.date_sent >= %%(time_start)s AND
 | 
					        zerver_message.date_sent >= %%(time_start)s AND
 | 
				
			||||||
 | 
					        {realm_clause}
 | 
				
			||||||
        zerver_message.date_sent < %%(time_end)s
 | 
					        zerver_message.date_sent < %%(time_end)s
 | 
				
			||||||
    GROUP BY zerver_stream.id %(group_by_clause)s
 | 
					    GROUP BY zerver_stream.id %(group_by_clause)s
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Hardcodes the query needed by active_users:is_bot:day, since that is
 | 
					# Hardcodes the query needed by active_users:is_bot:day, since that is
 | 
				
			||||||
# currently the only stat that uses this.
 | 
					# currently the only stat that uses this.
 | 
				
			||||||
count_user_by_realm_query = """
 | 
					def count_user_by_realm_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_realmcount
 | 
					    INSERT INTO analytics_realmcount
 | 
				
			||||||
        (realm_id, value, property, subgroup, end_time)
 | 
					        (realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -379,15 +429,21 @@ count_user_by_realm_query = """
 | 
				
			|||||||
        zerver_realm.date_created < %%(time_end)s AND
 | 
					        zerver_realm.date_created < %%(time_end)s AND
 | 
				
			||||||
        zerver_userprofile.date_joined >= %%(time_start)s AND
 | 
					        zerver_userprofile.date_joined >= %%(time_start)s AND
 | 
				
			||||||
        zerver_userprofile.date_joined < %%(time_end)s AND
 | 
					        zerver_userprofile.date_joined < %%(time_end)s AND
 | 
				
			||||||
 | 
					        {realm_clause}
 | 
				
			||||||
        zerver_userprofile.is_active = TRUE
 | 
					        zerver_userprofile.is_active = TRUE
 | 
				
			||||||
    GROUP BY zerver_realm.id %(group_by_clause)s
 | 
					    GROUP BY zerver_realm.id %(group_by_clause)s
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
 | 
					# Currently hardcodes the query needed for active_users_audit:is_bot:day.
 | 
				
			||||||
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
 | 
					# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
 | 
				
			||||||
# event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc].
 | 
					# event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc].
 | 
				
			||||||
# In particular, it's important to ensure that migrations don't cause that to happen.
 | 
					# In particular, it's important to ensure that migrations don't cause that to happen.
 | 
				
			||||||
check_realmauditlog_by_user_query = """
 | 
					def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_usercount
 | 
					    INSERT INTO analytics_usercount
 | 
				
			||||||
        (user_id, realm_id, value, property, subgroup, end_time)
 | 
					        (user_id, realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -398,6 +454,7 @@ check_realmauditlog_by_user_query = """
 | 
				
			|||||||
        FROM zerver_realmauditlog
 | 
					        FROM zerver_realmauditlog
 | 
				
			||||||
        WHERE
 | 
					        WHERE
 | 
				
			||||||
            event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
 | 
					            event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
 | 
				
			||||||
 | 
					            {realm_clause}
 | 
				
			||||||
            event_time < %%(time_end)s
 | 
					            event_time < %%(time_end)s
 | 
				
			||||||
        GROUP BY modified_user_id
 | 
					        GROUP BY modified_user_id
 | 
				
			||||||
    ) ral2
 | 
					    ) ral2
 | 
				
			||||||
@@ -412,9 +469,15 @@ check_realmauditlog_by_user_query = """
 | 
				
			|||||||
""".format(user_created=RealmAuditLog.USER_CREATED,
 | 
					""".format(user_created=RealmAuditLog.USER_CREATED,
 | 
				
			||||||
           user_activated=RealmAuditLog.USER_ACTIVATED,
 | 
					           user_activated=RealmAuditLog.USER_ACTIVATED,
 | 
				
			||||||
           user_deactivated=RealmAuditLog.USER_DEACTIVATED,
 | 
					           user_deactivated=RealmAuditLog.USER_DEACTIVATED,
 | 
				
			||||||
           user_reactivated=RealmAuditLog.USER_REACTIVATED)
 | 
					           user_reactivated=RealmAuditLog.USER_REACTIVATED,
 | 
				
			||||||
 | 
					           realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
check_useractivityinterval_by_user_query = """
 | 
					def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_usercount
 | 
					    INSERT INTO analytics_usercount
 | 
				
			||||||
        (user_id, realm_id, value, property, subgroup, end_time)
 | 
					        (user_id, realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -425,11 +488,17 @@ check_useractivityinterval_by_user_query = """
 | 
				
			|||||||
        zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
 | 
					        zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
 | 
				
			||||||
    WHERE
 | 
					    WHERE
 | 
				
			||||||
        zerver_useractivityinterval.end >= %%(time_start)s AND
 | 
					        zerver_useractivityinterval.end >= %%(time_start)s AND
 | 
				
			||||||
 | 
					        {realm_clause}
 | 
				
			||||||
        zerver_useractivityinterval.start < %%(time_end)s
 | 
					        zerver_useractivityinterval.start < %%(time_end)s
 | 
				
			||||||
    GROUP BY zerver_userprofile.id %(group_by_clause)s
 | 
					    GROUP BY zerver_userprofile.id %(group_by_clause)s
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
count_realm_active_humans_query = """
 | 
					def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
 | 
				
			||||||
 | 
					    if realm is None:
 | 
				
			||||||
 | 
					        realm_clause = ""
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        realm_clause = "realm_id = %s AND" % (realm.id,)
 | 
				
			||||||
 | 
					    return """
 | 
				
			||||||
    INSERT INTO analytics_realmcount
 | 
					    INSERT INTO analytics_realmcount
 | 
				
			||||||
        (realm_id, value, property, subgroup, end_time)
 | 
					        (realm_id, value, property, subgroup, end_time)
 | 
				
			||||||
    SELECT
 | 
					    SELECT
 | 
				
			||||||
@@ -440,6 +509,7 @@ count_realm_active_humans_query = """
 | 
				
			|||||||
        WHERE
 | 
					        WHERE
 | 
				
			||||||
            property = 'active_users_audit:is_bot:day' AND
 | 
					            property = 'active_users_audit:is_bot:day' AND
 | 
				
			||||||
            subgroup = 'false' AND
 | 
					            subgroup = 'false' AND
 | 
				
			||||||
 | 
					            {realm_clause}
 | 
				
			||||||
            end_time = %%(time_end)s
 | 
					            end_time = %%(time_end)s
 | 
				
			||||||
    ) usercount1
 | 
					    ) usercount1
 | 
				
			||||||
    JOIN (
 | 
					    JOIN (
 | 
				
			||||||
@@ -447,12 +517,13 @@ count_realm_active_humans_query = """
 | 
				
			|||||||
        FROM analytics_usercount
 | 
					        FROM analytics_usercount
 | 
				
			||||||
        WHERE
 | 
					        WHERE
 | 
				
			||||||
            property = '15day_actives::day' AND
 | 
					            property = '15day_actives::day' AND
 | 
				
			||||||
 | 
					            {realm_clause}
 | 
				
			||||||
            end_time = %%(time_end)s
 | 
					            end_time = %%(time_end)s
 | 
				
			||||||
    ) usercount2
 | 
					    ) usercount2
 | 
				
			||||||
    ON
 | 
					    ON
 | 
				
			||||||
        usercount1.user_id = usercount2.user_id
 | 
					        usercount1.user_id = usercount2.user_id
 | 
				
			||||||
    GROUP BY usercount1.realm_id
 | 
					    GROUP BY usercount1.realm_id
 | 
				
			||||||
"""
 | 
					""".format(realm_clause=realm_clause)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Currently unused and untested
 | 
					# Currently unused and untested
 | 
				
			||||||
count_stream_by_realm_query = """
 | 
					count_stream_by_realm_query = """
 | 
				
			||||||
@@ -471,24 +542,28 @@ count_stream_by_realm_query = """
 | 
				
			|||||||
    GROUP BY zerver_realm.id %(group_by_clause)s
 | 
					    GROUP BY zerver_realm.id %(group_by_clause)s
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## CountStat declarations ##
 | 
					def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]:
 | 
				
			||||||
 | 
					    ## CountStat declarations ##
 | 
				
			||||||
 | 
					
 | 
				
			||||||
count_stats_ = [
 | 
					    count_stats_ = [
 | 
				
			||||||
        # Messages Sent stats
 | 
					        # Messages Sent stats
 | 
				
			||||||
        # Stats that count the number of messages sent in various ways.
 | 
					        # Stats that count the number of messages sent in various ways.
 | 
				
			||||||
        # These are also the set of stats that read from the Message table.
 | 
					        # These are also the set of stats that read from the Message table.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        CountStat('messages_sent:is_bot:hour',
 | 
					        CountStat('messages_sent:is_bot:hour',
 | 
				
			||||||
              sql_data_collector(UserCount, count_message_by_user_query, (UserProfile, 'is_bot')),
 | 
					                  sql_data_collector(UserCount, count_message_by_user_query(
 | 
				
			||||||
 | 
					                      realm), (UserProfile, 'is_bot')),
 | 
				
			||||||
                  CountStat.HOUR),
 | 
					                  CountStat.HOUR),
 | 
				
			||||||
        CountStat('messages_sent:message_type:day',
 | 
					        CountStat('messages_sent:message_type:day',
 | 
				
			||||||
              sql_data_collector(UserCount, count_message_type_by_user_query, None), CountStat.DAY),
 | 
					                  sql_data_collector(
 | 
				
			||||||
 | 
					                      UserCount, count_message_type_by_user_query(realm), None),
 | 
				
			||||||
 | 
					                  CountStat.DAY),
 | 
				
			||||||
        CountStat('messages_sent:client:day',
 | 
					        CountStat('messages_sent:client:day',
 | 
				
			||||||
              sql_data_collector(UserCount, count_message_by_user_query, (Message, 'sending_client_id')),
 | 
					                  sql_data_collector(UserCount, count_message_by_user_query(realm),
 | 
				
			||||||
              CountStat.DAY),
 | 
					                                     (Message, 'sending_client_id')), CountStat.DAY),
 | 
				
			||||||
        CountStat('messages_in_stream:is_bot:day',
 | 
					        CountStat('messages_in_stream:is_bot:day',
 | 
				
			||||||
              sql_data_collector(StreamCount, count_message_by_stream_query, (UserProfile, 'is_bot')),
 | 
					                  sql_data_collector(StreamCount, count_message_by_stream_query(realm),
 | 
				
			||||||
              CountStat.DAY),
 | 
					                                     (UserProfile, 'is_bot')), CountStat.DAY),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Number of Users stats
 | 
					        # Number of Users stats
 | 
				
			||||||
        # Stats that count the number of active users in the UserProfile.is_active sense.
 | 
					        # Stats that count the number of active users in the UserProfile.is_active sense.
 | 
				
			||||||
@@ -497,31 +572,36 @@ count_stats_ = [
 | 
				
			|||||||
        # active on which days (in the UserProfile.is_active sense).
 | 
					        # active on which days (in the UserProfile.is_active sense).
 | 
				
			||||||
        # Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
 | 
					        # Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
 | 
				
			||||||
        CountStat('active_users_audit:is_bot:day',
 | 
					        CountStat('active_users_audit:is_bot:day',
 | 
				
			||||||
              sql_data_collector(UserCount, check_realmauditlog_by_user_query, (UserProfile, 'is_bot')),
 | 
					                  sql_data_collector(UserCount, check_realmauditlog_by_user_query(
 | 
				
			||||||
 | 
					                      realm), (UserProfile, 'is_bot')),
 | 
				
			||||||
                  CountStat.DAY),
 | 
					                  CountStat.DAY),
 | 
				
			||||||
        # Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats.
 | 
					        # Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats.
 | 
				
			||||||
        # In RealmCount, 'active_users_audit:is_bot:day' should be the partial
 | 
					        # In RealmCount, 'active_users_audit:is_bot:day' should be the partial
 | 
				
			||||||
        # sum sequence of 'active_users_log:is_bot:day', for any realm that
 | 
					        # sum sequence of 'active_users_log:is_bot:day', for any realm that
 | 
				
			||||||
        # started after the latter stat was introduced.
 | 
					        # started after the latter stat was introduced.
 | 
				
			||||||
    LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
 | 
					        LoggingCountStat('active_users_log:is_bot:day',
 | 
				
			||||||
 | 
					                         RealmCount, CountStat.DAY),
 | 
				
			||||||
        # Another sanity check on 'active_users_audit:is_bot:day'. Is only an
 | 
					        # Another sanity check on 'active_users_audit:is_bot:day'. Is only an
 | 
				
			||||||
        # approximation, e.g. if a user is deactivated between the end of the
 | 
					        # approximation, e.g. if a user is deactivated between the end of the
 | 
				
			||||||
        # day and when this stat is run, they won't be counted. However, is the
 | 
					        # day and when this stat is run, they won't be counted. However, is the
 | 
				
			||||||
        # simplest of the three to inspect by hand.
 | 
					        # simplest of the three to inspect by hand.
 | 
				
			||||||
        CountStat('active_users:is_bot:day',
 | 
					        CountStat('active_users:is_bot:day',
 | 
				
			||||||
              sql_data_collector(RealmCount, count_user_by_realm_query, (UserProfile, 'is_bot')),
 | 
					                  sql_data_collector(RealmCount, count_user_by_realm_query(realm), (UserProfile, 'is_bot')),
 | 
				
			||||||
                  CountStat.DAY, interval=TIMEDELTA_MAX),
 | 
					                  CountStat.DAY, interval=TIMEDELTA_MAX),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # User Activity stats
 | 
					        # User Activity stats
 | 
				
			||||||
        # Stats that measure user activity in the UserActivityInterval sense.
 | 
					        # Stats that measure user activity in the UserActivityInterval sense.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        CountStat('1day_actives::day',
 | 
					        CountStat('1day_actives::day',
 | 
				
			||||||
              sql_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
 | 
					                  sql_data_collector(
 | 
				
			||||||
 | 
					                      UserCount, check_useractivityinterval_by_user_query(realm), None),
 | 
				
			||||||
                  CountStat.DAY, interval=timedelta(days=1)-UserActivityInterval.MIN_INTERVAL_LENGTH),
 | 
					                  CountStat.DAY, interval=timedelta(days=1)-UserActivityInterval.MIN_INTERVAL_LENGTH),
 | 
				
			||||||
        CountStat('15day_actives::day',
 | 
					        CountStat('15day_actives::day',
 | 
				
			||||||
              sql_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
 | 
					                  sql_data_collector(
 | 
				
			||||||
 | 
					                      UserCount, check_useractivityinterval_by_user_query(realm), None),
 | 
				
			||||||
                  CountStat.DAY, interval=timedelta(days=15)-UserActivityInterval.MIN_INTERVAL_LENGTH),
 | 
					                  CountStat.DAY, interval=timedelta(days=15)-UserActivityInterval.MIN_INTERVAL_LENGTH),
 | 
				
			||||||
    CountStat('minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY),
 | 
					        CountStat('minutes_active::day', DataCollector(
 | 
				
			||||||
 | 
					            UserCount, do_pull_minutes_active), CountStat.DAY),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Rate limiting stats
 | 
					        # Rate limiting stats
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -533,9 +613,13 @@ count_stats_ = [
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # Canonical account of the number of active humans in a realm on each day.
 | 
					        # Canonical account of the number of active humans in a realm on each day.
 | 
				
			||||||
        DependentCountStat('realm_active_humans::day',
 | 
					        DependentCountStat('realm_active_humans::day',
 | 
				
			||||||
                       sql_data_collector(RealmCount, count_realm_active_humans_query, None),
 | 
					                           sql_data_collector(
 | 
				
			||||||
 | 
					                               RealmCount, count_realm_active_humans_query(realm), None),
 | 
				
			||||||
                           CountStat.DAY,
 | 
					                           CountStat.DAY,
 | 
				
			||||||
                           dependencies=['active_users_audit:is_bot:day', '15day_actives::day'])
 | 
					                           dependencies=['active_users_audit:is_bot:day', '15day_actives::day'])
 | 
				
			||||||
]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
COUNT_STATS = OrderedDict([(stat.property, stat) for stat in count_stats_])
 | 
					    return OrderedDict([(stat.property, stat) for stat in count_stats_])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# To avoid refactoring for now COUNT_STATS can be used as before
 | 
				
			||||||
 | 
					COUNT_STATS = get_count_stats()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -10,7 +10,7 @@ from django.test import TestCase
 | 
				
			|||||||
from django.utils.timezone import now as timezone_now
 | 
					from django.utils.timezone import now as timezone_now
 | 
				
			||||||
from django.utils.timezone import utc as timezone_utc
 | 
					from django.utils.timezone import utc as timezone_utc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from analytics.lib.counts import COUNT_STATS, CountStat, \
 | 
					from analytics.lib.counts import COUNT_STATS, CountStat, get_count_stats, \
 | 
				
			||||||
    DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \
 | 
					    DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \
 | 
				
			||||||
    do_drop_all_analytics_tables, do_drop_single_stat, \
 | 
					    do_drop_all_analytics_tables, do_drop_single_stat, \
 | 
				
			||||||
    do_fill_count_stat_at_hour, do_increment_logging_stat, \
 | 
					    do_fill_count_stat_at_hour, do_increment_logging_stat, \
 | 
				
			||||||
@@ -363,6 +363,29 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(UserCount, [], [])
 | 
					        self.assertTableState(UserCount, [], [])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['active_users:is_bot:day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_user(is_bot=True, date_joined=self.TIME_ZERO-25*self.HOUR)
 | 
				
			||||||
 | 
					        self.create_user(is_bot=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_user(email='test@second.analytics',
 | 
				
			||||||
 | 
					                         realm=self.second_realm, date_joined=self.TIME_ZERO-2*self.DAY)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'subgroup'],
 | 
				
			||||||
 | 
					                              [[1, 'true'], [1, 'false']])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, [], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_messages_sent_by_is_bot(self) -> None:
 | 
					    def test_messages_sent_by_is_bot(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['messages_sent:is_bot:hour']
 | 
					        stat = COUNT_STATS['messages_sent:is_bot:hour']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -392,6 +415,46 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [3, 'true']])
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [3, 'true']])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_messages_sent_by_is_bot_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['messages_sent:is_bot:hour']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        bot = self.create_user(is_bot=True)
 | 
				
			||||||
 | 
					        human1 = self.create_user()
 | 
				
			||||||
 | 
					        human2 = self.create_user()
 | 
				
			||||||
 | 
					        recipient_human1 = Recipient.objects.get(type_id=human1.id,
 | 
				
			||||||
 | 
					                                                 type=Recipient.PERSONAL)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        recipient_stream = self.create_stream_with_recipient()[1]
 | 
				
			||||||
 | 
					        recipient_huddle = self.create_huddle_with_recipient()[1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_message(bot, recipient_human1)
 | 
				
			||||||
 | 
					        self.create_message(bot, recipient_stream)
 | 
				
			||||||
 | 
					        self.create_message(bot, recipient_huddle)
 | 
				
			||||||
 | 
					        self.create_message(human1, recipient_human1)
 | 
				
			||||||
 | 
					        self.create_message(human2, recipient_human1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_human1)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_stream)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_huddle)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'subgroup', 'user'],
 | 
				
			||||||
 | 
					                              [[1, 'false', human1], [1, 'false', human2],
 | 
				
			||||||
 | 
					                               [3, 'true', bot]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
 | 
				
			||||||
 | 
					                              [[2, 'false', self.default_realm],
 | 
				
			||||||
 | 
					                               [3, 'true', self.default_realm]])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_messages_sent_by_message_type(self) -> None:
 | 
					    def test_messages_sent_by_message_type(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['messages_sent:message_type:day']
 | 
					        stat = COUNT_STATS['messages_sent:message_type:day']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -454,6 +517,43 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
                               [2, 'huddle_message']])
 | 
					                               [2, 'huddle_message']])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_messages_sent_by_message_type_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['messages_sent:message_type:day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        user = self.create_user()
 | 
				
			||||||
 | 
					        user_recipient = Recipient.objects.get(type_id=user.id, type=Recipient.PERSONAL)
 | 
				
			||||||
 | 
					        private_stream_recipient = self.create_stream_with_recipient(invite_only=True)[1]
 | 
				
			||||||
 | 
					        stream_recipient = self.create_stream_with_recipient()[1]
 | 
				
			||||||
 | 
					        huddle_recipient = self.create_huddle_with_recipient()[1]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_message(user, user_recipient)
 | 
				
			||||||
 | 
					        self.create_message(user, private_stream_recipient)
 | 
				
			||||||
 | 
					        self.create_message(user, stream_recipient)
 | 
				
			||||||
 | 
					        self.create_message(user, huddle_recipient)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, user_recipient)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, private_stream_recipient)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, stream_recipient)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, huddle_recipient)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'subgroup', 'user'],
 | 
				
			||||||
 | 
					                              [[1, 'private_message', user], [1, 'private_stream', user],
 | 
				
			||||||
 | 
					                               [1, 'huddle_message', user], [1, 'public_stream', user]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'subgroup'],
 | 
				
			||||||
 | 
					                              [[1, 'private_message'], [1, 'private_stream'],
 | 
				
			||||||
 | 
					                               [1, 'public_stream'], [1, 'huddle_message']])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_messages_sent_to_recipients_with_same_id(self) -> None:
 | 
					    def test_messages_sent_to_recipients_with_same_id(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['messages_sent:message_type:day']
 | 
					        stat = COUNT_STATS['messages_sent:message_type:day']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -508,6 +608,42 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
                              [[4, website_client_id], [3, client2_id]])
 | 
					                              [[4, website_client_id], [3, client2_id]])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_messages_sent_by_client_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['messages_sent:client:day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        user1 = self.create_user(is_bot=True)
 | 
				
			||||||
 | 
					        user2 = self.create_user()
 | 
				
			||||||
 | 
					        recipient_user2 = Recipient.objects.get(type_id=user2.id, type=Recipient.PERSONAL)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        client2 = Client.objects.create(name='client2')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # TO be included
 | 
				
			||||||
 | 
					        self.create_message(user1, recipient_user2, sending_client=client2)
 | 
				
			||||||
 | 
					        self.create_message(user2, recipient_user2, sending_client=client2)
 | 
				
			||||||
 | 
					        self.create_message(user2, recipient_user2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_user2, sending_client=client2)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_user2, sending_client=client2)
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_user2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        client2_id = str(client2.id)
 | 
				
			||||||
 | 
					        website_client_id = str(get_client('website').id)  # default for self.create_message
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'subgroup', 'user'],
 | 
				
			||||||
 | 
					                              [[1, client2_id, user1], [1, client2_id, user2],
 | 
				
			||||||
 | 
					                               [1, website_client_id, user2]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'subgroup'],
 | 
				
			||||||
 | 
					                              [[1, website_client_id], [2, client2_id]])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_messages_sent_to_stream_by_is_bot(self) -> None:
 | 
					    def test_messages_sent_to_stream_by_is_bot(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['messages_in_stream:is_bot:day']
 | 
					        stat = COUNT_STATS['messages_in_stream:is_bot:day']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -545,6 +681,39 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']])
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']])
 | 
				
			||||||
        self.assertTableState(UserCount, [], [])
 | 
					        self.assertTableState(UserCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_messages_sent_to_stream_by_is_bot_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['messages_in_stream:is_bot:day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        human1 = self.create_user()
 | 
				
			||||||
 | 
					        bot = self.create_user(is_bot=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        realm = {'realm': self.second_realm}
 | 
				
			||||||
 | 
					        stream1, recipient_stream1 = self.create_stream_with_recipient()
 | 
				
			||||||
 | 
					        stream2, recipient_stream2 = self.create_stream_with_recipient(**realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_message(human1, recipient_stream1)
 | 
				
			||||||
 | 
					        self.create_message(bot, recipient_stream1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_message(self.hourly_user, recipient_stream2)
 | 
				
			||||||
 | 
					        self.create_message(self.daily_user, recipient_stream2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'],
 | 
				
			||||||
 | 
					                              [[1, 'false', stream1],
 | 
				
			||||||
 | 
					                               [1, 'true', stream1]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
 | 
				
			||||||
 | 
					                              [[1, 'false'], [1, 'true']])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value', 'subgroup'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def create_interval(self, user: UserProfile, start_offset: timedelta,
 | 
					    def create_interval(self, user: UserProfile, start_offset: timedelta,
 | 
				
			||||||
                        end_offset: timedelta) -> None:
 | 
					                        end_offset: timedelta) -> None:
 | 
				
			||||||
        UserActivityInterval.objects.create(
 | 
					        UserActivityInterval.objects.create(
 | 
				
			||||||
@@ -594,6 +763,34 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(InstallationCount, ['value'], [[6]])
 | 
					        self.assertTableState(InstallationCount, ['value'], [[6]])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_1day_actives_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['1day_actives::day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _1day = 1*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH
 | 
				
			||||||
 | 
					        user1 = self.create_user()
 | 
				
			||||||
 | 
					        user2 = self.create_user()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_interval(user1, 20*self.HOUR, 19*self.HOUR)
 | 
				
			||||||
 | 
					        self.create_interval(user2, _1day + self.DAY, _1day)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        user3 = self.create_user(realm=self.second_realm)
 | 
				
			||||||
 | 
					        self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'user'],
 | 
				
			||||||
 | 
					                              [[1, user2], [1, user2]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'realm'],
 | 
				
			||||||
 | 
					                              [[2, self.default_realm]])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_15day_actives(self) -> None:
 | 
					    def test_15day_actives(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['15day_actives::day']
 | 
					        stat = COUNT_STATS['15day_actives::day']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -637,6 +834,36 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(InstallationCount, ['value'], [[6]])
 | 
					        self.assertTableState(InstallationCount, ['value'], [[6]])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_15day_actives_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['15day_actives::day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        _15day = 15*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        user1 = self.create_user()
 | 
				
			||||||
 | 
					        user2 = self.create_user()
 | 
				
			||||||
 | 
					        user3 = self.create_user(realm=self.second_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_interval(user1, _15day + self.DAY, _15day)
 | 
				
			||||||
 | 
					        self.create_interval(user2, 20*self.HOUR, 19*self.HOUR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_interval(user3, 20*self.HOUR, 19*self.HOUR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'user'],
 | 
				
			||||||
 | 
					                              [[1, user1], [1, user2]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'realm'],
 | 
				
			||||||
 | 
					                              [[2, self.default_realm]])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_minutes_active(self) -> None:
 | 
					    def test_minutes_active(self) -> None:
 | 
				
			||||||
        stat = COUNT_STATS['minutes_active::day']
 | 
					        stat = COUNT_STATS['minutes_active::day']
 | 
				
			||||||
        self.current_property = stat.property
 | 
					        self.current_property = stat.property
 | 
				
			||||||
@@ -679,6 +906,35 @@ class TestCountStats(AnalyticsTestCase):
 | 
				
			|||||||
        self.assertTableState(InstallationCount, ['value'], [[61 + 121 + 24*60 + 1]])
 | 
					        self.assertTableState(InstallationCount, ['value'], [[61 + 121 + 24*60 + 1]])
 | 
				
			||||||
        self.assertTableState(StreamCount, [], [])
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_minutes_active_realm_constraint(self) -> None:
 | 
				
			||||||
 | 
					        # For single Realm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        COUNT_STATS = get_count_stats(self.default_realm)
 | 
				
			||||||
 | 
					        stat = COUNT_STATS['minutes_active::day']
 | 
				
			||||||
 | 
					        self.current_property = stat.property
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Outside time range, should not appear. Also testing for intervals
 | 
				
			||||||
 | 
					        # starting and ending on boundary
 | 
				
			||||||
 | 
					        user1 = self.create_user()
 | 
				
			||||||
 | 
					        user2 = self.create_user()
 | 
				
			||||||
 | 
					        user3 = self.create_user(realm=self.second_realm)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be included
 | 
				
			||||||
 | 
					        self.create_interval(user1, 20*self.HOUR, 19*self.HOUR)
 | 
				
			||||||
 | 
					        self.create_interval(user2, 20*self.MINUTE, 19*self.MINUTE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # To be excluded
 | 
				
			||||||
 | 
					        self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
 | 
				
			||||||
 | 
					        self.assertTableState(UserCount, ['value', 'user'],
 | 
				
			||||||
 | 
					                              [[60, user1], [1, user2]])
 | 
				
			||||||
 | 
					        self.assertTableState(RealmCount, ['value', 'realm'],
 | 
				
			||||||
 | 
					                              [[60 + 1, self.default_realm]])
 | 
				
			||||||
 | 
					        # No aggregation to InstallationCount with realm constraint
 | 
				
			||||||
 | 
					        self.assertTableState(InstallationCount, ['value'], [])
 | 
				
			||||||
 | 
					        self.assertTableState(StreamCount, [], [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestDoAggregateToSummaryTable(AnalyticsTestCase):
 | 
					class TestDoAggregateToSummaryTable(AnalyticsTestCase):
 | 
				
			||||||
    # do_aggregate_to_summary_table is mostly tested by the end to end
 | 
					    # do_aggregate_to_summary_table is mostly tested by the end to end
 | 
				
			||||||
    # nature of the tests in TestCountStats. But want to highlight one
 | 
					    # nature of the tests in TestCountStats. But want to highlight one
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user