mirror of
https://github.com/zulip/zulip.git
synced 2025-11-06 15:03:34 +00:00
counts.py: Move CountStat.group_by into ZerverCountQuery.
Part of a larger refactoring to reduce cyclic dependencies between CountStat and DataCollector (coming soon).
This commit is contained in:
@@ -37,12 +37,11 @@ class CountStat(object):
|
|||||||
DAY = 'day'
|
DAY = 'day'
|
||||||
FREQUENCIES = frozenset([HOUR, DAY])
|
FREQUENCIES = frozenset([HOUR, DAY])
|
||||||
|
|
||||||
def __init__(self, property, zerver_count_query, group_by, frequency, interval=None):
|
def __init__(self, property, zerver_count_query, frequency, interval=None):
|
||||||
# type: (str, ZerverCountQuery, Optional[Tuple[models.Model, str]], str, Optional[timedelta]) -> None
|
# type: (str, ZerverCountQuery, str, Optional[timedelta]) -> None
|
||||||
self.property = property
|
self.property = property
|
||||||
self.zerver_count_query = zerver_count_query
|
self.zerver_count_query = zerver_count_query
|
||||||
# might have to do something different for bitfields
|
# might have to do something different for bitfields
|
||||||
self.group_by = group_by
|
|
||||||
if frequency not in self.FREQUENCIES:
|
if frequency not in self.FREQUENCIES:
|
||||||
raise AssertionError("Unknown frequency: %s" % (frequency,))
|
raise AssertionError("Unknown frequency: %s" % (frequency,))
|
||||||
self.frequency = frequency
|
self.frequency = frequency
|
||||||
@@ -62,22 +61,21 @@ class CountStat(object):
|
|||||||
class LoggingCountStat(CountStat):
|
class LoggingCountStat(CountStat):
|
||||||
def __init__(self, property, analytics_table, frequency):
|
def __init__(self, property, analytics_table, frequency):
|
||||||
# type: (str, Type[BaseCount], str) -> None
|
# type: (str, Type[BaseCount], str) -> None
|
||||||
CountStat.__init__(self, property, ZerverCountQuery(analytics_table, None),
|
CountStat.__init__(self, property, ZerverCountQuery(analytics_table, None, None), frequency)
|
||||||
None, frequency)
|
|
||||||
self.is_logging = True
|
self.is_logging = True
|
||||||
|
|
||||||
class CustomPullCountStat(CountStat):
|
class CustomPullCountStat(CountStat):
|
||||||
def __init__(self, property, analytics_table, frequency, custom_pull_function):
|
def __init__(self, property, analytics_table, frequency, custom_pull_function):
|
||||||
# type: (str, Type[BaseCount], str, Callable[[CountStat, datetime, datetime], None]) -> None
|
# type: (str, Type[BaseCount], str, Callable[[CountStat, datetime, datetime], None]) -> None
|
||||||
CountStat.__init__(self, property, ZerverCountQuery(analytics_table, None),
|
CountStat.__init__(self, property, ZerverCountQuery(analytics_table, None, None), frequency)
|
||||||
None, frequency)
|
|
||||||
self.custom_pull_function = custom_pull_function
|
self.custom_pull_function = custom_pull_function
|
||||||
|
|
||||||
class ZerverCountQuery(object):
|
class ZerverCountQuery(object):
|
||||||
def __init__(self, analytics_table, query):
|
def __init__(self, analytics_table, query, group_by):
|
||||||
# type: (Type[BaseCount], Text) -> None
|
# type: (Type[BaseCount], Text, Optional[Tuple[models.Model, str]]) -> None
|
||||||
self.analytics_table = analytics_table
|
self.analytics_table = analytics_table
|
||||||
self.query = query
|
self.query = query
|
||||||
|
self.group_by = group_by
|
||||||
|
|
||||||
def do_update_fill_state(fill_state, end_time, state):
|
def do_update_fill_state(fill_state, end_time, state):
|
||||||
# type: (FillState, datetime, int) -> None
|
# type: (FillState, datetime, int) -> None
|
||||||
@@ -200,11 +198,12 @@ def do_aggregate_to_summary_table(stat, end_time):
|
|||||||
# This is the only method that hits the prod databases directly.
|
# This is the only method that hits the prod databases directly.
|
||||||
def do_pull_from_zerver(stat, start_time, end_time):
|
def do_pull_from_zerver(stat, start_time, end_time):
|
||||||
# type: (CountStat, datetime, datetime) -> None
|
# type: (CountStat, datetime, datetime) -> None
|
||||||
if stat.group_by is None:
|
group_by = stat.zerver_count_query.group_by
|
||||||
|
if group_by is None:
|
||||||
subgroup = 'NULL'
|
subgroup = 'NULL'
|
||||||
group_by_clause = ''
|
group_by_clause = ''
|
||||||
else:
|
else:
|
||||||
subgroup = '%s.%s' % (stat.group_by[0]._meta.db_table, stat.group_by[1])
|
subgroup = '%s.%s' % (group_by[0]._meta.db_table, group_by[1])
|
||||||
group_by_clause = ', ' + subgroup
|
group_by_clause = ', ' + subgroup
|
||||||
|
|
||||||
# We do string replacement here because passing group_by_clause as a param
|
# We do string replacement here because passing group_by_clause as a param
|
||||||
@@ -261,7 +260,8 @@ count_user_by_realm_query = """
|
|||||||
zerver_userprofile.is_active = TRUE
|
zerver_userprofile.is_active = TRUE
|
||||||
GROUP BY zerver_realm.id %(group_by_clause)s
|
GROUP BY zerver_realm.id %(group_by_clause)s
|
||||||
"""
|
"""
|
||||||
zerver_count_user_by_realm = ZerverCountQuery(RealmCount, count_user_by_realm_query)
|
zerver_count_user_by_realm = ZerverCountQuery(RealmCount, count_user_by_realm_query,
|
||||||
|
(UserProfile, 'is_bot'))
|
||||||
|
|
||||||
# currently .sender_id is only Message specific thing
|
# currently .sender_id is only Message specific thing
|
||||||
count_message_by_user_query = """
|
count_message_by_user_query = """
|
||||||
@@ -279,7 +279,10 @@ count_message_by_user_query = """
|
|||||||
zerver_message.pub_date < %%(time_end)s
|
zerver_message.pub_date < %%(time_end)s
|
||||||
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
||||||
"""
|
"""
|
||||||
zerver_count_message_by_user = ZerverCountQuery(UserCount, count_message_by_user_query)
|
zerver_count_message_by_user_is_bot = ZerverCountQuery(UserCount, count_message_by_user_query,
|
||||||
|
(UserProfile, 'is_bot'))
|
||||||
|
zerver_count_message_by_user_client = ZerverCountQuery(UserCount, count_message_by_user_query,
|
||||||
|
(Message, 'sending_client_id'))
|
||||||
|
|
||||||
# Currently unused and untested
|
# Currently unused and untested
|
||||||
count_stream_by_realm_query = """
|
count_stream_by_realm_query = """
|
||||||
@@ -297,7 +300,7 @@ count_stream_by_realm_query = """
|
|||||||
zerver_stream.date_created < %%(time_end)s
|
zerver_stream.date_created < %%(time_end)s
|
||||||
GROUP BY zerver_realm.id %(group_by_clause)s
|
GROUP BY zerver_realm.id %(group_by_clause)s
|
||||||
"""
|
"""
|
||||||
zerver_count_stream_by_realm = ZerverCountQuery(RealmCount, count_stream_by_realm_query)
|
zerver_count_stream_by_realm = ZerverCountQuery(RealmCount, count_stream_by_realm_query, None)
|
||||||
|
|
||||||
# This query violates the count_X_by_Y_query conventions in several ways. One,
|
# This query violates the count_X_by_Y_query conventions in several ways. One,
|
||||||
# the X table is not specified by the query name; MessageType is not a zerver
|
# the X table is not specified by the query name; MessageType is not a zerver
|
||||||
@@ -336,7 +339,7 @@ count_message_type_by_user_query = """
|
|||||||
) AS subquery
|
) AS subquery
|
||||||
GROUP BY realm_id, id, message_type
|
GROUP BY realm_id, id, message_type
|
||||||
"""
|
"""
|
||||||
zerver_count_message_type_by_user = ZerverCountQuery(UserCount, count_message_type_by_user_query)
|
zerver_count_message_type_by_user = ZerverCountQuery(UserCount, count_message_type_by_user_query, None)
|
||||||
|
|
||||||
# Note that this query also joins to the UserProfile table, since all
|
# Note that this query also joins to the UserProfile table, since all
|
||||||
# current queries that use this also subgroup on UserProfile.is_bot. If in
|
# current queries that use this also subgroup on UserProfile.is_bot. If in
|
||||||
@@ -364,7 +367,8 @@ count_message_by_stream_query = """
|
|||||||
zerver_message.pub_date < %%(time_end)s
|
zerver_message.pub_date < %%(time_end)s
|
||||||
GROUP BY zerver_stream.id %(group_by_clause)s
|
GROUP BY zerver_stream.id %(group_by_clause)s
|
||||||
"""
|
"""
|
||||||
zerver_count_message_by_stream = ZerverCountQuery(StreamCount, count_message_by_stream_query)
|
zerver_count_message_by_stream = ZerverCountQuery(StreamCount, count_message_by_stream_query,
|
||||||
|
(UserProfile, 'is_bot'))
|
||||||
|
|
||||||
check_useractivityinterval_by_user_query = """
|
check_useractivityinterval_by_user_query = """
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
@@ -381,7 +385,7 @@ check_useractivityinterval_by_user_query = """
|
|||||||
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
||||||
"""
|
"""
|
||||||
zerver_check_useractivityinterval_by_user = ZerverCountQuery(
|
zerver_check_useractivityinterval_by_user = ZerverCountQuery(
|
||||||
UserCount, check_useractivityinterval_by_user_query)
|
UserCount, check_useractivityinterval_by_user_query, None)
|
||||||
|
|
||||||
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
|
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
|
||||||
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
|
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
|
||||||
@@ -410,7 +414,8 @@ check_realmauditlog_by_user_query = """
|
|||||||
WHERE
|
WHERE
|
||||||
ral1.event_type in ('user_created', 'user_activated', 'user_reactivated')
|
ral1.event_type in ('user_created', 'user_activated', 'user_reactivated')
|
||||||
"""
|
"""
|
||||||
zerver_check_realmauditlog_by_user = ZerverCountQuery(UserCount, check_realmauditlog_by_user_query)
|
zerver_check_realmauditlog_by_user = ZerverCountQuery(UserCount, check_realmauditlog_by_user_query,
|
||||||
|
(UserProfile, 'is_bot'))
|
||||||
|
|
||||||
def do_pull_minutes_active(stat, start_time, end_time):
|
def do_pull_minutes_active(stat, start_time, end_time):
|
||||||
# type: (CountStat, datetime, datetime) -> None
|
# type: (CountStat, datetime, datetime) -> None
|
||||||
@@ -437,33 +442,28 @@ def do_pull_minutes_active(stat, start_time, end_time):
|
|||||||
(stat.property, (time.time()-timer_start)*1000, len(rows)))
|
(stat.property, (time.time()-timer_start)*1000, len(rows)))
|
||||||
|
|
||||||
count_stats_ = [
|
count_stats_ = [
|
||||||
CountStat('messages_sent:is_bot:hour', zerver_count_message_by_user,
|
CountStat('messages_sent:is_bot:hour', zerver_count_message_by_user_is_bot, CountStat.HOUR),
|
||||||
(UserProfile, 'is_bot'), CountStat.HOUR),
|
CountStat('messages_sent:message_type:day', zerver_count_message_type_by_user, CountStat.DAY),
|
||||||
CountStat('messages_sent:message_type:day', zerver_count_message_type_by_user,
|
CountStat('messages_sent:client:day', zerver_count_message_by_user_client, CountStat.DAY),
|
||||||
None, CountStat.DAY),
|
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream, CountStat.DAY),
|
||||||
CountStat('messages_sent:client:day', zerver_count_message_by_user,
|
|
||||||
(Message, 'sending_client_id'), CountStat.DAY),
|
|
||||||
CountStat('messages_in_stream:is_bot:day', zerver_count_message_by_stream,
|
|
||||||
(UserProfile, 'is_bot'), CountStat.DAY),
|
|
||||||
|
|
||||||
# Sanity check on the bottom two stats. Is only an approximation,
|
# Sanity check on the bottom two stats. Is only an approximation,
|
||||||
# e.g. if a user is deactivated between the end of the day and when this
|
# e.g. if a user is deactivated between the end of the day and when this
|
||||||
# stat is run, they won't be counted.
|
# stat is run, they won't be counted.
|
||||||
CountStat('active_users:is_bot:day', zerver_count_user_by_realm,
|
CountStat('active_users:is_bot:day', zerver_count_user_by_realm,
|
||||||
(UserProfile, 'is_bot'), CountStat.DAY, interval=TIMEDELTA_MAX),
|
CountStat.DAY, interval=TIMEDELTA_MAX),
|
||||||
# In RealmCount, 'active_humans_audit::day' should be the partial sum sequence
|
# In RealmCount, 'active_humans_audit::day' should be the partial sum sequence
|
||||||
# of 'active_users_log:is_bot:day', for any realm that started after the
|
# of 'active_users_log:is_bot:day', for any realm that started after the
|
||||||
# latter stat was introduced.
|
# latter stat was introduced.
|
||||||
# 'active_users_audit:is_bot:day' is the canonical record of which users were
|
# 'active_users_audit:is_bot:day' is the canonical record of which users were
|
||||||
# active on which days (in the UserProfile.is_active sense).
|
# active on which days (in the UserProfile.is_active sense).
|
||||||
CountStat('active_users_audit:is_bot:day', zerver_check_realmauditlog_by_user,
|
CountStat('active_users_audit:is_bot:day', zerver_check_realmauditlog_by_user, CountStat.DAY),
|
||||||
(UserProfile, 'is_bot'), CountStat.DAY),
|
|
||||||
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
|
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY),
|
||||||
|
|
||||||
# The minutes=15 part is due to the 15 minutes added in
|
# The minutes=15 part is due to the 15 minutes added in
|
||||||
# zerver.lib.actions.do_update_user_activity_interval.
|
# zerver.lib.actions.do_update_user_activity_interval.
|
||||||
CountStat('15day_actives::day', zerver_check_useractivityinterval_by_user,
|
CountStat('15day_actives::day', zerver_check_useractivityinterval_by_user,
|
||||||
None, CountStat.DAY, interval=timedelta(days=15)-timedelta(minutes=15)),
|
CountStat.DAY, interval=timedelta(days=15)-timedelta(minutes=15)),
|
||||||
CustomPullCountStat('minutes_active::day', UserCount, CountStat.DAY, do_pull_minutes_active)
|
CustomPullCountStat('minutes_active::day', UserCount, CountStat.DAY, do_pull_minutes_active)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ from django.test import TestCase
|
|||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
|
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
|
||||||
zerver_count_user_by_realm, zerver_count_message_by_user, \
|
|
||||||
zerver_count_message_by_stream, zerver_count_stream_by_realm, \
|
|
||||||
do_fill_count_stat_at_hour, do_increment_logging_stat, ZerverCountQuery, \
|
do_fill_count_stat_at_hour, do_increment_logging_stat, ZerverCountQuery, \
|
||||||
LoggingCountStat, do_aggregate_to_summary_table, \
|
LoggingCountStat, do_aggregate_to_summary_table, \
|
||||||
do_drop_all_analytics_tables
|
do_drop_all_analytics_tables
|
||||||
@@ -159,8 +157,7 @@ class TestProcessCountStat(AnalyticsTestCase):
|
|||||||
# type: (datetime) -> CountStat
|
# type: (datetime) -> CountStat
|
||||||
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
||||||
VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
|
VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
|
||||||
stat = CountStat('test stat', ZerverCountQuery(UserCount, dummy_query),
|
stat = CountStat('test stat', ZerverCountQuery(UserCount, dummy_query, None), CountStat.HOUR)
|
||||||
None, CountStat.HOUR)
|
|
||||||
return stat
|
return stat
|
||||||
|
|
||||||
def assertFillStateEquals(self, end_time, state=FillState.DONE, property=None):
|
def assertFillStateEquals(self, end_time, state=FillState.DONE, property=None):
|
||||||
|
|||||||
Reference in New Issue
Block a user