mirror of
https://github.com/zulip/zulip.git
synced 2025-11-04 05:53:43 +00:00
analytics: Simplify frequency and measurement interval options.
Change the CountStat object to take an is_gauge variable instead of a smallest_interval variable. Previously, (smallest_interval, frequency) could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour), (day, day), or (day, gauge). The current change is equivalent to excluding (hour, day) and (day, hour) from the list above. This change, along with other recent changes, allows us to simplify how we handle time intervals. This commit also removes the TimeInterval object.
This commit is contained in:
@@ -1,23 +1,35 @@
|
||||
from django.db import connection, models
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta, datetime
|
||||
|
||||
from analytics.models import InstallationCount, RealmCount, \
|
||||
UserCount, StreamCount, BaseCount, FillState, get_fill_state, installation_epoch
|
||||
from analytics.lib.interval import TimeInterval, floor_to_day
|
||||
from zerver.models import Realm, UserProfile, Message, Stream, models
|
||||
from zerver.lib.timestamp import floor_to_day
|
||||
|
||||
from typing import Any, Optional, Type
|
||||
from six import text_type
|
||||
|
||||
# First post office in Boston
|
||||
MIN_TIME = datetime(1639, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
class CountStat(object):
|
||||
def __init__(self, property, zerver_count_query, filter_args, smallest_interval, frequency):
|
||||
# type: (text_type, ZerverCountQuery, Dict[str, bool], str, str) -> None
|
||||
HOUR = 'hour'
|
||||
DAY = 'day'
|
||||
FREQUENCIES = frozenset([HOUR, DAY])
|
||||
# Allowed intervals are HOUR, DAY, and, GAUGE
|
||||
GAUGE = 'gauge'
|
||||
|
||||
def __init__(self, property, zerver_count_query, filter_args, frequency, is_gauge):
|
||||
# type: (text_type, ZerverCountQuery, Dict[str, bool], str, bool) -> None
|
||||
self.property = property
|
||||
self.zerver_count_query = zerver_count_query
|
||||
# might have to do something different for bitfields
|
||||
self.filter_args = filter_args
|
||||
self.smallest_interval = smallest_interval
|
||||
if frequency not in self.FREQUENCIES:
|
||||
raise ValueError("Unknown frequency: %s" % (frequency,))
|
||||
self.frequency = frequency
|
||||
self.interval = self.GAUGE if is_gauge else frequency
|
||||
|
||||
class ZerverCountQuery(object):
|
||||
def __init__(self, zerver_table, analytics_table, query):
|
||||
@@ -52,18 +64,22 @@ def process_count_stat(stat, fill_to_time):
|
||||
FillState.objects.filter(property = stat.property).update(state = FillState.DONE)
|
||||
currently_filled = currently_filled + timedelta(hours = 1)
|
||||
|
||||
# We assume end_time is on an hour boundary. It is the caller's
|
||||
# responsibility to enforce this!
|
||||
# Note: very soon we are going to disallow CountStats with
|
||||
# (smallest_interval, frequency) = (hour, day) or (day, hour). The logic below
|
||||
# reflects that assumption.
|
||||
# We assume end_time is on an hour boundary, and is timezone aware.
|
||||
# It is the caller's responsibility to enforce this!
|
||||
def do_fill_count_stat_at_hour(stat, end_time):
|
||||
# type: (CountStat, datetime) -> None
|
||||
if stat.frequency == 'day' and (end_time != floor_to_day(end_time)):
|
||||
if stat.frequency == CountStat.DAY and (end_time != floor_to_day(end_time)):
|
||||
return
|
||||
time_interval = TimeInterval(stat.smallest_interval, end_time)
|
||||
do_pull_from_zerver(stat, time_interval)
|
||||
do_aggregate_to_summary_table(stat, time_interval)
|
||||
|
||||
if stat.interval == CountStat.HOUR:
|
||||
start_time = end_time - timedelta(hours = 1)
|
||||
elif stat.interval == CountStat.DAY:
|
||||
start_time = end_time - timedelta(days = 1)
|
||||
else: # stat.interval == CountStat.GAUGE
|
||||
start_time = MIN_TIME
|
||||
|
||||
do_pull_from_zerver(stat, start_time, end_time, stat.interval)
|
||||
do_aggregate_to_summary_table(stat, end_time, stat.interval)
|
||||
|
||||
def do_delete_count_stat_at_hour(stat, end_time):
|
||||
# type: (CountStat, datetime) -> None
|
||||
@@ -72,8 +88,8 @@ def do_delete_count_stat_at_hour(stat, end_time):
|
||||
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
||||
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
|
||||
|
||||
def do_aggregate_to_summary_table(stat, time_interval):
|
||||
# type: (CountStat, TimeInterval) -> None
|
||||
def do_aggregate_to_summary_table(stat, end_time, interval):
|
||||
# type: (CountStat, datetime, str) -> None
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Aggregate into RealmCount
|
||||
@@ -96,9 +112,9 @@ def do_aggregate_to_summary_table(stat, time_interval):
|
||||
GROUP BY zerver_realm.id
|
||||
""" % {'analytics_table' : analytics_table._meta.db_table,
|
||||
'property' : stat.property,
|
||||
'interval' : time_interval.interval}
|
||||
'interval' : interval}
|
||||
|
||||
cursor.execute(realmcount_query, {'end_time': time_interval.end})
|
||||
cursor.execute(realmcount_query, {'end_time': end_time})
|
||||
|
||||
# Aggregate into InstallationCount
|
||||
installationcount_query = """
|
||||
@@ -114,9 +130,9 @@ def do_aggregate_to_summary_table(stat, time_interval):
|
||||
interval = '%(interval)s'
|
||||
)
|
||||
""" % {'property': stat.property,
|
||||
'interval': time_interval.interval}
|
||||
'interval': interval}
|
||||
|
||||
cursor.execute(installationcount_query, {'end_time': time_interval.end})
|
||||
cursor.execute(installationcount_query, {'end_time': end_time})
|
||||
cursor.close()
|
||||
|
||||
## methods that hit the prod databases directly
|
||||
@@ -124,8 +140,8 @@ def do_aggregate_to_summary_table(stat, time_interval):
|
||||
# written in slightly more than needed generality, to reduce copy-paste errors
|
||||
# as more of these are made / make it easy to extend to a pull_X_by_realm
|
||||
|
||||
def do_pull_from_zerver(stat, time_interval):
|
||||
# type: (CountStat, TimeInterval) -> None
|
||||
def do_pull_from_zerver(stat, start_time, end_time, interval):
|
||||
# type: (CountStat, datetime, datetime, str) -> None
|
||||
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
|
||||
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value) \
|
||||
for key, value in stat.filter_args.items())
|
||||
@@ -134,10 +150,10 @@ def do_pull_from_zerver(stat, time_interval):
|
||||
# the string formatting prior so that cursor.execute runs it as sql
|
||||
query_ = stat.zerver_count_query.query % {'zerver_table' : zerver_table,
|
||||
'property' : stat.property,
|
||||
'interval' : time_interval.interval,
|
||||
'interval' : interval,
|
||||
'join_args' : join_args}
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(query_, {'time_start': time_interval.start, 'time_end': time_interval.end})
|
||||
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time})
|
||||
cursor.close()
|
||||
|
||||
count_user_by_realm_query = """
|
||||
@@ -224,7 +240,7 @@ zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream
|
||||
|
||||
COUNT_STATS = {
|
||||
'active_humans': CountStat('active_humans', zerver_count_user_by_realm,
|
||||
{'is_bot': False, 'is_active': True}, 'gauge', 'day'),
|
||||
{'is_bot': False, 'is_active': True}, CountStat.DAY, True),
|
||||
'active_bots': CountStat('active_bots', zerver_count_user_by_realm,
|
||||
{'is_bot': True, 'is_active': True}, 'gauge', 'day'),
|
||||
'messages_sent': CountStat('messages_sent', zerver_count_message_by_user, {}, 'hour', 'hour')}
|
||||
{'is_bot': True, 'is_active': True}, CountStat.DAY, True),
|
||||
'messages_sent': CountStat('messages_sent', zerver_count_message_by_user, {}, CountStat.HOUR, False)}
|
||||
|
||||
Reference in New Issue
Block a user