analytics: Simplify frequency and measurement interval options.

Change the CountStat object to take an is_gauge variable instead of a
smallest_interval variable. Previously, (smallest_interval, frequency)
could be any of (hour, hour), (hour, day), (hour, gauge), (day, hour),
(day, day), or (day, gauge).
The current change is equivalent to excluding (hour, day) and (day, hour)
from the list above.

This change, along with other recent changes, allows us to simplify how we
handle time intervals. This commit also removes the TimeInterval object.
This commit is contained in:
Rishi Gupta
2016-10-13 15:15:46 -07:00
committed by Tim Abbott
parent 807520411b
commit 82b814a1cd
6 changed files with 72 additions and 143 deletions

View File

@@ -1,23 +1,35 @@
from django.db import connection, models
from django.utils import timezone
from datetime import timedelta, datetime
from analytics.models import InstallationCount, RealmCount, \
UserCount, StreamCount, BaseCount, FillState, get_fill_state, installation_epoch
from analytics.lib.interval import TimeInterval, floor_to_day
from zerver.models import Realm, UserProfile, Message, Stream, models
from zerver.lib.timestamp import floor_to_day
from typing import Any, Optional, Type
from six import text_type
# First post office in Boston
MIN_TIME = datetime(1639, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
class CountStat(object):
def __init__(self, property, zerver_count_query, filter_args, smallest_interval, frequency):
# type: (text_type, ZerverCountQuery, Dict[str, bool], str, str) -> None
HOUR = 'hour'
DAY = 'day'
FREQUENCIES = frozenset([HOUR, DAY])
# Allowed intervals are HOUR, DAY, and, GAUGE
GAUGE = 'gauge'
def __init__(self, property, zerver_count_query, filter_args, frequency, is_gauge):
# type: (text_type, ZerverCountQuery, Dict[str, bool], str, bool) -> None
self.property = property
self.zerver_count_query = zerver_count_query
# might have to do something different for bitfields
self.filter_args = filter_args
self.smallest_interval = smallest_interval
if frequency not in self.FREQUENCIES:
raise ValueError("Unknown frequency: %s" % (frequency,))
self.frequency = frequency
self.interval = self.GAUGE if is_gauge else frequency
class ZerverCountQuery(object):
def __init__(self, zerver_table, analytics_table, query):
@@ -52,18 +64,22 @@ def process_count_stat(stat, fill_to_time):
FillState.objects.filter(property = stat.property).update(state = FillState.DONE)
currently_filled = currently_filled + timedelta(hours = 1)
# We assume end_time is on an hour boundary. It is the caller's
# responsibility to enforce this!
# Note: very soon we are going to disallow CountStats with
# (smallest_interval, frequency) = (hour, day) or (day, hour). The logic below
# reflects that assumption.
# We assume end_time is on an hour boundary, and is timezone aware.
# It is the caller's responsibility to enforce this!
def do_fill_count_stat_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None
if stat.frequency == 'day' and (end_time != floor_to_day(end_time)):
if stat.frequency == CountStat.DAY and (end_time != floor_to_day(end_time)):
return
time_interval = TimeInterval(stat.smallest_interval, end_time)
do_pull_from_zerver(stat, time_interval)
do_aggregate_to_summary_table(stat, time_interval)
if stat.interval == CountStat.HOUR:
start_time = end_time - timedelta(hours = 1)
elif stat.interval == CountStat.DAY:
start_time = end_time - timedelta(days = 1)
else: # stat.interval == CountStat.GAUGE
start_time = MIN_TIME
do_pull_from_zerver(stat, start_time, end_time, stat.interval)
do_aggregate_to_summary_table(stat, end_time, stat.interval)
def do_delete_count_stat_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None
@@ -72,8 +88,8 @@ def do_delete_count_stat_at_hour(stat, end_time):
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
def do_aggregate_to_summary_table(stat, time_interval):
# type: (CountStat, TimeInterval) -> None
def do_aggregate_to_summary_table(stat, end_time, interval):
# type: (CountStat, datetime, str) -> None
cursor = connection.cursor()
# Aggregate into RealmCount
@@ -96,9 +112,9 @@ def do_aggregate_to_summary_table(stat, time_interval):
GROUP BY zerver_realm.id
""" % {'analytics_table' : analytics_table._meta.db_table,
'property' : stat.property,
'interval' : time_interval.interval}
'interval' : interval}
cursor.execute(realmcount_query, {'end_time': time_interval.end})
cursor.execute(realmcount_query, {'end_time': end_time})
# Aggregate into InstallationCount
installationcount_query = """
@@ -114,9 +130,9 @@ def do_aggregate_to_summary_table(stat, time_interval):
interval = '%(interval)s'
)
""" % {'property': stat.property,
'interval': time_interval.interval}
'interval': interval}
cursor.execute(installationcount_query, {'end_time': time_interval.end})
cursor.execute(installationcount_query, {'end_time': end_time})
cursor.close()
## methods that hit the prod databases directly
@@ -124,8 +140,8 @@ def do_aggregate_to_summary_table(stat, time_interval):
# written in slightly more than needed generality, to reduce copy-paste errors
# as more of these are made / make it easy to extend to a pull_X_by_realm
def do_pull_from_zerver(stat, time_interval):
# type: (CountStat, TimeInterval) -> None
def do_pull_from_zerver(stat, start_time, end_time, interval):
# type: (CountStat, datetime, datetime, str) -> None
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value) \
for key, value in stat.filter_args.items())
@@ -134,10 +150,10 @@ def do_pull_from_zerver(stat, time_interval):
# the string formatting prior so that cursor.execute runs it as sql
query_ = stat.zerver_count_query.query % {'zerver_table' : zerver_table,
'property' : stat.property,
'interval' : time_interval.interval,
'interval' : interval,
'join_args' : join_args}
cursor = connection.cursor()
cursor.execute(query_, {'time_start': time_interval.start, 'time_end': time_interval.end})
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time})
cursor.close()
count_user_by_realm_query = """
@@ -224,7 +240,7 @@ zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream
COUNT_STATS = {
'active_humans': CountStat('active_humans', zerver_count_user_by_realm,
{'is_bot': False, 'is_active': True}, 'gauge', 'day'),
{'is_bot': False, 'is_active': True}, CountStat.DAY, True),
'active_bots': CountStat('active_bots', zerver_count_user_by_realm,
{'is_bot': True, 'is_active': True}, 'gauge', 'day'),
'messages_sent': CountStat('messages_sent', zerver_count_message_by_user, {}, 'hour', 'hour')}
{'is_bot': True, 'is_active': True}, CountStat.DAY, True),
'messages_sent': CountStat('messages_sent', zerver_count_message_by_user, {}, CountStat.HOUR, False)}