analytics: Only update daily stats on day boundaries.

Previously we would update FillState for daily stats on hourly boundaries as
well. This would create two extra queries on the FillState table every hour
(for each CountStat), which adds roughly 50ms of extra processing for each
CountStat each day, as well as two extra lines each hour in the analytics
log. This can be a minor annoyance when backfilling stats.
This commit is contained in:
Rishi Gupta
2017-04-15 00:23:39 -07:00
committed by Tim Abbott
parent 2535f6c8f2
commit 5e49da9285
2 changed files with 32 additions and 8 deletions

View File

@@ -80,6 +80,13 @@ class DataCollector(object):
def process_count_stat(stat, fill_to_time):
# type: (CountStat, datetime) -> None
if stat.frequency == CountStat.HOUR:
time_increment = timedelta(hours=1)
elif stat.frequency == CountStat.DAY:
time_increment = timedelta(days=1)
else:
raise AssertionError("Unknown frequency: %s" % (stat.frequency,))
fill_state = FillState.objects.filter(property=stat.property).first()
if fill_state is None:
currently_filled = installation_epoch()
@@ -90,7 +97,7 @@ def process_count_stat(stat, fill_to_time):
elif fill_state.state == FillState.STARTED:
logger.info("UNDO START %s %s" % (stat.property, fill_state.end_time))
do_delete_counts_at_hour(stat, fill_state.end_time)
currently_filled = fill_state.end_time - timedelta(hours = 1)
currently_filled = fill_state.end_time - time_increment
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
logger.info("UNDO DONE %s" % (stat.property,))
elif fill_state.state == FillState.DONE:
@@ -107,7 +114,7 @@ def process_count_stat(stat, fill_to_time):
return
fill_to_time = min(fill_to_time, dependency_fill_time)
currently_filled = currently_filled + timedelta(hours = 1)
currently_filled = currently_filled + time_increment
while currently_filled <= fill_to_time:
logger.info("START %s %s" % (stat.property, currently_filled))
start = time.time()
@@ -115,7 +122,7 @@ def process_count_stat(stat, fill_to_time):
do_fill_count_stat_at_hour(stat, currently_filled)
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
end = time.time()
currently_filled = currently_filled + timedelta(hours = 1)
currently_filled = currently_filled + time_increment
logger.info("DONE %s (%dms)" % (stat.property, (end-start)*1000))
def do_update_fill_state(fill_state, end_time, state):
@@ -124,13 +131,10 @@ def do_update_fill_state(fill_state, end_time, state):
fill_state.state = state
fill_state.save()
# We assume end_time is on an hour boundary, and is timezone aware.
# It is the caller's responsibility to enforce this!
# We assume end_time is valid (e.g. is on a day or hour boundary as appropriate)
# and is timezone aware. It is the caller's responsibility to enforce this!
def do_fill_count_stat_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None
if stat.frequency == CountStat.DAY and (end_time != floor_to_day(end_time)):
return
start_time = end_time - stat.interval
if not isinstance(stat, LoggingCountStat):
timer = time.time()