analytics: Change messages_sent_to_stream to a daily stat.

Analytics database tables are getting big, and so we're likely moving to a
model where ~all stats are day stats, and we keep hourly stats only for the
last N days.

Also changed the name because:
* messages_sent_* suggests the counts (summed over subgroup) should be the
  same as the other messages_sent stats, but they are different (these don't
  include PMs).
* messages_sent_by_stream:is_bot:day is longer than 32 characters, the max
  allowable length for a BaseCount.property.

Includes a database migration to remove the old stat from the analytics
tables.
This commit is contained in:
Rishi Gupta
2017-02-14 19:10:03 -08:00
committed by Tim Abbott
parent b8caf45262
commit 20255e48a4
3 changed files with 39 additions and 10 deletions

View File

@@ -75,7 +75,7 @@ def process_count_stat(stat, fill_to_time):
logger.info("INITIALIZED %s %s" % (stat.property, currently_filled))
elif fill_state.state == FillState.STARTED:
logger.info("UNDO START %s %s" % (stat.property, fill_state.end_time))
do_delete_count_stat_at_hour(stat, fill_state.end_time)
do_delete_counts_at_hour(stat, fill_state.end_time)
currently_filled = fill_state.end_time - timedelta(hours = 1)
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
logger.info("UNDO DONE %s" % (stat.property,))
@@ -112,13 +112,21 @@ def do_fill_count_stat_at_hour(stat, end_time):
do_pull_from_zerver(stat, start_time, end_time)
do_aggregate_to_summary_table(stat, end_time)
def do_delete_count_stat_at_hour(stat, end_time):
def do_delete_counts_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None
UserCount.objects.filter(property = stat.property, end_time = end_time).delete()
StreamCount.objects.filter(property = stat.property, end_time = end_time).delete()
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
def do_delete_count_stat(property):
# type: (str) -> None
UserCount.objects.filter(property=property).delete()
StreamCount.objects.filter(property=property).delete()
RealmCount.objects.filter(property=property).delete()
InstallationCount.objects.filter(property=property).delete()
FillState.objects.filter(property=property).delete()
def do_drop_all_analytics_tables():
# type: () -> None
UserCount.objects.all().delete()
@@ -347,7 +355,7 @@ COUNT_STATS = {
'messages_sent:client:day': CountStat(
'messages_sent:client:day', zerver_count_message_by_user, {},
(Message, 'sending_client_id'), CountStat.DAY, False),
'messages_sent_to_stream:is_bot:hour': CountStat(
'messages_sent_to_stream:is_bot', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.HOUR, False)
'messages_in_stream:is_bot:day': CountStat(
'messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.DAY, False)
}