analytics: Change messages_sent_to_stream to a daily stat.

Analytics database tables are getting big, and so we're likely moving to a
model where ~all stats are day stats, and we keep hourly stats only for the
last N days.

Also changed the name because:
* messages_sent_* suggests the counts (summed over subgroup) should be the
  same as the other messages_sent stats, but they are different (these don't
  include PMs).
* messages_sent_by_stream:is_bot:day is longer than 32 characters, the max
  allowable length for a BaseCount.property.

Includes a database migration to remove the old stat from the analytics
tables.
This commit is contained in:
Rishi Gupta
2017-02-14 19:10:03 -08:00
committed by Tim Abbott
parent b8caf45262
commit 20255e48a4
3 changed files with 39 additions and 10 deletions

View File

@@ -75,7 +75,7 @@ def process_count_stat(stat, fill_to_time):
logger.info("INITIALIZED %s %s" % (stat.property, currently_filled))
elif fill_state.state == FillState.STARTED:
logger.info("UNDO START %s %s" % (stat.property, fill_state.end_time))
do_delete_count_stat_at_hour(stat, fill_state.end_time)
do_delete_counts_at_hour(stat, fill_state.end_time)
currently_filled = fill_state.end_time - timedelta(hours = 1)
do_update_fill_state(fill_state, currently_filled, FillState.DONE)
logger.info("UNDO DONE %s" % (stat.property,))
@@ -112,13 +112,21 @@ def do_fill_count_stat_at_hour(stat, end_time):
do_pull_from_zerver(stat, start_time, end_time)
do_aggregate_to_summary_table(stat, end_time)
def do_delete_count_stat_at_hour(stat, end_time):
def do_delete_counts_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None
UserCount.objects.filter(property = stat.property, end_time = end_time).delete()
StreamCount.objects.filter(property = stat.property, end_time = end_time).delete()
RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
def do_delete_count_stat(property):
# type: (str) -> None
UserCount.objects.filter(property=property).delete()
StreamCount.objects.filter(property=property).delete()
RealmCount.objects.filter(property=property).delete()
InstallationCount.objects.filter(property=property).delete()
FillState.objects.filter(property=property).delete()
def do_drop_all_analytics_tables():
# type: () -> None
UserCount.objects.all().delete()
@@ -347,7 +355,7 @@ COUNT_STATS = {
'messages_sent:client:day': CountStat(
'messages_sent:client:day', zerver_count_message_by_user, {},
(Message, 'sending_client_id'), CountStat.DAY, False),
'messages_sent_to_stream:is_bot:hour': CountStat(
'messages_sent_to_stream:is_bot', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.HOUR, False)
'messages_in_stream:is_bot:day': CountStat(
'messages_in_stream:is_bot:day', zerver_count_message_by_stream, {},
(UserProfile, 'is_bot'), CountStat.DAY, False)
}

View File

@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db import migrations
from analytics.lib.counts import do_delete_count_stat
def delete_messages_sent_to_stream_stat(apps, schema_editor):
# type: (StateApps, DatabaseSchemaEditor) -> None
do_delete_count_stat('messages_sent_to_stream:is_bot')
class Migration(migrations.Migration):
dependencies = [
('analytics', '0008_add_count_indexes'),
]
operations = [
migrations.RunPython(delete_messages_sent_to_stream_stat),
]

View File

@@ -392,7 +392,7 @@ class TestCountStats(AnalyticsTestCase):
def test_messages_sent_to_stream_by_is_bot(self):
# type: () -> None
stat = COUNT_STATS['messages_sent_to_stream:is_bot:hour']
stat = COUNT_STATS['messages_in_stream:is_bot:day']
self.current_property = stat.property
bot = self.create_user(is_bot=True)
@@ -420,9 +420,10 @@ class TestCountStats(AnalyticsTestCase):
self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'],
[[2, 'false', stream1], [1, 'false', stream2], [2, 'true', stream2],
# "hourly" stream, from TestCountStats.setUp
[1, 'false', Stream.objects.get(name='stream 1')]])
# "hourly" and "daily" stream, from TestCountStats.setUp
[1, 'false', Stream.objects.get(name='stream 1')],
[1, 'false', Stream.objects.get(name='stream 61')]])
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
[[3, 'false'], [2, 'true'], [1, 'false', self.second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[4, 'false'], [2, 'true']])
[[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']])
self.assertTableState(UserCount, [], [])