mirror of
https://github.com/zulip/zulip.git
synced 2025-11-09 16:37:23 +00:00
analytics: Remove interval field from *Count tables.
Includes a database migration. The interval field was originally there to facilitate time aggregation (e.g. aggregate_hour_to_day), but we now do such aggregations in views code or in the frontend.
This commit is contained in:
@@ -109,8 +109,8 @@ def do_fill_count_stat_at_hour(stat, end_time):
|
||||
else: # stat.interval == CountStat.GAUGE
|
||||
start_time = MIN_TIME
|
||||
|
||||
do_pull_from_zerver(stat, start_time, end_time, stat.interval)
|
||||
do_aggregate_to_summary_table(stat, end_time, stat.interval)
|
||||
do_pull_from_zerver(stat, start_time, end_time)
|
||||
do_aggregate_to_summary_table(stat, end_time)
|
||||
|
||||
def do_delete_count_stat_at_hour(stat, end_time):
|
||||
# type: (CountStat, datetime) -> None
|
||||
@@ -127,8 +127,8 @@ def do_drop_all_analytics_tables():
|
||||
InstallationCount.objects.all().delete()
|
||||
FillState.objects.all().delete()
|
||||
|
||||
def do_aggregate_to_summary_table(stat, end_time, interval):
|
||||
# type: (CountStat, datetime, str) -> None
|
||||
def do_aggregate_to_summary_table(stat, end_time):
|
||||
# type: (CountStat, datetime) -> None
|
||||
cursor = connection.cursor()
|
||||
|
||||
# Aggregate into RealmCount
|
||||
@@ -136,23 +136,21 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
||||
if analytics_table in (UserCount, StreamCount):
|
||||
realmcount_query = """
|
||||
INSERT INTO analytics_realmcount
|
||||
(realm_id, value, property, subgroup, end_time, interval)
|
||||
(realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s',
|
||||
%(analytics_table)s.subgroup, %%(end_time)s, '%(interval)s'
|
||||
%(analytics_table)s.subgroup, %%(end_time)s
|
||||
FROM zerver_realm
|
||||
JOIN %(analytics_table)s
|
||||
ON
|
||||
(
|
||||
%(analytics_table)s.realm_id = zerver_realm.id AND
|
||||
%(analytics_table)s.property = '%(property)s' AND
|
||||
%(analytics_table)s.end_time = %%(end_time)s AND
|
||||
%(analytics_table)s.interval = '%(interval)s'
|
||||
%(analytics_table)s.end_time = %%(end_time)s
|
||||
)
|
||||
GROUP BY zerver_realm.id, %(analytics_table)s.subgroup
|
||||
""" % {'analytics_table': analytics_table._meta.db_table,
|
||||
'property': stat.property,
|
||||
'interval': interval}
|
||||
'property': stat.property}
|
||||
start = time.time()
|
||||
cursor.execute(realmcount_query, {'end_time': end_time})
|
||||
end = time.time()
|
||||
@@ -161,18 +159,16 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
||||
# Aggregate into InstallationCount
|
||||
installationcount_query = """
|
||||
INSERT INTO analytics_installationcount
|
||||
(value, property, subgroup, end_time, interval)
|
||||
(value, property, subgroup, end_time)
|
||||
SELECT
|
||||
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s, '%(interval)s'
|
||||
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s
|
||||
FROM analytics_realmcount
|
||||
WHERE
|
||||
(
|
||||
property = '%(property)s' AND
|
||||
end_time = %%(end_time)s AND
|
||||
interval = '%(interval)s'
|
||||
end_time = %%(end_time)s
|
||||
) GROUP BY analytics_realmcount.subgroup
|
||||
""" % {'property': stat.property,
|
||||
'interval': interval}
|
||||
""" % {'property': stat.property}
|
||||
start = time.time()
|
||||
cursor.execute(installationcount_query, {'end_time': end_time})
|
||||
end = time.time()
|
||||
@@ -180,8 +176,8 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
||||
cursor.close()
|
||||
|
||||
# This is the only method that hits the prod databases directly.
|
||||
def do_pull_from_zerver(stat, start_time, end_time, interval):
|
||||
# type: (CountStat, datetime, datetime, str) -> None
|
||||
def do_pull_from_zerver(stat, start_time, end_time):
|
||||
# type: (CountStat, datetime, datetime) -> None
|
||||
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
|
||||
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value)
|
||||
for key, value in stat.filter_args.items())
|
||||
@@ -197,7 +193,6 @@ def do_pull_from_zerver(stat, start_time, end_time, interval):
|
||||
# the string formatting prior so that cursor.execute runs it as sql
|
||||
query_ = stat.zerver_count_query.query % {'zerver_table': zerver_table,
|
||||
'property': stat.property,
|
||||
'interval': interval,
|
||||
'join_args': join_args,
|
||||
'subgroup': subgroup,
|
||||
'group_by_clause': group_by_clause}
|
||||
@@ -210,9 +205,9 @@ def do_pull_from_zerver(stat, start_time, end_time, interval):
|
||||
|
||||
count_user_by_realm_query = """
|
||||
INSERT INTO analytics_realmcount
|
||||
(realm_id, value, property, subgroup, end_time, interval)
|
||||
(realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
||||
zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s
|
||||
FROM zerver_realm
|
||||
JOIN zerver_userprofile
|
||||
ON
|
||||
@@ -231,9 +226,9 @@ zerver_count_user_by_realm = ZerverCountQuery(UserProfile, RealmCount, count_use
|
||||
# currently .sender_id is only Message specific thing
|
||||
count_message_by_user_query = """
|
||||
INSERT INTO analytics_usercount
|
||||
(user_id, realm_id, value, property, subgroup, end_time, interval)
|
||||
(user_id, realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
||||
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||
FROM zerver_userprofile
|
||||
JOIN zerver_message
|
||||
ON
|
||||
@@ -252,9 +247,9 @@ zerver_count_message_by_user = ZerverCountQuery(Message, UserCount, count_messag
|
||||
# Currently unused and untested
|
||||
count_stream_by_realm_query = """
|
||||
INSERT INTO analytics_realmcount
|
||||
(realm_id, value, property, subgroup, end_time, interval)
|
||||
(realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
||||
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||
FROM zerver_realm
|
||||
JOIN zerver_stream
|
||||
ON
|
||||
@@ -276,8 +271,8 @@ zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream
|
||||
# it uses 'message_type' from the subquery to fill in the subgroup column.
|
||||
count_message_type_by_user_query = """
|
||||
INSERT INTO analytics_usercount
|
||||
(realm_id, user_id, value, property, subgroup, end_time, interval)
|
||||
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s, '%(interval)s'
|
||||
(realm_id, user_id, value, property, subgroup, end_time)
|
||||
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
|
||||
FROM
|
||||
(
|
||||
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
||||
@@ -314,9 +309,9 @@ zerver_count_message_type_by_user = ZerverCountQuery(Message, UserCount, count_m
|
||||
# the UserProfile table, consider writing a new query for efficiency.
|
||||
count_message_by_stream_query = """
|
||||
INSERT INTO analytics_streamcount
|
||||
(stream_id, realm_id, value, property, subgroup, end_time, interval)
|
||||
(stream_id, realm_id, value, property, subgroup, end_time)
|
||||
SELECT
|
||||
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
||||
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||
FROM zerver_stream
|
||||
JOIN zerver_recipient
|
||||
ON
|
||||
|
||||
@@ -64,7 +64,7 @@ class Command(BaseCommand):
|
||||
for subgroup, values in fixture_data.items():
|
||||
table.objects.bulk_create([
|
||||
table(property=stat.property, subgroup=subgroup, end_time=end_time,
|
||||
interval=stat.interval, value=value, **id_args)
|
||||
value=value, **id_args)
|
||||
for end_time, value in zip(end_times, values) if value != 0])
|
||||
|
||||
stat = COUNT_STATS['active_users:is_bot:day']
|
||||
|
||||
48
analytics/migrations/0007_remove_interval.py
Normal file
48
analytics/migrations/0007_remove_interval.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.10.4 on 2017-01-16 20:50
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('analytics', '0006_add_subgroup_to_unique_constraints'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterUniqueTogether(
|
||||
name='installationcount',
|
||||
unique_together=set([('property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='installationcount',
|
||||
name='interval',
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='realmcount',
|
||||
unique_together=set([('realm', 'property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='realmcount',
|
||||
name='interval',
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='streamcount',
|
||||
unique_together=set([('stream', 'property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='streamcount',
|
||||
name='interval',
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name='usercount',
|
||||
unique_together=set([('user', 'property', 'subgroup', 'end_time')]),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='usercount',
|
||||
name='interval',
|
||||
),
|
||||
]
|
||||
@@ -46,7 +46,6 @@ class BaseCount(ModelReprMixin, models.Model):
|
||||
property = models.CharField(max_length=32) # type: Text
|
||||
subgroup = models.CharField(max_length=16, null=True) # type: Text
|
||||
end_time = models.DateTimeField() # type: datetime.datetime
|
||||
interval = models.CharField(max_length=8) # type: Text
|
||||
value = models.BigIntegerField() # type: int
|
||||
anomaly = models.ForeignKey(Anomaly, null=True) # type: Optional[Anomaly]
|
||||
|
||||
@@ -66,7 +65,7 @@ class BaseCount(ModelReprMixin, models.Model):
|
||||
class InstallationCount(BaseCount):
|
||||
|
||||
class Meta(object):
|
||||
unique_together = ("property", "subgroup", "end_time", "interval")
|
||||
unique_together = ("property", "subgroup", "end_time")
|
||||
|
||||
@staticmethod
|
||||
def extended_id():
|
||||
@@ -86,7 +85,7 @@ class RealmCount(BaseCount):
|
||||
realm = models.ForeignKey(Realm)
|
||||
|
||||
class Meta(object):
|
||||
unique_together = ("realm", "property", "subgroup", "end_time", "interval")
|
||||
unique_together = ("realm", "property", "subgroup", "end_time")
|
||||
|
||||
@staticmethod
|
||||
def extended_id():
|
||||
@@ -107,7 +106,7 @@ class UserCount(BaseCount):
|
||||
realm = models.ForeignKey(Realm)
|
||||
|
||||
class Meta(object):
|
||||
unique_together = ("user", "property", "subgroup", "end_time", "interval")
|
||||
unique_together = ("user", "property", "subgroup", "end_time")
|
||||
|
||||
@staticmethod
|
||||
def extended_id():
|
||||
@@ -128,7 +127,7 @@ class StreamCount(BaseCount):
|
||||
realm = models.ForeignKey(Realm)
|
||||
|
||||
class Meta(object):
|
||||
unique_together = ("stream", "property", "subgroup", "end_time", "interval")
|
||||
unique_together = ("stream", "property", "subgroup", "end_time")
|
||||
|
||||
@staticmethod
|
||||
def extended_id():
|
||||
|
||||
@@ -149,8 +149,8 @@ class AnalyticsTestCase(TestCase):
|
||||
class TestProcessCountStat(AnalyticsTestCase):
|
||||
def make_dummy_count_stat(self, current_time):
|
||||
# type: (datetime) -> CountStat
|
||||
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, interval, value)
|
||||
VALUES (1, 'test stat', '%(end_time)s','hour', 22)""" % {'end_time': current_time}
|
||||
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
||||
VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
|
||||
count_stat = CountStat('test stat', ZerverCountQuery(Recipient, UserCount, dummy_query),
|
||||
{}, None, CountStat.HOUR, False)
|
||||
return count_stat
|
||||
|
||||
Reference in New Issue
Block a user