mirror of
https://github.com/zulip/zulip.git
synced 2025-11-09 16:37:23 +00:00
analytics: Remove interval field from *Count tables.
Includes a database migration. The interval field was originally there to facilitate time aggregation (e.g. aggregate_hour_to_day), but we now do such aggregations in views code or in the frontend.
This commit is contained in:
@@ -109,8 +109,8 @@ def do_fill_count_stat_at_hour(stat, end_time):
|
|||||||
else: # stat.interval == CountStat.GAUGE
|
else: # stat.interval == CountStat.GAUGE
|
||||||
start_time = MIN_TIME
|
start_time = MIN_TIME
|
||||||
|
|
||||||
do_pull_from_zerver(stat, start_time, end_time, stat.interval)
|
do_pull_from_zerver(stat, start_time, end_time)
|
||||||
do_aggregate_to_summary_table(stat, end_time, stat.interval)
|
do_aggregate_to_summary_table(stat, end_time)
|
||||||
|
|
||||||
def do_delete_count_stat_at_hour(stat, end_time):
|
def do_delete_count_stat_at_hour(stat, end_time):
|
||||||
# type: (CountStat, datetime) -> None
|
# type: (CountStat, datetime) -> None
|
||||||
@@ -127,8 +127,8 @@ def do_drop_all_analytics_tables():
|
|||||||
InstallationCount.objects.all().delete()
|
InstallationCount.objects.all().delete()
|
||||||
FillState.objects.all().delete()
|
FillState.objects.all().delete()
|
||||||
|
|
||||||
def do_aggregate_to_summary_table(stat, end_time, interval):
|
def do_aggregate_to_summary_table(stat, end_time):
|
||||||
# type: (CountStat, datetime, str) -> None
|
# type: (CountStat, datetime) -> None
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
|
||||||
# Aggregate into RealmCount
|
# Aggregate into RealmCount
|
||||||
@@ -136,23 +136,21 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
|||||||
if analytics_table in (UserCount, StreamCount):
|
if analytics_table in (UserCount, StreamCount):
|
||||||
realmcount_query = """
|
realmcount_query = """
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time, interval)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s',
|
zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s',
|
||||||
%(analytics_table)s.subgroup, %%(end_time)s, '%(interval)s'
|
%(analytics_table)s.subgroup, %%(end_time)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN %(analytics_table)s
|
JOIN %(analytics_table)s
|
||||||
ON
|
ON
|
||||||
(
|
(
|
||||||
%(analytics_table)s.realm_id = zerver_realm.id AND
|
%(analytics_table)s.realm_id = zerver_realm.id AND
|
||||||
%(analytics_table)s.property = '%(property)s' AND
|
%(analytics_table)s.property = '%(property)s' AND
|
||||||
%(analytics_table)s.end_time = %%(end_time)s AND
|
%(analytics_table)s.end_time = %%(end_time)s
|
||||||
%(analytics_table)s.interval = '%(interval)s'
|
|
||||||
)
|
)
|
||||||
GROUP BY zerver_realm.id, %(analytics_table)s.subgroup
|
GROUP BY zerver_realm.id, %(analytics_table)s.subgroup
|
||||||
""" % {'analytics_table': analytics_table._meta.db_table,
|
""" % {'analytics_table': analytics_table._meta.db_table,
|
||||||
'property': stat.property,
|
'property': stat.property}
|
||||||
'interval': interval}
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
cursor.execute(realmcount_query, {'end_time': end_time})
|
cursor.execute(realmcount_query, {'end_time': end_time})
|
||||||
end = time.time()
|
end = time.time()
|
||||||
@@ -161,18 +159,16 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
|||||||
# Aggregate into InstallationCount
|
# Aggregate into InstallationCount
|
||||||
installationcount_query = """
|
installationcount_query = """
|
||||||
INSERT INTO analytics_installationcount
|
INSERT INTO analytics_installationcount
|
||||||
(value, property, subgroup, end_time, interval)
|
(value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s, '%(interval)s'
|
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s
|
||||||
FROM analytics_realmcount
|
FROM analytics_realmcount
|
||||||
WHERE
|
WHERE
|
||||||
(
|
(
|
||||||
property = '%(property)s' AND
|
property = '%(property)s' AND
|
||||||
end_time = %%(end_time)s AND
|
end_time = %%(end_time)s
|
||||||
interval = '%(interval)s'
|
|
||||||
) GROUP BY analytics_realmcount.subgroup
|
) GROUP BY analytics_realmcount.subgroup
|
||||||
""" % {'property': stat.property,
|
""" % {'property': stat.property}
|
||||||
'interval': interval}
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
cursor.execute(installationcount_query, {'end_time': end_time})
|
cursor.execute(installationcount_query, {'end_time': end_time})
|
||||||
end = time.time()
|
end = time.time()
|
||||||
@@ -180,8 +176,8 @@ def do_aggregate_to_summary_table(stat, end_time, interval):
|
|||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
# This is the only method that hits the prod databases directly.
|
# This is the only method that hits the prod databases directly.
|
||||||
def do_pull_from_zerver(stat, start_time, end_time, interval):
|
def do_pull_from_zerver(stat, start_time, end_time):
|
||||||
# type: (CountStat, datetime, datetime, str) -> None
|
# type: (CountStat, datetime, datetime) -> None
|
||||||
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
|
zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
|
||||||
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value)
|
join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value)
|
||||||
for key, value in stat.filter_args.items())
|
for key, value in stat.filter_args.items())
|
||||||
@@ -197,7 +193,6 @@ def do_pull_from_zerver(stat, start_time, end_time, interval):
|
|||||||
# the string formatting prior so that cursor.execute runs it as sql
|
# the string formatting prior so that cursor.execute runs it as sql
|
||||||
query_ = stat.zerver_count_query.query % {'zerver_table': zerver_table,
|
query_ = stat.zerver_count_query.query % {'zerver_table': zerver_table,
|
||||||
'property': stat.property,
|
'property': stat.property,
|
||||||
'interval': interval,
|
|
||||||
'join_args': join_args,
|
'join_args': join_args,
|
||||||
'subgroup': subgroup,
|
'subgroup': subgroup,
|
||||||
'group_by_clause': group_by_clause}
|
'group_by_clause': group_by_clause}
|
||||||
@@ -210,9 +205,9 @@ def do_pull_from_zerver(stat, start_time, end_time, interval):
|
|||||||
|
|
||||||
count_user_by_realm_query = """
|
count_user_by_realm_query = """
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time, interval)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN zerver_userprofile
|
JOIN zerver_userprofile
|
||||||
ON
|
ON
|
||||||
@@ -231,9 +226,9 @@ zerver_count_user_by_realm = ZerverCountQuery(UserProfile, RealmCount, count_use
|
|||||||
# currently .sender_id is only Message specific thing
|
# currently .sender_id is only Message specific thing
|
||||||
count_message_by_user_query = """
|
count_message_by_user_query = """
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(user_id, realm_id, value, property, subgroup, end_time, interval)
|
(user_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||||
FROM zerver_userprofile
|
FROM zerver_userprofile
|
||||||
JOIN zerver_message
|
JOIN zerver_message
|
||||||
ON
|
ON
|
||||||
@@ -252,9 +247,9 @@ zerver_count_message_by_user = ZerverCountQuery(Message, UserCount, count_messag
|
|||||||
# Currently unused and untested
|
# Currently unused and untested
|
||||||
count_stream_by_realm_query = """
|
count_stream_by_realm_query = """
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time, interval)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN zerver_stream
|
JOIN zerver_stream
|
||||||
ON
|
ON
|
||||||
@@ -276,8 +271,8 @@ zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream
|
|||||||
# it uses 'message_type' from the subquery to fill in the subgroup column.
|
# it uses 'message_type' from the subquery to fill in the subgroup column.
|
||||||
count_message_type_by_user_query = """
|
count_message_type_by_user_query = """
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(realm_id, user_id, value, property, subgroup, end_time, interval)
|
(realm_id, user_id, value, property, subgroup, end_time)
|
||||||
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s, '%(interval)s'
|
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
|
||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
||||||
@@ -314,9 +309,9 @@ zerver_count_message_type_by_user = ZerverCountQuery(Message, UserCount, count_m
|
|||||||
# the UserProfile table, consider writing a new query for efficiency.
|
# the UserProfile table, consider writing a new query for efficiency.
|
||||||
count_message_by_stream_query = """
|
count_message_by_stream_query = """
|
||||||
INSERT INTO analytics_streamcount
|
INSERT INTO analytics_streamcount
|
||||||
(stream_id, realm_id, value, property, subgroup, end_time, interval)
|
(stream_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s, '%(interval)s'
|
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
||||||
FROM zerver_stream
|
FROM zerver_stream
|
||||||
JOIN zerver_recipient
|
JOIN zerver_recipient
|
||||||
ON
|
ON
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ class Command(BaseCommand):
|
|||||||
for subgroup, values in fixture_data.items():
|
for subgroup, values in fixture_data.items():
|
||||||
table.objects.bulk_create([
|
table.objects.bulk_create([
|
||||||
table(property=stat.property, subgroup=subgroup, end_time=end_time,
|
table(property=stat.property, subgroup=subgroup, end_time=end_time,
|
||||||
interval=stat.interval, value=value, **id_args)
|
value=value, **id_args)
|
||||||
for end_time, value in zip(end_times, values) if value != 0])
|
for end_time, value in zip(end_times, values) if value != 0])
|
||||||
|
|
||||||
stat = COUNT_STATS['active_users:is_bot:day']
|
stat = COUNT_STATS['active_users:is_bot:day']
|
||||||
|
|||||||
48
analytics/migrations/0007_remove_interval.py
Normal file
48
analytics/migrations/0007_remove_interval.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Generated by Django 1.10.4 on 2017-01-16 20:50
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('analytics', '0006_add_subgroup_to_unique_constraints'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterUniqueTogether(
|
||||||
|
name='installationcount',
|
||||||
|
unique_together=set([('property', 'subgroup', 'end_time')]),
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='installationcount',
|
||||||
|
name='interval',
|
||||||
|
),
|
||||||
|
migrations.AlterUniqueTogether(
|
||||||
|
name='realmcount',
|
||||||
|
unique_together=set([('realm', 'property', 'subgroup', 'end_time')]),
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='realmcount',
|
||||||
|
name='interval',
|
||||||
|
),
|
||||||
|
migrations.AlterUniqueTogether(
|
||||||
|
name='streamcount',
|
||||||
|
unique_together=set([('stream', 'property', 'subgroup', 'end_time')]),
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='streamcount',
|
||||||
|
name='interval',
|
||||||
|
),
|
||||||
|
migrations.AlterUniqueTogether(
|
||||||
|
name='usercount',
|
||||||
|
unique_together=set([('user', 'property', 'subgroup', 'end_time')]),
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='usercount',
|
||||||
|
name='interval',
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -46,7 +46,6 @@ class BaseCount(ModelReprMixin, models.Model):
|
|||||||
property = models.CharField(max_length=32) # type: Text
|
property = models.CharField(max_length=32) # type: Text
|
||||||
subgroup = models.CharField(max_length=16, null=True) # type: Text
|
subgroup = models.CharField(max_length=16, null=True) # type: Text
|
||||||
end_time = models.DateTimeField() # type: datetime.datetime
|
end_time = models.DateTimeField() # type: datetime.datetime
|
||||||
interval = models.CharField(max_length=8) # type: Text
|
|
||||||
value = models.BigIntegerField() # type: int
|
value = models.BigIntegerField() # type: int
|
||||||
anomaly = models.ForeignKey(Anomaly, null=True) # type: Optional[Anomaly]
|
anomaly = models.ForeignKey(Anomaly, null=True) # type: Optional[Anomaly]
|
||||||
|
|
||||||
@@ -66,7 +65,7 @@ class BaseCount(ModelReprMixin, models.Model):
|
|||||||
class InstallationCount(BaseCount):
|
class InstallationCount(BaseCount):
|
||||||
|
|
||||||
class Meta(object):
|
class Meta(object):
|
||||||
unique_together = ("property", "subgroup", "end_time", "interval")
|
unique_together = ("property", "subgroup", "end_time")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extended_id():
|
def extended_id():
|
||||||
@@ -86,7 +85,7 @@ class RealmCount(BaseCount):
|
|||||||
realm = models.ForeignKey(Realm)
|
realm = models.ForeignKey(Realm)
|
||||||
|
|
||||||
class Meta(object):
|
class Meta(object):
|
||||||
unique_together = ("realm", "property", "subgroup", "end_time", "interval")
|
unique_together = ("realm", "property", "subgroup", "end_time")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extended_id():
|
def extended_id():
|
||||||
@@ -107,7 +106,7 @@ class UserCount(BaseCount):
|
|||||||
realm = models.ForeignKey(Realm)
|
realm = models.ForeignKey(Realm)
|
||||||
|
|
||||||
class Meta(object):
|
class Meta(object):
|
||||||
unique_together = ("user", "property", "subgroup", "end_time", "interval")
|
unique_together = ("user", "property", "subgroup", "end_time")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extended_id():
|
def extended_id():
|
||||||
@@ -128,7 +127,7 @@ class StreamCount(BaseCount):
|
|||||||
realm = models.ForeignKey(Realm)
|
realm = models.ForeignKey(Realm)
|
||||||
|
|
||||||
class Meta(object):
|
class Meta(object):
|
||||||
unique_together = ("stream", "property", "subgroup", "end_time", "interval")
|
unique_together = ("stream", "property", "subgroup", "end_time")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extended_id():
|
def extended_id():
|
||||||
|
|||||||
@@ -149,8 +149,8 @@ class AnalyticsTestCase(TestCase):
|
|||||||
class TestProcessCountStat(AnalyticsTestCase):
|
class TestProcessCountStat(AnalyticsTestCase):
|
||||||
def make_dummy_count_stat(self, current_time):
|
def make_dummy_count_stat(self, current_time):
|
||||||
# type: (datetime) -> CountStat
|
# type: (datetime) -> CountStat
|
||||||
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, interval, value)
|
dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
|
||||||
VALUES (1, 'test stat', '%(end_time)s','hour', 22)""" % {'end_time': current_time}
|
VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
|
||||||
count_stat = CountStat('test stat', ZerverCountQuery(Recipient, UserCount, dummy_query),
|
count_stat = CountStat('test stat', ZerverCountQuery(Recipient, UserCount, dummy_query),
|
||||||
{}, None, CountStat.HOUR, False)
|
{}, None, CountStat.HOUR, False)
|
||||||
return count_stat
|
return count_stat
|
||||||
|
|||||||
Reference in New Issue
Block a user