analytics: Improve escaping correctness with psycopg2.sql.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg
2020-06-09 01:46:28 -07:00
committed by Tim Abbott
parent db197d0abd
commit 5839fdf963
3 changed files with 175 additions and 129 deletions

View File

@@ -8,6 +8,7 @@ from typing import Callable, Dict, List, \
from django.conf import settings from django.conf import settings
from django.db import connection from django.db import connection
from django.db.models import F from django.db.models import F
from psycopg2.sql import Composable, Identifier, Literal, SQL
from analytics.models import BaseCount, \ from analytics.models import BaseCount, \
FillState, InstallationCount, RealmCount, StreamCount, \ FillState, InstallationCount, RealmCount, StreamCount, \
@@ -164,31 +165,35 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime,
# Aggregate into RealmCount # Aggregate into RealmCount
output_table = stat.data_collector.output_table output_table = stat.data_collector.output_table
if realm is not None: if realm is not None:
realm_clause = "AND zerver_realm.id = %s" % (realm.id,) realm_clause = SQL("AND zerver_realm.id = {}").format(Literal(realm.id))
else: else:
realm_clause = "" realm_clause = SQL("")
if output_table in (UserCount, StreamCount): if output_table in (UserCount, StreamCount):
realmcount_query = """ realmcount_query = SQL("""
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_realm.id, COALESCE(sum(%(output_table)s.value), 0), '%(property)s', zerver_realm.id, COALESCE(sum({output_table}.value), 0), %(property)s,
%(output_table)s.subgroup, %%(end_time)s {output_table}.subgroup, %(end_time)s
FROM zerver_realm FROM zerver_realm
JOIN %(output_table)s JOIN {output_table}
ON ON
zerver_realm.id = %(output_table)s.realm_id zerver_realm.id = {output_table}.realm_id
WHERE WHERE
%(output_table)s.property = '%(property)s' AND {output_table}.property = %(property)s AND
%(output_table)s.end_time = %%(end_time)s {output_table}.end_time = %(end_time)s
%(realm_clause)s {realm_clause}
GROUP BY zerver_realm.id, %(output_table)s.subgroup GROUP BY zerver_realm.id, {output_table}.subgroup
""" % {'output_table': output_table._meta.db_table, """).format(
'property': stat.property, output_table=Identifier(output_table._meta.db_table),
'realm_clause': realm_clause} realm_clause=realm_clause,
)
start = time.time() start = time.time()
cursor.execute(realmcount_query, {'end_time': end_time}) cursor.execute(realmcount_query, {
'property': stat.property,
'end_time': end_time,
})
end = time.time() end = time.time()
logger.info( logger.info(
"%s RealmCount aggregation (%dms/%sr)", "%s RealmCount aggregation (%dms/%sr)",
@@ -201,19 +206,22 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime,
# #
# TODO: Add support for updating installation data after # TODO: Add support for updating installation data after
# changing an individual realm's values. # changing an individual realm's values.
installationcount_query = """ installationcount_query = SQL("""
INSERT INTO analytics_installationcount INSERT INTO analytics_installationcount
(value, property, subgroup, end_time) (value, property, subgroup, end_time)
SELECT SELECT
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s sum(value), %(property)s, analytics_realmcount.subgroup, %(end_time)s
FROM analytics_realmcount FROM analytics_realmcount
WHERE WHERE
property = '%(property)s' AND property = %(property)s AND
end_time = %%(end_time)s end_time = %(end_time)s
GROUP BY analytics_realmcount.subgroup GROUP BY analytics_realmcount.subgroup
""" % {'property': stat.property} """)
start = time.time() start = time.time()
cursor.execute(installationcount_query, {'end_time': end_time}) cursor.execute(installationcount_query, {
'property': stat.property,
'end_time': end_time,
})
end = time.time() end = time.time()
logger.info( logger.info(
"%s InstallationCount aggregation (%dms/%sr)", "%s InstallationCount aggregation (%dms/%sr)",
@@ -264,29 +272,45 @@ def do_drop_single_stat(property: str) -> None:
## DataCollector-level operations ## ## DataCollector-level operations ##
def do_pull_by_sql_query(property: str, start_time: datetime, end_time: datetime, query: str, QueryFn = Callable[[Dict[str, Composable]], Composable]
group_by: Optional[Tuple[models.Model, str]]) -> int:
def do_pull_by_sql_query(
property: str,
start_time: datetime,
end_time: datetime,
query: QueryFn,
group_by: Optional[Tuple[models.Model, str]],
) -> int:
if group_by is None: if group_by is None:
subgroup = 'NULL' subgroup = SQL('NULL')
group_by_clause = '' group_by_clause = SQL('')
else: else:
subgroup = '%s.%s' % (group_by[0]._meta.db_table, group_by[1]) subgroup = Identifier(group_by[0]._meta.db_table, group_by[1])
group_by_clause = ', ' + subgroup group_by_clause = SQL(', {}').format(subgroup)
# We do string replacement here because cursor.execute will reject a # We do string replacement here because cursor.execute will reject a
# group_by_clause given as a param. # group_by_clause given as a param.
# We pass in the datetimes as params to cursor.execute so that we don't have to # We pass in the datetimes as params to cursor.execute so that we don't have to
# think about how to convert python datetimes to SQL datetimes. # think about how to convert python datetimes to SQL datetimes.
query_ = query % {'property': property, 'subgroup': subgroup, query_ = query({
'group_by_clause': group_by_clause} 'subgroup': subgroup,
'group_by_clause': group_by_clause,
})
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time}) cursor.execute(query_, {
'property': property,
'time_start': start_time,
'time_end': end_time,
})
rowcount = cursor.rowcount rowcount = cursor.rowcount
cursor.close() cursor.close()
return rowcount return rowcount
def sql_data_collector(output_table: Type[BaseCount], query: str, def sql_data_collector(
group_by: Optional[Tuple[models.Model, str]]) -> DataCollector: output_table: Type[BaseCount],
query: QueryFn,
group_by: Optional[Tuple[models.Model, str]],
) -> DataCollector:
def pull_function(property: str, start_time: datetime, end_time: datetime, def pull_function(property: str, start_time: datetime, end_time: datetime,
realm: Optional[Realm] = None) -> int: realm: Optional[Realm] = None) -> int:
# The pull function type needs to accept a Realm argument # The pull function type needs to accept a Realm argument
@@ -320,39 +344,39 @@ def do_pull_minutes_active(property: str, start_time: datetime, end_time: dateti
UserCount.objects.bulk_create(rows) UserCount.objects.bulk_create(rows)
return len(rows) return len(rows)
def count_message_by_user_query(realm: Optional[Realm]) -> str: def count_message_by_user_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,) realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_usercount INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time) (user_id, realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_userprofile.id, zerver_userprofile.realm_id, count(*), zerver_userprofile.id, zerver_userprofile.realm_id, count(*),
'%(property)s', %(subgroup)s, %%(time_end)s %(property)s, {subgroup}, %(time_end)s
FROM zerver_userprofile FROM zerver_userprofile
JOIN zerver_message JOIN zerver_message
ON ON
zerver_userprofile.id = zerver_message.sender_id zerver_userprofile.id = zerver_message.sender_id
WHERE WHERE
zerver_userprofile.date_joined < %%(time_end)s AND zerver_userprofile.date_joined < %(time_end)s AND
zerver_message.date_sent >= %%(time_start)s AND zerver_message.date_sent >= %(time_start)s AND
{realm_clause} {realm_clause}
zerver_message.date_sent < %%(time_end)s zerver_message.date_sent < %(time_end)s
GROUP BY zerver_userprofile.id %(group_by_clause)s GROUP BY zerver_userprofile.id {group_by_clause}
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
# Note: ignores the group_by / group_by_clause. # Note: ignores the group_by / group_by_clause.
def count_message_type_by_user_query(realm: Optional[Realm]) -> str: def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,) realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_usercount INSERT INTO analytics_usercount
(realm_id, user_id, value, property, subgroup, end_time) (realm_id, user_id, value, property, subgroup, end_time)
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s SELECT realm_id, id, SUM(count) AS value, %(property)s, message_type, %(time_end)s
FROM FROM
( (
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*), SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
@@ -370,9 +394,9 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
JOIN zerver_message JOIN zerver_message
ON ON
zerver_userprofile.id = zerver_message.sender_id AND zerver_userprofile.id = zerver_message.sender_id AND
zerver_message.date_sent >= %%(time_start)s AND zerver_message.date_sent >= %(time_start)s AND
{realm_clause} {realm_clause}
zerver_message.date_sent < %%(time_end)s zerver_message.date_sent < %(time_end)s
JOIN zerver_recipient JOIN zerver_recipient
ON ON
zerver_message.recipient_id = zerver_recipient.id zerver_message.recipient_id = zerver_recipient.id
@@ -384,22 +408,22 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
zerver_recipient.type, zerver_stream.invite_only zerver_recipient.type, zerver_stream.invite_only
) AS subquery ) AS subquery
GROUP BY realm_id, id, message_type GROUP BY realm_id, id, message_type
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
# This query joins to the UserProfile table since all current queries that # This query joins to the UserProfile table since all current queries that
# use this also subgroup on UserProfile.is_bot. If in the future there is a # use this also subgroup on UserProfile.is_bot. If in the future there is a
# stat that counts messages by stream and doesn't need the UserProfile # stat that counts messages by stream and doesn't need the UserProfile
# table, consider writing a new query for efficiency. # table, consider writing a new query for efficiency.
def count_message_by_stream_query(realm: Optional[Realm]) -> str: def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "zerver_stream.realm_id = %s AND" % (realm.id,) realm_clause = SQL("zerver_stream.realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_streamcount INSERT INTO analytics_streamcount
(stream_id, realm_id, value, property, subgroup, end_time) (stream_id, realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s zerver_stream.id, zerver_stream.realm_id, count(*), %(property)s, {subgroup}, %(time_end)s
FROM zerver_stream FROM zerver_stream
JOIN zerver_recipient JOIN zerver_recipient
ON ON
@@ -411,53 +435,53 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> str:
ON ON
zerver_message.sender_id = zerver_userprofile.id zerver_message.sender_id = zerver_userprofile.id
WHERE WHERE
zerver_stream.date_created < %%(time_end)s AND zerver_stream.date_created < %(time_end)s AND
zerver_recipient.type = 2 AND zerver_recipient.type = 2 AND
zerver_message.date_sent >= %%(time_start)s AND zerver_message.date_sent >= %(time_start)s AND
{realm_clause} {realm_clause}
zerver_message.date_sent < %%(time_end)s zerver_message.date_sent < %(time_end)s
GROUP BY zerver_stream.id %(group_by_clause)s GROUP BY zerver_stream.id {group_by_clause}
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
# Hardcodes the query needed by active_users:is_bot:day, since that is # Hardcodes the query needed by active_users:is_bot:day, since that is
# currently the only stat that uses this. # currently the only stat that uses this.
def count_user_by_realm_query(realm: Optional[Realm]) -> str: def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,) realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_realm.id, count(*),'%(property)s', %(subgroup)s, %%(time_end)s zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
FROM zerver_realm FROM zerver_realm
JOIN zerver_userprofile JOIN zerver_userprofile
ON ON
zerver_realm.id = zerver_userprofile.realm_id zerver_realm.id = zerver_userprofile.realm_id
WHERE WHERE
zerver_realm.date_created < %%(time_end)s AND zerver_realm.date_created < %(time_end)s AND
zerver_userprofile.date_joined >= %%(time_start)s AND zerver_userprofile.date_joined >= %(time_start)s AND
zerver_userprofile.date_joined < %%(time_end)s AND zerver_userprofile.date_joined < %(time_end)s AND
{realm_clause} {realm_clause}
zerver_userprofile.is_active = TRUE zerver_userprofile.is_active = TRUE
GROUP BY zerver_realm.id %(group_by_clause)s GROUP BY zerver_realm.id {group_by_clause}
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
# Currently hardcodes the query needed for active_users_audit:is_bot:day. # Currently hardcodes the query needed for active_users_audit:is_bot:day.
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and # Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
# event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc]. # event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc].
# In particular, it's important to ensure that migrations don't cause that to happen. # In particular, it's important to ensure that migrations don't cause that to happen.
def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str: def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "realm_id = %s AND" % (realm.id,) realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_usercount INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time) (user_id, realm_id, value, property, subgroup, end_time)
SELECT SELECT
ral1.modified_user_id, ral1.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s ral1.modified_user_id, ral1.realm_id, 1, %(property)s, {subgroup}, %(time_end)s
FROM zerver_realmauditlog ral1 FROM zerver_realmauditlog ral1
JOIN ( JOIN (
SELECT modified_user_id, max(event_time) AS max_event_time SELECT modified_user_id, max(event_time) AS max_event_time
@@ -465,7 +489,7 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
WHERE WHERE
event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
{realm_clause} {realm_clause}
event_time < %%(time_end)s event_time < %(time_end)s
GROUP BY modified_user_id GROUP BY modified_user_id
) ral2 ) ral2
ON ON
@@ -476,43 +500,46 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
ral1.modified_user_id = zerver_userprofile.id ral1.modified_user_id = zerver_userprofile.id
WHERE WHERE
ral1.event_type in ({user_created}, {user_activated}, {user_reactivated}) ral1.event_type in ({user_created}, {user_activated}, {user_reactivated})
""".format(user_created=RealmAuditLog.USER_CREATED, """).format(
user_activated=RealmAuditLog.USER_ACTIVATED, **kwargs,
user_deactivated=RealmAuditLog.USER_DEACTIVATED, user_created=Literal(RealmAuditLog.USER_CREATED),
user_reactivated=RealmAuditLog.USER_REACTIVATED, user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
realm_clause=realm_clause) user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
realm_clause=realm_clause,
)
def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> str: def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,) realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_usercount INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time) (user_id, realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_userprofile.id, zerver_userprofile.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s zerver_userprofile.id, zerver_userprofile.realm_id, 1, %(property)s, {subgroup}, %(time_end)s
FROM zerver_userprofile FROM zerver_userprofile
JOIN zerver_useractivityinterval JOIN zerver_useractivityinterval
ON ON
zerver_userprofile.id = zerver_useractivityinterval.user_profile_id zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
WHERE WHERE
zerver_useractivityinterval.end >= %%(time_start)s AND zerver_useractivityinterval.end >= %(time_start)s AND
{realm_clause} {realm_clause}
zerver_useractivityinterval.start < %%(time_end)s zerver_useractivityinterval.start < %(time_end)s
GROUP BY zerver_userprofile.id %(group_by_clause)s GROUP BY zerver_userprofile.id {group_by_clause}
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
def count_realm_active_humans_query(realm: Optional[Realm]) -> str: def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn:
if realm is None: if realm is None:
realm_clause = "" realm_clause = SQL("")
else: else:
realm_clause = "realm_id = %s AND" % (realm.id,) realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
return """ return lambda kwargs: SQL("""
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
SELECT SELECT
usercount1.realm_id, count(*), '%(property)s', NULL, %%(time_end)s usercount1.realm_id, count(*), %(property)s, NULL, %(time_end)s
FROM ( FROM (
SELECT realm_id, user_id SELECT realm_id, user_id
FROM analytics_usercount FROM analytics_usercount
@@ -520,7 +547,7 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
property = 'active_users_audit:is_bot:day' AND property = 'active_users_audit:is_bot:day' AND
subgroup = 'false' AND subgroup = 'false' AND
{realm_clause} {realm_clause}
end_time = %%(time_end)s end_time = %(time_end)s
) usercount1 ) usercount1
JOIN ( JOIN (
SELECT realm_id, user_id SELECT realm_id, user_id
@@ -528,29 +555,29 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
WHERE WHERE
property = '15day_actives::day' AND property = '15day_actives::day' AND
{realm_clause} {realm_clause}
end_time = %%(time_end)s end_time = %(time_end)s
) usercount2 ) usercount2
ON ON
usercount1.user_id = usercount2.user_id usercount1.user_id = usercount2.user_id
GROUP BY usercount1.realm_id GROUP BY usercount1.realm_id
""".format(realm_clause=realm_clause) """).format(**kwargs, realm_clause=realm_clause)
# Currently unused and untested # Currently unused and untested
count_stream_by_realm_query = """ count_stream_by_realm_query = lambda kwargs: SQL("""
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
FROM zerver_realm FROM zerver_realm
JOIN zerver_stream JOIN zerver_stream
ON ON
zerver_realm.id = zerver_stream.realm_id AND zerver_realm.id = zerver_stream.realm_id AND
WHERE WHERE
zerver_realm.date_created < %%(time_end)s AND zerver_realm.date_created < %(time_end)s AND
zerver_stream.date_created >= %%(time_start)s AND zerver_stream.date_created >= %(time_start)s AND
zerver_stream.date_created < %%(time_end)s zerver_stream.date_created < %(time_end)s
GROUP BY zerver_realm.id %(group_by_clause)s GROUP BY zerver_realm.id {group_by_clause}
""" """).format(**kwargs)
def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]: def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]:
## CountStat declarations ## ## CountStat declarations ##

View File

@@ -8,6 +8,7 @@ from django.db import models
from django.db.models import Sum from django.db.models import Sum
from django.test import TestCase from django.test import TestCase
from django.utils.timezone import now as timezone_now from django.utils.timezone import now as timezone_now
from psycopg2.sql import SQL, Literal
from analytics.lib.counts import COUNT_STATS, CountStat, get_count_stats, \ from analytics.lib.counts import COUNT_STATS, CountStat, get_count_stats, \
DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \ DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \
@@ -166,8 +167,13 @@ class AnalyticsTestCase(TestCase):
class TestProcessCountStat(AnalyticsTestCase): class TestProcessCountStat(AnalyticsTestCase):
def make_dummy_count_stat(self, property: str) -> CountStat: def make_dummy_count_stat(self, property: str) -> CountStat:
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time) query = lambda kwargs: SQL("""
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, property) INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
""").format(
default_realm_id=Literal(self.default_realm.id),
property=Literal(property),
)
return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR) return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR)
def assertFillStateEquals(self, stat: CountStat, end_time: datetime, def assertFillStateEquals(self, stat: CountStat, end_time: datetime,
@@ -265,8 +271,13 @@ class TestProcessCountStat(AnalyticsTestCase):
def test_process_dependent_stat(self) -> None: def test_process_dependent_stat(self) -> None:
stat1 = self.make_dummy_count_stat('stat1') stat1 = self.make_dummy_count_stat('stat1')
stat2 = self.make_dummy_count_stat('stat2') stat2 = self.make_dummy_count_stat('stat2')
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time) query = lambda kwargs: SQL("""
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat3') INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
""").format(
default_realm_id=Literal(self.default_realm.id),
property=Literal('stat3'),
)
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None), stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None),
CountStat.HOUR, CountStat.HOUR,
dependencies=['stat1', 'stat2']) dependencies=['stat1', 'stat2'])
@@ -299,8 +310,13 @@ class TestProcessCountStat(AnalyticsTestCase):
self.assertFillStateEquals(stat3, hour[2]) self.assertFillStateEquals(stat3, hour[2])
# test daily dependent stat with hourly dependencies # test daily dependent stat with hourly dependencies
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time) query = lambda kwargs: SQL("""
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat4') INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
""").format(
default_realm_id=Literal(self.default_realm.id),
property=Literal('stat4'),
)
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None), stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None),
CountStat.DAY, CountStat.DAY,
dependencies=['stat1', 'stat2']) dependencies=['stat1', 'stat2'])

View File

@@ -24,6 +24,7 @@ from django.utils.timesince import timesince
from django.core.validators import URLValidator from django.core.validators import URLValidator
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from jinja2 import Markup as mark_safe from jinja2 import Markup as mark_safe
from psycopg2.sql import Composable, Literal, SQL
from analytics.lib.counts import COUNT_STATS, CountStat from analytics.lib.counts import COUNT_STATS, CountStat
from analytics.lib.time_utils import time_range from analytics.lib.time_utils import time_range
@@ -415,7 +416,7 @@ def dictfetchall(cursor: connection.cursor) -> List[Dict[str, Any]]:
def get_realm_day_counts() -> Dict[str, Dict[str, str]]: def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
query = ''' query = SQL('''
select select
r.string_id, r.string_id,
(now()::date - date_sent::date) age, (now()::date - date_sent::date) age,
@@ -436,7 +437,7 @@ def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
order by order by
r.string_id, r.string_id,
age age
''' ''')
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(query) cursor.execute(query)
rows = dictfetchall(cursor) rows = dictfetchall(cursor)
@@ -476,7 +477,7 @@ def get_plan_name(plan_type: int) -> str:
def realm_summary_table(realm_minutes: Dict[str, float]) -> str: def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
now = timezone_now() now = timezone_now()
query = ''' query = SQL('''
SELECT SELECT
realm.string_id, realm.string_id,
realm.date_created, realm.date_created,
@@ -578,7 +579,7 @@ def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
last_visit > now() - interval '2 week' last_visit > now() - interval '2 week'
) )
ORDER BY dau_count DESC, string_id ASC ORDER BY dau_count DESC, string_id ASC
''' ''')
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(query) cursor.execute(query)
@@ -737,7 +738,7 @@ def sent_messages_report(realm: str) -> str:
'Bots' 'Bots'
] ]
query = ''' query = SQL('''
select select
series.day::date, series.day::date,
humans.cnt, humans.cnt,
@@ -787,7 +788,7 @@ def sent_messages_report(realm: str) -> str:
date_sent::date date_sent::date
) bots on ) bots on
series.day = bots.date_sent series.day = bots.date_sent
''' ''')
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(query, [realm, realm]) cursor.execute(query, [realm, realm])
rows = cursor.fetchall() rows = cursor.fetchall()
@@ -796,7 +797,7 @@ def sent_messages_report(realm: str) -> str:
return make_table(title, cols, rows) return make_table(title, cols, rows)
def ad_hoc_queries() -> List[Dict[str, str]]: def ad_hoc_queries() -> List[Dict[str, str]]:
def get_page(query: str, cols: List[str], title: str, def get_page(query: Composable, cols: List[str], title: str,
totals_columns: List[int]=[]) -> Dict[str, str]: totals_columns: List[int]=[]) -> Dict[str, str]:
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute(query) cursor.execute(query)
@@ -842,7 +843,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
for mobile_type in ['Android', 'ZulipiOS']: for mobile_type in ['Android', 'ZulipiOS']:
title = '%s usage' % (mobile_type,) title = '%s usage' % (mobile_type,)
query = ''' query = SQL('''
select select
realm.string_id, realm.string_id,
up.id user_id, up.id user_id,
@@ -854,11 +855,13 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
join zerver_userprofile up on up.id = ua.user_profile_id join zerver_userprofile up on up.id = ua.user_profile_id
join zerver_realm realm on realm.id = up.realm_id join zerver_realm realm on realm.id = up.realm_id
where where
client.name like '%s' client.name like {mobile_type}
group by string_id, up.id, client.name group by string_id, up.id, client.name
having max(last_visit) > now() - interval '2 week' having max(last_visit) > now() - interval '2 week'
order by string_id, up.id, client.name order by string_id, up.id, client.name
''' % (mobile_type,) ''').format(
mobile_type=Literal(mobile_type),
)
cols = [ cols = [
'Realm', 'Realm',
@@ -874,7 +877,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
title = 'Desktop users' title = 'Desktop users'
query = ''' query = SQL('''
select select
realm.string_id, realm.string_id,
client.name, client.name,
@@ -889,7 +892,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
group by string_id, client.name group by string_id, client.name
having max(last_visit) > now() - interval '2 week' having max(last_visit) > now() - interval '2 week'
order by string_id, client.name order by string_id, client.name
''' ''')
cols = [ cols = [
'Realm', 'Realm',
@@ -904,7 +907,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
title = 'Integrations by realm' title = 'Integrations by realm'
query = ''' query = SQL('''
select select
realm.string_id, realm.string_id,
case case
@@ -927,7 +930,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
group by string_id, client_name group by string_id, client_name
having max(last_visit) > now() - interval '2 week' having max(last_visit) > now() - interval '2 week'
order by string_id, client_name order by string_id, client_name
''' ''')
cols = [ cols = [
'Realm', 'Realm',
@@ -942,7 +945,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
title = 'Integrations by client' title = 'Integrations by client'
query = ''' query = SQL('''
select select
case case
when query like '%%external%%' then split_part(query, '/', 5) when query like '%%external%%' then split_part(query, '/', 5)
@@ -965,7 +968,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
group by client_name, string_id group by client_name, string_id
having max(last_visit) > now() - interval '2 week' having max(last_visit) > now() - interval '2 week'
order by client_name, string_id order by client_name, string_id
''' ''')
cols = [ cols = [
'Client', 'Client',
@@ -978,7 +981,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
title = 'Remote Zulip servers' title = 'Remote Zulip servers'
query = ''' query = SQL('''
with icount as ( with icount as (
select select
server_id, server_id,
@@ -1005,7 +1008,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
left join icount on icount.server_id = rserver.id left join icount on icount.server_id = rserver.id
left join remote_push_devices on remote_push_devices.server_id = rserver.id left join remote_push_devices on remote_push_devices.server_id = rserver.id
order by max_value DESC NULLS LAST, push_user_count DESC NULLS LAST order by max_value DESC NULLS LAST, push_user_count DESC NULLS LAST
''' ''')
cols = [ cols = [
'ID', 'ID',