mirror of
https://github.com/zulip/zulip.git
synced 2025-11-15 11:22:04 +00:00
analytics: Improve escaping correctness with psycopg2.sql.
Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
committed by
Tim Abbott
parent
db197d0abd
commit
5839fdf963
@@ -8,6 +8,7 @@ from typing import Callable, Dict, List, \
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import connection
|
from django.db import connection
|
||||||
from django.db.models import F
|
from django.db.models import F
|
||||||
|
from psycopg2.sql import Composable, Identifier, Literal, SQL
|
||||||
|
|
||||||
from analytics.models import BaseCount, \
|
from analytics.models import BaseCount, \
|
||||||
FillState, InstallationCount, RealmCount, StreamCount, \
|
FillState, InstallationCount, RealmCount, StreamCount, \
|
||||||
@@ -164,31 +165,35 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime,
|
|||||||
# Aggregate into RealmCount
|
# Aggregate into RealmCount
|
||||||
output_table = stat.data_collector.output_table
|
output_table = stat.data_collector.output_table
|
||||||
if realm is not None:
|
if realm is not None:
|
||||||
realm_clause = "AND zerver_realm.id = %s" % (realm.id,)
|
realm_clause = SQL("AND zerver_realm.id = {}").format(Literal(realm.id))
|
||||||
else:
|
else:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
|
|
||||||
if output_table in (UserCount, StreamCount):
|
if output_table in (UserCount, StreamCount):
|
||||||
realmcount_query = """
|
realmcount_query = SQL("""
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, COALESCE(sum(%(output_table)s.value), 0), '%(property)s',
|
zerver_realm.id, COALESCE(sum({output_table}.value), 0), %(property)s,
|
||||||
%(output_table)s.subgroup, %%(end_time)s
|
{output_table}.subgroup, %(end_time)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN %(output_table)s
|
JOIN {output_table}
|
||||||
ON
|
ON
|
||||||
zerver_realm.id = %(output_table)s.realm_id
|
zerver_realm.id = {output_table}.realm_id
|
||||||
WHERE
|
WHERE
|
||||||
%(output_table)s.property = '%(property)s' AND
|
{output_table}.property = %(property)s AND
|
||||||
%(output_table)s.end_time = %%(end_time)s
|
{output_table}.end_time = %(end_time)s
|
||||||
%(realm_clause)s
|
{realm_clause}
|
||||||
GROUP BY zerver_realm.id, %(output_table)s.subgroup
|
GROUP BY zerver_realm.id, {output_table}.subgroup
|
||||||
""" % {'output_table': output_table._meta.db_table,
|
""").format(
|
||||||
'property': stat.property,
|
output_table=Identifier(output_table._meta.db_table),
|
||||||
'realm_clause': realm_clause}
|
realm_clause=realm_clause,
|
||||||
|
)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
cursor.execute(realmcount_query, {'end_time': end_time})
|
cursor.execute(realmcount_query, {
|
||||||
|
'property': stat.property,
|
||||||
|
'end_time': end_time,
|
||||||
|
})
|
||||||
end = time.time()
|
end = time.time()
|
||||||
logger.info(
|
logger.info(
|
||||||
"%s RealmCount aggregation (%dms/%sr)",
|
"%s RealmCount aggregation (%dms/%sr)",
|
||||||
@@ -201,19 +206,22 @@ def do_aggregate_to_summary_table(stat: CountStat, end_time: datetime,
|
|||||||
#
|
#
|
||||||
# TODO: Add support for updating installation data after
|
# TODO: Add support for updating installation data after
|
||||||
# changing an individual realm's values.
|
# changing an individual realm's values.
|
||||||
installationcount_query = """
|
installationcount_query = SQL("""
|
||||||
INSERT INTO analytics_installationcount
|
INSERT INTO analytics_installationcount
|
||||||
(value, property, subgroup, end_time)
|
(value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s
|
sum(value), %(property)s, analytics_realmcount.subgroup, %(end_time)s
|
||||||
FROM analytics_realmcount
|
FROM analytics_realmcount
|
||||||
WHERE
|
WHERE
|
||||||
property = '%(property)s' AND
|
property = %(property)s AND
|
||||||
end_time = %%(end_time)s
|
end_time = %(end_time)s
|
||||||
GROUP BY analytics_realmcount.subgroup
|
GROUP BY analytics_realmcount.subgroup
|
||||||
""" % {'property': stat.property}
|
""")
|
||||||
start = time.time()
|
start = time.time()
|
||||||
cursor.execute(installationcount_query, {'end_time': end_time})
|
cursor.execute(installationcount_query, {
|
||||||
|
'property': stat.property,
|
||||||
|
'end_time': end_time,
|
||||||
|
})
|
||||||
end = time.time()
|
end = time.time()
|
||||||
logger.info(
|
logger.info(
|
||||||
"%s InstallationCount aggregation (%dms/%sr)",
|
"%s InstallationCount aggregation (%dms/%sr)",
|
||||||
@@ -264,29 +272,45 @@ def do_drop_single_stat(property: str) -> None:
|
|||||||
|
|
||||||
## DataCollector-level operations ##
|
## DataCollector-level operations ##
|
||||||
|
|
||||||
def do_pull_by_sql_query(property: str, start_time: datetime, end_time: datetime, query: str,
|
QueryFn = Callable[[Dict[str, Composable]], Composable]
|
||||||
group_by: Optional[Tuple[models.Model, str]]) -> int:
|
|
||||||
|
def do_pull_by_sql_query(
|
||||||
|
property: str,
|
||||||
|
start_time: datetime,
|
||||||
|
end_time: datetime,
|
||||||
|
query: QueryFn,
|
||||||
|
group_by: Optional[Tuple[models.Model, str]],
|
||||||
|
) -> int:
|
||||||
if group_by is None:
|
if group_by is None:
|
||||||
subgroup = 'NULL'
|
subgroup = SQL('NULL')
|
||||||
group_by_clause = ''
|
group_by_clause = SQL('')
|
||||||
else:
|
else:
|
||||||
subgroup = '%s.%s' % (group_by[0]._meta.db_table, group_by[1])
|
subgroup = Identifier(group_by[0]._meta.db_table, group_by[1])
|
||||||
group_by_clause = ', ' + subgroup
|
group_by_clause = SQL(', {}').format(subgroup)
|
||||||
|
|
||||||
# We do string replacement here because cursor.execute will reject a
|
# We do string replacement here because cursor.execute will reject a
|
||||||
# group_by_clause given as a param.
|
# group_by_clause given as a param.
|
||||||
# We pass in the datetimes as params to cursor.execute so that we don't have to
|
# We pass in the datetimes as params to cursor.execute so that we don't have to
|
||||||
# think about how to convert python datetimes to SQL datetimes.
|
# think about how to convert python datetimes to SQL datetimes.
|
||||||
query_ = query % {'property': property, 'subgroup': subgroup,
|
query_ = query({
|
||||||
'group_by_clause': group_by_clause}
|
'subgroup': subgroup,
|
||||||
|
'group_by_clause': group_by_clause,
|
||||||
|
})
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time})
|
cursor.execute(query_, {
|
||||||
|
'property': property,
|
||||||
|
'time_start': start_time,
|
||||||
|
'time_end': end_time,
|
||||||
|
})
|
||||||
rowcount = cursor.rowcount
|
rowcount = cursor.rowcount
|
||||||
cursor.close()
|
cursor.close()
|
||||||
return rowcount
|
return rowcount
|
||||||
|
|
||||||
def sql_data_collector(output_table: Type[BaseCount], query: str,
|
def sql_data_collector(
|
||||||
group_by: Optional[Tuple[models.Model, str]]) -> DataCollector:
|
output_table: Type[BaseCount],
|
||||||
|
query: QueryFn,
|
||||||
|
group_by: Optional[Tuple[models.Model, str]],
|
||||||
|
) -> DataCollector:
|
||||||
def pull_function(property: str, start_time: datetime, end_time: datetime,
|
def pull_function(property: str, start_time: datetime, end_time: datetime,
|
||||||
realm: Optional[Realm] = None) -> int:
|
realm: Optional[Realm] = None) -> int:
|
||||||
# The pull function type needs to accept a Realm argument
|
# The pull function type needs to accept a Realm argument
|
||||||
@@ -320,39 +344,39 @@ def do_pull_minutes_active(property: str, start_time: datetime, end_time: dateti
|
|||||||
UserCount.objects.bulk_create(rows)
|
UserCount.objects.bulk_create(rows)
|
||||||
return len(rows)
|
return len(rows)
|
||||||
|
|
||||||
def count_message_by_user_query(realm: Optional[Realm]) -> str:
|
def count_message_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(user_id, realm_id, value, property, subgroup, end_time)
|
(user_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_userprofile.id, zerver_userprofile.realm_id, count(*),
|
zerver_userprofile.id, zerver_userprofile.realm_id, count(*),
|
||||||
'%(property)s', %(subgroup)s, %%(time_end)s
|
%(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_userprofile
|
FROM zerver_userprofile
|
||||||
JOIN zerver_message
|
JOIN zerver_message
|
||||||
ON
|
ON
|
||||||
zerver_userprofile.id = zerver_message.sender_id
|
zerver_userprofile.id = zerver_message.sender_id
|
||||||
WHERE
|
WHERE
|
||||||
zerver_userprofile.date_joined < %%(time_end)s AND
|
zerver_userprofile.date_joined < %(time_end)s AND
|
||||||
zerver_message.date_sent >= %%(time_start)s AND
|
zerver_message.date_sent >= %(time_start)s AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
zerver_message.date_sent < %%(time_end)s
|
zerver_message.date_sent < %(time_end)s
|
||||||
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
GROUP BY zerver_userprofile.id {group_by_clause}
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
# Note: ignores the group_by / group_by_clause.
|
# Note: ignores the group_by / group_by_clause.
|
||||||
def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
|
def count_message_type_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(realm_id, user_id, value, property, subgroup, end_time)
|
(realm_id, user_id, value, property, subgroup, end_time)
|
||||||
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
|
SELECT realm_id, id, SUM(count) AS value, %(property)s, message_type, %(time_end)s
|
||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
|
||||||
@@ -370,9 +394,9 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
|
|||||||
JOIN zerver_message
|
JOIN zerver_message
|
||||||
ON
|
ON
|
||||||
zerver_userprofile.id = zerver_message.sender_id AND
|
zerver_userprofile.id = zerver_message.sender_id AND
|
||||||
zerver_message.date_sent >= %%(time_start)s AND
|
zerver_message.date_sent >= %(time_start)s AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
zerver_message.date_sent < %%(time_end)s
|
zerver_message.date_sent < %(time_end)s
|
||||||
JOIN zerver_recipient
|
JOIN zerver_recipient
|
||||||
ON
|
ON
|
||||||
zerver_message.recipient_id = zerver_recipient.id
|
zerver_message.recipient_id = zerver_recipient.id
|
||||||
@@ -384,22 +408,22 @@ def count_message_type_by_user_query(realm: Optional[Realm]) -> str:
|
|||||||
zerver_recipient.type, zerver_stream.invite_only
|
zerver_recipient.type, zerver_stream.invite_only
|
||||||
) AS subquery
|
) AS subquery
|
||||||
GROUP BY realm_id, id, message_type
|
GROUP BY realm_id, id, message_type
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
# This query joins to the UserProfile table since all current queries that
|
# This query joins to the UserProfile table since all current queries that
|
||||||
# use this also subgroup on UserProfile.is_bot. If in the future there is a
|
# use this also subgroup on UserProfile.is_bot. If in the future there is a
|
||||||
# stat that counts messages by stream and doesn't need the UserProfile
|
# stat that counts messages by stream and doesn't need the UserProfile
|
||||||
# table, consider writing a new query for efficiency.
|
# table, consider writing a new query for efficiency.
|
||||||
def count_message_by_stream_query(realm: Optional[Realm]) -> str:
|
def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "zerver_stream.realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("zerver_stream.realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_streamcount
|
INSERT INTO analytics_streamcount
|
||||||
(stream_id, realm_id, value, property, subgroup, end_time)
|
(stream_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
zerver_stream.id, zerver_stream.realm_id, count(*), %(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_stream
|
FROM zerver_stream
|
||||||
JOIN zerver_recipient
|
JOIN zerver_recipient
|
||||||
ON
|
ON
|
||||||
@@ -411,53 +435,53 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> str:
|
|||||||
ON
|
ON
|
||||||
zerver_message.sender_id = zerver_userprofile.id
|
zerver_message.sender_id = zerver_userprofile.id
|
||||||
WHERE
|
WHERE
|
||||||
zerver_stream.date_created < %%(time_end)s AND
|
zerver_stream.date_created < %(time_end)s AND
|
||||||
zerver_recipient.type = 2 AND
|
zerver_recipient.type = 2 AND
|
||||||
zerver_message.date_sent >= %%(time_start)s AND
|
zerver_message.date_sent >= %(time_start)s AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
zerver_message.date_sent < %%(time_end)s
|
zerver_message.date_sent < %(time_end)s
|
||||||
GROUP BY zerver_stream.id %(group_by_clause)s
|
GROUP BY zerver_stream.id {group_by_clause}
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
# Hardcodes the query needed by active_users:is_bot:day, since that is
|
# Hardcodes the query needed by active_users:is_bot:day, since that is
|
||||||
# currently the only stat that uses this.
|
# currently the only stat that uses this.
|
||||||
def count_user_by_realm_query(realm: Optional[Realm]) -> str:
|
def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, count(*),'%(property)s', %(subgroup)s, %%(time_end)s
|
zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN zerver_userprofile
|
JOIN zerver_userprofile
|
||||||
ON
|
ON
|
||||||
zerver_realm.id = zerver_userprofile.realm_id
|
zerver_realm.id = zerver_userprofile.realm_id
|
||||||
WHERE
|
WHERE
|
||||||
zerver_realm.date_created < %%(time_end)s AND
|
zerver_realm.date_created < %(time_end)s AND
|
||||||
zerver_userprofile.date_joined >= %%(time_start)s AND
|
zerver_userprofile.date_joined >= %(time_start)s AND
|
||||||
zerver_userprofile.date_joined < %%(time_end)s AND
|
zerver_userprofile.date_joined < %(time_end)s AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
zerver_userprofile.is_active = TRUE
|
zerver_userprofile.is_active = TRUE
|
||||||
GROUP BY zerver_realm.id %(group_by_clause)s
|
GROUP BY zerver_realm.id {group_by_clause}
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
|
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
|
||||||
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
|
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
|
||||||
# event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc].
|
# event_type in [RealmAuditLog.USER_CREATED, USER_DEACTIVATED, etc].
|
||||||
# In particular, it's important to ensure that migrations don't cause that to happen.
|
# In particular, it's important to ensure that migrations don't cause that to happen.
|
||||||
def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
|
def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(user_id, realm_id, value, property, subgroup, end_time)
|
(user_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
ral1.modified_user_id, ral1.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
|
ral1.modified_user_id, ral1.realm_id, 1, %(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_realmauditlog ral1
|
FROM zerver_realmauditlog ral1
|
||||||
JOIN (
|
JOIN (
|
||||||
SELECT modified_user_id, max(event_time) AS max_event_time
|
SELECT modified_user_id, max(event_time) AS max_event_time
|
||||||
@@ -465,7 +489,7 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
|
|||||||
WHERE
|
WHERE
|
||||||
event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
|
event_type in ({user_created}, {user_activated}, {user_deactivated}, {user_reactivated}) AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
event_time < %%(time_end)s
|
event_time < %(time_end)s
|
||||||
GROUP BY modified_user_id
|
GROUP BY modified_user_id
|
||||||
) ral2
|
) ral2
|
||||||
ON
|
ON
|
||||||
@@ -476,43 +500,46 @@ def check_realmauditlog_by_user_query(realm: Optional[Realm]) -> str:
|
|||||||
ral1.modified_user_id = zerver_userprofile.id
|
ral1.modified_user_id = zerver_userprofile.id
|
||||||
WHERE
|
WHERE
|
||||||
ral1.event_type in ({user_created}, {user_activated}, {user_reactivated})
|
ral1.event_type in ({user_created}, {user_activated}, {user_reactivated})
|
||||||
""".format(user_created=RealmAuditLog.USER_CREATED,
|
""").format(
|
||||||
user_activated=RealmAuditLog.USER_ACTIVATED,
|
**kwargs,
|
||||||
user_deactivated=RealmAuditLog.USER_DEACTIVATED,
|
user_created=Literal(RealmAuditLog.USER_CREATED),
|
||||||
user_reactivated=RealmAuditLog.USER_REACTIVATED,
|
user_activated=Literal(RealmAuditLog.USER_ACTIVATED),
|
||||||
realm_clause=realm_clause)
|
user_deactivated=Literal(RealmAuditLog.USER_DEACTIVATED),
|
||||||
|
user_reactivated=Literal(RealmAuditLog.USER_REACTIVATED),
|
||||||
|
realm_clause=realm_clause,
|
||||||
|
)
|
||||||
|
|
||||||
def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> str:
|
def check_useractivityinterval_by_user_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "zerver_userprofile.realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_usercount
|
INSERT INTO analytics_usercount
|
||||||
(user_id, realm_id, value, property, subgroup, end_time)
|
(user_id, realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_userprofile.id, zerver_userprofile.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
|
zerver_userprofile.id, zerver_userprofile.realm_id, 1, %(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_userprofile
|
FROM zerver_userprofile
|
||||||
JOIN zerver_useractivityinterval
|
JOIN zerver_useractivityinterval
|
||||||
ON
|
ON
|
||||||
zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
|
zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
|
||||||
WHERE
|
WHERE
|
||||||
zerver_useractivityinterval.end >= %%(time_start)s AND
|
zerver_useractivityinterval.end >= %(time_start)s AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
zerver_useractivityinterval.start < %%(time_end)s
|
zerver_useractivityinterval.start < %(time_end)s
|
||||||
GROUP BY zerver_userprofile.id %(group_by_clause)s
|
GROUP BY zerver_userprofile.id {group_by_clause}
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
|
def count_realm_active_humans_query(realm: Optional[Realm]) -> QueryFn:
|
||||||
if realm is None:
|
if realm is None:
|
||||||
realm_clause = ""
|
realm_clause = SQL("")
|
||||||
else:
|
else:
|
||||||
realm_clause = "realm_id = %s AND" % (realm.id,)
|
realm_clause = SQL("realm_id = {} AND").format(Literal(realm.id))
|
||||||
return """
|
return lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
usercount1.realm_id, count(*), '%(property)s', NULL, %%(time_end)s
|
usercount1.realm_id, count(*), %(property)s, NULL, %(time_end)s
|
||||||
FROM (
|
FROM (
|
||||||
SELECT realm_id, user_id
|
SELECT realm_id, user_id
|
||||||
FROM analytics_usercount
|
FROM analytics_usercount
|
||||||
@@ -520,7 +547,7 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
|
|||||||
property = 'active_users_audit:is_bot:day' AND
|
property = 'active_users_audit:is_bot:day' AND
|
||||||
subgroup = 'false' AND
|
subgroup = 'false' AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
end_time = %%(time_end)s
|
end_time = %(time_end)s
|
||||||
) usercount1
|
) usercount1
|
||||||
JOIN (
|
JOIN (
|
||||||
SELECT realm_id, user_id
|
SELECT realm_id, user_id
|
||||||
@@ -528,29 +555,29 @@ def count_realm_active_humans_query(realm: Optional[Realm]) -> str:
|
|||||||
WHERE
|
WHERE
|
||||||
property = '15day_actives::day' AND
|
property = '15day_actives::day' AND
|
||||||
{realm_clause}
|
{realm_clause}
|
||||||
end_time = %%(time_end)s
|
end_time = %(time_end)s
|
||||||
) usercount2
|
) usercount2
|
||||||
ON
|
ON
|
||||||
usercount1.user_id = usercount2.user_id
|
usercount1.user_id = usercount2.user_id
|
||||||
GROUP BY usercount1.realm_id
|
GROUP BY usercount1.realm_id
|
||||||
""".format(realm_clause=realm_clause)
|
""").format(**kwargs, realm_clause=realm_clause)
|
||||||
|
|
||||||
# Currently unused and untested
|
# Currently unused and untested
|
||||||
count_stream_by_realm_query = """
|
count_stream_by_realm_query = lambda kwargs: SQL("""
|
||||||
INSERT INTO analytics_realmcount
|
INSERT INTO analytics_realmcount
|
||||||
(realm_id, value, property, subgroup, end_time)
|
(realm_id, value, property, subgroup, end_time)
|
||||||
SELECT
|
SELECT
|
||||||
zerver_realm.id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
|
zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
|
||||||
FROM zerver_realm
|
FROM zerver_realm
|
||||||
JOIN zerver_stream
|
JOIN zerver_stream
|
||||||
ON
|
ON
|
||||||
zerver_realm.id = zerver_stream.realm_id AND
|
zerver_realm.id = zerver_stream.realm_id AND
|
||||||
WHERE
|
WHERE
|
||||||
zerver_realm.date_created < %%(time_end)s AND
|
zerver_realm.date_created < %(time_end)s AND
|
||||||
zerver_stream.date_created >= %%(time_start)s AND
|
zerver_stream.date_created >= %(time_start)s AND
|
||||||
zerver_stream.date_created < %%(time_end)s
|
zerver_stream.date_created < %(time_end)s
|
||||||
GROUP BY zerver_realm.id %(group_by_clause)s
|
GROUP BY zerver_realm.id {group_by_clause}
|
||||||
"""
|
""").format(**kwargs)
|
||||||
|
|
||||||
def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]:
|
def get_count_stats(realm: Optional[Realm]=None) -> Dict[str, CountStat]:
|
||||||
## CountStat declarations ##
|
## CountStat declarations ##
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from django.db import models
|
|||||||
from django.db.models import Sum
|
from django.db.models import Sum
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils.timezone import now as timezone_now
|
from django.utils.timezone import now as timezone_now
|
||||||
|
from psycopg2.sql import SQL, Literal
|
||||||
|
|
||||||
from analytics.lib.counts import COUNT_STATS, CountStat, get_count_stats, \
|
from analytics.lib.counts import COUNT_STATS, CountStat, get_count_stats, \
|
||||||
DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \
|
DependentCountStat, LoggingCountStat, do_aggregate_to_summary_table, \
|
||||||
@@ -166,8 +167,13 @@ class AnalyticsTestCase(TestCase):
|
|||||||
|
|
||||||
class TestProcessCountStat(AnalyticsTestCase):
|
class TestProcessCountStat(AnalyticsTestCase):
|
||||||
def make_dummy_count_stat(self, property: str) -> CountStat:
|
def make_dummy_count_stat(self, property: str) -> CountStat:
|
||||||
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
query = lambda kwargs: SQL("""
|
||||||
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, property)
|
INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
||||||
|
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
|
||||||
|
""").format(
|
||||||
|
default_realm_id=Literal(self.default_realm.id),
|
||||||
|
property=Literal(property),
|
||||||
|
)
|
||||||
return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR)
|
return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR)
|
||||||
|
|
||||||
def assertFillStateEquals(self, stat: CountStat, end_time: datetime,
|
def assertFillStateEquals(self, stat: CountStat, end_time: datetime,
|
||||||
@@ -265,8 +271,13 @@ class TestProcessCountStat(AnalyticsTestCase):
|
|||||||
def test_process_dependent_stat(self) -> None:
|
def test_process_dependent_stat(self) -> None:
|
||||||
stat1 = self.make_dummy_count_stat('stat1')
|
stat1 = self.make_dummy_count_stat('stat1')
|
||||||
stat2 = self.make_dummy_count_stat('stat2')
|
stat2 = self.make_dummy_count_stat('stat2')
|
||||||
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
query = lambda kwargs: SQL("""
|
||||||
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat3')
|
INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
||||||
|
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
|
||||||
|
""").format(
|
||||||
|
default_realm_id=Literal(self.default_realm.id),
|
||||||
|
property=Literal('stat3'),
|
||||||
|
)
|
||||||
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None),
|
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None),
|
||||||
CountStat.HOUR,
|
CountStat.HOUR,
|
||||||
dependencies=['stat1', 'stat2'])
|
dependencies=['stat1', 'stat2'])
|
||||||
@@ -299,8 +310,13 @@ class TestProcessCountStat(AnalyticsTestCase):
|
|||||||
self.assertFillStateEquals(stat3, hour[2])
|
self.assertFillStateEquals(stat3, hour[2])
|
||||||
|
|
||||||
# test daily dependent stat with hourly dependencies
|
# test daily dependent stat with hourly dependencies
|
||||||
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
query = lambda kwargs: SQL("""
|
||||||
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat4')
|
INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
|
||||||
|
VALUES ({default_realm_id}, 1, {property}, %(time_end)s)
|
||||||
|
""").format(
|
||||||
|
default_realm_id=Literal(self.default_realm.id),
|
||||||
|
property=Literal('stat4'),
|
||||||
|
)
|
||||||
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None),
|
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None),
|
||||||
CountStat.DAY,
|
CountStat.DAY,
|
||||||
dependencies=['stat1', 'stat2'])
|
dependencies=['stat1', 'stat2'])
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from django.utils.timesince import timesince
|
|||||||
from django.core.validators import URLValidator
|
from django.core.validators import URLValidator
|
||||||
from django.core.exceptions import ValidationError
|
from django.core.exceptions import ValidationError
|
||||||
from jinja2 import Markup as mark_safe
|
from jinja2 import Markup as mark_safe
|
||||||
|
from psycopg2.sql import Composable, Literal, SQL
|
||||||
|
|
||||||
from analytics.lib.counts import COUNT_STATS, CountStat
|
from analytics.lib.counts import COUNT_STATS, CountStat
|
||||||
from analytics.lib.time_utils import time_range
|
from analytics.lib.time_utils import time_range
|
||||||
@@ -415,7 +416,7 @@ def dictfetchall(cursor: connection.cursor) -> List[Dict[str, Any]]:
|
|||||||
|
|
||||||
|
|
||||||
def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
|
def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
r.string_id,
|
r.string_id,
|
||||||
(now()::date - date_sent::date) age,
|
(now()::date - date_sent::date) age,
|
||||||
@@ -436,7 +437,7 @@ def get_realm_day_counts() -> Dict[str, Dict[str, str]]:
|
|||||||
order by
|
order by
|
||||||
r.string_id,
|
r.string_id,
|
||||||
age
|
age
|
||||||
'''
|
''')
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
rows = dictfetchall(cursor)
|
rows = dictfetchall(cursor)
|
||||||
@@ -476,7 +477,7 @@ def get_plan_name(plan_type: int) -> str:
|
|||||||
def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
|
def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
|
||||||
now = timezone_now()
|
now = timezone_now()
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
SELECT
|
SELECT
|
||||||
realm.string_id,
|
realm.string_id,
|
||||||
realm.date_created,
|
realm.date_created,
|
||||||
@@ -578,7 +579,7 @@ def realm_summary_table(realm_minutes: Dict[str, float]) -> str:
|
|||||||
last_visit > now() - interval '2 week'
|
last_visit > now() - interval '2 week'
|
||||||
)
|
)
|
||||||
ORDER BY dau_count DESC, string_id ASC
|
ORDER BY dau_count DESC, string_id ASC
|
||||||
'''
|
''')
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
@@ -737,7 +738,7 @@ def sent_messages_report(realm: str) -> str:
|
|||||||
'Bots'
|
'Bots'
|
||||||
]
|
]
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
series.day::date,
|
series.day::date,
|
||||||
humans.cnt,
|
humans.cnt,
|
||||||
@@ -787,7 +788,7 @@ def sent_messages_report(realm: str) -> str:
|
|||||||
date_sent::date
|
date_sent::date
|
||||||
) bots on
|
) bots on
|
||||||
series.day = bots.date_sent
|
series.day = bots.date_sent
|
||||||
'''
|
''')
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute(query, [realm, realm])
|
cursor.execute(query, [realm, realm])
|
||||||
rows = cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
@@ -796,7 +797,7 @@ def sent_messages_report(realm: str) -> str:
|
|||||||
return make_table(title, cols, rows)
|
return make_table(title, cols, rows)
|
||||||
|
|
||||||
def ad_hoc_queries() -> List[Dict[str, str]]:
|
def ad_hoc_queries() -> List[Dict[str, str]]:
|
||||||
def get_page(query: str, cols: List[str], title: str,
|
def get_page(query: Composable, cols: List[str], title: str,
|
||||||
totals_columns: List[int]=[]) -> Dict[str, str]:
|
totals_columns: List[int]=[]) -> Dict[str, str]:
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute(query)
|
cursor.execute(query)
|
||||||
@@ -842,7 +843,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
for mobile_type in ['Android', 'ZulipiOS']:
|
for mobile_type in ['Android', 'ZulipiOS']:
|
||||||
title = '%s usage' % (mobile_type,)
|
title = '%s usage' % (mobile_type,)
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
realm.string_id,
|
realm.string_id,
|
||||||
up.id user_id,
|
up.id user_id,
|
||||||
@@ -854,11 +855,13 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
join zerver_userprofile up on up.id = ua.user_profile_id
|
join zerver_userprofile up on up.id = ua.user_profile_id
|
||||||
join zerver_realm realm on realm.id = up.realm_id
|
join zerver_realm realm on realm.id = up.realm_id
|
||||||
where
|
where
|
||||||
client.name like '%s'
|
client.name like {mobile_type}
|
||||||
group by string_id, up.id, client.name
|
group by string_id, up.id, client.name
|
||||||
having max(last_visit) > now() - interval '2 week'
|
having max(last_visit) > now() - interval '2 week'
|
||||||
order by string_id, up.id, client.name
|
order by string_id, up.id, client.name
|
||||||
''' % (mobile_type,)
|
''').format(
|
||||||
|
mobile_type=Literal(mobile_type),
|
||||||
|
)
|
||||||
|
|
||||||
cols = [
|
cols = [
|
||||||
'Realm',
|
'Realm',
|
||||||
@@ -874,7 +877,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
|
|
||||||
title = 'Desktop users'
|
title = 'Desktop users'
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
realm.string_id,
|
realm.string_id,
|
||||||
client.name,
|
client.name,
|
||||||
@@ -889,7 +892,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
group by string_id, client.name
|
group by string_id, client.name
|
||||||
having max(last_visit) > now() - interval '2 week'
|
having max(last_visit) > now() - interval '2 week'
|
||||||
order by string_id, client.name
|
order by string_id, client.name
|
||||||
'''
|
''')
|
||||||
|
|
||||||
cols = [
|
cols = [
|
||||||
'Realm',
|
'Realm',
|
||||||
@@ -904,7 +907,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
|
|
||||||
title = 'Integrations by realm'
|
title = 'Integrations by realm'
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
realm.string_id,
|
realm.string_id,
|
||||||
case
|
case
|
||||||
@@ -927,7 +930,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
group by string_id, client_name
|
group by string_id, client_name
|
||||||
having max(last_visit) > now() - interval '2 week'
|
having max(last_visit) > now() - interval '2 week'
|
||||||
order by string_id, client_name
|
order by string_id, client_name
|
||||||
'''
|
''')
|
||||||
|
|
||||||
cols = [
|
cols = [
|
||||||
'Realm',
|
'Realm',
|
||||||
@@ -942,7 +945,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
|
|
||||||
title = 'Integrations by client'
|
title = 'Integrations by client'
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
select
|
select
|
||||||
case
|
case
|
||||||
when query like '%%external%%' then split_part(query, '/', 5)
|
when query like '%%external%%' then split_part(query, '/', 5)
|
||||||
@@ -965,7 +968,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
group by client_name, string_id
|
group by client_name, string_id
|
||||||
having max(last_visit) > now() - interval '2 week'
|
having max(last_visit) > now() - interval '2 week'
|
||||||
order by client_name, string_id
|
order by client_name, string_id
|
||||||
'''
|
''')
|
||||||
|
|
||||||
cols = [
|
cols = [
|
||||||
'Client',
|
'Client',
|
||||||
@@ -978,7 +981,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
|
|
||||||
title = 'Remote Zulip servers'
|
title = 'Remote Zulip servers'
|
||||||
|
|
||||||
query = '''
|
query = SQL('''
|
||||||
with icount as (
|
with icount as (
|
||||||
select
|
select
|
||||||
server_id,
|
server_id,
|
||||||
@@ -1005,7 +1008,7 @@ def ad_hoc_queries() -> List[Dict[str, str]]:
|
|||||||
left join icount on icount.server_id = rserver.id
|
left join icount on icount.server_id = rserver.id
|
||||||
left join remote_push_devices on remote_push_devices.server_id = rserver.id
|
left join remote_push_devices on remote_push_devices.server_id = rserver.id
|
||||||
order by max_value DESC NULLS LAST, push_user_count DESC NULLS LAST
|
order by max_value DESC NULLS LAST, push_user_count DESC NULLS LAST
|
||||||
'''
|
''')
|
||||||
|
|
||||||
cols = [
|
cols = [
|
||||||
'ID',
|
'ID',
|
||||||
|
|||||||
Reference in New Issue
Block a user