analytics: Remove active_users and active_users_log metrics.

Both of these are inaccurate, not currently used anywhere, and have
been superseded by the `active_users_audit` metric.
This commit is contained in:
Alex Vandiver
2024-06-03 16:29:44 +00:00
committed by Tim Abbott
parent 0100440a86
commit 09e9c75ec6
8 changed files with 33 additions and 180 deletions

View File

@@ -678,34 +678,6 @@ def count_message_by_stream_query(realm: Optional[Realm]) -> QueryFn:
).format(**kwargs, realm_clause=realm_clause)
# Hardcodes the query needed by active_users:is_bot:day, since that is
# currently the only stat that uses this.
def count_user_by_realm_query(realm: Optional[Realm]) -> QueryFn:
if realm is None:
realm_clause: Composable = SQL("")
else:
realm_clause = SQL("zerver_userprofile.realm_id = {} AND").format(Literal(realm.id))
return lambda kwargs: SQL(
"""
INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time)
SELECT
zerver_realm.id, count(*), %(property)s, {subgroup}, %(time_end)s
FROM zerver_realm
JOIN zerver_userprofile
ON
zerver_realm.id = zerver_userprofile.realm_id
WHERE
zerver_realm.date_created < %(time_end)s AND
zerver_userprofile.date_joined >= %(time_start)s AND
zerver_userprofile.date_joined < %(time_end)s AND
{realm_clause}
zerver_userprofile.is_active = TRUE
GROUP BY zerver_realm.id {group_by_clause}
"""
).format(**kwargs, realm_clause=realm_clause)
# Hardcodes the query needed for active_users_audit:is_bot:day.
# Assumes that a user cannot have two RealmAuditLog entries with the
# same event_time and event_type in [RealmAuditLog.USER_CREATED,
@@ -878,10 +850,7 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
),
CountStat.DAY,
),
# Number of users stats
# Stats that count the number of active users in the UserProfile.is_active sense.
# 'active_users_audit:is_bot:day' is the canonical record of which users were
# active on which days (in the UserProfile.is_active sense).
# Counts the number of active users in the UserProfile.is_active sense.
# Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected.
CountStat(
"active_users_audit:is_bot:day",
@@ -890,29 +859,6 @@ def get_count_stats(realm: Optional[Realm] = None) -> Dict[str, CountStat]:
),
CountStat.DAY,
),
# Important note: LoggingCountStat objects aren't passed the
# Realm argument, because by nature they have a logging
# structure, not a pull-from-database structure, so there's no
# way to compute them for a single realm after the fact (the
# use case for passing a Realm argument).
# Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats.
# In RealmCount, 'active_users_audit:is_bot:day' should be the partial
# sum sequence of 'active_users_log:is_bot:day', for any realm that
# started after the latter stat was introduced.
# Included in LOGGING_COUNT_STAT_PROPERTIES_NOT_SENT_TO_BOUNCER.
LoggingCountStat("active_users_log:is_bot:day", RealmCount, CountStat.DAY),
# Another sanity check on 'active_users_audit:is_bot:day'. Is only an
# approximation, e.g. if a user is deactivated between the end of the
# day and when this stat is run, they won't be counted. However, is the
# simplest of the three to inspect by hand.
CountStat(
"active_users:is_bot:day",
sql_data_collector(
RealmCount, count_user_by_realm_query(realm), (UserProfile, "is_bot")
),
CountStat.DAY,
interval=TIMEDELTA_MAX,
),
CountStat(
"upload_quota_used_bytes::day",
sql_data_collector(RealmCount, count_upload_space_used_by_realm_query(realm), None),

View File

@@ -0,0 +1,26 @@
from django.db import migrations
REMOVED_COUNTS = (
"active_users_log:is_bot:day",
"active_users:is_bot:day",
)
class Migration(migrations.Migration):
elidable = True
dependencies = [
("analytics", "0018_remove_usercount_active_users_audit"),
]
operations = [
migrations.RunSQL(
[
("DELETE FROM analytics_realmcount WHERE property IN %s", (REMOVED_COUNTS,)),
(
"DELETE FROM analytics_installationcount WHERE property IN %s",
(REMOVED_COUNTS,),
),
]
)
]

View File

@@ -543,34 +543,6 @@ class TestCountStats(AnalyticsTestCase):
# This huddle should not show up anywhere
self.create_huddle_with_recipient()
def test_active_users_by_is_bot(self) -> None:
stat = COUNT_STATS["active_users:is_bot:day"]
self.current_property = stat.property
# To be included
self.create_user(is_bot=True)
self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR)
self.create_user(is_bot=False)
# To be excluded
self.create_user(is_active=False)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
self.assertTableState(
RealmCount,
["value", "subgroup", "realm"],
[
[2, "true"],
[1, "false"],
[3, "false", self.second_realm],
[1, "false", self.no_message_realm],
],
)
self.assertTableState(InstallationCount, ["value", "subgroup"], [[2, "true"], [5, "false"]])
self.assertTableState(UserCount, [], [])
self.assertTableState(StreamCount, [], [])
def test_upload_quota_used_bytes(self) -> None:
stat = COUNT_STATS["upload_quota_used_bytes::day"]
self.current_property = stat.property
@@ -606,31 +578,6 @@ class TestCountStats(AnalyticsTestCase):
],
)
def test_active_users_by_is_bot_for_realm_constraint(self) -> None:
# For single Realm
COUNT_STATS = get_count_stats(self.default_realm)
stat = COUNT_STATS["active_users:is_bot:day"]
self.current_property = stat.property
# To be included
self.create_user(is_bot=True, date_joined=self.TIME_ZERO - 25 * self.HOUR)
self.create_user(is_bot=False)
# To be excluded
self.create_user(
email="test@second.analytics",
realm=self.second_realm,
date_joined=self.TIME_ZERO - 2 * self.DAY,
)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO, self.default_realm)
self.assertTableState(RealmCount, ["value", "subgroup"], [[1, "true"], [1, "false"]])
# No aggregation to InstallationCount with realm constraint
self.assertTableState(InstallationCount, ["value", "subgroup"], [])
self.assertTableState(UserCount, [], [])
self.assertTableState(StreamCount, [], [])
def test_messages_sent_by_is_bot(self) -> None:
stat = COUNT_STATS["messages_sent:is_bot:hour"]
self.current_property = stat.property
@@ -1419,46 +1366,6 @@ class TestLoggingCountStats(AnalyticsTestCase):
self.assertTableState(UserCount, ["property", "value"], [["user test", 1]])
self.assertTableState(StreamCount, ["property", "value"], [["stream test", 1]])
def test_active_users_log_by_is_bot(self) -> None:
property = "active_users_log:is_bot:day"
user = do_create_user(
"email", "password", self.default_realm, "full_name", acting_user=None
)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_deactivate_user(user, acting_user=None)
self.assertEqual(
0,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_activate_mirror_dummy_user(user, acting_user=None)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_deactivate_user(user, acting_user=None)
self.assertEqual(
0,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
do_reactivate_user(user, acting_user=None)
self.assertEqual(
1,
RealmCount.objects.filter(property=property, subgroup=False).aggregate(Sum("value"))[
"value__sum"
],
)
@override_settings(PUSH_NOTIFICATION_BOUNCER_URL="https://push.zulip.org.example.com")
def test_mobile_pushes_received_count(self) -> None:
self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe"