mirror of
https://github.com/zulip/zulip.git
synced 2025-11-18 04:43:58 +00:00
counts: Add function compute_max_monthly_messages for remote servers.
This calculates the largest amount of messages sent within a month for the last 3 months. The query is targeted for the specific use-case in this function - for finding the count for a specific server. For calculating this in bulk for a large number of remote server an adapted, bulk query will be needed - rather than running this one in a loop, which would likely be very inefficient.
This commit is contained in:
committed by
Tim Abbott
parent
617d2d509c
commit
3cafdbdc1e
@@ -23,6 +23,7 @@ from zerver.models import (
|
|||||||
get_org_type_display_name,
|
get_org_type_display_name,
|
||||||
get_realm,
|
get_realm,
|
||||||
)
|
)
|
||||||
|
from zilencer.lib.remote_counts import MissingDataError
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from django.test.client import _MonkeyPatchedWSGIResponse as TestHttpResponse
|
from django.test.client import _MonkeyPatchedWSGIResponse as TestHttpResponse
|
||||||
@@ -63,8 +64,20 @@ class TestRemoteServerSupportEndpoint(ZulipTestCase):
|
|||||||
result,
|
result,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with mock.patch("analytics.views.support.compute_max_monthly_messages", return_value=1000):
|
||||||
result = self.client_get("/activity/remote/support", {"q": "zulip-1.example.com"})
|
result = self.client_get("/activity/remote/support", {"q": "zulip-1.example.com"})
|
||||||
self.assert_in_success_response(["<h3>zulip-1.example.com</h3>"], result)
|
self.assert_in_success_response(["<h3>zulip-1.example.com</h3>"], result)
|
||||||
|
self.assert_in_success_response(["<b>Max monthly messages</b>: 1000"], result)
|
||||||
|
self.assert_not_in_success_response(["<h3>zulip-2.example.com</h3>"], result)
|
||||||
|
|
||||||
|
with mock.patch(
|
||||||
|
"analytics.views.support.compute_max_monthly_messages", side_effect=MissingDataError
|
||||||
|
):
|
||||||
|
result = self.client_get("/activity/remote/support", {"q": "zulip-1.example.com"})
|
||||||
|
self.assert_in_success_response(["<h3>zulip-1.example.com</h3>"], result)
|
||||||
|
self.assert_in_success_response(
|
||||||
|
["<b>Max monthly messages</b>: Recent data missing"], result
|
||||||
|
)
|
||||||
self.assert_not_in_success_response(["<h3>zulip-2.example.com</h3>"], result)
|
self.assert_not_in_success_response(["<h3>zulip-2.example.com</h3>"], result)
|
||||||
|
|
||||||
result = self.client_get("/activity/remote/support", {"q": "example.com"})
|
result = self.client_get("/activity/remote/support", {"q": "example.com"})
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from contextlib import suppress
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from typing import Any, Dict, Iterable, List, Optional
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -47,8 +47,10 @@ from zerver.models import (
|
|||||||
get_user_profile_by_id,
|
get_user_profile_by_id,
|
||||||
)
|
)
|
||||||
from zerver.views.invite import get_invitee_emails_set
|
from zerver.views.invite import get_invitee_emails_set
|
||||||
|
from zilencer.lib.remote_counts import MissingDataError
|
||||||
|
|
||||||
if settings.ZILENCER_ENABLED:
|
if settings.ZILENCER_ENABLED:
|
||||||
|
from zilencer.lib.remote_counts import compute_max_monthly_messages
|
||||||
from zilencer.models import RemoteZulipServer
|
from zilencer.models import RemoteZulipServer
|
||||||
|
|
||||||
if settings.BILLING_ENABLED:
|
if settings.BILLING_ENABLED:
|
||||||
@@ -444,10 +446,20 @@ def remote_servers_support(
|
|||||||
remote_servers = get_remote_servers_for_support(
|
remote_servers = get_remote_servers_for_support(
|
||||||
email_to_search=email_to_search, hostname_to_search=hostname_to_search
|
email_to_search=email_to_search, hostname_to_search=hostname_to_search
|
||||||
)
|
)
|
||||||
|
remote_server_to_max_monthly_messages: Dict[int, Union[int, str]] = dict()
|
||||||
|
for remote_server in remote_servers:
|
||||||
|
try:
|
||||||
|
remote_server_to_max_monthly_messages[remote_server.id] = compute_max_monthly_messages(
|
||||||
|
remote_server
|
||||||
|
)
|
||||||
|
except MissingDataError:
|
||||||
|
remote_server_to_max_monthly_messages[remote_server.id] = "Recent data missing"
|
||||||
|
|
||||||
return render(
|
return render(
|
||||||
request,
|
request,
|
||||||
"analytics/remote_server_support.html",
|
"analytics/remote_server_support.html",
|
||||||
context=dict(
|
context=dict(
|
||||||
remote_servers=remote_servers,
|
remote_servers=remote_servers,
|
||||||
|
remote_server_to_max_monthly_messages=remote_server_to_max_monthly_messages,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
104
corporate/tests/test_remote_counts.py
Normal file
104
corporate/tests/test_remote_counts.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
import datetime
|
||||||
|
|
||||||
|
import time_machine
|
||||||
|
from django.utils.timezone import now as timezone_now
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from zerver.lib.test_classes import ZulipTestCase
|
||||||
|
from zilencer.lib.remote_counts import MissingDataError, compute_max_monthly_messages
|
||||||
|
from zilencer.models import RemoteInstallationCount, RemoteZulipServer
|
||||||
|
|
||||||
|
|
||||||
|
class RemoteCountTest(ZulipTestCase):
|
||||||
|
@override
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.server_uuid = "6cde5f7a-1f7e-4978-9716-49f69ebfc9fe"
|
||||||
|
self.server = RemoteZulipServer(
|
||||||
|
uuid=self.server_uuid,
|
||||||
|
api_key="magic_secret_api_key",
|
||||||
|
hostname="demo.example.com",
|
||||||
|
last_updated=timezone_now(),
|
||||||
|
)
|
||||||
|
self.server.save()
|
||||||
|
super().setUp()
|
||||||
|
|
||||||
|
def test_compute_max_monthly_messages(self) -> None:
|
||||||
|
now = timezone_now()
|
||||||
|
# Note: We will use this modified now_offset value to subtract N days from it,
|
||||||
|
# to simulate the data in the time series for the day now - N days. This avoids
|
||||||
|
# inconsistent behavior on the boundaries. E.g. does an entry with
|
||||||
|
# end_time=now - 30 days belong to the "last 30 days" interval or the 30 days before that?
|
||||||
|
# Using now_offset avoids this ambiguity.
|
||||||
|
now_offset = now + datetime.timedelta(hours=1)
|
||||||
|
|
||||||
|
# First try with absolutely no analytics data.
|
||||||
|
with self.assertRaises(MissingDataError):
|
||||||
|
compute_max_monthly_messages(self.server)
|
||||||
|
|
||||||
|
# This one-off row is just because we use this property as a proxy for
|
||||||
|
# "the server submitted useful analytics data" in compute_max_monthly_messages.
|
||||||
|
# Servers without such an entry raises MissingDataError as illustrated above.
|
||||||
|
# See the function's implementation for details.
|
||||||
|
RemoteInstallationCount.objects.create(
|
||||||
|
server=self.server,
|
||||||
|
remote_id=1,
|
||||||
|
property="active_users_audit:is_bot:day",
|
||||||
|
value=5,
|
||||||
|
end_time=now_offset - datetime.timedelta(days=4),
|
||||||
|
)
|
||||||
|
|
||||||
|
# If we're missing any message data (which is the same as message data with 0, because
|
||||||
|
# we actually don't record 0s), then the function should just very reasonably return 0.
|
||||||
|
self.assertEqual(compute_max_monthly_messages(self.server), 0)
|
||||||
|
|
||||||
|
# Last 30 days of data:
|
||||||
|
RemoteInstallationCount.objects.bulk_create(
|
||||||
|
RemoteInstallationCount(
|
||||||
|
server=self.server,
|
||||||
|
remote_id=1,
|
||||||
|
property="messages_sent:message_type:day",
|
||||||
|
value=10,
|
||||||
|
end_time=now_offset - datetime.timedelta(days=t),
|
||||||
|
)
|
||||||
|
for t in range(1, 31)
|
||||||
|
)
|
||||||
|
# 30 days before that:
|
||||||
|
# This will be the peak of the last 3 months - with 900 messages total
|
||||||
|
RemoteInstallationCount.objects.bulk_create(
|
||||||
|
RemoteInstallationCount(
|
||||||
|
server=self.server,
|
||||||
|
remote_id=1,
|
||||||
|
property="messages_sent:message_type:day",
|
||||||
|
value=30,
|
||||||
|
end_time=now_offset - datetime.timedelta(days=30 + t),
|
||||||
|
)
|
||||||
|
for t in range(1, 31)
|
||||||
|
)
|
||||||
|
# Additional 30 days before that:
|
||||||
|
# This is the last month we're considering for the calculation
|
||||||
|
RemoteInstallationCount.objects.bulk_create(
|
||||||
|
RemoteInstallationCount(
|
||||||
|
server=self.server,
|
||||||
|
remote_id=1,
|
||||||
|
property="messages_sent:message_type:day",
|
||||||
|
value=20,
|
||||||
|
end_time=now_offset - datetime.timedelta(days=60 + t),
|
||||||
|
)
|
||||||
|
for t in range(1, 31)
|
||||||
|
)
|
||||||
|
# Additional 30 days before that:
|
||||||
|
# This is >90 days ago and should be ignored for the calculation. We simulate the highest
|
||||||
|
# amounts of messages here, to test that this is indeed ignored.
|
||||||
|
RemoteInstallationCount.objects.bulk_create(
|
||||||
|
RemoteInstallationCount(
|
||||||
|
server=self.server,
|
||||||
|
remote_id=1,
|
||||||
|
property="messages_sent:message_type:day",
|
||||||
|
value=100,
|
||||||
|
end_time=now_offset - datetime.timedelta(days=90 + t),
|
||||||
|
)
|
||||||
|
for t in range(1, 31)
|
||||||
|
)
|
||||||
|
|
||||||
|
with time_machine.travel(now, tick=False):
|
||||||
|
self.assertEqual(compute_max_monthly_messages(self.server), 900)
|
||||||
@@ -25,6 +25,7 @@
|
|||||||
<h3>{{ remote_server.hostname }}</h3>
|
<h3>{{ remote_server.hostname }}</h3>
|
||||||
<b>Contact email</b>: {{ remote_server.contact_email }}<br />
|
<b>Contact email</b>: {{ remote_server.contact_email }}<br />
|
||||||
<b>Last updated</b>: {{ remote_server.last_updated|timesince }} ago<br />
|
<b>Last updated</b>: {{ remote_server.last_updated|timesince }} ago<br />
|
||||||
|
<b>Max monthly messages</b>: {{ remote_server_to_max_monthly_messages[remote_server.id] }}<br />
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
77
zilencer/lib/remote_counts.py
Normal file
77
zilencer/lib/remote_counts.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
import datetime
|
||||||
|
|
||||||
|
from django.db import connection
|
||||||
|
from django.utils.timezone import now as timezone_now
|
||||||
|
from psycopg2.sql import SQL, Literal
|
||||||
|
|
||||||
|
from zilencer.models import RemoteInstallationCount, RemoteZulipServer
|
||||||
|
|
||||||
|
|
||||||
|
class MissingDataError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def compute_max_monthly_messages(remote_server: RemoteZulipServer) -> int:
|
||||||
|
# Calculate the maximum amount of messages that the server had within a month.
|
||||||
|
# out of the last 3 months.
|
||||||
|
|
||||||
|
# We would like to just check whether we have current data for the
|
||||||
|
# actual property we care about
|
||||||
|
# ('messages_sent:message_type:day'). But because our analytics
|
||||||
|
# tables have implicit zeros, that can't distinguish missing data
|
||||||
|
# from days with no messages. So we filter on `active_users_audit`
|
||||||
|
# instead, which will never be zero for an initialized server.
|
||||||
|
if not RemoteInstallationCount.objects.filter(
|
||||||
|
server=remote_server,
|
||||||
|
property="active_users_audit:is_bot:day",
|
||||||
|
end_time__lte=timezone_now() - datetime.timedelta(days=3),
|
||||||
|
).exists():
|
||||||
|
raise MissingDataError
|
||||||
|
|
||||||
|
query = SQL(
|
||||||
|
"""
|
||||||
|
WITH server_message_stats_daily AS -- Up to 4 rows per day for different subgroups
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
r.end_time,
|
||||||
|
r.value AS message_count
|
||||||
|
FROM
|
||||||
|
zilencer_remoteinstallationcount r
|
||||||
|
WHERE
|
||||||
|
r.property = 'messages_sent:message_type:day'
|
||||||
|
AND end_time >= CURRENT_TIMESTAMP(0) - INTERVAL '90 days'
|
||||||
|
AND r.server_id = {server_id}
|
||||||
|
),
|
||||||
|
server_message_stats_monthly AS (
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN current_timestamp(0) - end_time <= INTERVAL '30 days' THEN 0
|
||||||
|
WHEN current_timestamp(0) - end_time <= INTERVAL '60 days' THEN 1
|
||||||
|
WHEN current_timestamp(0) - end_time <= INTERVAL '90 days' THEN 2
|
||||||
|
END AS billing_month,
|
||||||
|
SUM(message_count) AS message_count
|
||||||
|
FROM
|
||||||
|
server_message_stats_daily
|
||||||
|
GROUP BY
|
||||||
|
1
|
||||||
|
),
|
||||||
|
server_max_monthly_messages AS (
|
||||||
|
SELECT
|
||||||
|
MAX(message_count) AS message_count
|
||||||
|
FROM
|
||||||
|
server_message_stats_monthly
|
||||||
|
WHERE
|
||||||
|
billing_month IS NOT NULL
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
-- Return zeros, rather than nulls,
|
||||||
|
-- for reporting servers with zero messages.
|
||||||
|
COALESCE(server_max_monthly_messages.message_count, 0) AS message_count
|
||||||
|
FROM
|
||||||
|
server_max_monthly_messages;
|
||||||
|
"""
|
||||||
|
).format(server_id=Literal(remote_server.id))
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
cursor.execute(query)
|
||||||
|
result = cursor.fetchone()[0]
|
||||||
|
return int(result)
|
||||||
Reference in New Issue
Block a user