message_summary: Add setting to put a monthly rate limit per user.

2025-10-23 04:52:12 +00:00 · 2025-02-05 14:47:52 +05:30
parent 7fb767c080
commit 2f5aea6604
8 changed files with 53 additions and 1 deletions
--- a/analytics/lib/counts.py
+++ b/analytics/lib/counts.py
@@ -2,11 +2,12 @@ import logging
 import time
 from collections import OrderedDict, defaultdict
 from collections.abc import Callable, Sequence
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from typing import TypeAlias, Union

 from django.conf import settings
 from django.db import connection, models
+from django.utils.timezone import now as timezone_now
 from psycopg2.sql import SQL, Composable, Identifier, Literal
 from typing_extensions import override

@@ -38,6 +39,7 @@ logger = logging.getLogger("zulip.analytics")
 # You can't subtract timedelta.max from a datetime, so use this instead
 TIMEDELTA_MAX = timedelta(days=365 * 1000)

+
 ## Class definitions ##


@@ -82,6 +84,27 @@ class CountStat:
            return fillstate.end_time
        return fillstate.end_time - self.time_increment

+    def current_month_accumulated_count_for_user(self, user: UserProfile) -> int:
+        now = timezone_now()
+        start_of_month = datetime(now.year, now.month, 1, tzinfo=timezone.utc)
+        if now.month == 12:  # nocoverage
+            start_of_next_month = datetime(now.year + 1, 1, 1, tzinfo=timezone.utc)
+        else:  # nocoverage
+            start_of_next_month = datetime(now.year, now.month + 1, 1, tzinfo=timezone.utc)
+
+        # We just want to check we are not using BaseCount, otherwise all
+        # `output_table` have `objects` property.
+        assert self.data_collector.output_table == UserCount
+        result = self.data_collector.output_table.objects.filter(  # type: ignore[attr-defined] # see above
+            user=user,
+            property=self.property,
+            end_time__gte=start_of_month,
+            end_time__lt=start_of_next_month,
+        ).aggregate(models.Sum("value"))
+
+        total_value = result["value__sum"] or 0
+        return total_value
+

 class LoggingCountStat(CountStat):
    def __init__(self, property: str, output_table: type[BaseCount], frequency: str) -> None:
--- a/tools/lib/capitalization.py
+++ b/tools/lib/capitalization.py
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
 # this list without any modification.
 IGNORED_PHRASES = [
    # Proper nouns and acronyms
+    r"AI",
    r"API",
    r"APNS",
    r"Botserver",
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -194,6 +194,7 @@ def do_summarize_narrow(
    input_tokens = response["usage"]["prompt_tokens"]
    output_tokens = response["usage"]["completion_tokens"]

+    # Divide by 1 billion to get actual cost in USD.
    credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
        input_tokens * settings.INPUT_COST_PER_GIGATOKEN
    )
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -77,6 +77,14 @@ class MessagesSummaryTestCase(ZulipTestCase):
        with open(LLM_FIXTURES_FILE, "rb") as f:
            fixture_data = orjson.loads(f.read())

+        # Block summary requests if budget set to 0.
+        with self.settings(
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
+            MAX_PER_USER_MONTHLY_AI_COST=0,
+        ):
+            response = self.client_get("/json/messages/summary")
+            self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
+
        # Fake credentials to ensure we crash if actual network
        # requests occur, which would reflect a problem with how the
        # fixtures were set up.
@@ -103,3 +111,11 @@ class MessagesSummaryTestCase(ZulipTestCase):
                    property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
                ).exists()
            )
+
+        # If we reached the credit usage limit, block summary requests.
+        with self.settings(
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
+            MAX_PER_USER_MONTHLY_AI_COST=credits_used / 1000000000,
+        ):
+            response = self.client_get("/json/messages/summary")
+            self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
--- a/zerver/views/message_summary.py
+++ b/zerver/views/message_summary.py
@@ -8,6 +8,7 @@ from django.http import HttpRequest, HttpResponse
 from django.utils.translation import gettext as _
 from pydantic import Json

+from analytics.lib.counts import COUNT_STATS
 from zerver.actions.message_summary import do_summarize_narrow
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.narrow import NarrowParameter
@@ -29,6 +30,13 @@ def get_messages_summary(
    if not (user_profile.is_moderator or user_profile.is_realm_admin):  # nocoverage
        return json_success(request, {"summary": "Feature limited to moderators for now."})

+    if settings.MAX_PER_USER_MONTHLY_AI_COST is not None:
+        used_credits = COUNT_STATS["ai_credit_usage::day"].current_month_accumulated_count_for_user(
+            user_profile
+        )
+        if used_credits >= settings.MAX_PER_USER_MONTHLY_AI_COST * 1000000000:
+            raise JsonableError(_("Reached monthly limit for AI credits."))
+
    summary = do_summarize_narrow(user_profile, narrow)
    if summary is None:  # nocoverage
        return json_success(request, {"summary": "No messages in conversation to summarize"})
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -706,3 +706,4 @@ TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
 # are arbitrarily, but typically will be USD.
 INPUT_COST_PER_GIGATOKEN: int = 0
 OUTPUT_COST_PER_GIGATOKEN: int = 0
+MAX_PER_USER_MONTHLY_AI_COST: float | None = 0.5
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -225,3 +225,4 @@ TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
 # https://groq.com/pricing/
 OUTPUT_COST_PER_GIGATOKEN = 590
 INPUT_COST_PER_GIGATOKEN = 790
+MAX_PER_USER_MONTHLY_AI_COST = 1
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -738,6 +738,7 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 ## quota. Units are USD or other currency of your choice.
 # OUTPUT_COST_PER_GIGATOKEN = 500
 # INPUT_COST_PER_GIGATOKEN = 400
+# MAX_PER_USER_MONTHLY_AI_COST = 0.5

 ################
 ## Miscellaneous settings.