diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py index 0b9b9b2b7e..0ef8d58a84 100644 --- a/analytics/lib/counts.py +++ b/analytics/lib/counts.py @@ -2,11 +2,12 @@ import logging import time from collections import OrderedDict, defaultdict from collections.abc import Callable, Sequence -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import TypeAlias, Union from django.conf import settings from django.db import connection, models +from django.utils.timezone import now as timezone_now from psycopg2.sql import SQL, Composable, Identifier, Literal from typing_extensions import override @@ -38,6 +39,7 @@ logger = logging.getLogger("zulip.analytics") # You can't subtract timedelta.max from a datetime, so use this instead TIMEDELTA_MAX = timedelta(days=365 * 1000) + ## Class definitions ## @@ -82,6 +84,27 @@ class CountStat: return fillstate.end_time return fillstate.end_time - self.time_increment + def current_month_accumulated_count_for_user(self, user: UserProfile) -> int: + now = timezone_now() + start_of_month = datetime(now.year, now.month, 1, tzinfo=timezone.utc) + if now.month == 12: # nocoverage + start_of_next_month = datetime(now.year + 1, 1, 1, tzinfo=timezone.utc) + else: # nocoverage + start_of_next_month = datetime(now.year, now.month + 1, 1, tzinfo=timezone.utc) + + # We just want to check we are not using BaseCount, otherwise all + # `output_table` have `objects` property. + assert self.data_collector.output_table == UserCount + result = self.data_collector.output_table.objects.filter( # type: ignore[attr-defined] # see above + user=user, + property=self.property, + end_time__gte=start_of_month, + end_time__lt=start_of_next_month, + ).aggregate(models.Sum("value")) + + total_value = result["value__sum"] or 0 + return total_value + class LoggingCountStat(CountStat): def __init__(self, property: str, output_table: type[BaseCount], frequency: str) -> None: diff --git a/tools/lib/capitalization.py b/tools/lib/capitalization.py index cc18ff501f..491d639c68 100644 --- a/tools/lib/capitalization.py +++ b/tools/lib/capitalization.py @@ -11,6 +11,7 @@ from bs4 import BeautifulSoup # this list without any modification. IGNORED_PHRASES = [ # Proper nouns and acronyms + r"AI", r"API", r"APNS", r"Botserver", diff --git a/zerver/actions/message_summary.py b/zerver/actions/message_summary.py index bbd5dc70c6..923c13749c 100644 --- a/zerver/actions/message_summary.py +++ b/zerver/actions/message_summary.py @@ -194,6 +194,7 @@ def do_summarize_narrow( input_tokens = response["usage"]["prompt_tokens"] output_tokens = response["usage"]["completion_tokens"] + # Divide by 1 billion to get actual cost in USD. credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + ( input_tokens * settings.INPUT_COST_PER_GIGATOKEN ) diff --git a/zerver/tests/test_message_summary.py b/zerver/tests/test_message_summary.py index ad9c5207cf..93ef6b86e0 100644 --- a/zerver/tests/test_message_summary.py +++ b/zerver/tests/test_message_summary.py @@ -77,6 +77,14 @@ class MessagesSummaryTestCase(ZulipTestCase): with open(LLM_FIXTURES_FILE, "rb") as f: fixture_data = orjson.loads(f.read()) + # Block summary requests if budget set to 0. + with self.settings( + TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile", + MAX_PER_USER_MONTHLY_AI_COST=0, + ): + response = self.client_get("/json/messages/summary") + self.assert_json_error_contains(response, "Reached monthly limit for AI credits.") + # Fake credentials to ensure we crash if actual network # requests occur, which would reflect a problem with how the # fixtures were set up. @@ -103,3 +111,11 @@ class MessagesSummaryTestCase(ZulipTestCase): property="ai_credit_usage::day", value=credits_used, user_id=self.user.id ).exists() ) + + # If we reached the credit usage limit, block summary requests. + with self.settings( + TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile", + MAX_PER_USER_MONTHLY_AI_COST=credits_used / 1000000000, + ): + response = self.client_get("/json/messages/summary") + self.assert_json_error_contains(response, "Reached monthly limit for AI credits.") diff --git a/zerver/views/message_summary.py b/zerver/views/message_summary.py index d0d02b73de..b2e3ea992e 100644 --- a/zerver/views/message_summary.py +++ b/zerver/views/message_summary.py @@ -8,6 +8,7 @@ from django.http import HttpRequest, HttpResponse from django.utils.translation import gettext as _ from pydantic import Json +from analytics.lib.counts import COUNT_STATS from zerver.actions.message_summary import do_summarize_narrow from zerver.lib.exceptions import JsonableError from zerver.lib.narrow import NarrowParameter @@ -29,6 +30,13 @@ def get_messages_summary( if not (user_profile.is_moderator or user_profile.is_realm_admin): # nocoverage return json_success(request, {"summary": "Feature limited to moderators for now."}) + if settings.MAX_PER_USER_MONTHLY_AI_COST is not None: + used_credits = COUNT_STATS["ai_credit_usage::day"].current_month_accumulated_count_for_user( + user_profile + ) + if used_credits >= settings.MAX_PER_USER_MONTHLY_AI_COST * 1000000000: + raise JsonableError(_("Reached monthly limit for AI credits.")) + summary = do_summarize_narrow(user_profile, narrow) if summary is None: # nocoverage return json_success(request, {"summary": "No messages in conversation to summarize"}) diff --git a/zproject/default_settings.py b/zproject/default_settings.py index 1e800949cb..04c6f1919e 100644 --- a/zproject/default_settings.py +++ b/zproject/default_settings.py @@ -706,3 +706,4 @@ TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {} # are arbitrarily, but typically will be USD. INPUT_COST_PER_GIGATOKEN: int = 0 OUTPUT_COST_PER_GIGATOKEN: int = 0 +MAX_PER_USER_MONTHLY_AI_COST: float | None = 0.5 diff --git a/zproject/dev_settings.py b/zproject/dev_settings.py index 962df519fd..452b5cd80d 100644 --- a/zproject/dev_settings.py +++ b/zproject/dev_settings.py @@ -225,3 +225,4 @@ TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile" # https://groq.com/pricing/ OUTPUT_COST_PER_GIGATOKEN = 590 INPUT_COST_PER_GIGATOKEN = 790 +MAX_PER_USER_MONTHLY_AI_COST = 1 diff --git a/zproject/prod_settings_template.py b/zproject/prod_settings_template.py index b077183b0b..1db220f12c 100644 --- a/zproject/prod_settings_template.py +++ b/zproject/prod_settings_template.py @@ -738,6 +738,7 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = { ## quota. Units are USD or other currency of your choice. # OUTPUT_COST_PER_GIGATOKEN = 500 # INPUT_COST_PER_GIGATOKEN = 400 +# MAX_PER_USER_MONTHLY_AI_COST = 0.5 ################ ## Miscellaneous settings.