message_summary: Add setting to put a monthly rate limit per user.

This commit is contained in:
Aman Agrawal
2025-02-05 14:47:52 +05:30
committed by Tim Abbott
parent 7fb767c080
commit 2f5aea6604
8 changed files with 53 additions and 1 deletions

View File

@@ -2,11 +2,12 @@ import logging
import time
from collections import OrderedDict, defaultdict
from collections.abc import Callable, Sequence
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import TypeAlias, Union
from django.conf import settings
from django.db import connection, models
from django.utils.timezone import now as timezone_now
from psycopg2.sql import SQL, Composable, Identifier, Literal
from typing_extensions import override
@@ -38,6 +39,7 @@ logger = logging.getLogger("zulip.analytics")
# You can't subtract timedelta.max from a datetime, so use this instead
TIMEDELTA_MAX = timedelta(days=365 * 1000)
## Class definitions ##
@@ -82,6 +84,27 @@ class CountStat:
return fillstate.end_time
return fillstate.end_time - self.time_increment
def current_month_accumulated_count_for_user(self, user: UserProfile) -> int:
now = timezone_now()
start_of_month = datetime(now.year, now.month, 1, tzinfo=timezone.utc)
if now.month == 12: # nocoverage
start_of_next_month = datetime(now.year + 1, 1, 1, tzinfo=timezone.utc)
else: # nocoverage
start_of_next_month = datetime(now.year, now.month + 1, 1, tzinfo=timezone.utc)
# We just want to check we are not using BaseCount, otherwise all
# `output_table` have `objects` property.
assert self.data_collector.output_table == UserCount
result = self.data_collector.output_table.objects.filter( # type: ignore[attr-defined] # see above
user=user,
property=self.property,
end_time__gte=start_of_month,
end_time__lt=start_of_next_month,
).aggregate(models.Sum("value"))
total_value = result["value__sum"] or 0
return total_value
class LoggingCountStat(CountStat):
def __init__(self, property: str, output_table: type[BaseCount], frequency: str) -> None:

View File

@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
# this list without any modification.
IGNORED_PHRASES = [
# Proper nouns and acronyms
r"AI",
r"API",
r"APNS",
r"Botserver",

View File

@@ -194,6 +194,7 @@ def do_summarize_narrow(
input_tokens = response["usage"]["prompt_tokens"]
output_tokens = response["usage"]["completion_tokens"]
# Divide by 1 billion to get actual cost in USD.
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
)

View File

@@ -77,6 +77,14 @@ class MessagesSummaryTestCase(ZulipTestCase):
with open(LLM_FIXTURES_FILE, "rb") as f:
fixture_data = orjson.loads(f.read())
# Block summary requests if budget set to 0.
with self.settings(
TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
MAX_PER_USER_MONTHLY_AI_COST=0,
):
response = self.client_get("/json/messages/summary")
self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
# Fake credentials to ensure we crash if actual network
# requests occur, which would reflect a problem with how the
# fixtures were set up.
@@ -103,3 +111,11 @@ class MessagesSummaryTestCase(ZulipTestCase):
property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
).exists()
)
# If we reached the credit usage limit, block summary requests.
with self.settings(
TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
MAX_PER_USER_MONTHLY_AI_COST=credits_used / 1000000000,
):
response = self.client_get("/json/messages/summary")
self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")

View File

@@ -8,6 +8,7 @@ from django.http import HttpRequest, HttpResponse
from django.utils.translation import gettext as _
from pydantic import Json
from analytics.lib.counts import COUNT_STATS
from zerver.actions.message_summary import do_summarize_narrow
from zerver.lib.exceptions import JsonableError
from zerver.lib.narrow import NarrowParameter
@@ -29,6 +30,13 @@ def get_messages_summary(
if not (user_profile.is_moderator or user_profile.is_realm_admin): # nocoverage
return json_success(request, {"summary": "Feature limited to moderators for now."})
if settings.MAX_PER_USER_MONTHLY_AI_COST is not None:
used_credits = COUNT_STATS["ai_credit_usage::day"].current_month_accumulated_count_for_user(
user_profile
)
if used_credits >= settings.MAX_PER_USER_MONTHLY_AI_COST * 1000000000:
raise JsonableError(_("Reached monthly limit for AI credits."))
summary = do_summarize_narrow(user_profile, narrow)
if summary is None: # nocoverage
return json_success(request, {"summary": "No messages in conversation to summarize"})

View File

@@ -706,3 +706,4 @@ TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
# are arbitrarily, but typically will be USD.
INPUT_COST_PER_GIGATOKEN: int = 0
OUTPUT_COST_PER_GIGATOKEN: int = 0
MAX_PER_USER_MONTHLY_AI_COST: float | None = 0.5

View File

@@ -225,3 +225,4 @@ TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
# https://groq.com/pricing/
OUTPUT_COST_PER_GIGATOKEN = 590
INPUT_COST_PER_GIGATOKEN = 790
MAX_PER_USER_MONTHLY_AI_COST = 1

View File

@@ -738,6 +738,7 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
## quota. Units are USD or other currency of your choice.
# OUTPUT_COST_PER_GIGATOKEN = 500
# INPUT_COST_PER_GIGATOKEN = 400
# MAX_PER_USER_MONTHLY_AI_COST = 0.5
################
## Miscellaneous settings.