mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
message_summary: Add setting to put a monthly rate limit per user.
This commit is contained in:
@@ -2,11 +2,12 @@ import logging
|
||||
import time
|
||||
from collections import OrderedDict, defaultdict
|
||||
from collections.abc import Callable, Sequence
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import TypeAlias, Union
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection, models
|
||||
from django.utils.timezone import now as timezone_now
|
||||
from psycopg2.sql import SQL, Composable, Identifier, Literal
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -38,6 +39,7 @@ logger = logging.getLogger("zulip.analytics")
|
||||
# You can't subtract timedelta.max from a datetime, so use this instead
|
||||
TIMEDELTA_MAX = timedelta(days=365 * 1000)
|
||||
|
||||
|
||||
## Class definitions ##
|
||||
|
||||
|
||||
@@ -82,6 +84,27 @@ class CountStat:
|
||||
return fillstate.end_time
|
||||
return fillstate.end_time - self.time_increment
|
||||
|
||||
def current_month_accumulated_count_for_user(self, user: UserProfile) -> int:
|
||||
now = timezone_now()
|
||||
start_of_month = datetime(now.year, now.month, 1, tzinfo=timezone.utc)
|
||||
if now.month == 12: # nocoverage
|
||||
start_of_next_month = datetime(now.year + 1, 1, 1, tzinfo=timezone.utc)
|
||||
else: # nocoverage
|
||||
start_of_next_month = datetime(now.year, now.month + 1, 1, tzinfo=timezone.utc)
|
||||
|
||||
# We just want to check we are not using BaseCount, otherwise all
|
||||
# `output_table` have `objects` property.
|
||||
assert self.data_collector.output_table == UserCount
|
||||
result = self.data_collector.output_table.objects.filter( # type: ignore[attr-defined] # see above
|
||||
user=user,
|
||||
property=self.property,
|
||||
end_time__gte=start_of_month,
|
||||
end_time__lt=start_of_next_month,
|
||||
).aggregate(models.Sum("value"))
|
||||
|
||||
total_value = result["value__sum"] or 0
|
||||
return total_value
|
||||
|
||||
|
||||
class LoggingCountStat(CountStat):
|
||||
def __init__(self, property: str, output_table: type[BaseCount], frequency: str) -> None:
|
||||
|
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
|
||||
# this list without any modification.
|
||||
IGNORED_PHRASES = [
|
||||
# Proper nouns and acronyms
|
||||
r"AI",
|
||||
r"API",
|
||||
r"APNS",
|
||||
r"Botserver",
|
||||
|
@@ -194,6 +194,7 @@ def do_summarize_narrow(
|
||||
input_tokens = response["usage"]["prompt_tokens"]
|
||||
output_tokens = response["usage"]["completion_tokens"]
|
||||
|
||||
# Divide by 1 billion to get actual cost in USD.
|
||||
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
|
||||
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
|
||||
)
|
||||
|
@@ -77,6 +77,14 @@ class MessagesSummaryTestCase(ZulipTestCase):
|
||||
with open(LLM_FIXTURES_FILE, "rb") as f:
|
||||
fixture_data = orjson.loads(f.read())
|
||||
|
||||
# Block summary requests if budget set to 0.
|
||||
with self.settings(
|
||||
TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
|
||||
MAX_PER_USER_MONTHLY_AI_COST=0,
|
||||
):
|
||||
response = self.client_get("/json/messages/summary")
|
||||
self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
|
||||
|
||||
# Fake credentials to ensure we crash if actual network
|
||||
# requests occur, which would reflect a problem with how the
|
||||
# fixtures were set up.
|
||||
@@ -103,3 +111,11 @@ class MessagesSummaryTestCase(ZulipTestCase):
|
||||
property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
|
||||
).exists()
|
||||
)
|
||||
|
||||
# If we reached the credit usage limit, block summary requests.
|
||||
with self.settings(
|
||||
TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
|
||||
MAX_PER_USER_MONTHLY_AI_COST=credits_used / 1000000000,
|
||||
):
|
||||
response = self.client_get("/json/messages/summary")
|
||||
self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
|
||||
|
@@ -8,6 +8,7 @@ from django.http import HttpRequest, HttpResponse
|
||||
from django.utils.translation import gettext as _
|
||||
from pydantic import Json
|
||||
|
||||
from analytics.lib.counts import COUNT_STATS
|
||||
from zerver.actions.message_summary import do_summarize_narrow
|
||||
from zerver.lib.exceptions import JsonableError
|
||||
from zerver.lib.narrow import NarrowParameter
|
||||
@@ -29,6 +30,13 @@ def get_messages_summary(
|
||||
if not (user_profile.is_moderator or user_profile.is_realm_admin): # nocoverage
|
||||
return json_success(request, {"summary": "Feature limited to moderators for now."})
|
||||
|
||||
if settings.MAX_PER_USER_MONTHLY_AI_COST is not None:
|
||||
used_credits = COUNT_STATS["ai_credit_usage::day"].current_month_accumulated_count_for_user(
|
||||
user_profile
|
||||
)
|
||||
if used_credits >= settings.MAX_PER_USER_MONTHLY_AI_COST * 1000000000:
|
||||
raise JsonableError(_("Reached monthly limit for AI credits."))
|
||||
|
||||
summary = do_summarize_narrow(user_profile, narrow)
|
||||
if summary is None: # nocoverage
|
||||
return json_success(request, {"summary": "No messages in conversation to summarize"})
|
||||
|
@@ -706,3 +706,4 @@ TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
|
||||
# are arbitrarily, but typically will be USD.
|
||||
INPUT_COST_PER_GIGATOKEN: int = 0
|
||||
OUTPUT_COST_PER_GIGATOKEN: int = 0
|
||||
MAX_PER_USER_MONTHLY_AI_COST: float | None = 0.5
|
||||
|
@@ -225,3 +225,4 @@ TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
|
||||
# https://groq.com/pricing/
|
||||
OUTPUT_COST_PER_GIGATOKEN = 590
|
||||
INPUT_COST_PER_GIGATOKEN = 790
|
||||
MAX_PER_USER_MONTHLY_AI_COST = 1
|
||||
|
@@ -738,6 +738,7 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
|
||||
## quota. Units are USD or other currency of your choice.
|
||||
# OUTPUT_COST_PER_GIGATOKEN = 500
|
||||
# INPUT_COST_PER_GIGATOKEN = 400
|
||||
# MAX_PER_USER_MONTHLY_AI_COST = 0.5
|
||||
|
||||
################
|
||||
## Miscellaneous settings.
|
||||
|
Reference in New Issue
Block a user