diff --git a/zerver/actions/message_summary.py b/zerver/actions/message_summary.py index dc1d38f5d6..bbd5dc70c6 100644 --- a/zerver/actions/message_summary.py +++ b/zerver/actions/message_summary.py @@ -17,17 +17,6 @@ from zerver.models import UserProfile # Maximum number of messages that can be summarized in a single request. MAX_MESSAGES_SUMMARIZED = 100 -# Price per token for input and output tokens. -# These values are based on the pricing of the Bedrock API -# for Llama 3.3 Instruct (70B). -# https://aws.amazon.com/bedrock/pricing/ -# Unit: USD per 1 billion tokens. -# -# These values likely will want to be declared in configuration, -# rather than here in the code. -OUTPUT_COST_PER_GIGATOKEN = 720 -INPUT_COST_PER_GIGATOKEN = 720 - ai_time_start = 0.0 ai_total_time = 0.0 @@ -205,8 +194,8 @@ def do_summarize_narrow( input_tokens = response["usage"]["prompt_tokens"] output_tokens = response["usage"]["completion_tokens"] - credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + ( - input_tokens * INPUT_COST_PER_GIGATOKEN + credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + ( + input_tokens * settings.INPUT_COST_PER_GIGATOKEN ) do_increment_logging_stat( user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used diff --git a/zerver/tests/test_message_summary.py b/zerver/tests/test_message_summary.py index 27b6768e0e..ad9c5207cf 100644 --- a/zerver/tests/test_message_summary.py +++ b/zerver/tests/test_message_summary.py @@ -7,7 +7,6 @@ from django.conf import settings from typing_extensions import override from analytics.models import UserCount -from zerver.actions.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN from zerver.lib.test_classes import ZulipTestCase warnings.filterwarnings("ignore", category=UserWarning, module="pydantic") @@ -87,8 +86,8 @@ class MessagesSummaryTestCase(ZulipTestCase): ): input_tokens = fixture_data["response"]["usage"]["prompt_tokens"] output_tokens = fixture_data["response"]["usage"]["completion_tokens"] - credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + ( - input_tokens * INPUT_COST_PER_GIGATOKEN + credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + ( + input_tokens * settings.INPUT_COST_PER_GIGATOKEN ) self.assertFalse( UserCount.objects.filter( diff --git a/zproject/default_settings.py b/zproject/default_settings.py index 498df0d33e..1e800949cb 100644 --- a/zproject/default_settings.py +++ b/zproject/default_settings.py @@ -702,3 +702,7 @@ MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None TOPIC_SUMMARIZATION_MODEL: str | None = None TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {} +# Price per token for input and output tokens, and maximum cost. Units +# are arbitrarily, but typically will be USD. +INPUT_COST_PER_GIGATOKEN: int = 0 +OUTPUT_COST_PER_GIGATOKEN: int = 0 diff --git a/zproject/dev_settings.py b/zproject/dev_settings.py index 0446b70066..962df519fd 100644 --- a/zproject/dev_settings.py +++ b/zproject/dev_settings.py @@ -221,3 +221,7 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5 ROOT_DOMAIN_LANDING_PAGE = True TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile" +# Defaults based on groq's pricing for Llama 3.3 70B Versatile 128k. +# https://groq.com/pricing/ +OUTPUT_COST_PER_GIGATOKEN = 590 +INPUT_COST_PER_GIGATOKEN = 790 diff --git a/zproject/prod_settings_template.py b/zproject/prod_settings_template.py index 019e98bac4..b077183b0b 100644 --- a/zproject/prod_settings_template.py +++ b/zproject/prod_settings_template.py @@ -724,18 +724,21 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = { # BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/" ################ -## LLM Summarization +## AI Features ## -## The model name that will used by LiteLLM library to configure -## parameters to be sent to API. -## The Llama-3-8B-instruct model is free to use and only requires submitting -## a small form on the HuggingFace page for the model to gain access. +## Specify the model and provider to use for topic summarization. The +## `model` field from https://docs.litellm.ai/docs/providers specifies +## your preferred provider/model combination. # TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct" - ## Other configuration parameters, passed through to litellm's `completion` call ## See https://docs.litellm.ai/docs/completion/input # TOPIC_SUMMARIZATION_PARAMETERS = {} +## Set usage costs based on your model, and a maximum per-user monthly +## quota. Units are USD or other currency of your choice. +# OUTPUT_COST_PER_GIGATOKEN = 500 +# INPUT_COST_PER_GIGATOKEN = 400 + ################ ## Miscellaneous settings.