message_summary: Move cost per token configuration to settings.

2025-10-23 04:52:12 +00:00 · 2025-02-05 11:27:18 +05:30
parent 099bab986f
commit 7fb767c080
5 changed files with 21 additions and 22 deletions
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -17,17 +17,6 @@ from zerver.models import UserProfile

 # Maximum number of messages that can be summarized in a single request.
 MAX_MESSAGES_SUMMARIZED = 100
-# Price per token for input and output tokens.
-# These values are based on the pricing of the Bedrock API
-# for Llama 3.3 Instruct (70B).
-# https://aws.amazon.com/bedrock/pricing/
-# Unit: USD per 1 billion tokens.
-#
-# These values likely will want to be declared in configuration,
-# rather than here in the code.
-OUTPUT_COST_PER_GIGATOKEN = 720
-INPUT_COST_PER_GIGATOKEN = 720
-

 ai_time_start = 0.0
 ai_total_time = 0.0
@@ -205,8 +194,8 @@ def do_summarize_narrow(
    input_tokens = response["usage"]["prompt_tokens"]
    output_tokens = response["usage"]["completion_tokens"]

-    credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
-        input_tokens * INPUT_COST_PER_GIGATOKEN
+    credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
+        input_tokens * settings.INPUT_COST_PER_GIGATOKEN
    )
    do_increment_logging_stat(
        user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -7,7 +7,6 @@ from django.conf import settings
 from typing_extensions import override

 from analytics.models import UserCount
-from zerver.actions.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN
 from zerver.lib.test_classes import ZulipTestCase

 warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
@@ -87,8 +86,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
        ):
            input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
            output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
-            credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
-                input_tokens * INPUT_COST_PER_GIGATOKEN
+            credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
+                input_tokens * settings.INPUT_COST_PER_GIGATOKEN
            )
            self.assertFalse(
                UserCount.objects.filter(
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -702,3 +702,7 @@ MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None

 TOPIC_SUMMARIZATION_MODEL: str | None = None
 TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
+# Price per token for input and output tokens, and maximum cost. Units
+# are arbitrarily, but typically will be USD.
+INPUT_COST_PER_GIGATOKEN: int = 0
+OUTPUT_COST_PER_GIGATOKEN: int = 0
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -221,3 +221,7 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
 ROOT_DOMAIN_LANDING_PAGE = True

 TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
+# Defaults based on groq's pricing for Llama 3.3 70B Versatile 128k.
+# https://groq.com/pricing/
+OUTPUT_COST_PER_GIGATOKEN = 590
+INPUT_COST_PER_GIGATOKEN = 790
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -724,18 +724,21 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 # BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"

 ################
-## LLM Summarization
+## AI Features
 ##
-## The model name that will used by LiteLLM library to configure
-## parameters to be sent to API.
-## The Llama-3-8B-instruct model is free to use and only requires submitting
-## a small form on the HuggingFace page for the model to gain access.
+## Specify the model and provider to use for topic summarization. The
+## `model` field from https://docs.litellm.ai/docs/providers specifies
+## your preferred provider/model combination.
 # TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
-
 ## Other configuration parameters, passed through to litellm's `completion` call
 ## See https://docs.litellm.ai/docs/completion/input
 # TOPIC_SUMMARIZATION_PARAMETERS = {}

+## Set usage costs based on your model, and a maximum per-user monthly
+## quota. Units are USD or other currency of your choice.
+# OUTPUT_COST_PER_GIGATOKEN = 500
+# INPUT_COST_PER_GIGATOKEN = 400
+
 ################
 ## Miscellaneous settings.