diff --git a/zerver/actions/message_summary.py b/zerver/actions/message_summary.py
index dc1d38f5d6..bbd5dc70c6 100644
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -17,17 +17,6 @@ from zerver.models import UserProfile
 
 # Maximum number of messages that can be summarized in a single request.
 MAX_MESSAGES_SUMMARIZED = 100
-# Price per token for input and output tokens.
-# These values are based on the pricing of the Bedrock API
-# for Llama 3.3 Instruct (70B).
-# https://aws.amazon.com/bedrock/pricing/
-# Unit: USD per 1 billion tokens.
-#
-# These values likely will want to be declared in configuration,
-# rather than here in the code.
-OUTPUT_COST_PER_GIGATOKEN = 720
-INPUT_COST_PER_GIGATOKEN = 720
-
 
 ai_time_start = 0.0
 ai_total_time = 0.0
@@ -205,8 +194,8 @@ def do_summarize_narrow(
     input_tokens = response["usage"]["prompt_tokens"]
     output_tokens = response["usage"]["completion_tokens"]
 
-    credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
-        input_tokens * INPUT_COST_PER_GIGATOKEN
+    credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
+        input_tokens * settings.INPUT_COST_PER_GIGATOKEN
     )
     do_increment_logging_stat(
         user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used
diff --git a/zerver/tests/test_message_summary.py b/zerver/tests/test_message_summary.py
index 27b6768e0e..ad9c5207cf 100644
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -7,7 +7,6 @@ from django.conf import settings
 from typing_extensions import override
 
 from analytics.models import UserCount
-from zerver.actions.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN
 from zerver.lib.test_classes import ZulipTestCase
 
 warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
@@ -87,8 +86,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
         ):
             input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
             output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
-            credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
-                input_tokens * INPUT_COST_PER_GIGATOKEN
+            credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
+                input_tokens * settings.INPUT_COST_PER_GIGATOKEN
             )
             self.assertFalse(
                 UserCount.objects.filter(
diff --git a/zproject/default_settings.py b/zproject/default_settings.py
index 498df0d33e..1e800949cb 100644
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -702,3 +702,7 @@ MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
 
 TOPIC_SUMMARIZATION_MODEL: str | None = None
 TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
+# Price per token for input and output tokens, and maximum cost. Units
+# are arbitrarily, but typically will be USD.
+INPUT_COST_PER_GIGATOKEN: int = 0
+OUTPUT_COST_PER_GIGATOKEN: int = 0
diff --git a/zproject/dev_settings.py b/zproject/dev_settings.py
index 0446b70066..962df519fd 100644
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -221,3 +221,7 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
 ROOT_DOMAIN_LANDING_PAGE = True
 
 TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
+# Defaults based on groq's pricing for Llama 3.3 70B Versatile 128k.
+# https://groq.com/pricing/
+OUTPUT_COST_PER_GIGATOKEN = 590
+INPUT_COST_PER_GIGATOKEN = 790
diff --git a/zproject/prod_settings_template.py b/zproject/prod_settings_template.py
index 019e98bac4..b077183b0b 100644
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -724,18 +724,21 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 # BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"
 
 ################
-## LLM Summarization
+## AI Features
 ##
-## The model name that will used by LiteLLM library to configure
-## parameters to be sent to API.
-## The Llama-3-8B-instruct model is free to use and only requires submitting
-## a small form on the HuggingFace page for the model to gain access.
+## Specify the model and provider to use for topic summarization. The
+## `model` field from https://docs.litellm.ai/docs/providers specifies
+## your preferred provider/model combination.
 # TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
-
 ## Other configuration parameters, passed through to litellm's `completion` call
 ## See https://docs.litellm.ai/docs/completion/input
 # TOPIC_SUMMARIZATION_PARAMETERS = {}
 
+## Set usage costs based on your model, and a maximum per-user monthly
+## quota. Units are USD or other currency of your choice.
+# OUTPUT_COST_PER_GIGATOKEN = 500
+# INPUT_COST_PER_GIGATOKEN = 400
+
 ################
 ## Miscellaneous settings.