diff --git a/analytics/lib/counts.py b/analytics/lib/counts.py
index 0b9b9b2b7e..0ef8d58a84 100644
--- a/analytics/lib/counts.py
+++ b/analytics/lib/counts.py
@@ -2,11 +2,12 @@ import logging
 import time
 from collections import OrderedDict, defaultdict
 from collections.abc import Callable, Sequence
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from typing import TypeAlias, Union
 
 from django.conf import settings
 from django.db import connection, models
+from django.utils.timezone import now as timezone_now
 from psycopg2.sql import SQL, Composable, Identifier, Literal
 from typing_extensions import override
 
@@ -38,6 +39,7 @@ logger = logging.getLogger("zulip.analytics")
 # You can't subtract timedelta.max from a datetime, so use this instead
 TIMEDELTA_MAX = timedelta(days=365 * 1000)
 
+
 ## Class definitions ##
 
 
@@ -82,6 +84,27 @@ class CountStat:
             return fillstate.end_time
         return fillstate.end_time - self.time_increment
 
+    def current_month_accumulated_count_for_user(self, user: UserProfile) -> int:
+        now = timezone_now()
+        start_of_month = datetime(now.year, now.month, 1, tzinfo=timezone.utc)
+        if now.month == 12:  # nocoverage
+            start_of_next_month = datetime(now.year + 1, 1, 1, tzinfo=timezone.utc)
+        else:  # nocoverage
+            start_of_next_month = datetime(now.year, now.month + 1, 1, tzinfo=timezone.utc)
+
+        # We just want to check we are not using BaseCount, otherwise all
+        # `output_table` have `objects` property.
+        assert self.data_collector.output_table == UserCount
+        result = self.data_collector.output_table.objects.filter(  # type: ignore[attr-defined] # see above
+            user=user,
+            property=self.property,
+            end_time__gte=start_of_month,
+            end_time__lt=start_of_next_month,
+        ).aggregate(models.Sum("value"))
+
+        total_value = result["value__sum"] or 0
+        return total_value
+
 
 class LoggingCountStat(CountStat):
     def __init__(self, property: str, output_table: type[BaseCount], frequency: str) -> None:
diff --git a/tools/lib/capitalization.py b/tools/lib/capitalization.py
index cc18ff501f..491d639c68 100644
--- a/tools/lib/capitalization.py
+++ b/tools/lib/capitalization.py
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
 # this list without any modification.
 IGNORED_PHRASES = [
     # Proper nouns and acronyms
+    r"AI",
     r"API",
     r"APNS",
     r"Botserver",
diff --git a/zerver/actions/message_summary.py b/zerver/actions/message_summary.py
index bbd5dc70c6..923c13749c 100644
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -194,6 +194,7 @@ def do_summarize_narrow(
     input_tokens = response["usage"]["prompt_tokens"]
     output_tokens = response["usage"]["completion_tokens"]
 
+    # Divide by 1 billion to get actual cost in USD.
     credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
         input_tokens * settings.INPUT_COST_PER_GIGATOKEN
     )
diff --git a/zerver/tests/test_message_summary.py b/zerver/tests/test_message_summary.py
index ad9c5207cf..93ef6b86e0 100644
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -77,6 +77,14 @@ class MessagesSummaryTestCase(ZulipTestCase):
         with open(LLM_FIXTURES_FILE, "rb") as f:
             fixture_data = orjson.loads(f.read())
 
+        # Block summary requests if budget set to 0.
+        with self.settings(
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
+            MAX_PER_USER_MONTHLY_AI_COST=0,
+        ):
+            response = self.client_get("/json/messages/summary")
+            self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
+
         # Fake credentials to ensure we crash if actual network
         # requests occur, which would reflect a problem with how the
         # fixtures were set up.
@@ -103,3 +111,11 @@ class MessagesSummaryTestCase(ZulipTestCase):
                     property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
                 ).exists()
             )
+
+        # If we reached the credit usage limit, block summary requests.
+        with self.settings(
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
+            MAX_PER_USER_MONTHLY_AI_COST=credits_used / 1000000000,
+        ):
+            response = self.client_get("/json/messages/summary")
+            self.assert_json_error_contains(response, "Reached monthly limit for AI credits.")
diff --git a/zerver/views/message_summary.py b/zerver/views/message_summary.py
index d0d02b73de..b2e3ea992e 100644
--- a/zerver/views/message_summary.py
+++ b/zerver/views/message_summary.py
@@ -8,6 +8,7 @@ from django.http import HttpRequest, HttpResponse
 from django.utils.translation import gettext as _
 from pydantic import Json
 
+from analytics.lib.counts import COUNT_STATS
 from zerver.actions.message_summary import do_summarize_narrow
 from zerver.lib.exceptions import JsonableError
 from zerver.lib.narrow import NarrowParameter
@@ -29,6 +30,13 @@ def get_messages_summary(
     if not (user_profile.is_moderator or user_profile.is_realm_admin):  # nocoverage
         return json_success(request, {"summary": "Feature limited to moderators for now."})
 
+    if settings.MAX_PER_USER_MONTHLY_AI_COST is not None:
+        used_credits = COUNT_STATS["ai_credit_usage::day"].current_month_accumulated_count_for_user(
+            user_profile
+        )
+        if used_credits >= settings.MAX_PER_USER_MONTHLY_AI_COST * 1000000000:
+            raise JsonableError(_("Reached monthly limit for AI credits."))
+
     summary = do_summarize_narrow(user_profile, narrow)
     if summary is None:  # nocoverage
         return json_success(request, {"summary": "No messages in conversation to summarize"})
diff --git a/zproject/default_settings.py b/zproject/default_settings.py
index 1e800949cb..04c6f1919e 100644
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -706,3 +706,4 @@ TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
 # are arbitrarily, but typically will be USD.
 INPUT_COST_PER_GIGATOKEN: int = 0
 OUTPUT_COST_PER_GIGATOKEN: int = 0
+MAX_PER_USER_MONTHLY_AI_COST: float | None = 0.5
diff --git a/zproject/dev_settings.py b/zproject/dev_settings.py
index 962df519fd..452b5cd80d 100644
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -225,3 +225,4 @@ TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
 # https://groq.com/pricing/
 OUTPUT_COST_PER_GIGATOKEN = 590
 INPUT_COST_PER_GIGATOKEN = 790
+MAX_PER_USER_MONTHLY_AI_COST = 1
diff --git a/zproject/prod_settings_template.py b/zproject/prod_settings_template.py
index b077183b0b..1db220f12c 100644
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -738,6 +738,7 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 ## quota. Units are USD or other currency of your choice.
 # OUTPUT_COST_PER_GIGATOKEN = 500
 # INPUT_COST_PER_GIGATOKEN = 400
+# MAX_PER_USER_MONTHLY_AI_COST = 0.5
 
 ################
 ## Miscellaneous settings.