message_summary: Move cost per token configuration to settings.

This commit is contained in:
Aman Agrawal
2025-02-05 11:27:18 +05:30
committed by Tim Abbott
parent 099bab986f
commit 7fb767c080
5 changed files with 21 additions and 22 deletions

View File

@@ -17,17 +17,6 @@ from zerver.models import UserProfile
# Maximum number of messages that can be summarized in a single request.
MAX_MESSAGES_SUMMARIZED = 100
# Price per token for input and output tokens.
# These values are based on the pricing of the Bedrock API
# for Llama 3.3 Instruct (70B).
# https://aws.amazon.com/bedrock/pricing/
# Unit: USD per 1 billion tokens.
#
# These values likely will want to be declared in configuration,
# rather than here in the code.
OUTPUT_COST_PER_GIGATOKEN = 720
INPUT_COST_PER_GIGATOKEN = 720
ai_time_start = 0.0
ai_total_time = 0.0
@@ -205,8 +194,8 @@ def do_summarize_narrow(
input_tokens = response["usage"]["prompt_tokens"]
output_tokens = response["usage"]["completion_tokens"]
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * INPUT_COST_PER_GIGATOKEN
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
)
do_increment_logging_stat(
user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used

View File

@@ -7,7 +7,6 @@ from django.conf import settings
from typing_extensions import override
from analytics.models import UserCount
from zerver.actions.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN
from zerver.lib.test_classes import ZulipTestCase
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
@@ -87,8 +86,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
):
input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * INPUT_COST_PER_GIGATOKEN
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
)
self.assertFalse(
UserCount.objects.filter(

View File

@@ -702,3 +702,7 @@ MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
TOPIC_SUMMARIZATION_MODEL: str | None = None
TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
# Price per token for input and output tokens, and maximum cost. Units
# are arbitrarily, but typically will be USD.
INPUT_COST_PER_GIGATOKEN: int = 0
OUTPUT_COST_PER_GIGATOKEN: int = 0

View File

@@ -221,3 +221,7 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
ROOT_DOMAIN_LANDING_PAGE = True
TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
# Defaults based on groq's pricing for Llama 3.3 70B Versatile 128k.
# https://groq.com/pricing/
OUTPUT_COST_PER_GIGATOKEN = 590
INPUT_COST_PER_GIGATOKEN = 790

View File

@@ -724,18 +724,21 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
# BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"
################
## LLM Summarization
## AI Features
##
## The model name that will used by LiteLLM library to configure
## parameters to be sent to API.
## The Llama-3-8B-instruct model is free to use and only requires submitting
## a small form on the HuggingFace page for the model to gain access.
## Specify the model and provider to use for topic summarization. The
## `model` field from https://docs.litellm.ai/docs/providers specifies
## your preferred provider/model combination.
# TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
## Other configuration parameters, passed through to litellm's `completion` call
## See https://docs.litellm.ai/docs/completion/input
# TOPIC_SUMMARIZATION_PARAMETERS = {}
## Set usage costs based on your model, and a maximum per-user monthly
## quota. Units are USD or other currency of your choice.
# OUTPUT_COST_PER_GIGATOKEN = 500
# INPUT_COST_PER_GIGATOKEN = 400
################
## Miscellaneous settings.