mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
message_summary: Move cost per token configuration to settings.
This commit is contained in:
@@ -17,17 +17,6 @@ from zerver.models import UserProfile
|
||||
|
||||
# Maximum number of messages that can be summarized in a single request.
|
||||
MAX_MESSAGES_SUMMARIZED = 100
|
||||
# Price per token for input and output tokens.
|
||||
# These values are based on the pricing of the Bedrock API
|
||||
# for Llama 3.3 Instruct (70B).
|
||||
# https://aws.amazon.com/bedrock/pricing/
|
||||
# Unit: USD per 1 billion tokens.
|
||||
#
|
||||
# These values likely will want to be declared in configuration,
|
||||
# rather than here in the code.
|
||||
OUTPUT_COST_PER_GIGATOKEN = 720
|
||||
INPUT_COST_PER_GIGATOKEN = 720
|
||||
|
||||
|
||||
ai_time_start = 0.0
|
||||
ai_total_time = 0.0
|
||||
@@ -205,8 +194,8 @@ def do_summarize_narrow(
|
||||
input_tokens = response["usage"]["prompt_tokens"]
|
||||
output_tokens = response["usage"]["completion_tokens"]
|
||||
|
||||
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
|
||||
input_tokens * INPUT_COST_PER_GIGATOKEN
|
||||
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
|
||||
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
|
||||
)
|
||||
do_increment_logging_stat(
|
||||
user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used
|
||||
|
@@ -7,7 +7,6 @@ from django.conf import settings
|
||||
from typing_extensions import override
|
||||
|
||||
from analytics.models import UserCount
|
||||
from zerver.actions.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN
|
||||
from zerver.lib.test_classes import ZulipTestCase
|
||||
|
||||
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
||||
@@ -87,8 +86,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
|
||||
):
|
||||
input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
|
||||
output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
|
||||
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
|
||||
input_tokens * INPUT_COST_PER_GIGATOKEN
|
||||
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
|
||||
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
|
||||
)
|
||||
self.assertFalse(
|
||||
UserCount.objects.filter(
|
||||
|
@@ -702,3 +702,7 @@ MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
|
||||
|
||||
TOPIC_SUMMARIZATION_MODEL: str | None = None
|
||||
TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
|
||||
# Price per token for input and output tokens, and maximum cost. Units
|
||||
# are arbitrarily, but typically will be USD.
|
||||
INPUT_COST_PER_GIGATOKEN: int = 0
|
||||
OUTPUT_COST_PER_GIGATOKEN: int = 0
|
||||
|
@@ -221,3 +221,7 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
|
||||
ROOT_DOMAIN_LANDING_PAGE = True
|
||||
|
||||
TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
|
||||
# Defaults based on groq's pricing for Llama 3.3 70B Versatile 128k.
|
||||
# https://groq.com/pricing/
|
||||
OUTPUT_COST_PER_GIGATOKEN = 590
|
||||
INPUT_COST_PER_GIGATOKEN = 790
|
||||
|
@@ -724,18 +724,21 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
|
||||
# BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"
|
||||
|
||||
################
|
||||
## LLM Summarization
|
||||
## AI Features
|
||||
##
|
||||
## The model name that will used by LiteLLM library to configure
|
||||
## parameters to be sent to API.
|
||||
## The Llama-3-8B-instruct model is free to use and only requires submitting
|
||||
## a small form on the HuggingFace page for the model to gain access.
|
||||
## Specify the model and provider to use for topic summarization. The
|
||||
## `model` field from https://docs.litellm.ai/docs/providers specifies
|
||||
## your preferred provider/model combination.
|
||||
# TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
|
||||
|
||||
## Other configuration parameters, passed through to litellm's `completion` call
|
||||
## See https://docs.litellm.ai/docs/completion/input
|
||||
# TOPIC_SUMMARIZATION_PARAMETERS = {}
|
||||
|
||||
## Set usage costs based on your model, and a maximum per-user monthly
|
||||
## quota. Units are USD or other currency of your choice.
|
||||
# OUTPUT_COST_PER_GIGATOKEN = 500
|
||||
# INPUT_COST_PER_GIGATOKEN = 400
|
||||
|
||||
################
|
||||
## Miscellaneous settings.
|
||||
|
||||
|
Reference in New Issue
Block a user