message_summary: Reorganize settings and secrets.

This allows arbitrary litellm-supported backends.
This commit is contained in:
Alex Vandiver
2025-01-28 20:27:49 +00:00
committed by Tim Abbott
parent 69524b48b5
commit 4dfb285b5b
7 changed files with 49 additions and 52 deletions

View File

@@ -64,7 +64,8 @@ def do_summarize_narrow(
user_profile: UserProfile,
narrow: list[NarrowParameter] | None,
) -> str | None:
if settings.TOPIC_SUMMARIZATION_MODEL is None: # nocoverage
model = settings.TOPIC_SUMMARIZATION_MODEL
if model is None: # nocoverage
return None
# TODO: This implementation does not attempt to make use of
@@ -112,17 +113,6 @@ def do_summarize_narrow(
# IDEA: We could consider translating input and output text to
# English to improve results when using a summarization model that
# is primarily trained on English.
model = settings.TOPIC_SUMMARIZATION_MODEL
litellm_params: dict[str, Any] = {}
if model.startswith("huggingface"): # nocoverage
assert settings.HUGGINGFACE_API_KEY is not None
litellm_params["api_key"] = settings.HUGGINGFACE_API_KEY
else:
assert model.startswith("bedrock")
litellm_params["aws_access_key_id"] = settings.AWS_ACCESS_KEY_ID
litellm_params["aws_secret_access_key"] = settings.AWS_SECRET_ACCESS_KEY
litellm_params["aws_region_name"] = settings.AWS_REGION_NAME
conversation_length = len(message_list)
max_summary_length = get_max_summary_length(conversation_length)
intro = "The following is a chat conversation in the Zulip team chat app."
@@ -177,9 +167,12 @@ def do_summarize_narrow(
# That way, you can't easily get extra tokens by sending
# 25 requests all at once when you're just below the limit.
litellm_params: dict[str, object] = settings.TOPIC_SUMMARIZATION_PARAMETERS
api_key = settings.TOPIC_SUMMARIZATION_API_KEY
response = litellm.completion(
model=model,
messages=messages,
api_key=api_key,
**litellm_params,
)
input_tokens = response["usage"]["prompt_tokens"]

View File

@@ -1,5 +1,5 @@
{
"model": "bedrock/meta.llama3-8b-instruct-v1:0",
"model": "groq/llama-3.3-70b-versatile",
"messages": [
{
"content": "The following is a chat conversation in the Zulip team chat app. channel: Zulip features, topic: New feature launch",
@@ -15,17 +15,17 @@
}
],
"response": {
"id": "chatcmpl-a86e270f-a634-40f3-92f4-da786ccb263b",
"created": 1737832810,
"model": "meta.llama3-8b-instruct-v1:0",
"id": "chatcmpl-aba152b1-546f-497b-9fcf-13410008e180",
"created": 1738095814,
"model": "groq/llama-3.3-70b-versatile",
"object": "chat.completion",
"system_fingerprint": null,
"system_fingerprint": "fp_fcc3b74982",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "\n\nIago announced the launch of a new feature in Zulip, which generates summaries of messages. He expressed enthusiasm for the feature, stating it will greatly help him when catching up.",
"content": "Iago announced that Zulip has launched a feature to generate summaries of messages. Iago thinks this feature is awesome and believes it will greatly help when catching up. No actions are specified in the conversation. Iago appears to be looking forward to using the new feature.",
"role": "assistant",
"tool_calls": null,
"function_call": null
@@ -33,11 +33,18 @@
}
],
"usage": {
"completion_tokens": 39,
"prompt_tokens": 144,
"total_tokens": 183,
"completion_tokens": 56,
"prompt_tokens": 163,
"total_tokens": 219,
"completion_tokens_details": null,
"prompt_tokens_details": null
"prompt_tokens_details": null,
"queue_time": 0.43361592099999996,
"prompt_time": 0.02746035,
"completion_time": 0.203636364,
"total_time": 0.231096714
},
"x_groq": {
"id": "req_01jjq9ash2e56a6y3jd58cryx7"
}
}
}

View File

@@ -82,10 +82,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
# requests occur, which would reflect a problem with how the
# fixtures were set up.
with self.settings(
TOPIC_SUMMARIZATION_MODEL="bedrock/meta.llama3-8b-instruct-v1:0",
AWS_ACCESS_KEY_ID="fakeKeyID",
AWS_SECRET_ACCESS_KEY="fakeAccessKey",
AWS_REGION_NAME="ap-south-1",
TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
TOPIC_SUMMARIZATION_API_KEY="test",
):
input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
output_tokens = fixture_data["response"]["usage"]["completion_tokens"]

View File

@@ -1246,31 +1246,6 @@ EMAIL_HOST_PASSWORD = get_secret("email_password")
EMAIL_GATEWAY_PASSWORD = get_secret("email_gateway_password")
AUTH_LDAP_BIND_PASSWORD = get_secret("auth_ldap_bind_password", "")
########################################################################
# LiteLLM SETTINGS
########################################################################
# The model name that will used by LiteLLM library to configure
# parameters to be sent to API.
# The Llama-3-8B-instruct model is free to use and only requires submitting
# a small form on the HuggingFace page for the model to gain access.
# We only support HuggingFace and AWS Bedrock for LLM API requests.
DEFAULT_TOPIC_SUMMARIZATION_MODEL: str | None = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
if PRODUCTION:
DEFAULT_TOPIC_SUMMARIZATION_MODEL = None
TOPIC_SUMMARIZATION_MODEL = get_secret(
"topic_summarization_model", DEFAULT_TOPIC_SUMMARIZATION_MODEL
)
# Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
# HuggingFace access credentials
HUGGINGFACE_API_KEY = get_secret("huggingface_api_key", None)
# AWS Bedrock access credentials
AWS_ACCESS_KEY_ID = get_secret("aws_access_key_id", None)
AWS_SECRET_ACCESS_KEY = get_secret("aws_secret_access_key", None)
AWS_REGION_NAME = get_secret("aws_region_name", None)
########################################################################
# MISC SETTINGS
########################################################################
@@ -1316,3 +1291,6 @@ SCIM_SERVICE_PROVIDER = {
},
],
}
# Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
TOPIC_SUMMARIZATION_API_KEY = get_secret("topic_summarization_api_key", None)

View File

@@ -696,3 +696,10 @@ MAX_WEB_DATA_IMPORT_SIZE_MB = 1024
# delete an active organization.
MIN_DEACTIVATED_REALM_DELETION_DAYS: int | None = 14
MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
TOPIC_SUMMARIZATION_MODEL: str | None = None
if not PRODUCTION:
TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}

View File

@@ -219,3 +219,5 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
# In a dev environment, 'zulipdev.com:9991' is used to access the landing page.
# See: https://zulip.readthedocs.io/en/latest/subsystems/realms.html#working-with-subdomains-in-development-environment
ROOT_DOMAIN_LANDING_PAGE = True
TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"

View File

@@ -723,6 +723,18 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
## set big_blue_button_secret in zulip-secrets.conf.
# BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"
################
## LLM Summarization
##
## The model name that will used by LiteLLM library to configure
## parameters to be sent to API.
## The Llama-3-8B-instruct model is free to use and only requires submitting
## a small form on the HuggingFace page for the model to gain access.
# TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
## Other configuration parameters, passed through to litellm's `completion` call
## See https://docs.litellm.ai/docs/completion/input
# TOPIC_SUMMARIZATION_PARAMETERS = {}
################
## Miscellaneous settings.