message_summary: Reorganize settings and secrets.

This allows arbitrary litellm-supported backends.
2025-10-31 12:03:46 +00:00 · 2025-01-28 20:27:49 +00:00
parent 69524b48b5
commit 4dfb285b5b
7 changed files with 49 additions and 52 deletions
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -64,7 +64,8 @@ def do_summarize_narrow(
    user_profile: UserProfile,
    narrow: list[NarrowParameter] | None,
 ) -> str | None:
-    if settings.TOPIC_SUMMARIZATION_MODEL is None:  # nocoverage
+    model = settings.TOPIC_SUMMARIZATION_MODEL
    if model is None:  # nocoverage
        return None
    # TODO: This implementation does not attempt to make use of
@@ -112,17 +113,6 @@ def do_summarize_narrow(
    # IDEA: We could consider translating input and output text to
    # English to improve results when using a summarization model that
    # is primarily trained on English.
    model = settings.TOPIC_SUMMARIZATION_MODEL
    litellm_params: dict[str, Any] = {}
    if model.startswith("huggingface"):  # nocoverage
        assert settings.HUGGINGFACE_API_KEY is not None
        litellm_params["api_key"] = settings.HUGGINGFACE_API_KEY
    else:
        assert model.startswith("bedrock")
        litellm_params["aws_access_key_id"] = settings.AWS_ACCESS_KEY_ID
        litellm_params["aws_secret_access_key"] = settings.AWS_SECRET_ACCESS_KEY
        litellm_params["aws_region_name"] = settings.AWS_REGION_NAME
    conversation_length = len(message_list)
    max_summary_length = get_max_summary_length(conversation_length)
    intro = "The following is a chat conversation in the Zulip team chat app."
@@ -177,9 +167,12 @@ def do_summarize_narrow(
    # That way, you can't easily get extra tokens by sending
    # 25 requests all at once when you're just below the limit.
    litellm_params: dict[str, object] = settings.TOPIC_SUMMARIZATION_PARAMETERS
    api_key = settings.TOPIC_SUMMARIZATION_API_KEY
    response = litellm.completion(
        model=model,
        messages=messages,
        api_key=api_key,
        **litellm_params,
    )
    input_tokens = response["usage"]["prompt_tokens"]
--- a/zerver/tests/fixtures/litellm/summary.json
+++ b/zerver/tests/fixtures/litellm/summary.json
@@ -1,12 +1,12 @@
 {
-  "model": "bedrock/meta.llama3-8b-instruct-v1:0",
+  "model": "groq/llama-3.3-70b-versatile",
  "messages": [
    {
      "content": "The following is a chat conversation in the Zulip team chat app. channel: Zulip features, topic: New feature launch",
      "role": "system"
    },
    {
-      "content": "[{\"sender\": \"Iago\", \"content\": \"Zulip just launched a feature to generate summary of messages.\"}, {\"sender\": \"Iago\", \"content\": \"Sounds awesome! This will greatly help me when catching up.\"}]",
+      "content": "[{\"sender\":\"Iago\",\"content\":\"Zulip just launched a feature to generate summary of messages.\"},{\"sender\":\"Iago\",\"content\":\"Sounds awesome! This will greatly help me when catching up.\"}]",
      "role": "user"
    },
    {
@@ -15,17 +15,17 @@
    }
  ],
  "response": {
-    "id": "chatcmpl-a86e270f-a634-40f3-92f4-da786ccb263b",
+    "id": "chatcmpl-aba152b1-546f-497b-9fcf-13410008e180",
-    "created": 1737832810,
+    "created": 1738095814,
-    "model": "meta.llama3-8b-instruct-v1:0",
+    "model": "groq/llama-3.3-70b-versatile",
    "object": "chat.completion",
-    "system_fingerprint": null,
+    "system_fingerprint": "fp_fcc3b74982",
    "choices": [
      {
        "finish_reason": "stop",
        "index": 0,
        "message": {
-          "content": "\n\nIago announced the launch of a new feature in Zulip, which generates summaries of messages. He expressed enthusiasm for the feature, stating it will greatly help him when catching up.",
+          "content": "Iago announced that Zulip has launched a feature to generate summaries of messages. Iago thinks this feature is awesome and believes it will greatly help when catching up. No actions are specified in the conversation. Iago appears to be looking forward to using the new feature.",
          "role": "assistant",
          "tool_calls": null,
          "function_call": null
@@ -33,11 +33,18 @@
      }
    ],
    "usage": {
-      "completion_tokens": 39,
+      "completion_tokens": 56,
-      "prompt_tokens": 144,
+      "prompt_tokens": 163,
-      "total_tokens": 183,
+      "total_tokens": 219,
      "completion_tokens_details": null,
-      "prompt_tokens_details": null
+      "prompt_tokens_details": null,
      "queue_time": 0.43361592099999996,
      "prompt_time": 0.02746035,
      "completion_time": 0.203636364,
      "total_time": 0.231096714
    },
    "x_groq": {
      "id": "req_01jjq9ash2e56a6y3jd58cryx7"
    }
  }
 }
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -82,10 +82,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
        # requests occur, which would reflect a problem with how the
        # fixtures were set up.
        with self.settings(
-            TOPIC_SUMMARIZATION_MODEL="bedrock/meta.llama3-8b-instruct-v1:0",
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
-            AWS_ACCESS_KEY_ID="fakeKeyID",
+            TOPIC_SUMMARIZATION_API_KEY="test",
            AWS_SECRET_ACCESS_KEY="fakeAccessKey",
            AWS_REGION_NAME="ap-south-1",
        ):
            input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
            output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
--- a/zproject/computed_settings.py
+++ b/zproject/computed_settings.py
@@ -1246,31 +1246,6 @@ EMAIL_HOST_PASSWORD = get_secret("email_password")
 EMAIL_GATEWAY_PASSWORD = get_secret("email_gateway_password")
 AUTH_LDAP_BIND_PASSWORD = get_secret("auth_ldap_bind_password", "")
 ########################################################################
 # LiteLLM SETTINGS
 ########################################################################
 # The model name that will used by LiteLLM library to configure
 # parameters to be sent to API.
 # The Llama-3-8B-instruct model is free to use and only requires submitting
 # a small form on the HuggingFace page for the model to gain access.
 # We only support HuggingFace and AWS Bedrock for LLM API requests.
 DEFAULT_TOPIC_SUMMARIZATION_MODEL: str | None = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
 if PRODUCTION:
    DEFAULT_TOPIC_SUMMARIZATION_MODEL = None
 TOPIC_SUMMARIZATION_MODEL = get_secret(
    "topic_summarization_model", DEFAULT_TOPIC_SUMMARIZATION_MODEL
 )
 # Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
 # HuggingFace access credentials
 HUGGINGFACE_API_KEY = get_secret("huggingface_api_key", None)
 # AWS Bedrock access credentials
 AWS_ACCESS_KEY_ID = get_secret("aws_access_key_id", None)
 AWS_SECRET_ACCESS_KEY = get_secret("aws_secret_access_key", None)
 AWS_REGION_NAME = get_secret("aws_region_name", None)
 ########################################################################
 # MISC SETTINGS
 ########################################################################
@@ -1316,3 +1291,6 @@ SCIM_SERVICE_PROVIDER = {
        },
    ],
 }
 # Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
 TOPIC_SUMMARIZATION_API_KEY = get_secret("topic_summarization_api_key", None)
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -696,3 +696,10 @@ MAX_WEB_DATA_IMPORT_SIZE_MB = 1024
 # delete an active organization.
 MIN_DEACTIVATED_REALM_DELETION_DAYS: int | None = 14
 MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
 TOPIC_SUMMARIZATION_MODEL: str | None = None
 if not PRODUCTION:
    TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
 TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -219,3 +219,5 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
 # In a dev environment, 'zulipdev.com:9991' is used to access the landing page.
 # See: https://zulip.readthedocs.io/en/latest/subsystems/realms.html#working-with-subdomains-in-development-environment
 ROOT_DOMAIN_LANDING_PAGE = True
 TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -723,6 +723,18 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 ## set big_blue_button_secret in zulip-secrets.conf.
 # BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"
 ################
 ## LLM Summarization
 ##
 ## The model name that will used by LiteLLM library to configure
 ## parameters to be sent to API.
 ## The Llama-3-8B-instruct model is free to use and only requires submitting
 ## a small form on the HuggingFace page for the model to gain access.
 # TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
 ## Other configuration parameters, passed through to litellm's `completion` call
 ## See https://docs.litellm.ai/docs/completion/input
 # TOPIC_SUMMARIZATION_PARAMETERS = {}
 ################
 ## Miscellaneous settings.