message_summary: Reorganize settings and secrets.

This allows arbitrary litellm-supported backends.
2025-10-28 10:33:54 +00:00 · 2025-01-28 20:27:49 +00:00
parent 69524b48b5
commit 4dfb285b5b
7 changed files with 49 additions and 52 deletions
--- a/zerver/actions/message_summary.py
+++ b/zerver/actions/message_summary.py
@@ -64,7 +64,8 @@ def do_summarize_narrow(
    user_profile: UserProfile,
    narrow: list[NarrowParameter] | None,
 ) -> str | None:
-    if settings.TOPIC_SUMMARIZATION_MODEL is None:  # nocoverage
+    model = settings.TOPIC_SUMMARIZATION_MODEL
+    if model is None:  # nocoverage
        return None

    # TODO: This implementation does not attempt to make use of
@@ -112,17 +113,6 @@ def do_summarize_narrow(
    # IDEA: We could consider translating input and output text to
    # English to improve results when using a summarization model that
    # is primarily trained on English.
-    model = settings.TOPIC_SUMMARIZATION_MODEL
-    litellm_params: dict[str, Any] = {}
-    if model.startswith("huggingface"):  # nocoverage
-        assert settings.HUGGINGFACE_API_KEY is not None
-        litellm_params["api_key"] = settings.HUGGINGFACE_API_KEY
-    else:
-        assert model.startswith("bedrock")
-        litellm_params["aws_access_key_id"] = settings.AWS_ACCESS_KEY_ID
-        litellm_params["aws_secret_access_key"] = settings.AWS_SECRET_ACCESS_KEY
-        litellm_params["aws_region_name"] = settings.AWS_REGION_NAME
-
    conversation_length = len(message_list)
    max_summary_length = get_max_summary_length(conversation_length)
    intro = "The following is a chat conversation in the Zulip team chat app."
@@ -177,9 +167,12 @@ def do_summarize_narrow(
    # That way, you can't easily get extra tokens by sending
    # 25 requests all at once when you're just below the limit.

+    litellm_params: dict[str, object] = settings.TOPIC_SUMMARIZATION_PARAMETERS
+    api_key = settings.TOPIC_SUMMARIZATION_API_KEY
    response = litellm.completion(
        model=model,
        messages=messages,
+        api_key=api_key,
        **litellm_params,
    )
    input_tokens = response["usage"]["prompt_tokens"]
--- a/zerver/tests/fixtures/litellm/summary.json
+++ b/zerver/tests/fixtures/litellm/summary.json
@@ -1,5 +1,5 @@
 {
-  "model": "bedrock/meta.llama3-8b-instruct-v1:0",
+  "model": "groq/llama-3.3-70b-versatile",
  "messages": [
    {
      "content": "The following is a chat conversation in the Zulip team chat app. channel: Zulip features, topic: New feature launch",
@@ -15,17 +15,17 @@
    }
  ],
  "response": {
-    "id": "chatcmpl-a86e270f-a634-40f3-92f4-da786ccb263b",
-    "created": 1737832810,
-    "model": "meta.llama3-8b-instruct-v1:0",
+    "id": "chatcmpl-aba152b1-546f-497b-9fcf-13410008e180",
+    "created": 1738095814,
+    "model": "groq/llama-3.3-70b-versatile",
    "object": "chat.completion",
-    "system_fingerprint": null,
+    "system_fingerprint": "fp_fcc3b74982",
    "choices": [
      {
        "finish_reason": "stop",
        "index": 0,
        "message": {
-          "content": "\n\nIago announced the launch of a new feature in Zulip, which generates summaries of messages. He expressed enthusiasm for the feature, stating it will greatly help him when catching up.",
+          "content": "Iago announced that Zulip has launched a feature to generate summaries of messages. Iago thinks this feature is awesome and believes it will greatly help when catching up. No actions are specified in the conversation. Iago appears to be looking forward to using the new feature.",
          "role": "assistant",
          "tool_calls": null,
          "function_call": null
@@ -33,11 +33,18 @@
      }
    ],
    "usage": {
-      "completion_tokens": 39,
-      "prompt_tokens": 144,
-      "total_tokens": 183,
+      "completion_tokens": 56,
+      "prompt_tokens": 163,
+      "total_tokens": 219,
      "completion_tokens_details": null,
-      "prompt_tokens_details": null
+      "prompt_tokens_details": null,
+      "queue_time": 0.43361592099999996,
+      "prompt_time": 0.02746035,
+      "completion_time": 0.203636364,
+      "total_time": 0.231096714
+    },
+    "x_groq": {
+      "id": "req_01jjq9ash2e56a6y3jd58cryx7"
    }
  }
 }
--- a/zerver/tests/test_message_summary.py
+++ b/zerver/tests/test_message_summary.py
@@ -82,10 +82,8 @@ class MessagesSummaryTestCase(ZulipTestCase):
        # requests occur, which would reflect a problem with how the
        # fixtures were set up.
        with self.settings(
-            TOPIC_SUMMARIZATION_MODEL="bedrock/meta.llama3-8b-instruct-v1:0",
-            AWS_ACCESS_KEY_ID="fakeKeyID",
-            AWS_SECRET_ACCESS_KEY="fakeAccessKey",
-            AWS_REGION_NAME="ap-south-1",
+            TOPIC_SUMMARIZATION_MODEL="groq/llama-3.3-70b-versatile",
+            TOPIC_SUMMARIZATION_API_KEY="test",
        ):
            input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
            output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
--- a/zproject/computed_settings.py
+++ b/zproject/computed_settings.py
@@ -1246,31 +1246,6 @@ EMAIL_HOST_PASSWORD = get_secret("email_password")
 EMAIL_GATEWAY_PASSWORD = get_secret("email_gateway_password")
 AUTH_LDAP_BIND_PASSWORD = get_secret("auth_ldap_bind_password", "")

-########################################################################
-# LiteLLM SETTINGS
-########################################################################
-
-# The model name that will used by LiteLLM library to configure
-# parameters to be sent to API.
-# The Llama-3-8B-instruct model is free to use and only requires submitting
-# a small form on the HuggingFace page for the model to gain access.
-# We only support HuggingFace and AWS Bedrock for LLM API requests.
-DEFAULT_TOPIC_SUMMARIZATION_MODEL: str | None = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
-if PRODUCTION:
-    DEFAULT_TOPIC_SUMMARIZATION_MODEL = None
-TOPIC_SUMMARIZATION_MODEL = get_secret(
-    "topic_summarization_model", DEFAULT_TOPIC_SUMMARIZATION_MODEL
-)
-
-# Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
-# HuggingFace access credentials
-HUGGINGFACE_API_KEY = get_secret("huggingface_api_key", None)
-
-# AWS Bedrock access credentials
-AWS_ACCESS_KEY_ID = get_secret("aws_access_key_id", None)
-AWS_SECRET_ACCESS_KEY = get_secret("aws_secret_access_key", None)
-AWS_REGION_NAME = get_secret("aws_region_name", None)
-
 ########################################################################
 # MISC SETTINGS
 ########################################################################
@@ -1316,3 +1291,6 @@ SCIM_SERVICE_PROVIDER = {
        },
    ],
 }
+
+# Which API key to use will be determined based on TOPIC_SUMMARIZATION_MODEL.
+TOPIC_SUMMARIZATION_API_KEY = get_secret("topic_summarization_api_key", None)
--- a/zproject/default_settings.py
+++ b/zproject/default_settings.py
@@ -696,3 +696,10 @@ MAX_WEB_DATA_IMPORT_SIZE_MB = 1024
 # delete an active organization.
 MIN_DEACTIVATED_REALM_DELETION_DAYS: int | None = 14
 MAX_DEACTIVATED_REALM_DELETION_DAYS: int | None = None
+
+
+TOPIC_SUMMARIZATION_MODEL: str | None = None
+if not PRODUCTION:
+    TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
+
+TOPIC_SUMMARIZATION_PARAMETERS: dict[str, object] = {}
--- a/zproject/dev_settings.py
+++ b/zproject/dev_settings.py
@@ -219,3 +219,5 @@ RESOLVE_TOPIC_UNDO_GRACE_PERIOD_SECONDS = 5
 # In a dev environment, 'zulipdev.com:9991' is used to access the landing page.
 # See: https://zulip.readthedocs.io/en/latest/subsystems/realms.html#working-with-subdomains-in-development-environment
 ROOT_DOMAIN_LANDING_PAGE = True
+
+TOPIC_SUMMARIZATION_MODEL = "groq/llama-3.3-70b-versatile"
--- a/zproject/prod_settings_template.py
+++ b/zproject/prod_settings_template.py
@@ -723,6 +723,18 @@ SOCIAL_AUTH_SAML_SUPPORT_CONTACT = {
 ## set big_blue_button_secret in zulip-secrets.conf.
 # BIG_BLUE_BUTTON_URL = "https://bbb.example.com/bigbluebutton/"

+################
+## LLM Summarization
+##
+## The model name that will used by LiteLLM library to configure
+## parameters to be sent to API.
+## The Llama-3-8B-instruct model is free to use and only requires submitting
+## a small form on the HuggingFace page for the model to gain access.
+# TOPIC_SUMMARIZATION_MODEL = "huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
+
+## Other configuration parameters, passed through to litellm's `completion` call
+## See https://docs.litellm.ai/docs/completion/input
+# TOPIC_SUMMARIZATION_PARAMETERS = {}

 ################
 ## Miscellaneous settings.