mirror of
https://github.com/zulip/zulip.git
synced 2025-11-13 02:17:19 +00:00
test_message_summary: Add a basic test.
This commit is contained in:
@@ -130,8 +130,6 @@ not_yet_fully_covered = [
|
|||||||
"zerver/webhooks/zapier/view.py",
|
"zerver/webhooks/zapier/view.py",
|
||||||
# This is hard to get test coverage for, and low value to do so
|
# This is hard to get test coverage for, and low value to do so
|
||||||
"zerver/views/sentry.py",
|
"zerver/views/sentry.py",
|
||||||
# TODO: Add tests when the details are finalized.
|
|
||||||
"zerver/views/message_summary.py",
|
|
||||||
# Cannot have coverage, as tests run in a transaction
|
# Cannot have coverage, as tests run in a transaction
|
||||||
"zerver/lib/safe_session_cached_db.py",
|
"zerver/lib/safe_session_cached_db.py",
|
||||||
"zerver/lib/singleton_bmemcached.py",
|
"zerver/lib/singleton_bmemcached.py",
|
||||||
@@ -282,6 +280,11 @@ def main() -> None:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Generate Stripe test fixtures by making requests to Stripe test network",
|
help="Generate Stripe test fixtures by making requests to Stripe test network",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--generate-litellm-fixtures",
|
||||||
|
action="store_true",
|
||||||
|
help="Generate litellm test fixtures using credentials in zproject/dev-secrets.conf",
|
||||||
|
)
|
||||||
parser.add_argument("args", nargs="*")
|
parser.add_argument("args", nargs="*")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--ban-console-output",
|
"--ban-console-output",
|
||||||
@@ -390,6 +393,16 @@ def main() -> None:
|
|||||||
default_parallel = 1
|
default_parallel = 1
|
||||||
os.environ["GENERATE_STRIPE_FIXTURES"] = "1"
|
os.environ["GENERATE_STRIPE_FIXTURES"] = "1"
|
||||||
|
|
||||||
|
if options.generate_litellm_fixtures:
|
||||||
|
if full_suite:
|
||||||
|
suites = [
|
||||||
|
"zerver.tests.test_message_summary",
|
||||||
|
]
|
||||||
|
full_suite = False
|
||||||
|
print("-- Forcing serial mode for generating litellm fixtures.", flush=True)
|
||||||
|
default_parallel = 1
|
||||||
|
os.environ["GENERATE_LITELLM_FIXTURES"] = "1"
|
||||||
|
|
||||||
assert_provisioning_status_ok(options.skip_provision_check)
|
assert_provisioning_status_ok(options.skip_provision_check)
|
||||||
|
|
||||||
if options.coverage:
|
if options.coverage:
|
||||||
|
|||||||
@@ -526,9 +526,6 @@ def write_instrumentation_reports(full_suite: bool, include_webhooks: bool) -> N
|
|||||||
untested_patterns = {p.replace("\\", "") for p in pattern_cnt if pattern_cnt[p] == 0}
|
untested_patterns = {p.replace("\\", "") for p in pattern_cnt if pattern_cnt[p] == 0}
|
||||||
|
|
||||||
exempt_patterns = {
|
exempt_patterns = {
|
||||||
# TODO: Add tests when we are sure about what needs to be
|
|
||||||
# served to the user as summary.
|
|
||||||
"api/v1/messages/summary",
|
|
||||||
# We exempt some patterns that are called via Tornado.
|
# We exempt some patterns that are called via Tornado.
|
||||||
"api/v1/events",
|
"api/v1/events",
|
||||||
"api/v1/events/internal",
|
"api/v1/events/internal",
|
||||||
|
|||||||
43
zerver/tests/fixtures/litellm/summary.json
vendored
Normal file
43
zerver/tests/fixtures/litellm/summary.json
vendored
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"model": "bedrock/meta.llama3-8b-instruct-v1:0",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"content": "The following is a chat conversation in the Zulip team chat app. channel: Zulip features, topic: New feature launch",
|
||||||
|
"role": "system"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "[{\"sender\": \"Iago\", \"content\": \"Zulip just launched a feature to generate summary of messages.\"}, {\"sender\": \"Iago\", \"content\": \"Sounds awesome! This will greatly help me when catching up.\"}]",
|
||||||
|
"role": "user"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"content": "Succinctly summarize this conversation based only on the information provided, in up to 4 sentences, for someone who is familiar with the context. Mention key conclusions and actions, if any. Refer to specific people as appropriate. Don't use an intro phrase.",
|
||||||
|
"role": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"response": {
|
||||||
|
"id": "chatcmpl-a86e270f-a634-40f3-92f4-da786ccb263b",
|
||||||
|
"created": 1737832810,
|
||||||
|
"model": "meta.llama3-8b-instruct-v1:0",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "\n\nIago announced the launch of a new feature in Zulip, which generates summaries of messages. He expressed enthusiasm for the feature, stating it will greatly help him when catching up.",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 39,
|
||||||
|
"prompt_tokens": 144,
|
||||||
|
"total_tokens": 183,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
108
zerver/tests/test_message_summary.py
Normal file
108
zerver/tests/test_message_summary.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import orjson
|
||||||
|
from django.conf import settings
|
||||||
|
from typing_extensions import override
|
||||||
|
|
||||||
|
from analytics.models import UserCount
|
||||||
|
from zerver.lib.test_classes import ZulipTestCase
|
||||||
|
from zerver.views.message_summary import INPUT_COST_PER_GIGATOKEN, OUTPUT_COST_PER_GIGATOKEN
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
||||||
|
warnings.filterwarnings("ignore", category=DeprecationWarning, module="pydantic")
|
||||||
|
warnings.filterwarnings("ignore", category=DeprecationWarning, module="litellm")
|
||||||
|
# Avoid network query to fetch the model cost map.
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# Fixture file to store recorded responses
|
||||||
|
LLM_FIXTURES_FILE = "zerver/tests/fixtures/litellm/summary.json"
|
||||||
|
|
||||||
|
|
||||||
|
class MessagesSummaryTestCase(ZulipTestCase):
|
||||||
|
@override
|
||||||
|
def setUp(self) -> None:
|
||||||
|
super().setUp()
|
||||||
|
self.user = self.example_user("iago")
|
||||||
|
self.topic_name = "New feature launch"
|
||||||
|
self.channel_name = "Zulip features"
|
||||||
|
|
||||||
|
self.login_user(self.user)
|
||||||
|
self.subscribe(self.user, self.channel_name)
|
||||||
|
content = "Zulip just launched a feature to generate summary of messages."
|
||||||
|
self.send_stream_message(
|
||||||
|
self.user, self.channel_name, content=content, topic_name=self.topic_name
|
||||||
|
)
|
||||||
|
|
||||||
|
content = "Sounds awesome! This will greatly help me when catching up."
|
||||||
|
self.send_stream_message(
|
||||||
|
self.user, self.channel_name, content=content, topic_name=self.topic_name
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.GENERATE_LITELLM_FIXTURES: # nocoverage
|
||||||
|
self.patcher = mock.patch("litellm.completion", wraps=litellm.completion)
|
||||||
|
self.mocked_completion = self.patcher.start()
|
||||||
|
|
||||||
|
@override
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
if settings.GENERATE_LITELLM_FIXTURES: # nocoverage
|
||||||
|
self.patcher.stop()
|
||||||
|
super().tearDown()
|
||||||
|
|
||||||
|
def test_summarize_messages_in_topic(self) -> None:
|
||||||
|
narrow = orjson.dumps([["channel", self.channel_name], ["topic", self.topic_name]]).decode()
|
||||||
|
|
||||||
|
if settings.GENERATE_LITELLM_FIXTURES: # nocoverage
|
||||||
|
# NOTE: You need have proper credentials in zproject/dev-secrets.conf
|
||||||
|
# to generate the fixtures. (Tested using aws bedrock.)
|
||||||
|
# Trigger the API call to extract the arguments.
|
||||||
|
self.client_get("/json/messages/summary", dict(narrow=narrow))
|
||||||
|
call_args = self.mocked_completion.call_args
|
||||||
|
|
||||||
|
# Once we have the arguments, call the original method and save its response.
|
||||||
|
response = self.mocked_completion(**call_args.kwargs).json()
|
||||||
|
with open(LLM_FIXTURES_FILE, "wb") as f:
|
||||||
|
fixture_data = {
|
||||||
|
# Only store model and messages.
|
||||||
|
# We don't want to store any secrets.
|
||||||
|
"model": call_args.kwargs["model"],
|
||||||
|
"messages": call_args.kwargs["messages"],
|
||||||
|
"response": response,
|
||||||
|
}
|
||||||
|
f.write(orjson.dumps(fixture_data, option=orjson.OPT_INDENT_2) + b"\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
# In this code path, we test using the fixtures.
|
||||||
|
with open(LLM_FIXTURES_FILE, "rb") as f:
|
||||||
|
fixture_data = orjson.loads(f.read())
|
||||||
|
|
||||||
|
# Fake credentials to ensure we crash if actual network
|
||||||
|
# requests occur, which would reflect a problem with how the
|
||||||
|
# fixtures were set up.
|
||||||
|
with self.settings(
|
||||||
|
TOPIC_SUMMARIZATION_MODEL="bedrock/meta.llama3-8b-instruct-v1:0",
|
||||||
|
AWS_ACCESS_KEY_ID="fakeKeyID",
|
||||||
|
AWS_SECRET_ACCESS_KEY="fakeAccessKey",
|
||||||
|
AWS_REGION_NAME="ap-south-1",
|
||||||
|
):
|
||||||
|
input_tokens = fixture_data["response"]["usage"]["prompt_tokens"]
|
||||||
|
output_tokens = fixture_data["response"]["usage"]["completion_tokens"]
|
||||||
|
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
|
||||||
|
input_tokens * INPUT_COST_PER_GIGATOKEN
|
||||||
|
)
|
||||||
|
self.assertFalse(
|
||||||
|
UserCount.objects.filter(
|
||||||
|
property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
|
||||||
|
).exists()
|
||||||
|
)
|
||||||
|
with mock.patch("litellm.completion", return_value=fixture_data["response"]):
|
||||||
|
payload = self.client_get("/json/messages/summary", dict(narrow=narrow))
|
||||||
|
self.assertEqual(payload.status_code, 200)
|
||||||
|
# Check that we recorded this usage.
|
||||||
|
self.assertTrue(
|
||||||
|
UserCount.objects.filter(
|
||||||
|
property="ai_credit_usage::day", value=credits_used, user_id=self.user.id
|
||||||
|
).exists()
|
||||||
|
)
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
||||||
import json
|
import json
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -73,10 +76,10 @@ def get_messages_summary(
|
|||||||
*,
|
*,
|
||||||
narrow: Json[list[NarrowParameter] | None] = None,
|
narrow: Json[list[NarrowParameter] | None] = None,
|
||||||
) -> HttpResponse:
|
) -> HttpResponse:
|
||||||
if settings.TOPIC_SUMMARIZATION_MODEL is None:
|
if settings.TOPIC_SUMMARIZATION_MODEL is None: # nocoverage
|
||||||
raise JsonableError(_("AI features are not enabled on this server."))
|
raise JsonableError(_("AI features are not enabled on this server."))
|
||||||
|
|
||||||
if not (user_profile.is_moderator or user_profile.is_realm_admin):
|
if not (user_profile.is_moderator or user_profile.is_realm_admin): # nocoverage
|
||||||
return json_success(request, {"summary": "Feature limited to moderators for now."})
|
return json_success(request, {"summary": "Feature limited to moderators for now."})
|
||||||
|
|
||||||
# TODO: This implementation does not attempt to make use of
|
# TODO: This implementation does not attempt to make use of
|
||||||
@@ -95,7 +98,7 @@ def get_messages_summary(
|
|||||||
num_after=0,
|
num_after=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(query_info.rows) == 0:
|
if len(query_info.rows) == 0: # nocoverage
|
||||||
return json_success(request, {"summary": "No messages in conversation to summarize"})
|
return json_success(request, {"summary": "No messages in conversation to summarize"})
|
||||||
|
|
||||||
result_message_ids: list[int] = []
|
result_message_ids: list[int] = []
|
||||||
@@ -126,7 +129,7 @@ def get_messages_summary(
|
|||||||
# is primarily trained on English.
|
# is primarily trained on English.
|
||||||
model = settings.TOPIC_SUMMARIZATION_MODEL
|
model = settings.TOPIC_SUMMARIZATION_MODEL
|
||||||
litellm_params: dict[str, Any] = {}
|
litellm_params: dict[str, Any] = {}
|
||||||
if model.startswith("huggingface"):
|
if model.startswith("huggingface"): # nocoverage
|
||||||
assert settings.HUGGINGFACE_API_KEY is not None
|
assert settings.HUGGINGFACE_API_KEY is not None
|
||||||
litellm_params["api_key"] = settings.HUGGINGFACE_API_KEY
|
litellm_params["api_key"] = settings.HUGGINGFACE_API_KEY
|
||||||
else:
|
else:
|
||||||
@@ -194,6 +197,7 @@ def get_messages_summary(
|
|||||||
messages=messages,
|
messages=messages,
|
||||||
**litellm_params,
|
**litellm_params,
|
||||||
)
|
)
|
||||||
|
input_tokens = response["usage"]["prompt_tokens"]
|
||||||
output_tokens = response["usage"]["completion_tokens"]
|
output_tokens = response["usage"]["completion_tokens"]
|
||||||
|
|
||||||
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
|
credits_used = (output_tokens * OUTPUT_COST_PER_GIGATOKEN) + (
|
||||||
|
|||||||
@@ -179,6 +179,8 @@ RUNNING_OPENAPI_CURL_TEST = False
|
|||||||
# This is overridden in test_settings.py for the test suites
|
# This is overridden in test_settings.py for the test suites
|
||||||
GENERATE_STRIPE_FIXTURES = False
|
GENERATE_STRIPE_FIXTURES = False
|
||||||
# This is overridden in test_settings.py for the test suites
|
# This is overridden in test_settings.py for the test suites
|
||||||
|
GENERATE_LITELLM_FIXTURES = False
|
||||||
|
# This is overridden in test_settings.py for the test suites
|
||||||
BAN_CONSOLE_OUTPUT = False
|
BAN_CONSOLE_OUTPUT = False
|
||||||
# This is overridden in test_settings.py for the test suites
|
# This is overridden in test_settings.py for the test suites
|
||||||
TEST_WORKER_DIR = ""
|
TEST_WORKER_DIR = ""
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ if "RUNNING_OPENAPI_CURL_TEST" in os.environ:
|
|||||||
if "GENERATE_STRIPE_FIXTURES" in os.environ:
|
if "GENERATE_STRIPE_FIXTURES" in os.environ:
|
||||||
GENERATE_STRIPE_FIXTURES = True
|
GENERATE_STRIPE_FIXTURES = True
|
||||||
|
|
||||||
|
if "GENERATE_LITELLM_FIXTURES" in os.environ:
|
||||||
|
GENERATE_LITELLM_FIXTURES = True
|
||||||
|
|
||||||
if "BAN_CONSOLE_OUTPUT" in os.environ:
|
if "BAN_CONSOLE_OUTPUT" in os.environ:
|
||||||
BAN_CONSOLE_OUTPUT = True
|
BAN_CONSOLE_OUTPUT = True
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user