Files
zulip/zerver/actions/message_summary.py
Saubhagya Patel d0c5c1cacb settings: Add backend to change allow_edit_history to integer field.
This commit implements the backend of migrating the
`allow_edit_history` setting to
`message_edit_history_visibility_policy`.
This allows organizations, to have an intermediate setting to
view only the "Moves" history of the messages.

We still pass `realm_allow_edit_history` in `/register` response
though for older clients with its value being set depending on the
value of `realm_message_edit_history_visibility_policy`. We set
`realm_allow_edit_history` to `False` if the
`realm_message_edit_history_visibility_policy` is "None", and
`True` for "Moves only" or "All" message edit history.

Fixes part of #21398.

Co-authored-by: Shlok Patel <shlokcpatel2001@gmail.com>
Co-authored-by: Tim Abbott <tabbott@zulip.com>
2025-02-25 18:18:23 -08:00

215 lines
8.1 KiB
Python

import time
from typing import Any
import orjson
from django.conf import settings
from django.utils.timezone import now as timezone_now
from analytics.lib.counts import COUNT_STATS, do_increment_logging_stat
from zerver.lib.markdown import markdown_convert
from zerver.lib.message import messages_for_ids
from zerver.lib.narrow import (
LARGER_THAN_MAX_MESSAGE_ID,
NarrowParameter,
clean_narrow_for_message_fetch,
fetch_messages,
)
from zerver.models import UserProfile
from zerver.models.realms import MessageEditHistoryVisibilityPolicyEnum
# Maximum number of messages that can be summarized in a single request.
MAX_MESSAGES_SUMMARIZED = 100
ai_time_start = 0.0
ai_total_time = 0.0
ai_total_requests = 0
def get_ai_time() -> float:
return ai_total_time
def ai_stats_start() -> None:
global ai_time_start
ai_time_start = time.time()
def get_ai_requests() -> int:
return ai_total_requests
def ai_stats_finish() -> None:
global ai_total_time, ai_total_requests
ai_total_requests += 1
ai_total_time += time.time() - ai_time_start
def format_zulip_messages_for_model(zulip_messages: list[dict[str, Any]]) -> str:
# Format the Zulip messages for processing by the model.
#
# - We don't need to encode the recipient, since that's the same for
# every message in the conversation.
# - We use full names to reference senders, since we want the
# model to refer to users by name. We may want to experiment
# with using silent-mention syntax for users if we move to
# Markdown-rendering what the model returns.
# - We don't include timestamps, since experiments with current models
# suggest they do not make relevant use of them.
# - We haven't figured out a useful way to include reaction metadata (either
# the emoji themselves or just the counter).
# - Polls/TODO widgets are currently sent to the model as empty messages,
# since this logic doesn't inspect SubMessage objects.
zulip_messages_list = [
{"sender": message["sender_full_name"], "content": message["content"]}
for message in zulip_messages
]
return orjson.dumps(zulip_messages_list).decode()
def make_message(content: str, role: str = "user") -> dict[str, str]:
return {"content": content, "role": role}
def get_max_summary_length(conversation_length: int) -> int:
# Longer summaries work better for longer conversation.
# TODO: Test more with message content length.
return min(6, 4 + int((conversation_length - 10) / 10))
def do_summarize_narrow(
user_profile: UserProfile,
narrow: list[NarrowParameter] | None,
) -> str | None:
model = settings.TOPIC_SUMMARIZATION_MODEL
if model is None: # nocoverage
return None
# TODO: This implementation does not attempt to make use of
# caching previous summaries of the same conversation or rolling
# summaries. Doing so correctly will require careful work around
# invalidation of caches when messages are edited, moved, or sent.
narrow = clean_narrow_for_message_fetch(narrow, user_profile.realm, user_profile)
query_info = fetch_messages(
narrow=narrow,
user_profile=user_profile,
realm=user_profile.realm,
is_web_public_query=False,
anchor=LARGER_THAN_MAX_MESSAGE_ID,
include_anchor=True,
num_before=MAX_MESSAGES_SUMMARIZED,
num_after=0,
)
if len(query_info.rows) == 0: # nocoverage
return None
result_message_ids: list[int] = []
user_message_flags: dict[int, list[str]] = {}
for row in query_info.rows:
message_id = row[0]
result_message_ids.append(message_id)
# We skip populating flags, since they would be ignored below anyway.
user_message_flags[message_id] = []
message_list = messages_for_ids(
message_ids=result_message_ids,
user_message_flags=user_message_flags,
search_fields={},
# We currently prefer the plain-text content of messages to
apply_markdown=False,
# Avoid wasting resources computing gravatars.
client_gravatar=True,
allow_empty_topic_name=False,
# Avoid fetching edit history, which won't be passed to the model.
message_edit_history_visibility_policy=MessageEditHistoryVisibilityPolicyEnum.none.value,
user_profile=user_profile,
realm=user_profile.realm,
)
# IDEA: We could consider translating input and output text to
# English to improve results when using a summarization model that
# is primarily trained on English.
conversation_length = len(message_list)
max_summary_length = get_max_summary_length(conversation_length)
intro = "The following is a chat conversation in the Zulip team chat app."
topic: str | None = None
channel: str | None = None
if narrow and len(narrow) == 2:
for term in narrow:
assert not term.negated
if term.operator == "channel":
channel = term.operand
if term.operator == "topic":
topic = term.operand
if channel:
intro += f" channel: {channel}"
if topic:
intro += f", topic: {topic}"
formatted_conversation = format_zulip_messages_for_model(message_list)
prompt = (
f"Succinctly summarize this conversation based only on the information provided, "
f"in up to {max_summary_length} sentences, for someone who is familiar with the context. "
f"Mention key conclusions and actions, if any. Refer to specific people as appropriate. "
f"Don't use an intro phrase. You can use Zulip's CommonMark based formatting."
)
messages = [
make_message(intro, "system"),
make_message(formatted_conversation),
make_message(prompt),
]
# Stats for database queries are tracked separately.
ai_stats_start()
# We import litellm here to avoid a DeprecationWarning.
# See these issues for more info:
# https://github.com/BerriAI/litellm/issues/6232
# https://github.com/BerriAI/litellm/issues/5647
import litellm
# Token counter is recommended by LiteLLM but mypy says it's not explicitly exported.
# https://docs.litellm.ai/docs/completion/token_usage#3-token_counter
# estimated_input_tokens = litellm.token_counter(model=model, messages=messages) # type: ignore[attr-defined] # Explained above
# TODO when implementing user plans:
# - Before querying the model, check whether we've enough tokens left using
# the estimated token count.
# - Then increase the `LoggingCountStat` using the estimated token count.
# (These first two steps should be a short database transaction that
# locks the `LoggingCountStat` row).
# - Then query the model.
# - Then adjust the `LoggingCountStat` by `(actual - estimated)`,
# being careful to avoid doing this to the next day if the query
# happened milliseconds before midnight; changing the
# `LoggingCountStat` we added the estimate to.
# That way, you can't easily get extra tokens by sending
# 25 requests all at once when you're just below the limit.
litellm_params: dict[str, object] = settings.TOPIC_SUMMARIZATION_PARAMETERS
api_key = settings.TOPIC_SUMMARIZATION_API_KEY
response = litellm.completion(
model=model,
messages=messages,
api_key=api_key,
**litellm_params,
)
input_tokens = response["usage"]["prompt_tokens"]
output_tokens = response["usage"]["completion_tokens"]
# Divide by 1 billion to get actual cost in USD.
credits_used = (output_tokens * settings.OUTPUT_COST_PER_GIGATOKEN) + (
input_tokens * settings.INPUT_COST_PER_GIGATOKEN
)
ai_stats_finish()
do_increment_logging_stat(
user_profile, COUNT_STATS["ai_credit_usage::day"], None, timezone_now(), credits_used
)
summary = response["choices"][0]["message"]["content"]
# TODO: This may want to fetch `MentionData`, in order to be able
# to process channel or user mentions that might be in the
# content. Requires a prompt that supports it.
rendered_summary = markdown_convert(summary, message_realm=user_profile.realm).rendered_content
return rendered_summary