mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
For get and filter queries of NamedUserGroup, realm_for_sharding field is used instead of realm field, as directly using realm_for_sharding field on NamedUserGroup makes the query faster than using realm present on the base UserGroup table.
505 lines
19 KiB
Python
505 lines
19 KiB
Python
import functools
|
|
import re
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass
|
|
from re import Match
|
|
from typing import Literal
|
|
|
|
from django.conf import settings
|
|
from django.db.models import Q
|
|
from django_stubs_ext import StrPromise
|
|
|
|
from zerver.lib.streams import get_content_access_streams
|
|
from zerver.lib.topic import get_latest_message_for_user_in_topic
|
|
from zerver.lib.types import UserDisplayRecipient
|
|
from zerver.lib.user_groups import (
|
|
UserGroupMembershipDetails,
|
|
get_root_id_annotated_recursive_subgroups_for_groups,
|
|
user_has_permission_for_group_setting,
|
|
)
|
|
from zerver.lib.users import get_inaccessible_user_ids
|
|
from zerver.models import NamedUserGroup, UserProfile
|
|
from zerver.models.groups import SystemGroups
|
|
from zerver.models.streams import Stream
|
|
from zerver.models.users import is_cross_realm_bot_email
|
|
|
|
BEFORE_MENTION_ALLOWED_REGEX = r"(?<![^\s\'\"\(\{\[\/<])"
|
|
|
|
# Match multi-word string between @** ** or match any one-word
|
|
# sequences after @
|
|
MENTIONS_RE = re.compile(
|
|
rf"{BEFORE_MENTION_ALLOWED_REGEX}@(?P<silent>_?)(\*\*(?P<match>[^\*]+)\*\*)"
|
|
)
|
|
USER_GROUP_MENTIONS_RE = re.compile(
|
|
rf"{BEFORE_MENTION_ALLOWED_REGEX}@(?P<silent>_?)(\*(?P<match>[^\*]+)\*)"
|
|
)
|
|
|
|
topic_wildcards = frozenset(["topic"])
|
|
stream_wildcards = frozenset(["all", "everyone", "stream", "channel"])
|
|
|
|
|
|
@dataclass
|
|
class FullNameInfo:
|
|
id: int
|
|
full_name: str
|
|
is_active: bool
|
|
|
|
|
|
@dataclass
|
|
class UserFilter:
|
|
id: int | None
|
|
full_name: str | None
|
|
|
|
def Q(self) -> Q:
|
|
if self.full_name is not None and self.id is not None:
|
|
return Q(full_name__iexact=self.full_name, id=self.id)
|
|
elif self.id is not None:
|
|
return Q(id=self.id)
|
|
elif self.full_name is not None:
|
|
return Q(full_name__iexact=self.full_name)
|
|
else:
|
|
raise AssertionError("totally empty filter makes no sense")
|
|
|
|
|
|
@dataclass
|
|
class MentionText:
|
|
text: str | None
|
|
is_topic_wildcard: bool
|
|
is_stream_wildcard: bool
|
|
|
|
|
|
@dataclass
|
|
class PossibleMentions:
|
|
mention_texts: set[str]
|
|
message_has_topic_wildcards: bool
|
|
message_has_stream_wildcards: bool
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ChannelTopicInfo:
|
|
channel_name: str
|
|
topic_name: str
|
|
|
|
|
|
@dataclass
|
|
class ChannelInfo:
|
|
channel_id: int
|
|
recipient_id: int
|
|
history_public_to_subscribers: bool
|
|
# TODO: Track whether the current user has only metadata access or
|
|
# content access, so that we can allow mentioning channels with
|
|
# only metadata access, while still enforcing content access to
|
|
# mention topics or messages within channels.
|
|
|
|
|
|
class MentionBackend:
|
|
# Be careful about reuse: MentionBackend contains caches which are
|
|
# designed to only have the lifespan of a sender user (typically a
|
|
# single request).
|
|
#
|
|
# In particular, user_cache is not robust to message_sender
|
|
# within the lifetime of a single MentionBackend lifetime.
|
|
|
|
def __init__(self, realm_id: int) -> None:
|
|
self.realm_id = realm_id
|
|
self.user_cache: dict[tuple[int, str], FullNameInfo] = {}
|
|
self.stream_cache: dict[str, ChannelInfo] = {}
|
|
self.topic_cache: dict[ChannelTopicInfo, int | None] = {}
|
|
|
|
def get_full_name_info_list(
|
|
self, user_filters: list[UserFilter], message_sender: UserProfile | None
|
|
) -> list[FullNameInfo]:
|
|
result: list[FullNameInfo] = []
|
|
unseen_user_filters: list[UserFilter] = []
|
|
|
|
# Try to get messages from the user_cache first.
|
|
# This loop populates two lists:
|
|
# - results are the objects we pull from cache
|
|
# - unseen_user_filters are filters where need to hit the DB
|
|
for user_filter in user_filters:
|
|
# We expect callers who take advantage of our user_cache to supply both
|
|
# id and full_name in the user mentions in their messages.
|
|
if user_filter.id is not None and user_filter.full_name is not None:
|
|
user = self.user_cache.get((user_filter.id, user_filter.full_name), None)
|
|
if user is not None:
|
|
result.append(user)
|
|
continue
|
|
|
|
# BOO! We have to go the database.
|
|
unseen_user_filters.append(user_filter)
|
|
|
|
# Most of the time, we have to go to the database to get user info,
|
|
# unless our last loop found everything in the cache.
|
|
if unseen_user_filters:
|
|
q_list = [user_filter.Q() for user_filter in unseen_user_filters]
|
|
|
|
rows = (
|
|
UserProfile.objects.filter(
|
|
Q(realm_id=self.realm_id) | Q(email__in=settings.CROSS_REALM_BOT_EMAILS),
|
|
)
|
|
.filter(
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
)
|
|
.only(
|
|
"id",
|
|
"full_name",
|
|
"is_active",
|
|
)
|
|
)
|
|
|
|
possible_mention_user_ids = [row.id for row in rows]
|
|
inaccessible_user_ids = get_inaccessible_user_ids(
|
|
possible_mention_user_ids, message_sender
|
|
)
|
|
|
|
user_list = [
|
|
FullNameInfo(id=row.id, full_name=row.full_name, is_active=row.is_active)
|
|
for row in rows
|
|
if row.id not in inaccessible_user_ids
|
|
]
|
|
|
|
# We expect callers who take advantage of our cache to supply both
|
|
# id and full_name in the user mentions in their messages.
|
|
for user in user_list:
|
|
self.user_cache[(user.id, user.full_name)] = user
|
|
|
|
result += user_list
|
|
|
|
return result
|
|
|
|
def get_stream_name_map(
|
|
self, stream_names: set[str], acting_user: UserProfile | None
|
|
) -> dict[str, int]:
|
|
if not stream_names:
|
|
return {}
|
|
|
|
result: dict[str, int] = {}
|
|
unseen_stream_names: list[str] = []
|
|
|
|
for stream_name in stream_names:
|
|
if stream_name in self.stream_cache:
|
|
result[stream_name] = self.stream_cache[stream_name].channel_id
|
|
else:
|
|
unseen_stream_names.append(stream_name)
|
|
|
|
if not unseen_stream_names:
|
|
return result
|
|
|
|
q_list = {Q(name=name) for name in unseen_stream_names}
|
|
if acting_user is None:
|
|
rows = (
|
|
Stream.objects.filter(
|
|
realm_id=self.realm_id,
|
|
)
|
|
.filter(
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
)
|
|
.values(
|
|
"id",
|
|
"name",
|
|
"recipient_id",
|
|
"history_public_to_subscribers",
|
|
)
|
|
)
|
|
for row in rows:
|
|
self.stream_cache[row["name"]] = ChannelInfo(
|
|
row["id"], row["recipient_id"], row["history_public_to_subscribers"]
|
|
)
|
|
result[row["name"]] = row["id"]
|
|
else:
|
|
content_access_streams = get_content_access_streams(
|
|
acting_user,
|
|
list(
|
|
Stream.objects.filter(
|
|
realm_id=self.realm_id,
|
|
).filter(
|
|
functools.reduce(lambda a, b: a | b, q_list),
|
|
)
|
|
),
|
|
user_group_membership_details=UserGroupMembershipDetails(
|
|
user_recursive_group_ids=None
|
|
),
|
|
)
|
|
for stream in content_access_streams:
|
|
assert stream.recipient_id is not None
|
|
self.stream_cache[stream.name] = ChannelInfo(
|
|
stream.id, stream.recipient_id, stream.history_public_to_subscribers
|
|
)
|
|
result[stream.name] = stream.id
|
|
|
|
return result
|
|
|
|
def get_topic_info_map(
|
|
self, channel_topics: set[ChannelTopicInfo], acting_user: UserProfile | None
|
|
) -> dict[ChannelTopicInfo, int | None]:
|
|
if not channel_topics:
|
|
return {}
|
|
|
|
result: dict[ChannelTopicInfo, int | None] = {}
|
|
unseen_channel_topic: list[ChannelTopicInfo] = []
|
|
|
|
for channel_topic in channel_topics:
|
|
if channel_topic in self.topic_cache:
|
|
result[channel_topic] = self.topic_cache[channel_topic]
|
|
else:
|
|
unseen_channel_topic.append(channel_topic)
|
|
|
|
for channel_topic in unseen_channel_topic:
|
|
channel_info = self.stream_cache.get(channel_topic.channel_name)
|
|
|
|
if channel_info is None:
|
|
# The acting user does not have access to content in this channel.
|
|
continue
|
|
|
|
recipient_id = channel_info.recipient_id
|
|
topic_name = channel_topic.topic_name
|
|
history_public_to_subscribers = channel_info.history_public_to_subscribers
|
|
|
|
# Any message in the topic is a valid choice for the
|
|
# /with/ anchor. There are two risks to manage here:
|
|
# - The target message could be deleted or be a mispost that
|
|
# is off-topic and moved shortly.
|
|
# - The topic could be split into two topics.
|
|
#
|
|
# Originally, we picked the oldest message because that
|
|
# message is least likely to be deleted/moved for being a
|
|
# mispost/error -- i.e., trying to do a bit better in rare
|
|
# corner cases. We switched to preferring the latest
|
|
# message in API feature level 400. The "latest" algorithm
|
|
# is better when linking to an active conversation in a
|
|
# long topic that's a follow-up/tangent and ends up being
|
|
# moved/split, which users do constantly.
|
|
topic_latest_message = get_latest_message_for_user_in_topic(
|
|
self.realm_id,
|
|
acting_user,
|
|
recipient_id,
|
|
topic_name,
|
|
history_public_to_subscribers,
|
|
acting_user_has_channel_content_access=True,
|
|
)
|
|
|
|
self.topic_cache[channel_topic] = topic_latest_message
|
|
result[channel_topic] = topic_latest_message
|
|
|
|
return result
|
|
|
|
|
|
def user_mention_matches_topic_wildcard(mention: str) -> bool:
|
|
return mention in topic_wildcards
|
|
|
|
|
|
def user_mention_matches_stream_wildcard(mention: str) -> bool:
|
|
return mention in stream_wildcards
|
|
|
|
|
|
def extract_mention_text(m: Match[str]) -> MentionText:
|
|
text = m.group("match")
|
|
if text in topic_wildcards:
|
|
return MentionText(text=None, is_topic_wildcard=True, is_stream_wildcard=False)
|
|
if text in stream_wildcards:
|
|
return MentionText(text=None, is_topic_wildcard=False, is_stream_wildcard=True)
|
|
return MentionText(text=text, is_topic_wildcard=False, is_stream_wildcard=False)
|
|
|
|
|
|
def possible_mentions(content: str) -> PossibleMentions:
|
|
# mention texts can either be names, or an extended name|id syntax.
|
|
texts = set()
|
|
message_has_topic_wildcards = False
|
|
message_has_stream_wildcards = False
|
|
for m in MENTIONS_RE.finditer(content):
|
|
mention_text = extract_mention_text(m)
|
|
text = mention_text.text
|
|
if text:
|
|
texts.add(text)
|
|
if mention_text.is_topic_wildcard:
|
|
message_has_topic_wildcards = True
|
|
if mention_text.is_stream_wildcard:
|
|
message_has_stream_wildcards = True
|
|
return PossibleMentions(
|
|
mention_texts=texts,
|
|
message_has_topic_wildcards=message_has_topic_wildcards,
|
|
message_has_stream_wildcards=message_has_stream_wildcards,
|
|
)
|
|
|
|
|
|
def possible_user_group_mentions(content: str) -> dict[str, Literal["silent", "non-silent"]]:
|
|
# maps each group name to its mention type, silent or non-silent.
|
|
mentions: dict[str, Literal["silent", "non-silent"]] = {}
|
|
|
|
for mention in USER_GROUP_MENTIONS_RE.finditer(content):
|
|
group_mention = mention.group("match")
|
|
|
|
# non-silent mention can override silent.
|
|
if not mention.group("silent"):
|
|
mentions[group_mention] = "non-silent"
|
|
|
|
# silent mention should NOT override non-silent.
|
|
if mention.group("silent") and group_mention not in mentions:
|
|
mentions[group_mention] = "silent"
|
|
|
|
return mentions
|
|
|
|
|
|
def get_possible_mentions_info(
|
|
mention_backend: MentionBackend, mention_texts: set[str], message_sender: UserProfile | None
|
|
) -> list[FullNameInfo]:
|
|
if not mention_texts:
|
|
return []
|
|
|
|
user_filters = list()
|
|
|
|
name_re = r"(?P<full_name>.+)?\|(?P<mention_id>\d+)$"
|
|
for mention_text in mention_texts:
|
|
name_syntax_match = re.match(name_re, mention_text)
|
|
if name_syntax_match:
|
|
full_name = name_syntax_match.group("full_name")
|
|
mention_id = name_syntax_match.group("mention_id")
|
|
if full_name:
|
|
# For **name|id** mentions as mention_id
|
|
# cannot be null inside this block.
|
|
user_filters.append(UserFilter(full_name=full_name, id=int(mention_id)))
|
|
else:
|
|
# For **|id** syntax.
|
|
user_filters.append(UserFilter(full_name=None, id=int(mention_id)))
|
|
else:
|
|
# For **name** syntax.
|
|
user_filters.append(UserFilter(full_name=mention_text, id=None))
|
|
|
|
return mention_backend.get_full_name_info_list(user_filters, message_sender)
|
|
|
|
|
|
class MentionData:
|
|
def __init__(
|
|
self, mention_backend: MentionBackend, content: str, message_sender: UserProfile | None
|
|
) -> None:
|
|
self.mention_backend = mention_backend
|
|
realm_id = mention_backend.realm_id
|
|
self.message_sender = message_sender
|
|
mentions = possible_mentions(content)
|
|
possible_mentions_info = get_possible_mentions_info(
|
|
mention_backend, mentions.mention_texts, message_sender
|
|
)
|
|
self.full_name_info = {row.full_name.lower(): row for row in possible_mentions_info}
|
|
self.user_id_info = {row.id: row for row in possible_mentions_info}
|
|
self.init_user_group_data(realm_id=realm_id, content=content)
|
|
self.has_stream_wildcards = mentions.message_has_stream_wildcards
|
|
self.has_topic_wildcards = mentions.message_has_topic_wildcards
|
|
|
|
def message_has_stream_wildcards(self) -> bool:
|
|
return self.has_stream_wildcards
|
|
|
|
def message_has_topic_wildcards(self) -> bool:
|
|
return self.has_topic_wildcards
|
|
|
|
def init_user_group_data(self, realm_id: int, content: str) -> None:
|
|
self.user_group_name_info: dict[str, NamedUserGroup] = {}
|
|
self.user_group_members: dict[int, set[int]] = defaultdict(set)
|
|
user_group_names_mentions = possible_user_group_mentions(content)
|
|
if user_group_names_mentions:
|
|
named_user_groups = NamedUserGroup.objects.filter(
|
|
realm_for_sharding_id=realm_id, name__in=user_group_names_mentions
|
|
)
|
|
|
|
# No filter here as we need user_group_name_info for all groups mentions.
|
|
self.user_group_name_info = {group.name.lower(): group for group in named_user_groups}
|
|
|
|
# We only fetch group membership mentions that can
|
|
# possibly trigger notifications.
|
|
filtered_group_ids = [
|
|
group.id
|
|
for group in named_user_groups
|
|
if not group.deactivated
|
|
and user_group_names_mentions.get(group.name) == "non-silent"
|
|
]
|
|
|
|
# Avoid doing a database query if there's nothing to fetch.
|
|
#
|
|
# This isn't quite optimal -- we've not checked our user
|
|
# has permission to mention the group yet.
|
|
if len(filtered_group_ids) == 0:
|
|
return
|
|
|
|
# Fetch membership for the groups filtered above in a
|
|
# single, efficient bulk query, mapping each group to its
|
|
# direct and indirect members.
|
|
for group_root_id, member_id in (
|
|
get_root_id_annotated_recursive_subgroups_for_groups(filtered_group_ids, realm_id)
|
|
.filter(direct_members__is_active=True)
|
|
.values_list("root_id", "direct_members") # type: ignore[misc] # root_id is an annotated field.
|
|
):
|
|
self.user_group_members[group_root_id].add(member_id)
|
|
|
|
def get_user_by_name(self, name: str) -> FullNameInfo | None:
|
|
# warning: get_user_by_name is not dependable if two
|
|
# users of the same full name are mentioned. Use
|
|
# get_user_by_id where possible.
|
|
return self.full_name_info.get(name.lower(), None)
|
|
|
|
def get_user_by_id(self, id: int) -> FullNameInfo | None:
|
|
return self.user_id_info.get(id, None)
|
|
|
|
def get_user_ids(self) -> set[int]:
|
|
"""
|
|
Returns the user IDs that might have been mentioned by this
|
|
content. Note that because this data structure has not parsed
|
|
the message and does not know about escaping/code blocks, this
|
|
will overestimate the list of user ids.
|
|
"""
|
|
return set(self.user_id_info.keys())
|
|
|
|
def get_user_group(self, name: str) -> NamedUserGroup | None:
|
|
return self.user_group_name_info.get(name.lower(), None)
|
|
|
|
def get_group_members(self, user_group_id: int) -> set[int]:
|
|
return self.user_group_members.get(user_group_id, set())
|
|
|
|
def get_stream_name_map(
|
|
self, stream_names: set[str], acting_user: UserProfile | None
|
|
) -> dict[str, int]:
|
|
return self.mention_backend.get_stream_name_map(stream_names, acting_user=acting_user)
|
|
|
|
def get_topic_info_map(
|
|
self, channel_topics: set[ChannelTopicInfo], acting_user: UserProfile | None
|
|
) -> dict[ChannelTopicInfo, int | None]:
|
|
return self.mention_backend.get_topic_info_map(channel_topics, acting_user=acting_user)
|
|
|
|
|
|
def silent_mention_syntax_for_user(user_profile: UserProfile | UserDisplayRecipient) -> str:
|
|
if isinstance(user_profile, UserProfile):
|
|
return f"@_**{user_profile.full_name}|{user_profile.id}**"
|
|
else:
|
|
return f"@_**{user_profile['full_name']}|{user_profile['id']}**"
|
|
|
|
|
|
def silent_mention_syntax_for_user_group(user_group: NamedUserGroup) -> str:
|
|
return f"@_*{user_group.name}*"
|
|
|
|
|
|
def get_user_group_mention_display_name(user_group: NamedUserGroup) -> StrPromise | str:
|
|
if user_group.is_system_group:
|
|
return SystemGroups.GROUP_DISPLAY_NAME_MAP[user_group.name]
|
|
|
|
return user_group.name
|
|
|
|
|
|
def sender_can_mention_group(sender: UserProfile | None, named_group: NamedUserGroup) -> bool:
|
|
can_mention_group = named_group.can_mention_group
|
|
|
|
if (
|
|
hasattr(can_mention_group, "named_user_group")
|
|
and can_mention_group.named_user_group.name == SystemGroups.EVERYONE
|
|
):
|
|
return True
|
|
|
|
assert sender is not None
|
|
|
|
if is_cross_realm_bot_email(sender.delivery_email):
|
|
return False
|
|
|
|
return user_has_permission_for_group_setting(
|
|
can_mention_group.id,
|
|
sender,
|
|
NamedUserGroup.GROUP_PERMISSION_SETTINGS["can_mention_group"],
|
|
direct_member_only=False,
|
|
)
|