email digests: Do one query for recent streams.

This is another way to limit hops to the database
when we process users in bulk.
This commit is contained in:
Steve Howell
2020-11-13 11:27:39 +00:00
committed by Tim Abbott
parent 16583fd5b4
commit 162b372b93
2 changed files with 29 additions and 26 deletions

View File

@@ -243,13 +243,17 @@ def bulk_get_digest_context(users: List[UserProfile], cutoff: float) -> Dict[int
stream_map = get_stream_map(user_ids) stream_map = get_stream_map(user_ids)
long_term_idle_user_ids = [user.id for user in users if user.long_term_idle]
recently_modified_streams = get_modified_streams(long_term_idle_user_ids, cutoff_date)
all_stream_ids = set() all_stream_ids = set()
for user in users: for user in users:
stream_ids = stream_map[user.id] stream_ids = stream_map[user.id]
if user.long_term_idle: if user.long_term_idle:
stream_ids -= streams_recently_modified_for_user(user, cutoff_date) stream_ids -= recently_modified_streams.get(user.id, set())
all_stream_ids |= stream_ids all_stream_ids |= stream_ids
@@ -341,18 +345,25 @@ def bulk_write_realm_audit_logs(users: List[UserProfile]) -> None:
def handle_digest_email(user_id: int, cutoff: float) -> None: def handle_digest_email(user_id: int, cutoff: float) -> None:
bulk_handle_digest_email([user_id], cutoff) bulk_handle_digest_email([user_id], cutoff)
def streams_recently_modified_for_user(user: UserProfile, cutoff_date: datetime.datetime) -> Set[int]: def get_modified_streams(user_ids: List[int], cutoff_date: datetime.datetime) -> Dict[int, Set[int]]:
events = [ events = [
RealmAuditLog.SUBSCRIPTION_CREATED, RealmAuditLog.SUBSCRIPTION_CREATED,
RealmAuditLog.SUBSCRIPTION_ACTIVATED, RealmAuditLog.SUBSCRIPTION_ACTIVATED,
RealmAuditLog.SUBSCRIPTION_DEACTIVATED, RealmAuditLog.SUBSCRIPTION_DEACTIVATED,
] ]
# Streams where the user's subscription was changed # Get rows where the users' subscriptions have changed.
modified_streams = RealmAuditLog.objects.filter( rows = RealmAuditLog.objects.filter(
realm=user.realm, modified_user_id__in=user_ids,
modified_user=user,
event_time__gt=cutoff_date, event_time__gt=cutoff_date,
event_type__in=events).values_list('modified_stream_id', flat=True) event_type__in=events,
).values("modified_user_id", "modified_stream_id").distinct()
return set(modified_streams) result: Dict[int, Set[int]] = defaultdict(set)
for row in rows:
user_id = row["modified_user_id"]
stream_id = row["modified_stream_id"]
result[user_id].add(stream_id)
return result

View File

@@ -14,8 +14,8 @@ from zerver.lib.digest import (
bulk_write_realm_audit_logs, bulk_write_realm_audit_logs,
enqueue_emails, enqueue_emails,
gather_new_streams, gather_new_streams,
get_modified_streams,
handle_digest_email, handle_digest_email,
streams_recently_modified_for_user,
) )
from zerver.lib.message import get_last_message_id from zerver.lib.message import get_last_message_id
from zerver.lib.streams import create_stream_if_needed from zerver.lib.streams import create_stream_if_needed
@@ -180,7 +180,7 @@ class TestDigestEmailMessages(ZulipTestCase):
with cache_tries_captured() as cache_tries: with cache_tries_captured() as cache_tries:
bulk_handle_digest_email(digest_user_ids, cutoff) bulk_handle_digest_email(digest_user_ids, cutoff)
self.assert_length(queries, 37) self.assert_length(queries, 34)
self.assert_length(cache_tries, 0) self.assert_length(cache_tries, 0)
self.assertEqual(mock_send_future_email.call_count, len(digest_users)) self.assertEqual(mock_send_future_email.call_count, len(digest_users))
@@ -235,39 +235,31 @@ class TestDigestEmailMessages(ZulipTestCase):
self.unsubscribe(othello, 'Denmark') self.unsubscribe(othello, 'Denmark')
self.subscribe(othello, 'Denmark') self.subscribe(othello, 'Denmark')
self.assertEqual( recent_streams = get_modified_streams([othello.id], one_hour_ago)
streams_recently_modified_for_user(othello, one_hour_ago), self.assertEqual(recent_streams[othello.id], {denmark.id})
{denmark.id}
)
# Backdate all our logs (so that Denmark will no longer # Backdate all our logs (so that Denmark will no longer
# appear like a recently modified stream for Othello). # appear like a recently modified stream for Othello).
RealmAuditLog.objects.all().update(event_time=two_hours_ago) RealmAuditLog.objects.all().update(event_time=two_hours_ago)
# Now Denmark no longer appears recent to Othello. # Now Denmark no longer appears recent to Othello.
self.assertEqual( recent_streams = get_modified_streams([othello.id], one_hour_ago)
streams_recently_modified_for_user(othello, one_hour_ago), self.assertEqual(recent_streams[othello.id], set())
set()
)
# Unsubscribe and subscribe from a stream # Unsubscribe and subscribe from a stream
self.unsubscribe(othello, 'Verona') self.unsubscribe(othello, 'Verona')
self.subscribe(othello, 'Verona') self.subscribe(othello, 'Verona')
# Now, Verona, but not Denmark, appears recent. # Now, Verona, but not Denmark, appears recent.
self.assertEqual( recent_streams = get_modified_streams([othello.id], one_hour_ago)
streams_recently_modified_for_user(othello, one_hour_ago), self.assertEqual(recent_streams[othello.id], {verona.id})
{verona.id},
)
# make sure we don't mix up Othello and Cordelia # make sure we don't mix up Othello and Cordelia
self.unsubscribe(cordelia, 'Denmark') self.unsubscribe(cordelia, 'Denmark')
self.subscribe(cordelia, 'Denmark') self.subscribe(cordelia, 'Denmark')
self.assertEqual( recent_streams = get_modified_streams([othello.id, cordelia.id], one_hour_ago)
streams_recently_modified_for_user(cordelia, one_hour_ago), self.assertEqual(recent_streams[cordelia.id], {denmark.id})
{denmark.id}
)
def active_human_users(self, realm: Realm) -> List[UserProfile]: def active_human_users(self, realm: Realm) -> List[UserProfile]:
users = list(UserProfile.objects.filter( users = list(UserProfile.objects.filter(