message_fetch: Improve retry logic for matches_narrow re-checks.

Previously, we retried all failed requests to
`/json/messages/matches_narrow` forever, with a fixed 5s retry.  This
meant infinite retries even on 4xx responses, for instance on invalid
narrows -- and _each_ new message would add another infinite every-5s
retry.

Skip retries on 400 responses, and put an upper limit on the number of
retries.  At the same time, change the retries to be
exponential-backoff with full jitter, to be more responsive in the
event of a one-off failure, and more forgiving in response to a longer
outage.

Failures that exhaust the 5 attempts are silent to the user -- since
this is likely an uncommon edge case (requiring Tornado to be serving
events to the client, Django to be unresponsive, and the client to be
narrowed to a filter it cannot apply locally), and the failure mode is
not bad (it will simply fail to live-update with new matching
messages).

Fixes: #20165.
This commit is contained in:
Alex Vandiver
2022-10-12 10:15:38 -04:00
committed by Tim Abbott
parent 550a32bb20
commit 2d406aa9d0

View File

@@ -2,6 +2,7 @@ import $ from "jquery";
import * as alert_words from "./alert_words";
import {all_messages_data} from "./all_messages_data";
import * as blueslip from "./blueslip";
import * as channel from "./channel";
import * as compose_fade from "./compose_fade";
import * as compose_state from "./compose_state";
@@ -32,7 +33,7 @@ import * as unread_ops from "./unread_ops";
import * as unread_ui from "./unread_ui";
import * as util from "./util";
function maybe_add_narrowed_messages(messages, msg_list, callback) {
function maybe_add_narrowed_messages(messages, msg_list, callback, attempt = 1) {
const ids = [];
for (const elem of messages) {
@@ -80,15 +81,34 @@ function maybe_add_narrowed_messages(messages, msg_list, callback) {
unread_ops.process_visible();
notifications.notify_messages_outside_current_search(elsewhere_messages);
},
error() {
// We might want to be more clever here
error(xhr) {
if (msg_list.narrowed && msg_list !== message_lists.current) {
return;
}
if (xhr.status === 400) {
// This narrow was invalid -- don't retry it, and don't display the message.
return;
}
if (attempt >= 5) {
// Too many retries -- bail out. However, this means the `messages` are potentially
// missing from the search results view. Since this is a very unlikely circumstance
// (Tornado is up, Django is down for 5 retries, user is in a search view that it
// cannot apply itself) and the failure mode is not bad (it will simply fail to
// include live updates of new matching messages), just log an error.
blueslip.error(
"Failed to determine if new message matches current narrow, after 5 tries",
);
return;
}
// Backoff on retries, with full jitter: up to 2s, 4s, 8s, 16s, 32s
const delay = Math.random() * 2 ** attempt * 2000;
setTimeout(() => {
if (msg_list === message_lists.current) {
// Don't actually try again if we unnarrowed
// Don't actually try again if we un-narrowed
// while waiting
maybe_add_narrowed_messages(messages, msg_list, callback);
maybe_add_narrowed_messages(messages, msg_list, callback, attempt + 1);
}
}, 5000);
}, delay);
},
});
}