mirror of
https://github.com/zulip/zulip.git
synced 2025-11-02 13:03:29 +00:00
search: Make num_after/num_after more consistent.
We now consistently set our query limits so that we get at least `num_after` rows such that id > anchor. (Obviously, the caveat is that if there aren't enough rows that fulfill the query, we'll return the full set of rows, but that may be less than `num_after`.) Likewise for `num_before`. Before this change, we would sometimes return one too few rows for narrow queries. Now, we're still a bit broken, but in a more consistent way. If we have a query that does not match the anchor row (which could be true even for a non-narrow query), but which does match lots of rows after the anchor, we'll return `num_after + 1` rows on the right hand side, whether or not the query has narrow parameters. The off-by-one semantics here have probably been moot all along, since our windows are approximate to begin with. If we set num_after to 100, its just a rough performance optimization to begin with, so it doesn't matter whether we return 99 or 101 rows, as long as we set the anchor correctly on the subsequent query. We will make the results more rigorous in a follow up commit.
This commit is contained in:
@@ -435,7 +435,9 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||
|
||||
def message_visibility_test(self, narrow: List[Dict[str, str]],
|
||||
message_ids: List[int], pivot_index: int) -> None:
|
||||
post_params = dict(narrow=ujson.dumps(narrow), num_before=len(message_ids),
|
||||
num_before = len(message_ids) - 1
|
||||
|
||||
post_params = dict(narrow=ujson.dumps(narrow), num_before=num_before,
|
||||
num_after=0, anchor=LARGER_THAN_MAX_MESSAGE_ID)
|
||||
payload = self.client_get("/json/messages", dict(post_params))
|
||||
self.assert_json_success(payload)
|
||||
@@ -1802,50 +1804,50 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["pm-with", "%s"]]' % (self.example_email("othello"),)},
|
||||
sql)
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["is", "starred"]]'},
|
||||
sql)
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {othello_id} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["sender", "%s"]]' % (self.example_email("othello"),)},
|
||||
sql)
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id \nFROM (SELECT id AS message_id \nFROM zerver_message \nWHERE recipient_id = {scotland_recipient} ORDER BY zerver_message.id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["stream", "Scotland"]]'},
|
||||
sql)
|
||||
|
||||
sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND upper(subject) = upper('blah') ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["topic", "blah"]]'},
|
||||
sql)
|
||||
|
||||
sql_template = "SELECT anon_1.message_id \nFROM (SELECT id AS message_id \nFROM zerver_message \nWHERE recipient_id = {scotland_recipient} AND upper(subject) = upper('blah') ORDER BY zerver_message.id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["stream", "Scotland"], ["topic", "blah"]]'},
|
||||
sql)
|
||||
|
||||
# Narrow to pms with yourself
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {hamlet_id} AND recipient_id = {hamlet_recipient} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["pm-with", "%s"]]' % (self.example_email("hamlet"),)},
|
||||
sql)
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND recipient_id = {scotland_recipient} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["stream", "Scotland"], ["is", "starred"]]'},
|
||||
sql)
|
||||
|
||||
@@ -1855,19 +1857,19 @@ class GetOldMessagesTest(ZulipTestCase):
|
||||
|
||||
sql_template = "SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.subject_matches \nFROM (SELECT message_id, flags, subject, rendered_content, ts_match_locs_array('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping')) AS content_matches, ts_match_locs_array('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping')) AS subject_matches \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" # type: Text
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["search", "jumping"]]'},
|
||||
sql)
|
||||
|
||||
sql_template = "SELECT anon_1.message_id, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.subject_matches \nFROM (SELECT id AS message_id, subject, rendered_content, ts_match_locs_array('zulip.english_us_search', rendered_content, plainto_tsquery('zulip.english_us_search', 'jumping')) AS content_matches, ts_match_locs_array('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping')) AS subject_matches \nFROM zerver_message \nWHERE recipient_id = {scotland_recipient} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY zerver_message.id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC"
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["stream", "Scotland"], ["search", "jumping"]]'},
|
||||
sql)
|
||||
|
||||
sql_template = 'SELECT anon_1.message_id, anon_1.flags, anon_1.subject, anon_1.rendered_content, anon_1.content_matches, anon_1.subject_matches \nFROM (SELECT message_id, flags, subject, rendered_content, ts_match_locs_array(\'zulip.english_us_search\', rendered_content, plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) AS content_matches, ts_match_locs_array(\'zulip.english_us_search\', escape_html(subject), plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) AS subject_matches \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (content ILIKE \'%jumping%\' OR subject ILIKE \'%jumping%\') AND (search_tsvector @@ plainto_tsquery(\'zulip.english_us_search\', \'"jumping" quickly\')) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC'
|
||||
sql = sql_template.format(**query_ids)
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 10,
|
||||
self.common_check_get_messages_query({'anchor': 0, 'num_before': 0, 'num_after': 9,
|
||||
'narrow': '[["search", "\\"jumping\\" quickly"]]'},
|
||||
sql)
|
||||
|
||||
|
||||
@@ -680,7 +680,6 @@ def get_messages_backend(request: HttpRequest, user_profile: UserProfile,
|
||||
anchor=anchor,
|
||||
anchored_to_left=anchored_to_left,
|
||||
anchored_to_right=anchored_to_right,
|
||||
narrow=narrow,
|
||||
id_col=inner_msg_id_col,
|
||||
)
|
||||
|
||||
@@ -755,7 +754,6 @@ def limit_query_to_range(query: Query,
|
||||
anchor: int,
|
||||
anchored_to_left: bool,
|
||||
anchored_to_right: bool,
|
||||
narrow: Any,
|
||||
id_col: ColumnElement) -> Query:
|
||||
'''
|
||||
This code is actually generic enough that we could move it to a
|
||||
@@ -766,20 +764,31 @@ def limit_query_to_range(query: Query,
|
||||
|
||||
need_both_sides = need_before_query and need_after_query
|
||||
|
||||
# We add 1 to the number of messages requested if no narrow was
|
||||
# specified to ensure that the resulting list always contains the
|
||||
# anchor row. If a narrow was specified, the anchor row
|
||||
# might not match the narrow anyway.
|
||||
if narrow is None:
|
||||
if need_after_query:
|
||||
num_after += 1
|
||||
elif need_before_query:
|
||||
num_before += 1
|
||||
|
||||
# The semantics of our flags are as follows:
|
||||
#
|
||||
# num_after = number of rows < anchor
|
||||
# num_after = number of rows > anchor
|
||||
#
|
||||
# But we also want the row where id == anchor (if it exists),
|
||||
# and we don't want to union up to 3 queries. So in some cases
|
||||
# we do things like `after_limit = num_after + 1` to grab the
|
||||
# anchor row in the "after" query.
|
||||
#
|
||||
# Note that in some cases, if the anchor row isn't found, we
|
||||
# actually may fetch an extra row at one of the extremes.
|
||||
if need_both_sides:
|
||||
before_anchor = anchor - 1
|
||||
after_anchor = anchor
|
||||
before_limit = num_before
|
||||
after_limit = num_after + 1
|
||||
elif need_before_query:
|
||||
before_anchor = anchor
|
||||
before_limit = num_before
|
||||
if not anchored_to_right:
|
||||
before_limit += 1
|
||||
elif need_after_query:
|
||||
after_anchor = anchor
|
||||
after_limit = num_after + 1
|
||||
|
||||
if need_before_query:
|
||||
before_query = query
|
||||
@@ -787,15 +796,17 @@ def limit_query_to_range(query: Query,
|
||||
if not anchored_to_right:
|
||||
before_query = before_query.where(id_col <= before_anchor)
|
||||
|
||||
before_query = before_query.order_by(id_col.desc()).limit(num_before)
|
||||
before_query = before_query.order_by(id_col.desc())
|
||||
before_query = before_query.limit(before_limit)
|
||||
|
||||
if need_after_query:
|
||||
after_query = query
|
||||
|
||||
if not anchored_to_left:
|
||||
after_query = after_query.where(id_col >= anchor)
|
||||
after_query = after_query.where(id_col >= after_anchor)
|
||||
|
||||
after_query = after_query.order_by(id_col.asc()).limit(num_after)
|
||||
after_query = after_query.order_by(id_col.asc())
|
||||
after_query = after_query.limit(after_limit)
|
||||
|
||||
if need_both_sides:
|
||||
query = union_all(before_query.self_group(), after_query.self_group())
|
||||
|
||||
Reference in New Issue
Block a user