From b6a04aaa832d77d94f581db43244de1030d2d4c2 Mon Sep 17 00:00:00 2001 From: N-Shar-ma Date: Fri, 3 Mar 2023 19:10:19 +0530 Subject: [PATCH] typeahead: Add option for word order not mattering for query matching. Uptil now, the `query_matches_string_in_order` function would respect the order of words in the query string when matching a source string. This meant that for query "two one", the source string "one two three" would not be matched. For more flexibility, a new function, `query_matches_string_in_any_order`, has been added., which returns true if each word in the query string matches the prefix of a distinct word in the source string, else it returns false. The algorithm for computing this is quadratic in terms of the source word count so can be a little expensive, but it is only currently used for searching topics in Recent Conversations, where the strings' length is limited by the max stream / topic name length allowed, so this should be performant enough for this use case. --- web/shared/src/typeahead.ts | 93 ++++++++++++++++++++++++++++++++++- web/src/recent_view_ui.js | 4 +- web/tests/recent_view.test.js | 18 ++++--- web/tests/typeahead.test.js | 48 ++++++++++++++++++ 4 files changed, 152 insertions(+), 11 deletions(-) diff --git a/web/shared/src/typeahead.ts b/web/shared/src/typeahead.ts index 57b6fc3c30..53d5ecf962 100644 --- a/web/shared/src/typeahead.ts +++ b/web/shared/src/typeahead.ts @@ -50,7 +50,36 @@ export function remove_diacritics(s: string): string { return s.normalize("NFKD").replace(unicode_marks, ""); } -// This function attempts to match a query with a source text. +export function last_prefix_match(prefix: string, words: string[]): number | null { + // This function takes in a lexicographically sorted array of `words`, + // and a `prefix` string. It uses binary search to compute the index + // of `prefix`'s upper bound, that is, the string immediately after + // the lexicographically last prefix match of `prefix`. So, the return + // value is the upper bound minus 1, that is, the last prefix match's + // index. When no prefix match is found, we return null. + let left = 0; + let right = words.length; + let found = false; + while (left < right) { + const mid = Math.floor((left + right) / 2); + if (words[mid].startsWith(prefix)) { + // Note that left can never be 0 if `found` is true, + // since it is incremented at least once here. + left = mid + 1; + found = true; + } else if (words[mid] < prefix) { + left = mid + 1; + } else { + right = mid; + } + } + if (found) { + return left - 1; + } + return null; +} + +// This function attempts to match a query in order with a source text. // * query is the user-entered search query // * source_str is the string we're matching in, e.g. a user's name // * split_char is the separator for this syntax (e.g. ' '). @@ -78,6 +107,68 @@ export function query_matches_string_in_order( return source_str.startsWith(query) || source_str.includes(split_char + query); } +// Match the words in the query to the words in the source text, in any order. +// +// The query matches the source if each word in the query can be matched to +// a different word in the source. The order the words appear in the query +// or in the source does not affect the result. +// +// A query word matches a source word if it is a prefix of the source word, +// after both words are converted to lowercase and diacritics are removed. +// +// Returns true if the query matches, and false if not. +// +// * query is the user-entered search query +// * source_str is the string we're matching in, e.g. a user's name +// * split_char is the separator for this syntax (e.g. ' '). +export function query_matches_string_in_any_order( + query: string, + source_str: string, + split_char: string, +): boolean { + source_str = source_str.toLowerCase(); + source_str = remove_diacritics(source_str); + + query = query.toLowerCase(); + query = remove_diacritics(query); + + const search_words = query.split(split_char).filter(Boolean); + const source_words = source_str.split(split_char).filter(Boolean); + if (search_words.length > source_words.length) { + return false; + } + + // We go through the search words in reverse lexicographical order, and to select + // the corresponding source word for each, one by one, we find the lexicographically + // last possible prefix match and immediately then remove it from consideration for + // remaining search words. + + // This essentially means that there is no search word lexicographically greater than + // our current search word (say, q1) which might require the current corresponding source + // word (as all search words lexicographically greater than it have already been matched) + // and also that all search words lexicographically smaller than it have the best possible + // chance for getting matched. + + // This is because if the source word we just removed (say, s1) is the sole match for + // another search word (say, q2 - obviously lexicographically smaller than q1), this + // means that either q2 = q1 or that q2 is a prefix of q1. In either case, the final + // return value of this function should anyway be false, as s1 would be the sole match + // for q1 too; while we need unique matches for each search word. + + search_words.sort().reverse(); + source_words.sort(); + for (const word of search_words) { + // `match_index` is the index of the best possible match of `word`. + const match_index = last_prefix_match(word, source_words); + if (match_index === null) { + // We return false if no match was found for `word`. + return false; + } + source_words.splice(match_index, 1); + } + return true; +} + function clean_query(query: string): string { query = remove_diacritics(query); // When `abc ` with a space at the end is typed in diff --git a/web/src/recent_view_ui.js b/web/src/recent_view_ui.js index 10add2caed..255e738741 100644 --- a/web/src/recent_view_ui.js +++ b/web/src/recent_view_ui.js @@ -1,6 +1,7 @@ import $ from "jquery"; import _ from "lodash"; +import * as typeahead from "../shared/src/typeahead"; import render_introduce_zulip_view_modal from "../templates/introduce_zulip_view_modal.hbs"; import render_recent_view_filters from "../templates/recent_view_filters.hbs"; import render_recent_view_row from "../templates/recent_view_row.hbs"; @@ -635,8 +636,7 @@ export function topic_in_search_results(keyword, stream_name, topic) { return true; } const text = (stream_name + " " + topic).toLowerCase(); - const search_words = keyword.toLowerCase().split(/\s+/); - return search_words.every((word) => text.includes(word)); + return typeahead.query_matches_string_in_any_order(keyword, text, " "); } export function update_topics_of_deleted_message_ids(message_ids) { diff --git a/web/tests/recent_view.test.js b/web/tests/recent_view.test.js index 4d891fbaae..69365d8d08 100644 --- a/web/tests/recent_view.test.js +++ b/web/tests/recent_view.test.js @@ -1120,22 +1120,24 @@ test("test_search", () => { assert.equal(rt.topic_in_search_results("recent", "general", "Recent topic"), true); assert.equal(rt.topic_in_search_results("RECENT", "general", "Recent topic"), true); - // match in any order of words + // Match (by prefix) in any order of words. assert.equal(rt.topic_in_search_results("topic recent", "general", "Recent topic"), true); - - // Matches any sequence of words. - assert.equal(rt.topic_in_search_results("o", "general", "Recent topic"), true); - assert.equal(rt.topic_in_search_results("nt to", "general", "Recent topic"), true); - assert.equal(rt.topic_in_search_results("z", "general", "Recent topic"), false); + assert.equal(rt.topic_in_search_results("o", "general", "Recent topic"), false); + assert.equal(rt.topic_in_search_results("to recen", "general", "Recent topic"), true); + assert.equal(rt.topic_in_search_results("ner opic", "general", "Recent topic"), false); + assert.equal(rt.topic_in_search_results("pr pro", "general", "pro PRs"), true); + assert.equal(rt.topic_in_search_results("pr pro pr pro", "general", "pro PRs"), false); + assert.equal(rt.topic_in_search_results("co cows", "general", "one cow 2 cows"), true); + assert.equal(rt.topic_in_search_results("cows cows", "general", "one cow 2 cows"), false); assert.equal(rt.topic_in_search_results("?", "general", "Recent topic"), false); // Test special character match assert.equal(rt.topic_in_search_results(".*+?^${}()[]\\", "general", "Recent topic"), false); - assert.equal(rt.topic_in_search_results("?", "general", "not-at-start?"), true); + assert.equal(rt.topic_in_search_results("?", "general", "?at-start"), true); assert.equal(rt.topic_in_search_results("?", "general", "?"), true); - assert.equal(rt.topic_in_search_results("?", "general", "\\?"), true); + assert.equal(rt.topic_in_search_results("?", "general", "\\?"), false); assert.equal(rt.topic_in_search_results("\\", "general", "\\"), true); assert.equal(rt.topic_in_search_results("\\", "general", "\\\\"), true); diff --git a/web/tests/typeahead.test.js b/web/tests/typeahead.test.js index f5e22a8c58..72ffa4914e 100644 --- a/web/tests/typeahead.test.js +++ b/web/tests/typeahead.test.js @@ -272,3 +272,51 @@ run_test("sort_emojis: prioritise perfect matches", () => { ]; assert.deepEqual(typeahead.sort_emojis(emoji_list, "thank you"), emoji_list); }); + +run_test("last_prefix_match", () => { + let words = [ + "apple", + "banana", + "cantaloupe", + "cherry", + "kiwi", + "melon", + "pear", + "plum", + "raspberry", + "watermelon", + ]; + let prefix = "p"; + assert.equal(typeahead.last_prefix_match(prefix, words), 7); + + prefix = "ch"; + assert.equal(typeahead.last_prefix_match(prefix, words), 3); + + prefix = "pom"; + assert.equal(typeahead.last_prefix_match(prefix, words), null); + + prefix = "aa"; + assert.equal(typeahead.last_prefix_match(prefix, words), null); + + prefix = "zu"; + assert.equal(typeahead.last_prefix_match(prefix, words), null); + + prefix = ""; + assert.equal(typeahead.last_prefix_match(prefix, words), 9); + + words = ["one"]; + prefix = "one"; + assert.equal(typeahead.last_prefix_match(prefix, words), 0); + + words = ["aa", "pr", "pra", "pre", "pri", "pro", "pru", "zz"]; + prefix = "pr"; + assert.equal(typeahead.last_prefix_match(prefix, words), 6); + + words = ["same", "same", "same", "same", "same"]; + prefix = "same"; + assert.equal(typeahead.last_prefix_match(prefix, words), 4); + + words = []; + prefix = "empty"; + assert.equal(typeahead.last_prefix_match(prefix, words), null); +});