mirror of
https://github.com/zulip/zulip.git
synced 2025-11-03 21:43:21 +00:00
typeahead: Add option for word order not mattering for query matching.
Uptil now, the `query_matches_string_in_order` function would respect the order of words in the query string when matching a source string. This meant that for query "two one", the source string "one two three" would not be matched. For more flexibility, a new function, `query_matches_string_in_any_order`, has been added., which returns true if each word in the query string matches the prefix of a distinct word in the source string, else it returns false. The algorithm for computing this is quadratic in terms of the source word count so can be a little expensive, but it is only currently used for searching topics in Recent Conversations, where the strings' length is limited by the max stream / topic name length allowed, so this should be performant enough for this use case.
This commit is contained in:
@@ -50,7 +50,36 @@ export function remove_diacritics(s: string): string {
|
||||
return s.normalize("NFKD").replace(unicode_marks, "");
|
||||
}
|
||||
|
||||
// This function attempts to match a query with a source text.
|
||||
export function last_prefix_match(prefix: string, words: string[]): number | null {
|
||||
// This function takes in a lexicographically sorted array of `words`,
|
||||
// and a `prefix` string. It uses binary search to compute the index
|
||||
// of `prefix`'s upper bound, that is, the string immediately after
|
||||
// the lexicographically last prefix match of `prefix`. So, the return
|
||||
// value is the upper bound minus 1, that is, the last prefix match's
|
||||
// index. When no prefix match is found, we return null.
|
||||
let left = 0;
|
||||
let right = words.length;
|
||||
let found = false;
|
||||
while (left < right) {
|
||||
const mid = Math.floor((left + right) / 2);
|
||||
if (words[mid].startsWith(prefix)) {
|
||||
// Note that left can never be 0 if `found` is true,
|
||||
// since it is incremented at least once here.
|
||||
left = mid + 1;
|
||||
found = true;
|
||||
} else if (words[mid] < prefix) {
|
||||
left = mid + 1;
|
||||
} else {
|
||||
right = mid;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
return left - 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// This function attempts to match a query in order with a source text.
|
||||
// * query is the user-entered search query
|
||||
// * source_str is the string we're matching in, e.g. a user's name
|
||||
// * split_char is the separator for this syntax (e.g. ' ').
|
||||
@@ -78,6 +107,68 @@ export function query_matches_string_in_order(
|
||||
return source_str.startsWith(query) || source_str.includes(split_char + query);
|
||||
}
|
||||
|
||||
// Match the words in the query to the words in the source text, in any order.
|
||||
//
|
||||
// The query matches the source if each word in the query can be matched to
|
||||
// a different word in the source. The order the words appear in the query
|
||||
// or in the source does not affect the result.
|
||||
//
|
||||
// A query word matches a source word if it is a prefix of the source word,
|
||||
// after both words are converted to lowercase and diacritics are removed.
|
||||
//
|
||||
// Returns true if the query matches, and false if not.
|
||||
//
|
||||
// * query is the user-entered search query
|
||||
// * source_str is the string we're matching in, e.g. a user's name
|
||||
// * split_char is the separator for this syntax (e.g. ' ').
|
||||
export function query_matches_string_in_any_order(
|
||||
query: string,
|
||||
source_str: string,
|
||||
split_char: string,
|
||||
): boolean {
|
||||
source_str = source_str.toLowerCase();
|
||||
source_str = remove_diacritics(source_str);
|
||||
|
||||
query = query.toLowerCase();
|
||||
query = remove_diacritics(query);
|
||||
|
||||
const search_words = query.split(split_char).filter(Boolean);
|
||||
const source_words = source_str.split(split_char).filter(Boolean);
|
||||
if (search_words.length > source_words.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We go through the search words in reverse lexicographical order, and to select
|
||||
// the corresponding source word for each, one by one, we find the lexicographically
|
||||
// last possible prefix match and immediately then remove it from consideration for
|
||||
// remaining search words.
|
||||
|
||||
// This essentially means that there is no search word lexicographically greater than
|
||||
// our current search word (say, q1) which might require the current corresponding source
|
||||
// word (as all search words lexicographically greater than it have already been matched)
|
||||
// and also that all search words lexicographically smaller than it have the best possible
|
||||
// chance for getting matched.
|
||||
|
||||
// This is because if the source word we just removed (say, s1) is the sole match for
|
||||
// another search word (say, q2 - obviously lexicographically smaller than q1), this
|
||||
// means that either q2 = q1 or that q2 is a prefix of q1. In either case, the final
|
||||
// return value of this function should anyway be false, as s1 would be the sole match
|
||||
// for q1 too; while we need unique matches for each search word.
|
||||
|
||||
search_words.sort().reverse();
|
||||
source_words.sort();
|
||||
for (const word of search_words) {
|
||||
// `match_index` is the index of the best possible match of `word`.
|
||||
const match_index = last_prefix_match(word, source_words);
|
||||
if (match_index === null) {
|
||||
// We return false if no match was found for `word`.
|
||||
return false;
|
||||
}
|
||||
source_words.splice(match_index, 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function clean_query(query: string): string {
|
||||
query = remove_diacritics(query);
|
||||
// When `abc ` with a space at the end is typed in
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import $ from "jquery";
|
||||
import _ from "lodash";
|
||||
|
||||
import * as typeahead from "../shared/src/typeahead";
|
||||
import render_introduce_zulip_view_modal from "../templates/introduce_zulip_view_modal.hbs";
|
||||
import render_recent_view_filters from "../templates/recent_view_filters.hbs";
|
||||
import render_recent_view_row from "../templates/recent_view_row.hbs";
|
||||
@@ -635,8 +636,7 @@ export function topic_in_search_results(keyword, stream_name, topic) {
|
||||
return true;
|
||||
}
|
||||
const text = (stream_name + " " + topic).toLowerCase();
|
||||
const search_words = keyword.toLowerCase().split(/\s+/);
|
||||
return search_words.every((word) => text.includes(word));
|
||||
return typeahead.query_matches_string_in_any_order(keyword, text, " ");
|
||||
}
|
||||
|
||||
export function update_topics_of_deleted_message_ids(message_ids) {
|
||||
|
||||
@@ -1120,22 +1120,24 @@ test("test_search", () => {
|
||||
assert.equal(rt.topic_in_search_results("recent", "general", "Recent topic"), true);
|
||||
assert.equal(rt.topic_in_search_results("RECENT", "general", "Recent topic"), true);
|
||||
|
||||
// match in any order of words
|
||||
// Match (by prefix) in any order of words.
|
||||
assert.equal(rt.topic_in_search_results("topic recent", "general", "Recent topic"), true);
|
||||
|
||||
// Matches any sequence of words.
|
||||
assert.equal(rt.topic_in_search_results("o", "general", "Recent topic"), true);
|
||||
assert.equal(rt.topic_in_search_results("nt to", "general", "Recent topic"), true);
|
||||
assert.equal(rt.topic_in_search_results("z", "general", "Recent topic"), false);
|
||||
assert.equal(rt.topic_in_search_results("o", "general", "Recent topic"), false);
|
||||
assert.equal(rt.topic_in_search_results("to recen", "general", "Recent topic"), true);
|
||||
assert.equal(rt.topic_in_search_results("ner opic", "general", "Recent topic"), false);
|
||||
assert.equal(rt.topic_in_search_results("pr pro", "general", "pro PRs"), true);
|
||||
assert.equal(rt.topic_in_search_results("pr pro pr pro", "general", "pro PRs"), false);
|
||||
assert.equal(rt.topic_in_search_results("co cows", "general", "one cow 2 cows"), true);
|
||||
assert.equal(rt.topic_in_search_results("cows cows", "general", "one cow 2 cows"), false);
|
||||
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "Recent topic"), false);
|
||||
|
||||
// Test special character match
|
||||
assert.equal(rt.topic_in_search_results(".*+?^${}()[]\\", "general", "Recent topic"), false);
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "not-at-start?"), true);
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "?at-start"), true);
|
||||
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "?"), true);
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "\\?"), true);
|
||||
assert.equal(rt.topic_in_search_results("?", "general", "\\?"), false);
|
||||
|
||||
assert.equal(rt.topic_in_search_results("\\", "general", "\\"), true);
|
||||
assert.equal(rt.topic_in_search_results("\\", "general", "\\\\"), true);
|
||||
|
||||
@@ -272,3 +272,51 @@ run_test("sort_emojis: prioritise perfect matches", () => {
|
||||
];
|
||||
assert.deepEqual(typeahead.sort_emojis(emoji_list, "thank you"), emoji_list);
|
||||
});
|
||||
|
||||
run_test("last_prefix_match", () => {
|
||||
let words = [
|
||||
"apple",
|
||||
"banana",
|
||||
"cantaloupe",
|
||||
"cherry",
|
||||
"kiwi",
|
||||
"melon",
|
||||
"pear",
|
||||
"plum",
|
||||
"raspberry",
|
||||
"watermelon",
|
||||
];
|
||||
let prefix = "p";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 7);
|
||||
|
||||
prefix = "ch";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 3);
|
||||
|
||||
prefix = "pom";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), null);
|
||||
|
||||
prefix = "aa";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), null);
|
||||
|
||||
prefix = "zu";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), null);
|
||||
|
||||
prefix = "";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 9);
|
||||
|
||||
words = ["one"];
|
||||
prefix = "one";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 0);
|
||||
|
||||
words = ["aa", "pr", "pra", "pre", "pri", "pro", "pru", "zz"];
|
||||
prefix = "pr";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 6);
|
||||
|
||||
words = ["same", "same", "same", "same", "same"];
|
||||
prefix = "same";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), 4);
|
||||
|
||||
words = [];
|
||||
prefix = "empty";
|
||||
assert.equal(typeahead.last_prefix_match(prefix, words), null);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user