Fixes how user input is escaped in stream and topic regexes

NarrowBuilder.by_stream and NarrowBuilder.by_topic for mit users uses a
regex to search by stream and topic. Python's re.escape escapes unicode
in a format that postgres can not parse. We escape unicode as '\uXXXX'
for postgres.

(imported from commit d2c27d4514c31fdc6ef1fea898fe721a6f0ab069)
This commit is contained in:
Jason Michalski
2014-01-07 16:15:22 -05:00
committed by Waseem Daher
parent 3720cb1c33
commit 75d1366ae1
2 changed files with 94 additions and 2 deletions

View File

@@ -2235,6 +2235,74 @@ class GetOldMessagesTest(AuthedTestCase):
self.assertEqual(message["type"], "stream")
self.assertEqual(message["recipient_id"], stream_id)
def test_get_old_messages_with_narrow_stream_mit_unicode_regex(self):
"""
A request for old messages for a user in the mit.edu relam with unicode
stream name should be correctly escaped in the database query.
"""
self.login("starnine@mit.edu")
# We need to susbcribe to a stream and then send a message to
# it to ensure that we actually have a stream message in this
# narrow view.
realm = Realm.objects.get(domain="mit.edu")
lambda_stream, _ = create_stream_if_needed(realm, u"\u03bb-stream")
do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
lambda_stream, no_log=True)
lambda_stream_d, _ = create_stream_if_needed(realm, u"\u03bb-stream.d")
do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
lambda_stream_d, no_log=True)
self.send_message("starnine@mit.edu", u"\u03bb-stream", Recipient.STREAM)
self.send_message("starnine@mit.edu", u"\u03bb-stream.d", Recipient.STREAM)
result = self.post_with_params({"num_after": 2, "narrow": ujson.dumps(
[['stream', u'\u03bb-stream']])})
self.check_well_formed_messages_response(result)
messages = get_user_messages(get_user_profile_by_email("starnine@mit.edu"))
stream_messages = filter(lambda msg: msg.recipient.type == Recipient.STREAM,
messages)
self.assertEqual(len(result["messages"]), 2)
for i, message in enumerate(result["messages"]):
self.assertEqual(message["type"], "stream")
stream_id = stream_messages[i].recipient.id
self.assertEqual(message["recipient_id"], stream_id)
def test_get_old_messages_with_narrow_topic_mit_unicode_regex(self):
"""
A request for old messages for a user in the mit.edu relam with unicode
topic name should be correctly escaped in the database query.
"""
self.login("starnine@mit.edu")
# We need to susbcribe to a stream and then send a message to
# it to ensure that we actually have a stream message in this
# narrow view.
realm = Realm.objects.get(domain="mit.edu")
stream, _ = create_stream_if_needed(realm, "Scotland")
do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
stream, no_log=True)
self.send_message("starnine@mit.edu", "Scotland", Recipient.STREAM,
subject=u"\u03bb-topic")
self.send_message("starnine@mit.edu", "Scotland", Recipient.STREAM,
subject=u"\u03bb-topic.d")
result = self.post_with_params({"num_after": 2, "narrow": ujson.dumps(
[['topic', u'\u03bb-topic']])})
self.check_well_formed_messages_response(result)
messages = get_user_messages(get_user_profile_by_email("starnine@mit.edu"))
stream_messages = filter(lambda msg: msg.recipient.type == Recipient.STREAM,
messages)
self.assertEqual(len(result["messages"]), 2)
for i, message in enumerate(result["messages"]):
self.assertEqual(message["type"], "stream")
stream_id = stream_messages[i].recipient.id
self.assertEqual(message["recipient_id"], stream_id)
def test_get_old_messages_with_narrow_sender(self):
"""
A request for old messages with a narrow by sender only returns

View File

@@ -77,6 +77,30 @@ class NarrowBuilder(object):
return Q(flags=UserMessage.flags.mentioned)
raise BadNarrowOperator("unknown 'is' operand " + operand)
_alphanum = frozenset(
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
def _pg_re_escape(self, pattern):
"""
Escape user input to place in a regex
Python's re.escape escapes unicode characters in a way which postgres
fails on, u'\u03bb' to u'\\\u03bb'. This function will correctly escape
them for postgres, u'\u03bb' to u'\\u03bb'.
"""
s = list(pattern)
for i, c in enumerate(s):
if c not in self._alphanum:
if c == '\000':
s[1] = '\\000'
elif ord(c) >= 128:
# convert the character to hex postgres regex will take
# \uXXXX
s[i] = '\\u{:0>4x}'.format(ord(c))
else:
s[i] = '\\' + c
return ''.join(s)
def by_stream(self, operand):
stream = get_stream(operand, self.user_profile.realm)
if stream is None:
@@ -92,7 +116,7 @@ class NarrowBuilder(object):
base_stream_name = stream.name
matching_streams = Stream.objects.filter(realm=self.user_profile.realm,
name__iregex=r'^(un)*%s(\.d)*$' % (re.escape(base_stream_name),))
name__iregex=r'^(un)*%s(\.d)*$' % (self._pg_re_escape(base_stream_name),))
matching_stream_ids = [matching_stream.id for matching_stream in matching_streams]
recipients = bulk_get_recipients(Recipient.STREAM, matching_stream_ids).values()
return self.pQ(recipient__in=recipients)
@@ -115,7 +139,7 @@ class NarrowBuilder(object):
if base_topic in ('', 'personal', '(instance "")'):
regex = r'^(|personal|\(instance ""\))(\.d)*$'
else:
regex = r'^%s(\.d)*$' % (re.escape(base_topic),)
regex = r'^%s(\.d)*$' % (self._pg_re_escape(base_topic),)
return self.pQ(subject__iregex=regex)