mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-04 05:53:43 +00:00 
			
		
		
		
	Fixes how user input is escaped in stream and topic regexes
NarrowBuilder.by_stream and NarrowBuilder.by_topic for mit users uses a regex to search by stream and topic. Python's re.escape escapes unicode in a format that postgres can not parse. We escape unicode as '\uXXXX' for postgres. (imported from commit d2c27d4514c31fdc6ef1fea898fe721a6f0ab069)
This commit is contained in:
		
				
					committed by
					
						
						Waseem Daher
					
				
			
			
				
	
			
			
			
						parent
						
							3720cb1c33
						
					
				
				
					commit
					75d1366ae1
				
			@@ -2235,6 +2235,74 @@ class GetOldMessagesTest(AuthedTestCase):
 | 
			
		||||
            self.assertEqual(message["type"], "stream")
 | 
			
		||||
            self.assertEqual(message["recipient_id"], stream_id)
 | 
			
		||||
 | 
			
		||||
    def test_get_old_messages_with_narrow_stream_mit_unicode_regex(self):
 | 
			
		||||
        """
 | 
			
		||||
        A request for old messages for a user in the mit.edu relam with unicode
 | 
			
		||||
        stream name should be correctly escaped in the database query.
 | 
			
		||||
        """
 | 
			
		||||
        self.login("starnine@mit.edu")
 | 
			
		||||
        # We need to susbcribe to a stream and then send a message to
 | 
			
		||||
        # it to ensure that we actually have a stream message in this
 | 
			
		||||
        # narrow view.
 | 
			
		||||
        realm = Realm.objects.get(domain="mit.edu")
 | 
			
		||||
        lambda_stream, _ = create_stream_if_needed(realm, u"\u03bb-stream")
 | 
			
		||||
        do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
 | 
			
		||||
                            lambda_stream, no_log=True)
 | 
			
		||||
 | 
			
		||||
        lambda_stream_d, _ = create_stream_if_needed(realm, u"\u03bb-stream.d")
 | 
			
		||||
        do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
 | 
			
		||||
                            lambda_stream_d, no_log=True)
 | 
			
		||||
 | 
			
		||||
        self.send_message("starnine@mit.edu", u"\u03bb-stream", Recipient.STREAM)
 | 
			
		||||
        self.send_message("starnine@mit.edu", u"\u03bb-stream.d", Recipient.STREAM)
 | 
			
		||||
 | 
			
		||||
        result = self.post_with_params({"num_after": 2, "narrow": ujson.dumps(
 | 
			
		||||
                    [['stream', u'\u03bb-stream']])})
 | 
			
		||||
        self.check_well_formed_messages_response(result)
 | 
			
		||||
 | 
			
		||||
        messages = get_user_messages(get_user_profile_by_email("starnine@mit.edu"))
 | 
			
		||||
        stream_messages = filter(lambda msg: msg.recipient.type == Recipient.STREAM,
 | 
			
		||||
                                 messages)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(result["messages"]), 2)
 | 
			
		||||
        for i, message in enumerate(result["messages"]):
 | 
			
		||||
            self.assertEqual(message["type"], "stream")
 | 
			
		||||
            stream_id = stream_messages[i].recipient.id
 | 
			
		||||
            self.assertEqual(message["recipient_id"], stream_id)
 | 
			
		||||
 | 
			
		||||
    def test_get_old_messages_with_narrow_topic_mit_unicode_regex(self):
 | 
			
		||||
        """
 | 
			
		||||
        A request for old messages for a user in the mit.edu relam with unicode
 | 
			
		||||
        topic name should be correctly escaped in the database query.
 | 
			
		||||
        """
 | 
			
		||||
        self.login("starnine@mit.edu")
 | 
			
		||||
        # We need to susbcribe to a stream and then send a message to
 | 
			
		||||
        # it to ensure that we actually have a stream message in this
 | 
			
		||||
        # narrow view.
 | 
			
		||||
        realm = Realm.objects.get(domain="mit.edu")
 | 
			
		||||
        stream, _ = create_stream_if_needed(realm, "Scotland")
 | 
			
		||||
        do_add_subscription(get_user_profile_by_email("starnine@mit.edu"),
 | 
			
		||||
                            stream, no_log=True)
 | 
			
		||||
 | 
			
		||||
        self.send_message("starnine@mit.edu", "Scotland", Recipient.STREAM,
 | 
			
		||||
                          subject=u"\u03bb-topic")
 | 
			
		||||
        self.send_message("starnine@mit.edu", "Scotland", Recipient.STREAM,
 | 
			
		||||
                          subject=u"\u03bb-topic.d")
 | 
			
		||||
 | 
			
		||||
        result = self.post_with_params({"num_after": 2, "narrow": ujson.dumps(
 | 
			
		||||
                    [['topic', u'\u03bb-topic']])})
 | 
			
		||||
        self.check_well_formed_messages_response(result)
 | 
			
		||||
 | 
			
		||||
        messages = get_user_messages(get_user_profile_by_email("starnine@mit.edu"))
 | 
			
		||||
        stream_messages = filter(lambda msg: msg.recipient.type == Recipient.STREAM,
 | 
			
		||||
                                 messages)
 | 
			
		||||
        self.assertEqual(len(result["messages"]), 2)
 | 
			
		||||
        for i, message in enumerate(result["messages"]):
 | 
			
		||||
            self.assertEqual(message["type"], "stream")
 | 
			
		||||
            stream_id = stream_messages[i].recipient.id
 | 
			
		||||
            self.assertEqual(message["recipient_id"], stream_id)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def test_get_old_messages_with_narrow_sender(self):
 | 
			
		||||
        """
 | 
			
		||||
        A request for old messages with a narrow by sender only returns
 | 
			
		||||
 
 | 
			
		||||
@@ -77,6 +77,30 @@ class NarrowBuilder(object):
 | 
			
		||||
            return Q(flags=UserMessage.flags.mentioned)
 | 
			
		||||
        raise BadNarrowOperator("unknown 'is' operand " + operand)
 | 
			
		||||
 | 
			
		||||
    _alphanum = frozenset(
 | 
			
		||||
        'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
 | 
			
		||||
 | 
			
		||||
    def _pg_re_escape(self, pattern):
 | 
			
		||||
        """
 | 
			
		||||
        Escape user input to place in a regex
 | 
			
		||||
 | 
			
		||||
        Python's re.escape escapes unicode characters in a way which postgres
 | 
			
		||||
        fails on, u'\u03bb' to u'\\\u03bb'. This function will correctly escape
 | 
			
		||||
        them for postgres, u'\u03bb' to u'\\u03bb'.
 | 
			
		||||
        """
 | 
			
		||||
        s = list(pattern)
 | 
			
		||||
        for i, c in enumerate(s):
 | 
			
		||||
            if c not in self._alphanum:
 | 
			
		||||
                if c == '\000':
 | 
			
		||||
                    s[1] = '\\000'
 | 
			
		||||
                elif ord(c) >= 128:
 | 
			
		||||
                    # convert the character to hex postgres regex will take
 | 
			
		||||
                    # \uXXXX
 | 
			
		||||
                    s[i] = '\\u{:0>4x}'.format(ord(c))
 | 
			
		||||
                else:
 | 
			
		||||
                    s[i] = '\\' + c
 | 
			
		||||
        return ''.join(s)
 | 
			
		||||
 | 
			
		||||
    def by_stream(self, operand):
 | 
			
		||||
        stream = get_stream(operand, self.user_profile.realm)
 | 
			
		||||
        if stream is None:
 | 
			
		||||
@@ -92,7 +116,7 @@ class NarrowBuilder(object):
 | 
			
		||||
                base_stream_name = stream.name
 | 
			
		||||
 | 
			
		||||
            matching_streams = Stream.objects.filter(realm=self.user_profile.realm,
 | 
			
		||||
                                                     name__iregex=r'^(un)*%s(\.d)*$' % (re.escape(base_stream_name),))
 | 
			
		||||
                                                     name__iregex=r'^(un)*%s(\.d)*$' % (self._pg_re_escape(base_stream_name),))
 | 
			
		||||
            matching_stream_ids = [matching_stream.id for matching_stream in matching_streams]
 | 
			
		||||
            recipients = bulk_get_recipients(Recipient.STREAM, matching_stream_ids).values()
 | 
			
		||||
            return self.pQ(recipient__in=recipients)
 | 
			
		||||
@@ -115,7 +139,7 @@ class NarrowBuilder(object):
 | 
			
		||||
            if base_topic in ('', 'personal', '(instance "")'):
 | 
			
		||||
                regex = r'^(|personal|\(instance ""\))(\.d)*$'
 | 
			
		||||
            else:
 | 
			
		||||
                regex = r'^%s(\.d)*$' % (re.escape(base_topic),)
 | 
			
		||||
                regex = r'^%s(\.d)*$' % (self._pg_re_escape(base_topic),)
 | 
			
		||||
 | 
			
		||||
            return self.pQ(subject__iregex=regex)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user