Always give hashlib.sha1 and friends bytes.

This fixes an experienced bug where you couldn't subscribe to a stream
with non-ASCII characters (failing with a UnicodeEncodeError), as well
as many other potential bugs.

(imported from commit f084a4b4b597b85935655097a7b5a163811c4d71)
This commit is contained in:
Jessica McKellar
2013-03-20 10:31:27 -04:00
parent 55240e3de2
commit 0c3382fabb
4 changed files with 27 additions and 7 deletions

View File

@@ -1,5 +1,11 @@
import hashlib import hashlib
from utils import make_safe_digest
def gravatar_hash(email): def gravatar_hash(email):
"""Compute the Gravatar hash for an email address.""" """Compute the Gravatar hash for an email address."""
return hashlib.md5(email.lower()).hexdigest() # Non-ASCII characters aren't permitted by the currently active e-mail
# RFCs. However, the IETF has published https://tools.ietf.org/html/rfc4952,
# outlining internationalization of email addresses, and regardless if we
# typo an address or someone manages to give us a non-ASCII address, let's
# not error out on it.
return make_safe_digest(email.lower(), hashlib.md5)

View File

@@ -1,9 +1,10 @@
from functools import wraps from functools import wraps
import hashlib
from django.core.cache import cache as djcache from django.core.cache import cache as djcache
from django.core.cache import get_cache from django.core.cache import get_cache
from utils import make_safe_digest
def cache_with_key(keyfunc, cache_name=None, timeout=None): def cache_with_key(keyfunc, cache_name=None, timeout=None):
"""Decorator which applies Django caching to a function. """Decorator which applies Django caching to a function.
@@ -56,7 +57,10 @@ def message_cache_key(message_id):
return "message:%d" % (message_id,) return "message:%d" % (message_id,)
def user_profile_by_email_cache_key(email): def user_profile_by_email_cache_key(email):
return 'user_profile_by_email:%s' % (hashlib.sha1(email).hexdigest(),) # See the comment in zephyr/lib/avatar.py:gravatar_hash for why we
# are proactively encoding email addresses even though they will
# with high likelihood be ASCII-only for the foreseeable future.
return 'user_profile_by_email:%s' % (make_safe_digest(email),)
def user_profile_by_user_cache_key(user_id): def user_profile_by_user_cache_key(user_id):
return 'user_profile_by_user_id:%d' % (user_id,) return 'user_profile_by_user_id:%d' % (user_id,)

View File

@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import hashlib
from time import sleep from time import sleep
# Runs the callback with slices of all_list of a given batch_size # Runs the callback with slices of all_list of a given batch_size
@@ -21,3 +22,11 @@ def run_in_batches(all_list, batch_size, callback, sleep_time = 0, logger = None
if i != limit - 1: if i != limit - 1:
sleep(sleep_time) sleep(sleep_time)
def make_safe_digest(string, hash_func=hashlib.sha1):
"""
return a hex digest of `string`.
"""
# hashlib.sha1, md5, etc. expect bytes, so non-ASCII strings must
# be encoded.
return hash_func(string.encode('utf-8')).hexdigest()

View File

@@ -1,10 +1,10 @@
from django.db import models from django.db import models
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
import hashlib
from zephyr.lib.cache import cache_with_key, update_user_profile_cache, \ from zephyr.lib.cache import cache_with_key, update_user_profile_cache, \
update_user_cache update_user_cache
from zephyr.lib.initial_password import initial_api_key from zephyr.lib.initial_password import initial_api_key
from zephyr.lib.utils import make_safe_digest
import os import os
from django.db import transaction, IntegrityError from django.db import transaction, IntegrityError
from zephyr.lib import bugdown from zephyr.lib import bugdown
@@ -158,7 +158,7 @@ class Recipient(models.Model):
class Client(models.Model): class Client(models.Model):
name = models.CharField(max_length=30, db_index=True, unique=True) name = models.CharField(max_length=30, db_index=True, unique=True)
@cache_with_key(lambda name: 'get_client:%s' % (hashlib.sha1(name).hexdigest(),)) @cache_with_key(lambda name: 'get_client:%s' % (make_safe_digest(name),))
@transaction.commit_on_success @transaction.commit_on_success
def get_client(name): def get_client(name):
try: try:
@@ -181,7 +181,8 @@ def get_stream_cache_key(stream_name, realm):
realm_id = realm.id realm_id = realm.id
else: else:
realm_id = realm realm_id = realm
return "stream_by_realm_and_name:%s:%s" % (realm_id, hashlib.sha1(stream_name.strip().lower()).hexdigest()) return "stream_by_realm_and_name:%s:%s" % (
realm_id, make_safe_digest(stream_name.strip().lower()))
# get_stream_backend takes either a realm id or a realm # get_stream_backend takes either a realm id or a realm
@cache_with_key(get_stream_cache_key) @cache_with_key(get_stream_cache_key)
@@ -333,7 +334,7 @@ class Huddle(models.Model):
def get_huddle_hash(id_list): def get_huddle_hash(id_list):
id_list = sorted(set(id_list)) id_list = sorted(set(id_list))
hash_key = ",".join(str(x) for x in id_list) hash_key = ",".join(str(x) for x in id_list)
return hashlib.sha1(hash_key).hexdigest() return make_safe_digest(hash_key)
def get_huddle(id_list): def get_huddle(id_list):
huddle_hash = get_huddle_hash(id_list) huddle_hash = get_huddle_hash(id_list)