Move bulk_create_* methods to bulk_create.py.

(imported from commit 30d5f86209f9c8807516f0440276fafeb6590c1a)
This commit is contained in:
Tim Abbott
2013-01-10 15:50:09 -05:00
parent 21c2317bd2
commit 3c10a2e6a0
3 changed files with 141 additions and 137 deletions

View File

@@ -1,4 +1,9 @@
from django.conf import settings from django.conf import settings
from zephyr.models import Realm, Stream, User, UserProfile, Huddle, \
Subscription, Recipient, Client, Message, \
create_user_base, get_huddle_hash, initial_api_key, initial_password
# batch_bulk_create should become obsolete with Django 1.5, when the # batch_bulk_create should become obsolete with Django 1.5, when the
# Django bulk_create method accepts a batch_size directly. # Django bulk_create method accepts a batch_size directly.
def batch_bulk_create(cls, cls_list, batch_size=150): def batch_bulk_create(cls, cls_list, batch_size=150):
@@ -10,3 +15,135 @@ def batch_bulk_create(cls, cls_list, batch_size=150):
current_batch = cls_list[0:batch_size] current_batch = cls_list[0:batch_size]
cls.objects.bulk_create(current_batch) cls.objects.bulk_create(current_batch)
cls_list = cls_list[batch_size:] cls_list = cls_list[batch_size:]
def bulk_create_realms(realm_list):
existing_realms = set(r.domain for r in Realm.objects.select_related().all())
realms_to_create = []
for domain in realm_list:
if domain not in existing_realms:
realms_to_create.append(Realm(domain=domain))
existing_realms.add(domain)
batch_bulk_create(Realm, realms_to_create)
def bulk_create_users(realms, users_raw):
"""
Creates and saves a User with the given email.
Has some code based off of UserManage.create_user, but doesn't .save()
"""
users = []
existing_users = set(u.email for u in User.objects.all())
for (email, full_name, short_name, active) in users_raw:
if email in existing_users:
continue
users.append((email, full_name, short_name, active))
existing_users.add(email)
users_to_create = []
for (email, full_name, short_name, active) in users:
users_to_create.append(create_user_base(email, initial_password(email),
active=active))
batch_bulk_create(User, users_to_create, 30)
users_by_email = {}
for user in User.objects.all():
users_by_email[user.email] = user
# Now create user_profiles
profiles_to_create = []
for (email, full_name, short_name, active) in users:
domain = email.split('@')[1]
profile = UserProfile(user=users_by_email[email], pointer=-1,
realm=realms[domain],
full_name=full_name, short_name=short_name)
profile.api_key = initial_api_key(email)
profiles_to_create.append(profile)
batch_bulk_create(UserProfile, profiles_to_create, 50)
profiles_by_email = {}
profiles_by_id = {}
for profile in UserProfile.objects.select_related().all():
profiles_by_email[profile.user.email] = profile
profiles_by_id[profile.user.id] = profile
recipients_to_create = []
for (email, _, _, _) in users:
recipients_to_create.append(Recipient(type_id=profiles_by_email[email].id,
type=Recipient.PERSONAL))
batch_bulk_create(Recipient, recipients_to_create)
recipients_by_email = {}
for recipient in Recipient.objects.filter(type=Recipient.PERSONAL):
recipients_by_email[profiles_by_id[recipient.type_id].user.email] = recipient
subscriptions_to_create = []
for (email, _, _, _) in users:
subscriptions_to_create.append(
Subscription(user_profile_id=profiles_by_email[email].id,
recipient=recipients_by_email[email]))
batch_bulk_create(Subscription, subscriptions_to_create)
def bulk_create_streams(realms, stream_list):
existing_streams = set((stream.realm.domain, stream.name.lower())
for stream in Stream.objects.select_related().all())
streams_to_create = []
for (domain, name) in stream_list:
if (domain, name.lower()) not in existing_streams:
streams_to_create.append(Stream(realm=realms[domain], name=name))
batch_bulk_create(Stream, streams_to_create)
recipients_to_create = []
for stream in Stream.objects.select_related().all():
if (stream.realm.domain, stream.name.lower()) not in existing_streams:
recipients_to_create.append(Recipient(type_id=stream.id,
type=Recipient.STREAM))
batch_bulk_create(Recipient, recipients_to_create)
def bulk_create_clients(client_list):
existing_clients = set(client.name for client in Client.objects.select_related().all())
clients_to_create = []
for name in client_list:
if name not in existing_clients:
clients_to_create.append(Client(name=name))
existing_clients.add(name)
batch_bulk_create(Client, clients_to_create)
def bulk_create_huddles(users, huddle_user_list):
huddles = {}
huddles_by_id = {}
huddle_set = set()
existing_huddles = set()
for huddle in Huddle.objects.all():
existing_huddles.add(huddle.huddle_hash)
for huddle_users in huddle_user_list:
user_ids = [users[email].id for email in huddle_users]
huddle_hash = get_huddle_hash(user_ids)
if huddle_hash in existing_huddles:
continue
huddle_set.add((huddle_hash, tuple(sorted(user_ids))))
huddles_to_create = []
for (huddle_hash, _) in huddle_set:
huddles_to_create.append(Huddle(huddle_hash=huddle_hash))
batch_bulk_create(Huddle, huddles_to_create)
for huddle in Huddle.objects.all():
huddles[huddle.huddle_hash] = huddle
huddles_by_id[huddle.id] = huddle
recipients_to_create = []
for (huddle_hash, _) in huddle_set:
recipients_to_create.append(Recipient(type_id=huddles[huddle_hash].id, type=Recipient.HUDDLE))
batch_bulk_create(Recipient, recipients_to_create)
huddle_recipients = {}
for recipient in Recipient.objects.filter(type=Recipient.HUDDLE):
huddle_recipients[huddles_by_id[recipient.type_id].huddle_hash] = recipient
subscriptions_to_create = []
for (huddle_hash, huddle_user_ids) in huddle_set:
for user_id in huddle_user_ids:
subscriptions_to_create.append(Subscription(active=True, user_profile_id=user_id,
recipient=huddle_recipients[huddle_hash]))
batch_bulk_create(Subscription, subscriptions_to_create)

View File

@@ -5,14 +5,14 @@ from django.contrib.auth.models import User
from django.contrib.sites.models import Site from django.contrib.sites.models import Site
from zephyr.models import Message, UserProfile, Stream, Recipient, Client, \ from zephyr.models import Message, UserProfile, Stream, Recipient, Client, \
Subscription, Huddle, get_huddle, Realm, UserMessage, get_user_profile_by_id, \ Subscription, Huddle, get_huddle, Realm, UserMessage, get_user_profile_by_id, \
bulk_create_realms, bulk_create_streams, bulk_create_users, bulk_create_huddles, \ do_send_message, clear_database, StreamColor, set_default_streams, \
bulk_create_clients, set_default_streams, \
do_send_message, clear_database, StreamColor, \
get_huddle_hash, get_client, do_activate_user get_huddle_hash, get_client, do_activate_user
from zephyr.lib.parallel import run_parallel from zephyr.lib.parallel import run_parallel
from django.db import transaction, connection from django.db import transaction, connection
from django.conf import settings from django.conf import settings
from zephyr.lib.bulk_create import batch_bulk_create from zephyr.lib.bulk_create import batch_bulk_create, bulk_create_realms, \
bulk_create_streams, bulk_create_users, bulk_create_huddles, \
bulk_create_clients
from zephyr.lib.time import timestamp_to_datetime from zephyr.lib.time import timestamp_to_datetime
from zephyr.models import MAX_MESSAGE_LENGTH from zephyr.models import MAX_MESSAGE_LENGTH

View File

@@ -9,7 +9,6 @@ import os
import simplejson import simplejson
from django.db import transaction, IntegrityError from django.db import transaction, IntegrityError
from zephyr.lib import bugdown from zephyr.lib import bugdown
from zephyr.lib.bulk_create import batch_bulk_create
from zephyr.lib.avatar import gravatar_hash from zephyr.lib.avatar import gravatar_hash
from zephyr.lib.context_managers import lockfile from zephyr.lib.context_managers import lockfile
import requests import requests
@@ -55,16 +54,6 @@ class Realm(models.Model):
def __str__(self): def __str__(self):
return self.__repr__() return self.__repr__()
def bulk_create_realms(realm_list):
existing_realms = set(r.domain for r in Realm.objects.select_related().all())
realms_to_create = []
for domain in realm_list:
if domain not in existing_realms:
realms_to_create.append(Realm(domain=domain))
existing_realms.add(domain)
batch_bulk_create(Realm, realms_to_create)
class UserProfile(models.Model): class UserProfile(models.Model):
user = models.OneToOneField(User) user = models.OneToOneField(User)
full_name = models.CharField(max_length=100) full_name = models.CharField(max_length=100)
@@ -195,63 +184,6 @@ def create_mit_user_if_needed(realm, email):
transaction.commit() transaction.commit()
return UserProfile.objects.get(user__email=email) return UserProfile.objects.get(user__email=email)
def bulk_create_users(realms, users_raw):
"""
Creates and saves a User with the given email.
Has some code based off of UserManage.create_user, but doesn't .save()
"""
users = []
existing_users = set(u.email for u in User.objects.all())
for (email, full_name, short_name, active) in users_raw:
if email in existing_users:
continue
users.append((email, full_name, short_name, active))
existing_users.add(email)
users_to_create = []
for (email, full_name, short_name, active) in users:
users_to_create.append(create_user_base(email, initial_password(email),
active=active))
batch_bulk_create(User, users_to_create, 30)
users_by_email = {}
for user in User.objects.all():
users_by_email[user.email] = user
# Now create user_profiles
profiles_to_create = []
for (email, full_name, short_name, active) in users:
domain = email.split('@')[1]
profile = UserProfile(user=users_by_email[email], pointer=-1,
realm=realms[domain],
full_name=full_name, short_name=short_name)
profile.api_key = initial_api_key(email)
profiles_to_create.append(profile)
batch_bulk_create(UserProfile, profiles_to_create, 50)
profiles_by_email = {}
profiles_by_id = {}
for profile in UserProfile.objects.select_related().all():
profiles_by_email[profile.user.email] = profile
profiles_by_id[profile.user.id] = profile
recipients_to_create = []
for (email, _, _, _) in users:
recipients_to_create.append(Recipient(type_id=profiles_by_email[email].id,
type=Recipient.PERSONAL))
batch_bulk_create(Recipient, recipients_to_create)
recipients_by_email = {}
for recipient in Recipient.objects.filter(type=Recipient.PERSONAL):
recipients_by_email[profiles_by_id[recipient.type_id].user.email] = recipient
subscriptions_to_create = []
for (email, _, _, _) in users:
subscriptions_to_create.append(
Subscription(user_profile_id=profiles_by_email[email].id,
recipient=recipients_by_email[email]))
batch_bulk_create(Subscription, subscriptions_to_create)
def create_stream_if_needed(realm, stream_name): def create_stream_if_needed(realm, stream_name):
(stream, created) = Stream.objects.get_or_create( (stream, created) = Stream.objects.get_or_create(
realm=realm, name__iexact=stream_name, realm=realm, name__iexact=stream_name,
@@ -260,22 +192,6 @@ def create_stream_if_needed(realm, stream_name):
Recipient.objects.create(type_id=stream.id, type=Recipient.STREAM) Recipient.objects.create(type_id=stream.id, type=Recipient.STREAM)
return stream return stream
def bulk_create_streams(realms, stream_list):
existing_streams = set((stream.realm.domain, stream.name.lower())
for stream in Stream.objects.select_related().all())
streams_to_create = []
for (domain, name) in stream_list:
if (domain, name.lower()) not in existing_streams:
streams_to_create.append(Stream(realm=realms[domain], name=name))
batch_bulk_create(Stream, streams_to_create)
recipients_to_create = []
for stream in Stream.objects.select_related().all():
if (stream.realm.domain, stream.name.lower()) not in existing_streams:
recipients_to_create.append(Recipient(type_id=stream.id,
type=Recipient.STREAM))
batch_bulk_create(Recipient, recipients_to_create)
class Stream(models.Model): class Stream(models.Model):
name = models.CharField(max_length=30, db_index=True) name = models.CharField(max_length=30, db_index=True)
realm = models.ForeignKey(Realm, db_index=True) realm = models.ForeignKey(Realm, db_index=True)
@@ -342,16 +258,6 @@ def get_client(name):
return Client.objects.get(name=name) return Client.objects.get(name=name)
return client return client
def bulk_create_clients(client_list):
existing_clients = set(client.name for client in Client.objects.select_related().all())
clients_to_create = []
for name in client_list:
if name not in existing_clients:
clients_to_create.append(Client(name=name))
existing_clients.add(name)
batch_bulk_create(Client, clients_to_create)
def linebreak(string): def linebreak(string):
return string.replace('\n\n', '<p/>').replace('\n', '<br/>') return string.replace('\n\n', '<p/>').replace('\n', '<br/>')
@@ -659,45 +565,6 @@ def get_huddle(id_list):
user_profile = UserProfile.objects.get(id=uid)) user_profile = UserProfile.objects.get(id=uid))
return huddle return huddle
def bulk_create_huddles(users, huddle_user_list):
huddles = {}
huddles_by_id = {}
huddle_set = set()
existing_huddles = set()
for huddle in Huddle.objects.all():
existing_huddles.add(huddle.huddle_hash)
for huddle_users in huddle_user_list:
user_ids = [users[email].id for email in huddle_users]
huddle_hash = get_huddle_hash(user_ids)
if huddle_hash in existing_huddles:
continue
huddle_set.add((huddle_hash, tuple(sorted(user_ids))))
huddles_to_create = []
for (huddle_hash, _) in huddle_set:
huddles_to_create.append(Huddle(huddle_hash=huddle_hash))
batch_bulk_create(Huddle, huddles_to_create)
for huddle in Huddle.objects.all():
huddles[huddle.huddle_hash] = huddle
huddles_by_id[huddle.id] = huddle
recipients_to_create = []
for (huddle_hash, _) in huddle_set:
recipients_to_create.append(Recipient(type_id=huddles[huddle_hash].id, type=Recipient.HUDDLE))
batch_bulk_create(Recipient, recipients_to_create)
huddle_recipients = {}
for recipient in Recipient.objects.filter(type=Recipient.HUDDLE):
huddle_recipients[huddles_by_id[recipient.type_id].huddle_hash] = recipient
subscriptions_to_create = []
for (huddle_hash, huddle_user_ids) in huddle_set:
for user_id in huddle_user_ids:
subscriptions_to_create.append(Subscription(active=True, user_profile_id=user_id,
recipient=huddle_recipients[huddle_hash]))
batch_bulk_create(Subscription, subscriptions_to_create)
# This function is used only by tests. # This function is used only by tests.
# We have faster implementations within the app itself. # We have faster implementations within the app itself.
def filter_by_subscriptions(messages, user): def filter_by_subscriptions(messages, user):