From 2aadf6fc6eef36dfc8f276367e3044ab5984f3ef Mon Sep 17 00:00:00 2001 From: Zev Benjamin Date: Fri, 26 Apr 2013 16:28:55 -0400 Subject: [PATCH] [schema] [manual] Create a Postgres text search configuration for use with Humbug Text search was not that great partially because Postgres wasn't using a ispell dictionary (Postgres term) before. We now pull in Hunspell and use its dictionary and affix rules. It is Ok to run with this new configuration before updating our full text column and index that will be coming in the next few commits. Manual steps for deploy: 1) On both postgres0 and postgres1 (both before moving on to step 2), install the hunspell-en-us package 2) On staging, run migration 0022 3) On both postgres0 and postgres1, copy the appropriate postgresql.conf file over 4) On both postgres0 and postgres1, run `pg_ctlcluster 9.1 main reload` (imported from commit 706bf0f6ecc46c712cea10b73c34fd9d1dfd4767) --- .../files/postgresql/postgresql.conf.master | 3 +- .../files/postgresql/postgresql.conf.slave | 3 +- .../humbug/manifests/postgres-common.pp | 2 +- .../0022_create_humbug_search_config.py | 153 ++++++++++++++++++ 4 files changed, 158 insertions(+), 3 deletions(-) create mode 100644 zephyr/migrations/0022_create_humbug_search_config.py diff --git a/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.master b/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.master index c537fa844f..6b1106bde3 100644 --- a/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.master +++ b/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.master @@ -499,7 +499,7 @@ lc_numeric = 'en_US.UTF-8' # locale for number formatting lc_time = 'en_US.UTF-8' # locale for time formatting # default configuration for text search -default_text_search_config = 'pg_catalog.english' +#default_text_search_config = 'pg_catalog.english' # - Other Defaults - @@ -564,6 +564,7 @@ shared_buffers = 7680MB # pgtune wizard 2013-04-12 max_connections = 200 # pgtune wizard 2013-04-12 timezone = 'UTC' +default_text_search_config = 'humbug.english_us_search' # From http://www.databasesoup.com/2012/05/random-page-cost-revisited.html # This needs actual measurement, as bonnie++ random seek latency is # actually a read/write test diff --git a/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.slave b/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.slave index 94232b671a..886dc4ac6d 100644 --- a/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.slave +++ b/servers/puppet/modules/humbug/files/postgresql/postgresql.conf.slave @@ -499,7 +499,7 @@ lc_numeric = 'en_US.UTF-8' # locale for number formatting lc_time = 'en_US.UTF-8' # locale for time formatting # default configuration for text search -default_text_search_config = 'pg_catalog.english' +#default_text_search_config = 'pg_catalog.english' # - Other Defaults - @@ -564,6 +564,7 @@ shared_buffers = 1792MB # pgtune wizard 2012-12-12 max_connections = 200 # pgtune wizard 2012-12-12 timezone = 'UTC' +default_text_search_config = 'humbug.english_us_search' # EBS supposedly has very similar sequential and random access performance random_page_cost = 1.1 listen_addresses = '*' diff --git a/servers/puppet/modules/humbug/manifests/postgres-common.pp b/servers/puppet/modules/humbug/manifests/postgres-common.pp index 11d80ccabb..c5fbdca92e 100644 --- a/servers/puppet/modules/humbug/manifests/postgres-common.pp +++ b/servers/puppet/modules/humbug/manifests/postgres-common.pp @@ -3,7 +3,7 @@ class humbug::postgres-common { $postgres_packages = [ "postgresql-9.1", "pgtune", "python-boto", "python-argparse", "python-gevent/squeeze-backports", - "lzop", "pv"] + "lzop", "pv", "hunspell-en-us"] package { $postgres_packages: ensure => "installed" } exec {"pip_wal-e": diff --git a/zephyr/migrations/0022_create_humbug_search_config.py b/zephyr/migrations/0022_create_humbug_search_config.py new file mode 100644 index 0000000000..648e373b28 --- /dev/null +++ b/zephyr/migrations/0022_create_humbug_search_config.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models +from django.conf import settings + +class Migration(SchemaMigration): + + def forwards(self, orm): + if "postgres" not in settings.DATABASES["default"]["ENGINE"]: + return + + db.execute("""CREATE TEXT SEARCH DICTIONARY english_us_hunspell (template = ispell, + DictFile = en_us, AffFile = en_us, StopWords = english)""") + db.execute("CREATE TEXT SEARCH CONFIGURATION humbug.english_us_search (COPY=pg_catalog.english)") + db.execute("""ALTER TEXT SEARCH CONFIGURATION humbug.english_us_search ALTER MAPPING FOR + asciiword, asciihword, hword_asciipart, word, hword, hword_part + WITH english_us_hunspell, english_stem""") + + def backwards(self, orm): + if "postgres" not in settings.DATABASES["default"]["ENGINE"]: + return + + db.execute("DROP TEXT SEARCH CONFIGURATION humbug.english_us_search") + db.execute("DROP TEXT SEARCH DICTIONARY english_us_hunspell") + + models = { + u'zephyr.client': { + 'Meta': {'object_name': 'Client'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30', 'db_index': 'True'}) + }, + u'zephyr.defaultstream': { + 'Meta': {'unique_together': "(('realm', 'stream'),)", 'object_name': 'DefaultStream'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"}), + 'stream': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Stream']"}) + }, + u'zephyr.huddle': { + 'Meta': {'object_name': 'Huddle'}, + 'huddle_hash': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + u'zephyr.message': { + 'Meta': {'object_name': 'Message'}, + 'content': ('django.db.models.fields.TextField', [], {}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'pub_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'recipient': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Recipient']"}), + 'rendered_content': ('django.db.models.fields.TextField', [], {'null': 'True'}), + 'rendered_content_version': ('django.db.models.fields.IntegerField', [], {'null': 'True'}), + 'sender': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}), + 'sending_client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}), + 'subject': ('django.db.models.fields.CharField', [], {'max_length': '60', 'db_index': 'True'}) + }, + u'zephyr.mituser': { + 'Meta': {'object_name': 'MitUser'}, + 'email': ('django.db.models.fields.EmailField', [], {'unique': 'True', 'max_length': '75'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'status': ('django.db.models.fields.IntegerField', [], {'default': '0'}) + }, + u'zephyr.preregistrationuser': { + 'Meta': {'object_name': 'PreregistrationUser'}, + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'invited_at': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'referred_by': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']", 'null': 'True'}), + 'status': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'streams': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['zephyr.Stream']", 'null': 'True', 'symmetrical': 'False'}) + }, + u'zephyr.realm': { + 'Meta': {'object_name': 'Realm'}, + 'domain': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'restricted_to_domain': ('django.db.models.fields.BooleanField', [], {'default': 'True'}) + }, + u'zephyr.recipient': { + 'Meta': {'unique_together': "(('type', 'type_id'),)", 'object_name': 'Recipient'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'type': ('django.db.models.fields.PositiveSmallIntegerField', [], {'db_index': 'True'}), + 'type_id': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}) + }, + u'zephyr.stream': { + 'Meta': {'unique_together': "(('name', 'realm'),)", 'object_name': 'Stream'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'invite_only': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'db_index': 'True'}), + 'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"}) + }, + u'zephyr.streamcolor': { + 'Meta': {'object_name': 'StreamColor'}, + 'color': ('django.db.models.fields.CharField', [], {'max_length': '10'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'subscription': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Subscription']"}) + }, + u'zephyr.subscription': { + 'Meta': {'unique_together': "(('user_profile', 'recipient'),)", 'object_name': 'Subscription'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'color': ('django.db.models.fields.CharField', [], {'default': "'#c2c2c2'", 'max_length': '10'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'in_home_view': ('django.db.models.fields.NullBooleanField', [], {'default': 'True', 'null': 'True', 'blank': 'True'}), + 'notifications': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'recipient': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Recipient']"}), + 'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}) + }, + u'zephyr.useractivity': { + 'Meta': {'unique_together': "(('user_profile', 'client', 'query'),)", 'object_name': 'UserActivity'}, + 'client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}), + 'count': ('django.db.models.fields.IntegerField', [], {}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_visit': ('django.db.models.fields.DateTimeField', [], {}), + 'query': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}), + 'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}) + }, + u'zephyr.usermessage': { + 'Meta': {'unique_together': "(('user_profile', 'message'),)", 'object_name': 'UserMessage'}, + 'archived': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'flags': ('django.db.models.fields.BigIntegerField', [], {'default': '0'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'message': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Message']"}), + 'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}) + }, + u'zephyr.userpresence': { + 'Meta': {'unique_together': "(('user_profile', 'client'),)", 'object_name': 'UserPresence'}, + 'client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'status': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '1'}), + 'timestamp': ('django.db.models.fields.DateTimeField', [], {}), + 'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}) + }, + u'zephyr.userprofile': { + 'Meta': {'object_name': 'UserProfile'}, + 'api_key': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'unique': 'True', 'max_length': '75', 'db_index': 'True'}), + 'enable_desktop_notifications': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'enter_sends': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'full_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_pointer_updater': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'pointer': ('django.db.models.fields.IntegerField', [], {}), + 'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"}), + 'short_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'tutorial_status': ('django.db.models.fields.CharField', [], {'default': "'W'", 'max_length': '1'}) + } + } + + complete_apps = ['zephyr']