[schema] [manual] Create a Postgres text search configuration for use with Humbug

Text search was not that great partially because Postgres wasn't
using a ispell dictionary (Postgres term) before.  We now pull in
Hunspell and use its dictionary and affix rules.

It is Ok to run with this new configuration before updating our full
text column and index that will be coming in the next few commits.

Manual steps for deploy:
1) On both postgres0 and postgres1 (both before moving on to step 2),
   install the hunspell-en-us package
2) On staging, run migration 0022
3) On both postgres0 and postgres1, copy the appropriate postgresql.conf
   file over
4) On both postgres0 and postgres1, run `pg_ctlcluster 9.1 main reload`

(imported from commit 706bf0f6ecc46c712cea10b73c34fd9d1dfd4767)
This commit is contained in:
Zev Benjamin
2013-04-26 16:28:55 -04:00
parent 3d18b2eb2f
commit 2aadf6fc6e
4 changed files with 158 additions and 3 deletions

View File

@@ -499,7 +499,7 @@ lc_numeric = 'en_US.UTF-8' # locale for number formatting
lc_time = 'en_US.UTF-8' # locale for time formatting
# default configuration for text search
default_text_search_config = 'pg_catalog.english'
#default_text_search_config = 'pg_catalog.english'
# - Other Defaults -
@@ -564,6 +564,7 @@ shared_buffers = 7680MB # pgtune wizard 2013-04-12
max_connections = 200 # pgtune wizard 2013-04-12
timezone = 'UTC'
default_text_search_config = 'humbug.english_us_search'
# From http://www.databasesoup.com/2012/05/random-page-cost-revisited.html
# This needs actual measurement, as bonnie++ random seek latency is
# actually a read/write test

View File

@@ -499,7 +499,7 @@ lc_numeric = 'en_US.UTF-8' # locale for number formatting
lc_time = 'en_US.UTF-8' # locale for time formatting
# default configuration for text search
default_text_search_config = 'pg_catalog.english'
#default_text_search_config = 'pg_catalog.english'
# - Other Defaults -
@@ -564,6 +564,7 @@ shared_buffers = 1792MB # pgtune wizard 2012-12-12
max_connections = 200 # pgtune wizard 2012-12-12
timezone = 'UTC'
default_text_search_config = 'humbug.english_us_search'
# EBS supposedly has very similar sequential and random access performance
random_page_cost = 1.1
listen_addresses = '*'

View File

@@ -3,7 +3,7 @@ class humbug::postgres-common {
$postgres_packages = [ "postgresql-9.1", "pgtune", "python-boto",
"python-argparse", "python-gevent/squeeze-backports",
"lzop", "pv"]
"lzop", "pv", "hunspell-en-us"]
package { $postgres_packages: ensure => "installed" }
exec {"pip_wal-e":

View File

@@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
from django.conf import settings
class Migration(SchemaMigration):
def forwards(self, orm):
if "postgres" not in settings.DATABASES["default"]["ENGINE"]:
return
db.execute("""CREATE TEXT SEARCH DICTIONARY english_us_hunspell (template = ispell,
DictFile = en_us, AffFile = en_us, StopWords = english)""")
db.execute("CREATE TEXT SEARCH CONFIGURATION humbug.english_us_search (COPY=pg_catalog.english)")
db.execute("""ALTER TEXT SEARCH CONFIGURATION humbug.english_us_search ALTER MAPPING FOR
asciiword, asciihword, hword_asciipart, word, hword, hword_part
WITH english_us_hunspell, english_stem""")
def backwards(self, orm):
if "postgres" not in settings.DATABASES["default"]["ENGINE"]:
return
db.execute("DROP TEXT SEARCH CONFIGURATION humbug.english_us_search")
db.execute("DROP TEXT SEARCH DICTIONARY english_us_hunspell")
models = {
u'zephyr.client': {
'Meta': {'object_name': 'Client'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30', 'db_index': 'True'})
},
u'zephyr.defaultstream': {
'Meta': {'unique_together': "(('realm', 'stream'),)", 'object_name': 'DefaultStream'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"}),
'stream': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Stream']"})
},
u'zephyr.huddle': {
'Meta': {'object_name': 'Huddle'},
'huddle_hash': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
},
u'zephyr.message': {
'Meta': {'object_name': 'Message'},
'content': ('django.db.models.fields.TextField', [], {}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'pub_date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}),
'recipient': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Recipient']"}),
'rendered_content': ('django.db.models.fields.TextField', [], {'null': 'True'}),
'rendered_content_version': ('django.db.models.fields.IntegerField', [], {'null': 'True'}),
'sender': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"}),
'sending_client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}),
'subject': ('django.db.models.fields.CharField', [], {'max_length': '60', 'db_index': 'True'})
},
u'zephyr.mituser': {
'Meta': {'object_name': 'MitUser'},
'email': ('django.db.models.fields.EmailField', [], {'unique': 'True', 'max_length': '75'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'status': ('django.db.models.fields.IntegerField', [], {'default': '0'})
},
u'zephyr.preregistrationuser': {
'Meta': {'object_name': 'PreregistrationUser'},
'email': ('django.db.models.fields.EmailField', [], {'max_length': '75'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'invited_at': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
'referred_by': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']", 'null': 'True'}),
'status': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
'streams': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['zephyr.Stream']", 'null': 'True', 'symmetrical': 'False'})
},
u'zephyr.realm': {
'Meta': {'object_name': 'Realm'},
'domain': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '40', 'db_index': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'restricted_to_domain': ('django.db.models.fields.BooleanField', [], {'default': 'True'})
},
u'zephyr.recipient': {
'Meta': {'unique_together': "(('type', 'type_id'),)", 'object_name': 'Recipient'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'type': ('django.db.models.fields.PositiveSmallIntegerField', [], {'db_index': 'True'}),
'type_id': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'})
},
u'zephyr.stream': {
'Meta': {'unique_together': "(('name', 'realm'),)", 'object_name': 'Stream'},
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'invite_only': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'db_index': 'True'}),
'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"})
},
u'zephyr.streamcolor': {
'Meta': {'object_name': 'StreamColor'},
'color': ('django.db.models.fields.CharField', [], {'max_length': '10'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'subscription': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Subscription']"})
},
u'zephyr.subscription': {
'Meta': {'unique_together': "(('user_profile', 'recipient'),)", 'object_name': 'Subscription'},
'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'color': ('django.db.models.fields.CharField', [], {'default': "'#c2c2c2'", 'max_length': '10'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'in_home_view': ('django.db.models.fields.NullBooleanField', [], {'default': 'True', 'null': 'True', 'blank': 'True'}),
'notifications': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'recipient': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Recipient']"}),
'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"})
},
u'zephyr.useractivity': {
'Meta': {'unique_together': "(('user_profile', 'client', 'query'),)", 'object_name': 'UserActivity'},
'client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}),
'count': ('django.db.models.fields.IntegerField', [], {}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'last_visit': ('django.db.models.fields.DateTimeField', [], {}),
'query': ('django.db.models.fields.CharField', [], {'max_length': '50', 'db_index': 'True'}),
'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"})
},
u'zephyr.usermessage': {
'Meta': {'unique_together': "(('user_profile', 'message'),)", 'object_name': 'UserMessage'},
'archived': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'flags': ('django.db.models.fields.BigIntegerField', [], {'default': '0'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'message': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Message']"}),
'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"})
},
u'zephyr.userpresence': {
'Meta': {'unique_together': "(('user_profile', 'client'),)", 'object_name': 'UserPresence'},
'client': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Client']"}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'status': ('django.db.models.fields.PositiveSmallIntegerField', [], {'default': '1'}),
'timestamp': ('django.db.models.fields.DateTimeField', [], {}),
'user_profile': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.UserProfile']"})
},
u'zephyr.userprofile': {
'Meta': {'object_name': 'UserProfile'},
'api_key': ('django.db.models.fields.CharField', [], {'max_length': '32'}),
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'email': ('django.db.models.fields.EmailField', [], {'unique': 'True', 'max_length': '75', 'db_index': 'True'}),
'enable_desktop_notifications': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'enter_sends': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}),
'full_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
'last_pointer_updater': ('django.db.models.fields.CharField', [], {'max_length': '64'}),
'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
'pointer': ('django.db.models.fields.IntegerField', [], {}),
'realm': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['zephyr.Realm']"}),
'short_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'tutorial_status': ('django.db.models.fields.CharField', [], {'default': "'W'", 'max_length': '1'})
}
}
complete_apps = ['zephyr']