[manual] Cache results of the Twitter API in the database.

This should substantially improve the repeat-rendering time for pages with large numbers of tweets since we don't need to go all the way to twitter.com, which can take like a second, to render tweets properly. To deploy this commit properly, one needs to run ./manage.py createcachetable third_party_api_results (imported from commit 01b528e61f9dde2ee718bdec0490088907b6017e)
2025-11-14 10:57:58 +00:00 · 2013-03-11 11:23:34 -04:00
parent 9c28e52f91
commit d679a72952
5 changed files with 79 additions and 29 deletions
--- a/humbug/settings.py
+++ b/humbug/settings.py
@@ -296,11 +296,17 @@ RABBITMQ_PASSWORD = 'xxxxxxxxxxxxxxxx'

 # Caching
 if DEPLOYED:
-    CACHES = { 'default': {
-        'BACKEND':  'django.core.cache.backends.memcached.PyLibMCCache',
-        'LOCATION': '127.0.0.1:11211',
-        'TIMEOUT':  3600
-    } }
+    CACHES = {
+        'default': {
+            'BACKEND':  'django.core.cache.backends.memcached.PyLibMCCache',
+            'LOCATION': '127.0.0.1:11211',
+            'TIMEOUT':  3600
+        },
+        'database': {
+            'BACKEND':  'django.core.cache.backends.db.DatabaseCache',
+            'LOCATION':  'third_party_api_results'
+        },
+    }
    SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
 else:
    CACHES = { 'default': {
--- a/humbug/test_settings.py
+++ b/humbug/test_settings.py
@@ -29,13 +29,22 @@ TEST_SUITE = True
 USING_RABBITMQ = False

 # Disable use of memcached for caching
-CACHES = { 'default': {
+CACHES = {
+    'default': {
        'BACKEND':  'django.core.cache.backends.dummy.DummyCache',
-        'LOCATION': 'humbug-default-local-cache',
+        'LOCATION': 'humbug-default-test-cache',
        'TIMEOUT':  3600,
        'OPTIONS': {
            'MAX_ENTRIES': 100000
-        }
+        },
+    },
+    'database': {
+        'BACKEND':  'django.core.cache.backends.dummy.DummyCache',
+        'LOCATION': 'humbug-database-test-cache',
+        'TIMEOUT':  3600,
+        'OPTIONS': {
+            'MAX_ENTRIES': 100000
+        },
    } }

 requests_logger = logging.getLogger('humbug.requests')
--- a/tools/do-destroy-rebuild-database
+++ b/tools/do-destroy-rebuild-database
@@ -4,5 +4,6 @@ echo "DROP SCHEMA humbug CASCADE; CREATE SCHEMA humbug;" | python manage.py dbsh

 python manage.py syncdb --noinput
 python manage.py migrate
+python manage.py createcachetable third_party_api_results
 echo "CREATE UNIQUE INDEX ON auth_user (email);" | python manage.py dbshell
 python manage.py populate_db --replay-old-messages -n0
--- a/zephyr/lib/bugdown/init.py
+++ b/zephyr/lib/bugdown/init.py
@@ -16,6 +16,7 @@ from zephyr.lib.avatar  import gravatar_hash
 from zephyr.lib.bugdown import codehilite, fenced_code
 from zephyr.lib.bugdown.fenced_code import FENCE_RE
 from zephyr.lib.timeout import timeout
+from zephyr.lib.cache import db_cache_with_key

 def walk_tree(root, processor, stop_after_first=False):
    results = []
@@ -93,6 +94,29 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):

        return root

+@db_cache_with_key(lambda tweet_id: tweet_id)
+def fetch_tweet_data(tweet_id):
+    if settings.TEST_SUITE:
+        import testing_mocks
+        res = testing_mocks.twitter(tweet_id)
+    else:
+        if settings.DEPLOYED:
+            # This is the real set of API credentials used by our real server,
+            # and we probably shouldn't test with it just so we don't waste its requests
+            # Application: "Humbug HQ"
+            api = twitter.Api(consumer_key = 'xxxxxxxxxxxxxxxxxxxxxx',
+                              consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+                              access_token_key = 'xxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+                              access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
+        else:
+            # Application: "Humbug HQ Test"
+            api = twitter.Api(consumer_key = 'xxxxxxxxxxxxxxxxxxxxxx',
+                              consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+                              access_token_key = 'xxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
+                              access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
+        res = api.GetStatus(tweet_id).AsDict()
+    return res
+
 class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
    def twitter_link(self, url):
        parsed_url = urlparse.urlparse(url)
@@ -105,26 +129,7 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):

        tweet_id = tweet_id_match.groups()[0]
        try:
-            if settings.TEST_SUITE:
-                import testing_mocks
-                res = testing_mocks.twitter(tweet_id)
-            else:
-                if settings.DEPLOYED:
-                    # This is the real set of API credentials used by our real server,
-                    # and we probably shouldn't test with it just so we don't waste its requests
-                    # Application: "Humbug HQ"
-                    api = twitter.Api(consumer_key = 'xxxxxxxxxxxxxxxxxxxxxx',
-                                      consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
-                                      access_token_key = 'xxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
-                                      access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
-                else:
-                    # Application: "Humbug HQ Test"
-                    api = twitter.Api(consumer_key = 'xxxxxxxxxxxxxxxxxxxxxx',
-                                      consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
-                                      access_token_key = 'xxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
-                                      access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
-                res = api.GetStatus(tweet_id).AsDict()
-
+            res = fetch_tweet_data(tweet_id)
            user = res['user']
            tweet = markdown.util.etree.Element("div")
            tweet.set("class", "twitter-tweet")
@@ -150,7 +155,7 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
            # We put this in its own try-except because it requires external
            # connectivity. If Twitter flakes out, we don't want to not-render
            # the entire message; we just want to not show the Twitter preview.
-            traceback.print_exc()
+            logging.warning(traceback.format_exc())
            return None

    # Search the tree for <a> tags and read their href values
--- a/zephyr/lib/cache.py
+++ b/zephyr/lib/cache.py
@@ -1,6 +1,7 @@
 from functools import wraps

 from django.core.cache import cache as djcache
+from django.core.cache import get_cache

 def cache_with_key(keyfunc):
    """Decorator which applies Django caching to a function.
@@ -44,3 +45,31 @@ def cache(func):
        return key.replace('-','--').replace(' ','-s')

    return cache_with_key(keyfunc)(func)
+
+def db_cache_with_key(keyfunc):
+    """Decorator which applies Django caching to a function.
+
+       Decorator argument is a function which computes a cache key
+       from the original function's arguments.  You are responsible
+       for avoiding collisions with other uses of this decorator or
+       other uses of caching."""
+
+    def decorator(func):
+        @wraps(func)
+        def func_with_caching(*args, **kwargs):
+            key = keyfunc(*args, **kwargs)
+            database_cache = get_cache("database")
+            val = database_cache.get(key)
+
+            # Values are singleton tuples so that we can distinguish
+            # a result of None from a missing key.
+            if val is not None:
+                return val[0]
+
+            val = func(*args, **kwargs)
+            database_cache.set(key, (val,))
+            return val
+
+        return func_with_caching
+
+    return decorator