timezone: Correct common_timezones dictionary.

The changes are as follows: • Fix one day offset in all western zones. • Correct CST from -64800 to -21600 and CDT from -68400 to -18000. • Disambiguate PST in favor of -28000 over +28000. • Add GMT, UTC, WET, previously excluded for being at offset 0. • Add ACDT, AEDT, AKST, MET, MSK, NST, NZDT, PKT, which the previous code did not find. • Remove numbered abbreviations -12, …, +14, which are unnecessary. • Remove MSD and PKST, which are no longer used. Hardcode the dict and verify it with a test, so that future discrepancies won’t go silently unnoticed. Signed-off-by: Anders Kaseorg <anders@zulip.com>
2025-10-28 18:43:52 +00:00 · 2021-01-27 13:12:36 -08:00
parent fd8504e06b
commit 4ca66e7278
3 changed files with 101 additions and 44 deletions
--- a/zerver/lib/markdown/init.py
+++ b/zerver/lib/markdown/init.py
@@ -50,7 +50,7 @@ from zerver.lib.mention import extract_user_group, possible_mentions, possible_u
 from zerver.lib.tex import render_tex
 from zerver.lib.thumbnail import user_uploads_or_external
 from zerver.lib.timeout import TimeoutExpired, timeout
-from zerver.lib.timezone import get_common_timezones
+from zerver.lib.timezone import common_timezones
 from zerver.lib.url_encoding import encode_stream, hash_util_encode
 from zerver.lib.url_preview import preview as link_preview
 from zerver.models import (
@@ -1240,7 +1240,7 @@ class Timestamp(markdown.inlinepatterns.Pattern):
        time_input_string = match.group('time')
        timestamp = None
        try:
-            timestamp = dateutil.parser.parse(time_input_string, tzinfos=get_common_timezones())
+            timestamp = dateutil.parser.parse(time_input_string, tzinfos=common_timezones)
        except ValueError:
            try:
                timestamp = datetime.datetime.fromtimestamp(float(time_input_string))
--- a/zerver/lib/timezone.py
+++ b/zerver/lib/timezone.py
@@ -1,7 +1,6 @@
-import datetime
 from functools import lru_cache
 from io import TextIOWrapper
-from typing import Any, Dict, Union
+from typing import Dict

 import pytz

@@ -21,42 +20,58 @@ def get_canonical_timezone_map() -> Dict[str, str]:
 def canonicalize_timezone(key: str) -> str:
    return get_canonical_timezone_map().get(key, key)

-# This method carefully trims a list of common timezones in the pytz
-# database and handles duplicate abbreviations in favor of the most
-# common/popular offset. The output of this can be directly passed as
-# tz_data to dateutil.parser. It takes about 25ms to run, so we want
-# to cache its results (while avoiding running it on process startup
-# since we only need it for Markdown rendering).
-@lru_cache(maxsize=None)
-def get_common_timezones() -> Dict[str, Union[int, Any]]:
-    tzdata = {}
-    normal = datetime.datetime(2009, 9, 1)  # Any random date is fine here.
-    for str in pytz.all_timezones:
-        tz = pytz.timezone(str)
-        timedelta = tz.utcoffset(normal)
-        if not timedelta:
-            continue
-        offset = timedelta.seconds
-        tz_name = tz.tzname(normal)
-        tzdata[tz_name] = offset
-        # Handle known duplicates/exceptions.
-        # IST: Asia/Kolkata and Europe/Dublin.
-        if tz_name == 'IST':
-            tzdata[tz_name] = 19800  # Asia/Kolkata
-        # CDT: America/AlmostAll and America/Havana.
-        if tz_name == 'CDT':
-            tzdata[tz_name] = -68400  # America/AlmostAll
-        # CST America/Belize -64800
-        # CST America/Costa_Rica -64800
-        # CST America/El_Salvador -64800
-        # CST America/Guatemala -64800
-        # CST America/Managua -64800
-        # CST America/Regina -64800
-        # CST America/Swift_Current -64800
-        # CST America/Tegucigalpa -64800
-        # CST Asia/Macau 28800
-        # CST Asia/Shanghai 28800
-        # CST Asia/Taipei 28800
-        if tz_name == 'CST':
-            tzdata[tz_name] = -64800  # America/All
-    return tzdata
+# Note: some of these abbreviations are fundamentally ambiguous (see
+# zerver/tests/test_timezone.py), so you should never rely on them as
+# anything more than a heuristic.
+common_timezones = {
+    "SST": -39600,
+    "HST": -36000,
+    "AKST": -32400,
+    "HDT": -32400,
+    "AKDT": -28800,
+    "PST": -28800,
+    "MST": -25200,
+    "PDT": -25200,
+    "CST": -21600,
+    "MDT": -21600,
+    "CDT": -18000,
+    "EST": -18000,
+    "AST": -14400,
+    "EDT": -14400,
+    "NST": -12600,
+    "ADT": -10800,
+    "NDT": -9000,
+    "GMT": 0,
+    "UTC": 0,
+    "WET": 0,
+    "BST": +3600,
+    "CET": +3600,
+    "MET": +3600,
+    "WAT": +3600,
+    "WEST": +3600,
+    "CAT": +7200,
+    "CEST": +7200,
+    "EET": +7200,
+    "MEST": +7200,
+    "SAST": +7200,
+    "EAT": +10800,
+    "EEST": +10800,
+    "IDT": +10800,
+    "MSK": +10800,
+    "PKT": +18000,
+    "IST": +19800,
+    "WIB": +25200,
+    "AWST": +28800,
+    "HKT": +28800,
+    "WITA": +28800,
+    "JST": +32400,
+    "KST": +32400,
+    "WIT": +32400,
+    "ACST": +34200,
+    "AEST": +36000,
+    "ChST": +36000,
+    "ACDT": +37800,
+    "AEDT": +39600,
+    "NZST": +43200,
+    "NZDT": +46800,
+}
--- a/zerver/tests/test_timezone.py
+++ b/zerver/tests/test_timezone.py
@@ -1,5 +1,10 @@
+from datetime import datetime
+
+import pytz
+from django.utils.timezone import now as timezone_now
+
 from zerver.lib.test_classes import ZulipTestCase
-from zerver.lib.timezone import canonicalize_timezone
+from zerver.lib.timezone import canonicalize_timezone, common_timezones


 class TimeZoneTest(ZulipTestCase):
@@ -7,3 +12,40 @@ class TimeZoneTest(ZulipTestCase):
        self.assertEqual(canonicalize_timezone("America/Los_Angeles"), "America/Los_Angeles")
        self.assertEqual(canonicalize_timezone("US/Pacific"), "America/Los_Angeles")
        self.assertEqual(canonicalize_timezone("Gondor/Minas_Tirith"), "Gondor/Minas_Tirith")
+
+    def test_common_timezones(self) -> None:
+        ambiguous_abbrevs = [
+            ("CDT", -18000),  # Central Daylight Time
+            ("CDT", -14400),  # Cuba Daylight Time
+            ("CST", -21600),  # Central Standard Time
+            ("CST", +28800),  # China Standard Time
+            ("CST", -18000),  # Cuba Standard Time
+            ("PST", -28800),  # Pacific Standard Time
+            ("PST", +28800),  # Phillipine Standard Time
+            ("IST", +19800),  # India Standard Time
+            ("IST", +7200),  # Israel Standard Time
+            ("IST", +3600),  # Ireland Standard Time
+        ]
+        missing = set(dict(reversed(ambiguous_abbrevs)).items()) - set(
+            common_timezones.items()
+        )
+        assert not missing, missing
+
+        now = timezone_now()
+        dates = [datetime(now.year, 6, 21), datetime(now.year, 12, 21)]
+        extra = {*common_timezones.items(), *ambiguous_abbrevs}
+        for name in pytz.all_timezones:
+            tz = pytz.timezone(name)
+            for date in dates:
+                abbrev = tz.tzname(date)
+                if abbrev.startswith(("-", "+")):
+                    continue
+                delta = tz.utcoffset(date)
+                assert delta is not None
+                offset = delta.total_seconds()
+                assert (
+                    common_timezones[abbrev] == offset
+                    or (abbrev, offset) in ambiguous_abbrevs
+                ), (name, abbrev, offset)
+                extra.discard((abbrev, offset))
+        assert not extra, extra