timezone: Correct common_timezones dictionary.

The changes are as follows:

• Fix one day offset in all western zones.
• Correct CST from -64800 to -21600 and CDT from -68400 to -18000.
• Disambiguate PST in favor of -28000 over +28000.
• Add GMT, UTC, WET, previously excluded for being at offset 0.
• Add ACDT, AEDT, AKST, MET, MSK, NST, NZDT, PKT, which the previous
  code did not find.
• Remove numbered abbreviations -12, …, +14, which are unnecessary.
• Remove MSD and PKST, which are no longer used.

Hardcode the dict and verify it with a test, so that future
discrepancies won’t go silently unnoticed.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg
2021-01-27 13:12:36 -08:00
committed by Anders Kaseorg
parent fd8504e06b
commit 4ca66e7278
3 changed files with 101 additions and 44 deletions

View File

@@ -50,7 +50,7 @@ from zerver.lib.mention import extract_user_group, possible_mentions, possible_u
from zerver.lib.tex import render_tex from zerver.lib.tex import render_tex
from zerver.lib.thumbnail import user_uploads_or_external from zerver.lib.thumbnail import user_uploads_or_external
from zerver.lib.timeout import TimeoutExpired, timeout from zerver.lib.timeout import TimeoutExpired, timeout
from zerver.lib.timezone import get_common_timezones from zerver.lib.timezone import common_timezones
from zerver.lib.url_encoding import encode_stream, hash_util_encode from zerver.lib.url_encoding import encode_stream, hash_util_encode
from zerver.lib.url_preview import preview as link_preview from zerver.lib.url_preview import preview as link_preview
from zerver.models import ( from zerver.models import (
@@ -1240,7 +1240,7 @@ class Timestamp(markdown.inlinepatterns.Pattern):
time_input_string = match.group('time') time_input_string = match.group('time')
timestamp = None timestamp = None
try: try:
timestamp = dateutil.parser.parse(time_input_string, tzinfos=get_common_timezones()) timestamp = dateutil.parser.parse(time_input_string, tzinfos=common_timezones)
except ValueError: except ValueError:
try: try:
timestamp = datetime.datetime.fromtimestamp(float(time_input_string)) timestamp = datetime.datetime.fromtimestamp(float(time_input_string))

View File

@@ -1,7 +1,6 @@
import datetime
from functools import lru_cache from functools import lru_cache
from io import TextIOWrapper from io import TextIOWrapper
from typing import Any, Dict, Union from typing import Dict
import pytz import pytz
@@ -21,42 +20,58 @@ def get_canonical_timezone_map() -> Dict[str, str]:
def canonicalize_timezone(key: str) -> str: def canonicalize_timezone(key: str) -> str:
return get_canonical_timezone_map().get(key, key) return get_canonical_timezone_map().get(key, key)
# This method carefully trims a list of common timezones in the pytz # Note: some of these abbreviations are fundamentally ambiguous (see
# database and handles duplicate abbreviations in favor of the most # zerver/tests/test_timezone.py), so you should never rely on them as
# common/popular offset. The output of this can be directly passed as # anything more than a heuristic.
# tz_data to dateutil.parser. It takes about 25ms to run, so we want common_timezones = {
# to cache its results (while avoiding running it on process startup "SST": -39600,
# since we only need it for Markdown rendering). "HST": -36000,
@lru_cache(maxsize=None) "AKST": -32400,
def get_common_timezones() -> Dict[str, Union[int, Any]]: "HDT": -32400,
tzdata = {} "AKDT": -28800,
normal = datetime.datetime(2009, 9, 1) # Any random date is fine here. "PST": -28800,
for str in pytz.all_timezones: "MST": -25200,
tz = pytz.timezone(str) "PDT": -25200,
timedelta = tz.utcoffset(normal) "CST": -21600,
if not timedelta: "MDT": -21600,
continue "CDT": -18000,
offset = timedelta.seconds "EST": -18000,
tz_name = tz.tzname(normal) "AST": -14400,
tzdata[tz_name] = offset "EDT": -14400,
# Handle known duplicates/exceptions. "NST": -12600,
# IST: Asia/Kolkata and Europe/Dublin. "ADT": -10800,
if tz_name == 'IST': "NDT": -9000,
tzdata[tz_name] = 19800 # Asia/Kolkata "GMT": 0,
# CDT: America/AlmostAll and America/Havana. "UTC": 0,
if tz_name == 'CDT': "WET": 0,
tzdata[tz_name] = -68400 # America/AlmostAll "BST": +3600,
# CST America/Belize -64800 "CET": +3600,
# CST America/Costa_Rica -64800 "MET": +3600,
# CST America/El_Salvador -64800 "WAT": +3600,
# CST America/Guatemala -64800 "WEST": +3600,
# CST America/Managua -64800 "CAT": +7200,
# CST America/Regina -64800 "CEST": +7200,
# CST America/Swift_Current -64800 "EET": +7200,
# CST America/Tegucigalpa -64800 "MEST": +7200,
# CST Asia/Macau 28800 "SAST": +7200,
# CST Asia/Shanghai 28800 "EAT": +10800,
# CST Asia/Taipei 28800 "EEST": +10800,
if tz_name == 'CST': "IDT": +10800,
tzdata[tz_name] = -64800 # America/All "MSK": +10800,
return tzdata "PKT": +18000,
"IST": +19800,
"WIB": +25200,
"AWST": +28800,
"HKT": +28800,
"WITA": +28800,
"JST": +32400,
"KST": +32400,
"WIT": +32400,
"ACST": +34200,
"AEST": +36000,
"ChST": +36000,
"ACDT": +37800,
"AEDT": +39600,
"NZST": +43200,
"NZDT": +46800,
}

View File

@@ -1,5 +1,10 @@
from datetime import datetime
import pytz
from django.utils.timezone import now as timezone_now
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.timezone import canonicalize_timezone from zerver.lib.timezone import canonicalize_timezone, common_timezones
class TimeZoneTest(ZulipTestCase): class TimeZoneTest(ZulipTestCase):
@@ -7,3 +12,40 @@ class TimeZoneTest(ZulipTestCase):
self.assertEqual(canonicalize_timezone("America/Los_Angeles"), "America/Los_Angeles") self.assertEqual(canonicalize_timezone("America/Los_Angeles"), "America/Los_Angeles")
self.assertEqual(canonicalize_timezone("US/Pacific"), "America/Los_Angeles") self.assertEqual(canonicalize_timezone("US/Pacific"), "America/Los_Angeles")
self.assertEqual(canonicalize_timezone("Gondor/Minas_Tirith"), "Gondor/Minas_Tirith") self.assertEqual(canonicalize_timezone("Gondor/Minas_Tirith"), "Gondor/Minas_Tirith")
def test_common_timezones(self) -> None:
ambiguous_abbrevs = [
("CDT", -18000), # Central Daylight Time
("CDT", -14400), # Cuba Daylight Time
("CST", -21600), # Central Standard Time
("CST", +28800), # China Standard Time
("CST", -18000), # Cuba Standard Time
("PST", -28800), # Pacific Standard Time
("PST", +28800), # Phillipine Standard Time
("IST", +19800), # India Standard Time
("IST", +7200), # Israel Standard Time
("IST", +3600), # Ireland Standard Time
]
missing = set(dict(reversed(ambiguous_abbrevs)).items()) - set(
common_timezones.items()
)
assert not missing, missing
now = timezone_now()
dates = [datetime(now.year, 6, 21), datetime(now.year, 12, 21)]
extra = {*common_timezones.items(), *ambiguous_abbrevs}
for name in pytz.all_timezones:
tz = pytz.timezone(name)
for date in dates:
abbrev = tz.tzname(date)
if abbrev.startswith(("-", "+")):
continue
delta = tz.utcoffset(date)
assert delta is not None
offset = delta.total_seconds()
assert (
common_timezones[abbrev] == offset
or (abbrev, offset) in ambiguous_abbrevs
), (name, abbrev, offset)
extra.discard((abbrev, offset))
assert not extra, extra