Files
zulip/tools/setup/emoji/emoji_setup_utils.py
evykassirer 32c730e530 emoji: Fallback to Google Modern for unsupported new Twitter emoji.
The Twitter emoji team was laid off in 2022, so new emoji aren't supported.
https://github.com/twitter/twemoji/issues/570#issuecomment-1303422143.
The "twitter" sprite sheet we’re using does have images in those locations,
but they’re fallback images that emoji-datasource fills in from the Apple
sprite sheet, which has unclear licensing implications.

To be able to support newer emoji, we fallback to Google Modern for any emoji
not covered by Twemoji.

CZO conversation:
https://chat.zulip.org/#narrow/stream/107-kandra/topic/emoji.20changes.20in.208.2E0/near/1689918
2023-11-28 23:46:49 -08:00

128 lines
4.7 KiB
Python

# This file contains various helper functions used by `build_emoji` tool.
# See docs/subsystems/emoji.md for details on how this system works.
from collections import defaultdict
from typing import Any, Dict, List
from zerver.lib.emoji_utils import emoji_to_hex_codepoint, hex_codepoint_to_emoji, unqualify_emoji
# Some image files in the old emoji farm had a different name than in the new emoji
# farm. `remapped_emojis` is a map that contains a mapping of their name in the old
# emoji farm to their name in the new emoji farm.
REMAPPED_EMOJIS = {
"0023": "0023-20e3", # Hash
"0030": "0030-20e3", # Zero
"0031": "0031-20e3", # One
"0032": "0032-20e3", # Two
"0033": "0033-20e3", # Three
"0034": "0034-20e3", # Four
"0035": "0035-20e3", # Five
"0036": "0036-20e3", # Six
"0037": "0037-20e3", # Seven
"0038": "0038-20e3", # Eight
"0039": "0039-20e3", # Nine
"1f1e8": "1f1e8-1f1f3", # cn
"1f1e9": "1f1e9-1f1ea", # de
"1f1ea": "1f1ea-1f1f8", # es
"1f1eb": "1f1eb-1f1f7", # fr
"1f1ec": "1f1ec-1f1e7", # gb/us
"1f1ee": "1f1ee-1f1f9", # it
"1f1ef": "1f1ef-1f1f5", # jp
"1f1f0": "1f1f0-1f1f7", # kr
"1f1f7": "1f1f7-1f1fa", # ru
"1f1fa": "1f1fa-1f1f8", # us
}
# Emoticons and which emoji they should become. Duplicate emoji are allowed.
# Changes here should be mimicked in `help/configure-emoticon-translations.md`.
EMOTICON_CONVERSIONS = {
":)": ":smile:",
"(:": ":smile:",
":(": ":frown:",
"<3": ":heart:",
":|": ":neutral:",
":/": ":confused:",
";)": ":wink:",
":D": ":grinning:",
":o": ":open_mouth:",
":O": ":open_mouth:",
":p": ":stuck_out_tongue:",
":P": ":stuck_out_tongue:",
}
def emoji_names_for_picker(emoji_name_maps: Dict[str, Dict[str, Any]]) -> List[str]:
emoji_names: List[str] = []
for name_info in emoji_name_maps.values():
emoji_names.append(name_info["canonical_name"])
emoji_names.extend(name_info["aliases"])
return sorted(emoji_names)
def get_emoji_code(emoji_dict: Dict[str, Any]) -> str:
# There is a `non_qualified` field on `emoji_dict` but it's
# inconsistently present, so we'll always use the unqualified
# emoji by unqualifying it ourselves. This gives us more consistent
# behaviour between emojis, and doesn't rely on the incomplete
# upstream package (https://github.com/iamcal/emoji-data/pull/217).
return emoji_to_hex_codepoint(unqualify_emoji(hex_codepoint_to_emoji(emoji_dict["unified"])))
# Returns a dict from categories to list of codepoints. The list of
# codepoints are sorted according to the `sort_order` as defined in
# `emoji_data`.
def generate_emoji_catalog(
emoji_data: List[Dict[str, Any]], emoji_name_maps: Dict[str, Dict[str, Any]]
) -> Dict[str, List[str]]:
sort_order: Dict[str, int] = {}
emoji_catalog: Dict[str, List[str]] = defaultdict(list)
for emoji_dict in emoji_data:
emoji_code = get_emoji_code(emoji_dict)
if not emoji_is_supported(emoji_dict) or emoji_code not in emoji_name_maps:
continue
category = emoji_dict["category"]
sort_order[emoji_code] = emoji_dict["sort_order"]
emoji_catalog[category].append(emoji_code)
# Sort the emojis according to iamcal's sort order. This sorting determines the
# order in which emojis will be displayed in emoji picker.
for category in emoji_catalog:
emoji_catalog[category].sort(key=lambda emoji_code: sort_order[emoji_code])
return dict(emoji_catalog)
def generate_codepoint_to_name_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
codepoint_to_name: Dict[str, str] = {}
for emoji_code, name_info in emoji_name_maps.items():
codepoint_to_name[emoji_code] = name_info["canonical_name"]
return codepoint_to_name
# We support Google Modern, and fall back to Google Modern when emoji
# aren't supported by the other styles we use.
def emoji_is_supported(emoji_dict: Dict[str, Any]) -> bool:
return emoji_dict["has_img_google"]
def generate_codepoint_to_names_map(
emoji_name_maps: Dict[str, Dict[str, Any]]
) -> Dict[str, List[str]]:
# The first element of the names list is always the canonical name.
return {
emoji_code: [name_info["canonical_name"], *name_info["aliases"]]
for emoji_code, name_info in emoji_name_maps.items()
}
def generate_name_to_codepoint_map(emoji_name_maps: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
name_to_codepoint = {}
for emoji_code, name_info in emoji_name_maps.items():
canonical_name = name_info["canonical_name"]
aliases = name_info["aliases"]
name_to_codepoint[canonical_name] = emoji_code
for alias in aliases:
name_to_codepoint[alias] = emoji_code
return name_to_codepoint