Files
zulip/zerver/lib/url_decoding.py
PieterCK 3773ba10e1 url_decoding: Add parse_narrow_url.
This adds the Python copy of `hash_util.parse_narrow`. In the web app,
it will mainly be used in the import process later on. So, although it
has the same purpose as its frontend twin, there are differences:

- This doesn't convert a user-id-slug into a list of user emails. It
will instead parse it into a list of user IDs, as that is the preferred
form for those kinds of operators. It will also help in later operations
to remap the object IDs during import.

- To the same effect as the first point, operands can be an actual list
or int instead of a list or int as a string (e.g., "12,14,15" or "93").

- It has fewer validations than its frontend counterpart. It doesn't
look up the parsed object IDs for validity. This is partly because of
its main use case in import.
2025-02-24 15:37:40 -08:00

144 lines
4.7 KiB
Python

from urllib.parse import unquote, urlsplit
from django.conf import settings
from zerver.lib.narrow_helpers import NarrowTerm
from zerver.lib.topic import DB_TOPIC_NAME
def is_same_server_message_link(url: str) -> bool:
split_result = urlsplit(url)
hostname = split_result.hostname
fragment = split_result.fragment
if hostname not in {None, settings.EXTERNAL_HOST_WITHOUT_PORT}:
return False
# A message link always has category `narrow`, section `channel`
# or `dm`, and ends with `/near/<message_id>`, where <message_id>
# is a sequence of digits. The URL fragment of a message link has
# at least 5 parts. e.g. '#narrow/dm/9,15-dm/near/43'
fragment_parts = fragment.split("/")
if len(fragment_parts) < 5:
return False
category = fragment_parts[0]
section = fragment_parts[1]
ends_with_near_message_id = fragment_parts[-2] == "near" and fragment_parts[-1].isdigit()
return category == "narrow" and section in {"channel", "dm"} and ends_with_near_message_id
CHANNEL_SYNONYMS = {"stream": "channel", "streams": "channels"}
OPERATOR_SYNONYMS = {
**CHANNEL_SYNONYMS,
# "pm-with:" was renamed to "dm:"
"pm-with": "dm",
# "group-pm-with:" was replaced with "dm-including:"
"group-pm-with": "dm-including",
"from": "sender",
DB_TOPIC_NAME: "topic",
}
def canonicalize_operator_synonyms(text: str) -> str:
text = text.lower()
if text in OPERATOR_SYNONYMS.values():
return text
if text in OPERATOR_SYNONYMS:
return OPERATOR_SYNONYMS[text]
return text
def parse_recipient_slug(slug: str) -> tuple[int | list[int], str] | None:
"""
Parses operands formatted in slug containing object ID or IDs.
Typical of "channel" or private message operands.
Doesn't parse the legacy pre-2018 stream slug format, which would
require using data for what channels exist for a proper parse.
e.g. "stream-name"
Returns a tuple of parsed ids and the recipient info (channel name,
DM'ed users name, etc) or only `None` if the operand is invalid.
e.g.
- "12,13,14-group" -> ([12, 13, 14], "group")
- "89-Markl" -> (89, "Markl")
- "stream-name" -> None
"""
try:
ids_string, suffix = slug.split("-", maxsplit=1)
ids = [int(id) for id in ids_string.split(",")]
return (ids if len(ids) > 1 else ids[0], suffix)
except ValueError:
# We expect this to happen both for invalid URLs and legacy
# pre-2018 channel link URLs that don't have a channel ID in
# the slug.
return None
def decode_hash_component(string: str) -> str:
# This matches the web app's implementation of decodeHashComponent.
return unquote(string.replace(".", "%"))
def decode_narrow_operand(operator: str, operand: str) -> str | int | list[int]:
# These have the similar slug formatting for their operands which
# contain object ID(s).
if operator in ["dm-including", "dm", "sender", "channel"]:
result = parse_recipient_slug(operand)
return result[0] if isinstance(result, tuple) else ""
if operator == "near":
return int(operand) if operand.isdigit() else ""
operand = decode_hash_component(operand).strip()
return operand
def parse_narrow_url(
narrow_url: str,
) -> list[NarrowTerm] | None:
"""This server implementation is intended to match the algorithm
for the web app's `parse_narrow` in `hash_util.ts`. It largely
behaves the same way and has the same purpose: to parse a narrow
URL into a list of `NarrowTerm`.
The key difference from the web app implementation is that this
does not validate the referenced objects (users and channels).
"""
split_result = urlsplit(narrow_url)
fragment = split_result.fragment
fragment_parts = fragment.split("/")
terms: list[NarrowTerm] = []
for i in range(1, len(fragment_parts), 2):
raw_operator = decode_hash_component(fragment_parts[i]).strip()
if not raw_operator:
return None
negated = False
if raw_operator.startswith("-"):
negated = True
raw_operator = raw_operator[1:]
operator = canonicalize_operator_synonyms(raw_operator)
try:
raw_operand = fragment_parts[i + 1]
except IndexError:
raw_operand = ""
operand = decode_narrow_operand(operator, raw_operand)
if operand == "" and operator not in ["topic"]:
# The empty string is a valid topic (realm_empty_topic_display_name).
#
# Other empty string operands are invalid.
return None
terms.append(NarrowTerm(operator, operand, negated))
return terms