Files
zulip/zerver/lib/string_validation.py
Alex Vandiver df50280c54 string_validation: Loosen to allow some Cn unicode characters.
Under the unicodedata distributed with Python 3.6, some Emoji are
classified as `Cn`, and not `So`:

```
$ unicode 1f929 --long
U+1F929 GRINNING FACE WITH STAR EYES
UTF-8: f0 9f a4 a9 UTF-16BE: d83edd29 Decimal: 🤩 Octal: \0374451
🤩
Category: So (Symbol, Other); East Asian width: W (wide)
Unicode block: 1F900..1F9FF; Supplemental Symbols and Pictographs
Bidi: ON (Other Neutrals)

$ python3.6 -c 'import unicodedata; print(unicodedata.category("\U0001f929"))'
Cn

$ python3.7 -c 'import unicodedata; print(unicodedata.category("\U0001f929"))'
So
```

Drop `Cn` from the list of excluded Unicode character classes, and
replace it with an explicit list of the 66 non-characters, which are
invariant.

Co-authored-by: Shlok Patel <shlokcpatel2001@gmail.com>
2022-01-11 15:17:53 -08:00

53 lines
1.8 KiB
Python

import unicodedata
from typing import Optional
from django.utils.translation import gettext as _
from zerver.lib.exceptions import JsonableError
from zerver.models import Stream
# There are 66 Unicode non-characters; see
# https://www.unicode.org/faq/private_use.html#nonchar4
unicode_non_chars = [
chr(x)
for x in list(range(0xFDD0, 0xFDF0)) # FDD0 through FDEF, inclusive
+ list(range(0xFFFE, 0x110000, 0x10000)) # 0xFFFE, 0x1FFFE, ... 0x10FFFE inclusive
+ list(range(0xFFFF, 0x110000, 0x10000)) # 0xFFFF, 0x1FFFF, ... 0x10FFFF inclusive
]
def check_string_is_printable(var: str) -> Optional[int]:
# Return position (1-indexed!) of the character which is not
# printable, None if no such character is present.
for i in range(len(var)):
unicode_character = unicodedata.category(var[i])
if (unicode_character in ["Cc", "Cs"]) or var[i] in unicode_non_chars:
return i + 1
return None
def check_stream_name(stream_name: str) -> None:
if stream_name.strip() == "":
raise JsonableError(_("Stream name can't be empty!"))
if len(stream_name) > Stream.MAX_NAME_LENGTH:
raise JsonableError(
_("Stream name too long (limit: {} characters).").format(Stream.MAX_NAME_LENGTH)
)
for i in stream_name:
if ord(i) == 0:
raise JsonableError(
_("Stream name '{}' contains NULL (0x00) characters.").format(stream_name)
)
def check_stream_topic(topic: str) -> None:
if topic.strip() == "":
raise JsonableError(_("Topic can't be empty!"))
invalid_character_pos = check_string_is_printable(topic)
if invalid_character_pos is not None:
raise JsonableError(
_("Invalid character in topic, at position {}!").format(invalid_character_pos)
)