event_schema: Add documentation and rename a few functions.

This should help make this revised subsystem readable for more new
contributors.  We still need to make updates to the high-level
documentation.
This commit is contained in:
Tim Abbott
2020-09-25 12:53:00 -07:00
parent 5b7c9c4714
commit 94a9fa1891
5 changed files with 113 additions and 35 deletions

View File

@@ -1,4 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
#
# Validates that 3 data sources agree about the structure of Zulip's events API:
#
# * Node fixtures for the server_events_dispatch.js tests.
# * OpenAPI definitions in zerver/openapi/zulip.yaml
# * The schemas defined in zerver/lib/events_schema.py used for the
# Zulip server's test suite.
#
# We compare the Python and OpenAPI schemas by converting the OpenAPI data
# into the event_schema style of types and the diffing the schemas.
import argparse import argparse
import difflib import difflib
import os import os
@@ -61,7 +71,8 @@ SKIP_LIST = [
"user_status__set_status_text", "user_status__set_status_text",
] ]
# This list of exemptions represents details we should fix in Zulip's
# API structure and/or validators.
EXEMPT_OPENAPI_NAMES = [ EXEMPT_OPENAPI_NAMES = [
# users field missing # users field missing
"update_display_settings_event", "update_display_settings_event",
@@ -132,6 +143,9 @@ def verify_fixtures_are_sorted(names: List[str]) -> None:
def from_openapi(node: Dict[str, Any]) -> Any: def from_openapi(node: Dict[str, Any]) -> Any:
"""Converts the OpenAPI data into event_schema.py style type
definitions for convenient comparison with the types used for backend
tests declared there."""
if "oneOf" in node: if "oneOf" in node:
return UnionType([from_openapi(n) for n in node["oneOf"]]) return UnionType([from_openapi(n) for n in node["oneOf"]])
@@ -172,7 +186,7 @@ def from_openapi(node: Dict[str, Any]) -> Any:
raise AssertionError("cannot handle node") raise AssertionError("cannot handle node")
def validate_openapi() -> None: def validate_openapi_against_event_schema() -> None:
node = openapi_spec.openapi()["paths"]["/events"]["get"]["responses"]["200"][ node = openapi_spec.openapi()["paths"]["/events"]["get"]["responses"]["200"][
"content" "content"
]["application/json"]["schema"]["properties"]["events"]["items"]["oneOf"] ]["application/json"]["schema"]["properties"]["events"]["items"]["oneOf"]
@@ -222,7 +236,7 @@ def run() -> None:
print(f"skip {name}") print(f"skip {name}")
continue continue
check_event(name, event) check_event(name, event)
validate_openapi() validate_openapi_against_event_schema()
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,8 +1,12 @@
""" """This module sets up type classes like DictType and ListType that
This module sets up type classes like DictType and define types for arbitrary objects. The main use case is to specify
ListType that define types for arbitrary objects, but the types of Zulip events that come from send_event calls for
our first use case is to specify the types of Zulip verification in test_events.py; in most other code paths we'll want to
events that come from send_event calls. use a TypedDict.
This module consists of workarounds for cases where we cannot express
the level of detail we desire or do comparison with OpenAPI types
easily with the native Python type system.
""" """
from dataclasses import dataclass from dataclasses import dataclass
@@ -20,6 +24,10 @@ def indent(s: str) -> str:
@dataclass @dataclass
class DictType: class DictType:
"""Dictionary is validated as having all required keys, all keys
accounted for in required_keys and optional_keys, and recursive
validation of types of fields.
"""
def __init__( def __init__(
self, self,
required_keys: Sequence[Tuple[str, Any]], required_keys: Sequence[Tuple[str, Any]],
@@ -65,6 +73,7 @@ class DictType:
@dataclass @dataclass
class EnumType: class EnumType:
"""An enum with the set of valid values declared."""
valid_vals: Sequence[Any] valid_vals: Sequence[Any]
def check_data(self, var_name: str, val: Dict[str, Any]) -> None: def check_data(self, var_name: str, val: Dict[str, Any]) -> None:
@@ -76,6 +85,7 @@ class EnumType:
class Equals: class Equals:
"""Type requiring a specific value."""
def __init__(self, expected_value: Any) -> None: def __init__(self, expected_value: Any) -> None:
self.expected_value = expected_value self.expected_value = expected_value
@@ -94,6 +104,8 @@ class Equals:
class NumberType: class NumberType:
"""A Union[float, int]; needed to align with the `number` type in
OpenAPI, because isinstance(4, float) == False"""
def check_data(self, var_name: str, val: Optional[Any]) -> None: def check_data(self, var_name: str, val: Optional[Any]) -> None:
if isinstance(val, int) or isinstance(val, float): if isinstance(val, int) or isinstance(val, float):
return return
@@ -104,6 +116,7 @@ class NumberType:
class ListType: class ListType:
"""List with every object having the declared sub_type."""
def __init__(self, sub_type: Any, length: Optional[int] = None) -> None: def __init__(self, sub_type: Any, length: Optional[int] = None) -> None:
self.sub_type = sub_type self.sub_type = sub_type
self.length = length self.length = length
@@ -123,6 +136,7 @@ class ListType:
@dataclass @dataclass
class StringDictType: class StringDictType:
"""Type that validates an object is a Dict[str, str]"""
value_type: Any value_type: Any
def check_data(self, var_name: str, val: Dict[Any, Any]) -> None: def check_data(self, var_name: str, val: Dict[Any, Any]) -> None:
@@ -157,6 +171,8 @@ class OptionalType:
@dataclass @dataclass
class TupleType: class TupleType:
"""Deprecated; we'd like to avoid using tuples in our API. Validates
the tuple has the sequence of sub_types."""
sub_types: Sequence[Any] sub_types: Sequence[Any]
def check_data(self, var_name: str, val: Any) -> None: def check_data(self, var_name: str, val: Any) -> None:
@@ -223,6 +239,7 @@ def event_dict_type(
required_keys: Sequence[Tuple[str, Any]], required_keys: Sequence[Tuple[str, Any]],
optional_keys: Sequence[Tuple[str, Any]] = [], optional_keys: Sequence[Tuple[str, Any]] = [],
) -> DictType: ) -> DictType:
""" """
This is just a tiny wrapper on DictType, but it provides This is just a tiny wrapper on DictType, but it provides
some minor benefits: some minor benefits:
@@ -252,14 +269,16 @@ def make_checker(data_type: DictType,) -> Callable[[str, Dict[str, object]], Non
def schema( def schema(
# return a YAML-like string for our data type
var_name: str, var_name: str,
data_type: Any, data_type: Any,
) -> str: ) -> str:
""" """Returns a YAML-like string for our data type; these are used for
schema is a glorified repr of a data type, but it pretty-printing and comparison between the OpenAPI type
also includes a var_name you pass in, plus we dumb definitions and these Python data types, as part of
things down a bit to match our current openapi spec
schema is a glorified repr of a data type, but it also includes a
var_name you pass in, plus we dumb things down a bit to match our
current openapi spec.
""" """
if hasattr(data_type, "schema"): if hasattr(data_type, "schema"):
return data_type.schema(var_name) return data_type.schema(var_name)
@@ -269,11 +288,11 @@ def schema(
def check_data( def check_data(
# Check that data conforms to our data_type
data_type: Any, data_type: Any,
var_name: str, var_name: str,
val: Any, val: Any,
) -> None: ) -> None:
"""Check that val conforms to our data_type"""
if hasattr(data_type, "check_data"): if hasattr(data_type, "check_data"):
data_type.check_data(var_name, val) data_type.check_data(var_name, val)
return return

View File

@@ -1,3 +1,14 @@
# This module is a collection of testing helpers for validating the
# schema of "events" sent by Zulip's server-to-client push system.
#
# By policy, every event generated by Zulip's API should be validated
# by a test in test_events.py with a schema checker here (which is
# validated, in turn, against the OpenAPI documentation for GET
# /events in zulip.yaml and the fixtures used by the Zulip webapp
# frontend).
#
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html
from typing import Dict, List, Sequence, Tuple, Union from typing import Dict, List, Sequence, Tuple, Union
from zerver.lib.data_types import ( from zerver.lib.data_types import (
@@ -183,6 +194,10 @@ default_streams_event = event_dict_type(
) )
check_default_streams = make_checker(default_streams_event) check_default_streams = make_checker(default_streams_event)
# The event type has an unusual number of optional fields. The
# message_id/message_ids fields are conditional on the
# bulk_message_deletion client_capability, whereas the other fields
# are conditional on private vs. stream messages.
delete_message_event = event_dict_type( delete_message_event = event_dict_type(
required_keys=[ required_keys=[
# force vertical # force vertical
@@ -279,9 +294,11 @@ invites_changed_event = event_dict_type(
) )
check_invites_changed = make_checker(invites_changed_event) check_invites_changed = make_checker(invites_changed_event)
# This type, like other instances of TupleType, is a legacy feature of
# a very old Zulip API; we plan to replace it with an object as those
# are more extensible.
muted_topic_type = TupleType( muted_topic_type = TupleType(
[ [
# We should use objects, not tuples!
str, # stream name str, # stream name
str, # topic name str, # topic name
int, # timestamp int, # timestamp
@@ -327,6 +344,8 @@ message_event = event_dict_type(
) )
check_message = make_checker(message_event) check_message = make_checker(message_event)
# This legacy presence structure is intended to be replaced by a more
# sensible data structure.
presence_type = DictType( presence_type = DictType(
required_keys=[ required_keys=[
("status", EnumType(["active", "idle"])), ("status", EnumType(["active", "idle"])),
@@ -372,14 +391,17 @@ def check_presence(
assert list(event["presence"].values())[0]["status"] == status assert list(event["presence"].values())[0]["status"] == status
# We will eventually just send user_ids. # Type for the legacy user field; the `user_id` field is intended to
reaction_user_type = DictType( # replace this and we expect to remove this once clients have migrated
# to support the modern API.
reaction_legacy_user_type = DictType(
required_keys=[ required_keys=[
# force vertical # force vertical
("email", str), ("email", str),
("full_name", str), ("full_name", str),
("user_id", int), ("user_id", int),
] ]
# We should probably declare is_mirror_dummy as an optional field here.
) )
reaction_add_event = event_dict_type( reaction_add_event = event_dict_type(
@@ -391,7 +413,7 @@ reaction_add_event = event_dict_type(
("emoji_code", str), ("emoji_code", str),
("reaction_type", str), ("reaction_type", str),
("user_id", int), ("user_id", int),
("user", reaction_user_type), ("user", reaction_legacy_user_type),
] ]
) )
check_reaction_add = make_checker(reaction_add_event) check_reaction_add = make_checker(reaction_add_event)
@@ -406,7 +428,7 @@ reaction_remove_event = event_dict_type(
("emoji_code", str), ("emoji_code", str),
("reaction_type", str), ("reaction_type", str),
("user_id", int), ("user_id", int),
("user", reaction_user_type), ("user", reaction_legacy_user_type),
] ]
) )
check_reaction_remove = make_checker(reaction_remove_event) check_reaction_remove = make_checker(reaction_remove_event)
@@ -669,29 +691,25 @@ def check_realm_export(
has_deleted_timestamp: bool, has_deleted_timestamp: bool,
has_failed_timestamp: bool, has_failed_timestamp: bool,
) -> None: ) -> None:
""" # Check the overall event first, knowing it has some
Check the overall event first, knowing it has some # optional types.
optional types.
"""
_check_realm_export(var_name, event) _check_realm_export(var_name, event)
""" # It's possible to have multiple data exports, but the events tests do not
Theoretically we can have multiple exports, but until # exercise that case, so we do strict validation for a single export here.
that happens in practice, we assume our tests are
gonna have one export.
"""
assert isinstance(event["exports"], list) assert isinstance(event["exports"], list)
assert len(event["exports"]) == 1 assert len(event["exports"]) == 1
export = event["exports"][0] export = event["exports"][0]
""" # Now verify which fields have non-None values.
Now verify which fields have non-None values.
"""
assert has_export_url == (export["export_url"] is not None) assert has_export_url == (export["export_url"] is not None)
assert has_deleted_timestamp == (export["deleted_timestamp"] is not None) assert has_deleted_timestamp == (export["deleted_timestamp"] is not None)
assert has_failed_timestamp == (export["failed_timestamp"] is not None) assert has_failed_timestamp == (export["failed_timestamp"] is not None)
# This type, like other instances of TupleType, is a legacy feature of
# a very old Zulip API; we plan to replace it with an object as those
# are more extensible.
realm_filter_type = TupleType( realm_filter_type = TupleType(
[ [
# we should make this an object # we should make this an object
@@ -790,9 +808,10 @@ authentication_dict = DictType(
authentication_data = DictType( authentication_data = DictType(
required_keys=[ required_keys=[
# this single-key dictionary is an annoying # this single-key dictionary is an annoying consequence of us
# consequence of us using property_type of # using property_type of "default" for legacy reasons
# "default" for legacy reasons # (specifically, to avoid breaking old clients when we
# introduced the `update_dict` format).
("authentication_methods", authentication_dict), ("authentication_methods", authentication_dict),
] ]
) )
@@ -880,6 +899,14 @@ def check_realm_update_dict(
check_data(sub_type, f"{var_name}['data']", event["data"]) check_data(sub_type, f"{var_name}['data']", event["data"])
# TODO: This type is missing optional fields:
#
# * delivery_email
# * bot-related fields.
# * Users with custom profile fields, where profile_data will
# be nonempty.
#
# Only because our test_events.py tests don't cover the relevant cases.
realm_user_type = DictType( realm_user_type = DictType(
required_keys=[ required_keys=[
("user_id", int), ("user_id", int),
@@ -936,6 +963,8 @@ custom_profile_field_type = DictType(
], ],
) )
# This block of types, each named by the dictionary key, makes it
# possible to validate the type of all the realm_user update events.
realm_user_person_types = dict( realm_user_person_types = dict(
# Note that all flavors of person include user_id. # Note that all flavors of person include user_id.
avatar_fields=DictType( avatar_fields=DictType(
@@ -1170,6 +1199,8 @@ subscription_remove_event = event_dict_type(
) )
check_subscription_remove = make_checker(subscription_remove_event) check_subscription_remove = make_checker(subscription_remove_event)
# TODO: Have better validation for value_type; we don't have
# test_events tests for non-bool fields like `color`.
subscription_update_event = event_dict_type( subscription_update_event = event_dict_type(
required_keys=[ required_keys=[
("type", Equals("subscription")), ("type", Equals("subscription")),
@@ -1308,7 +1339,7 @@ update_message_topic_fields = [
"propagate_mode", "propagate_mode",
EnumType( EnumType(
[ [
# order matches openapi spec # The order here needs to match the OpenAPI definitions
"change_one", "change_one",
"change_later", "change_later",
"change_all", "change_all",

View File

@@ -1,5 +1,9 @@
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html for # See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html for
# high-level documentation on how this system works. # high-level documentation on how this system works.
#
# This module is closely integrated with zerver/lib/event_schema.py
# and zerver/lib/data_types.py systems for validating the schemas of
# events; it also uses the OpenAPI tools to validate our documentation.
import copy import copy
import sys import sys
import time import time

View File

@@ -117,6 +117,16 @@ def send_notification_http(realm: Realm, data: Mapping[str, Any]) -> None:
data=dict(data=orjson.dumps(data), secret=settings.SHARED_SECRET), data=dict(data=orjson.dumps(data), secret=settings.SHARED_SECRET),
) )
# The core function for sending an event from Django to Tornado (which
# will then push it to web and mobile clients for the target users).
# By convention, send_event should only be called from
# zerver/lib/actions.py, which helps make it easy to find event
# generation code.
#
# Every call point should be covered by a test in `test_events.py`,
# with the schema verified in `zerver/lib/event_schema.py`.
#
# See https://zulip.readthedocs.io/en/latest/subsystems/events-system.html
def send_event(realm: Realm, event: Mapping[str, Any], def send_event(realm: Realm, event: Mapping[str, Any],
users: Union[Iterable[int], Iterable[Mapping[str, Any]]]) -> None: users: Union[Iterable[int], Iterable[Mapping[str, Any]]]) -> None:
"""`users` is a list of user IDs, or in the case of `message` type """`users` is a list of user IDs, or in the case of `message` type