mirror of
https://github.com/zulip/zulip.git
synced 2025-10-27 18:13:58 +00:00
We want a clean codepath for the vast majority
of cases of using api_get/api_post, which now
uses email and which we'll soon convert to
accepting `user` as a parameter.
These apis that take two different types of
values for the same parameter make sweeps
like this kinda painful, and they're pretty
easy to avoid by extracting helpers to do
the actual common tasks. So, for example,
here I still keep a common method to
actually encode the credentials (since
the whole encode/decode business is an
annoying detail that you don't want to fix
in two places):
def encode_credentials(self, identifier: str, api_key: str) -> str:
"""
identifier: Can be an email or a remote server uuid.
"""
credentials = "%s:%s" % (identifier, api_key)
return 'Basic ' + base64.b64encode(credentials.encode('utf-8')).decode('utf-8')
But then the rest of the code has two separate
codepaths.
And for the uuid functions, we no longer have
crufty references to realm. (In fairness, realm
will also go away when we introduce users.)
For the `is_remote_server` helper, I just inlined
it, since it's now only needed in one place, and the
name didn't make total sense anyway, plus it wasn't
a super robust check. In context, it's easier
just to use a comment now to say what we're doing:
# If `role` doesn't look like an email, it might be a uuid.
if settings.ZILENCER_ENABLED and role is not None and '@' not in role:
# do stuff
201 lines
6.4 KiB
Python
201 lines
6.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
from typing import Any, Callable, List, Optional, Sequence, TypeVar, Iterable, Set, Tuple
|
|
import base64
|
|
import hashlib
|
|
import heapq
|
|
import itertools
|
|
import os
|
|
import re
|
|
import string
|
|
from time import sleep
|
|
from itertools import zip_longest
|
|
|
|
from django.conf import settings
|
|
|
|
T = TypeVar('T')
|
|
|
|
def statsd_key(val: Any, clean_periods: bool=False) -> str:
|
|
if not isinstance(val, str):
|
|
val = str(val)
|
|
|
|
if ':' in val:
|
|
val = val.split(':')[0]
|
|
val = val.replace('-', "_")
|
|
if clean_periods:
|
|
val = val.replace('.', '_')
|
|
|
|
return val
|
|
|
|
class StatsDWrapper:
|
|
"""Transparently either submit metrics to statsd
|
|
or do nothing without erroring out"""
|
|
|
|
# Backported support for gauge deltas
|
|
# as our statsd server supports them but supporting
|
|
# pystatsd is not released yet
|
|
def _our_gauge(self, stat: str, value: float, rate: float=1, delta: bool=False) -> None:
|
|
"""Set a gauge value."""
|
|
from django_statsd.clients import statsd
|
|
if delta:
|
|
value_str = '%+g|g' % (value,)
|
|
else:
|
|
value_str = '%g|g' % (value,)
|
|
statsd._send(stat, value_str, rate)
|
|
|
|
def __getattr__(self, name: str) -> Any:
|
|
# Hand off to statsd if we have it enabled
|
|
# otherwise do nothing
|
|
if name in ['timer', 'timing', 'incr', 'decr', 'gauge']:
|
|
if settings.STATSD_HOST != '':
|
|
from django_statsd.clients import statsd
|
|
if name == 'gauge':
|
|
return self._our_gauge
|
|
else:
|
|
return getattr(statsd, name)
|
|
else:
|
|
return lambda *args, **kwargs: None
|
|
|
|
raise AttributeError
|
|
|
|
statsd = StatsDWrapper()
|
|
|
|
# Runs the callback with slices of all_list of a given batch_size
|
|
def run_in_batches(all_list: Sequence[T],
|
|
batch_size: int,
|
|
callback: Callable[[Sequence[T]], None],
|
|
sleep_time: int=0,
|
|
logger: Optional[Callable[[str], None]]=None) -> None:
|
|
if len(all_list) == 0:
|
|
return
|
|
|
|
limit = (len(all_list) // batch_size) + 1
|
|
for i in range(limit):
|
|
start = i*batch_size
|
|
end = (i+1) * batch_size
|
|
if end >= len(all_list):
|
|
end = len(all_list)
|
|
batch = all_list[start:end]
|
|
|
|
if logger:
|
|
logger("Executing %s in batch %s of %s" % (end-start, i+1, limit))
|
|
|
|
callback(batch)
|
|
|
|
if i != limit - 1:
|
|
sleep(sleep_time)
|
|
|
|
def make_safe_digest(string: str,
|
|
hash_func: Callable[[bytes], Any]=hashlib.sha1) -> str:
|
|
"""
|
|
return a hex digest of `string`.
|
|
"""
|
|
# hashlib.sha1, md5, etc. expect bytes, so non-ASCII strings must
|
|
# be encoded.
|
|
return hash_func(string.encode('utf-8')).hexdigest()
|
|
|
|
|
|
def log_statsd_event(name: str) -> None:
|
|
"""
|
|
Sends a single event to statsd with the desired name and the current timestamp
|
|
|
|
This can be used to provide vertical lines in generated graphs,
|
|
for example when doing a prod deploy, bankruptcy request, or
|
|
other one-off events
|
|
|
|
Note that to draw this event as a vertical line in graphite
|
|
you can use the drawAsInfinite() command
|
|
"""
|
|
event_name = "events.%s" % (name,)
|
|
statsd.incr(event_name)
|
|
|
|
def generate_random_token(length: int) -> str:
|
|
return str(base64.b16encode(os.urandom(length // 2)).decode('utf-8').lower())
|
|
|
|
def generate_api_key() -> str:
|
|
choices = string.ascii_letters + string.digits
|
|
altchars = ''.join([choices[ord(os.urandom(1)) % 62] for _ in range(2)]).encode("utf-8")
|
|
api_key = base64.b64encode(os.urandom(24), altchars=altchars).decode("utf-8")
|
|
return api_key
|
|
|
|
def has_api_key_format(key: str) -> bool:
|
|
return bool(re.fullmatch(r"([A-Za-z0-9]){32}", key))
|
|
|
|
def query_chunker(queries: List[Any],
|
|
id_collector: Optional[Set[int]]=None,
|
|
chunk_size: int=1000,
|
|
db_chunk_size: Optional[int]=None) -> Iterable[Any]:
|
|
'''
|
|
This merges one or more Django ascending-id queries into
|
|
a generator that returns chunks of chunk_size row objects
|
|
during each yield, preserving id order across all results..
|
|
|
|
Queries should satisfy these conditions:
|
|
- They should be Django filters.
|
|
- They should return Django objects with "id" attributes.
|
|
- They should be disjoint.
|
|
|
|
The generator also populates id_collector, which we use
|
|
internally to enforce unique ids, but which the caller
|
|
can pass in to us if they want the side effect of collecting
|
|
all ids.
|
|
'''
|
|
if db_chunk_size is None:
|
|
db_chunk_size = chunk_size // len(queries)
|
|
|
|
assert db_chunk_size >= 2
|
|
assert chunk_size >= 2
|
|
|
|
if id_collector is not None:
|
|
assert(len(id_collector) == 0)
|
|
else:
|
|
id_collector = set()
|
|
|
|
def chunkify(q: Any, i: int) -> Iterable[Tuple[int, int, Any]]:
|
|
q = q.order_by('id')
|
|
min_id = -1
|
|
while True:
|
|
assert db_chunk_size is not None # Hint for mypy, but also workaround for mypy bug #3442.
|
|
rows = list(q.filter(id__gt=min_id)[0:db_chunk_size])
|
|
if len(rows) == 0:
|
|
break
|
|
for row in rows:
|
|
yield (row.id, i, row)
|
|
min_id = rows[-1].id
|
|
|
|
iterators = [chunkify(q, i) for i, q in enumerate(queries)]
|
|
merged_query = heapq.merge(*iterators)
|
|
|
|
while True:
|
|
tup_chunk = list(itertools.islice(merged_query, 0, chunk_size))
|
|
if len(tup_chunk) == 0:
|
|
break
|
|
|
|
# Do duplicate-id management here.
|
|
tup_ids = set([tup[0] for tup in tup_chunk])
|
|
assert len(tup_ids) == len(tup_chunk)
|
|
assert len(tup_ids.intersection(id_collector)) == 0
|
|
id_collector.update(tup_ids)
|
|
|
|
yield [row for row_id, i, row in tup_chunk]
|
|
|
|
def process_list_in_batches(lst: List[Any],
|
|
chunk_size: int,
|
|
process_batch: Callable[[List[Any]], None]) -> None:
|
|
offset = 0
|
|
|
|
while True:
|
|
items = lst[offset:offset+chunk_size]
|
|
if not items:
|
|
break
|
|
process_batch(items)
|
|
offset += chunk_size
|
|
|
|
def split_by(array: List[Any], group_size: int, filler: Any) -> List[List[Any]]:
|
|
"""
|
|
Group elements into list of size `group_size` and fill empty cells with
|
|
`filler`. Recipe from https://docs.python.org/3/library/itertools.html
|
|
"""
|
|
args = [iter(array)] * group_size
|
|
return list(map(list, zip_longest(*args, fillvalue=filler)))
|