Compare commits

..

6 Commits
1.6.0 ... 1.5.1

Author SHA1 Message Date
Tim Abbott
bd01b1e2e4 Release Zulip Server 1.5.1. 2017-02-07 11:24:57 -08:00
Tim Abbott
58a7f6085f stats: Include Zulip and realm name in heading. 2017-02-07 11:24:22 -08:00
Tim Abbott
3367593b52 analytics: Make page-content full height. 2017-02-07 11:24:22 -08:00
Rishi Gupta
1a92ec5d86 analytics: Update bar colors on message_sent_over_time. 2017-02-07 11:24:22 -08:00
Rishi Gupta
7a8d685a71 analytics: Remove portico header and footer from stats.html. 2017-02-07 11:24:22 -08:00
Tim Abbott
3c3a8747c3 upgrade: Stop trying to copy node_modules out of tarballs.
Now that we no longer use node_modules at all in production (it's only
used to generate static assets), we don't include `node_modules` in
the production tarballs, and thus we shouldn't attempt to copy
`node_modules` out of the production tarballs when installing.

Fixes a regression introduced in
d71f2e7b9b.
2017-02-07 10:36:38 -08:00
2026 changed files with 56647 additions and 122672 deletions

View File

@@ -6,7 +6,7 @@ charset = utf-8
trim_trailing_whitespace = true trim_trailing_whitespace = true
insert_final_newline = true insert_final_newline = true
[*.{sh,py,js,json,yml,xml,css,md,markdown,handlebars,html}] [*.{sh,py,js, json,yml,xml, css, md,markdown, handlebars,html}]
indent_style = space indent_style = space
indent_size = 4 indent_size = 4

View File

@@ -1,3 +1,3 @@
static/js/bundle.js
static/js/blueslip.js static/js/blueslip.js
puppet/zulip_ops/files/statsd/local.js puppet/zulip_ops/files/statsd/local.js
static/webpack-bundles

View File

@@ -14,56 +14,25 @@
"Dropbox": false, "Dropbox": false,
"SockJS": false, "SockJS": false,
"marked": false, "marked": false,
"moment": false,
"i18n": false, "i18n": false,
"DynamicText": false,
"bridge": false, "bridge": false,
"page_params": false, "page_params": false,
"status_classes": false, "status_classes": false,
"password_quality": false, "password_quality": false,
"attachments_ui": false,
"csrf_token": false, "csrf_token": false,
"typeahead_helper": false, "typeahead_helper": false,
"pygments_data": false,
"popovers": false, "popovers": false,
"server_events": false, "server_events": false,
"server_events_dispatch": false,
"ui": false, "ui": false,
"ui_report": false,
"ui_util": false,
"lightbox": false,
"stream_color": false, "stream_color": false,
"people": false, "people": false,
"navigate": false, "navigate": false,
"settings_account": false,
"settings_display": false,
"settings_notifications": false,
"settings_muting": false,
"settings_lab": false,
"settings_bots": false,
"settings_sections": false,
"settings_emoji": false,
"settings_org": false,
"settings_users": false,
"settings_streams": false,
"settings_filters": false,
"settings": false, "settings": false,
"resize": false, "resize": false,
"loading": false, "loading": false,
"typing": false,
"typing_events": false,
"typing_data": false,
"typing_status": false,
"compose": false, "compose": false,
"compose_actions": false,
"compose_state": false,
"compose_fade": false, "compose_fade": false,
"overlays": false,
"stream_create": false,
"stream_edit": false,
"subs": false, "subs": false,
"stream_muting": false,
"stream_events": false,
"timerender": false, "timerender": false,
"message_live_update": false, "message_live_update": false,
"message_edit": false, "message_edit": false,
@@ -71,10 +40,8 @@
"composebox_typeahead": false, "composebox_typeahead": false,
"search": false, "search": false,
"topic_list": false, "topic_list": false,
"topic_generator": false,
"gear_menu": false, "gear_menu": false,
"hashchange": false, "hashchange": false,
"hash_util": false,
"message_list": false, "message_list": false,
"Filter": false, "Filter": false,
"pointer": false, "pointer": false,
@@ -87,41 +54,28 @@
"Socket": false, "Socket": false,
"channel": false, "channel": false,
"components": false, "components": false,
"message_viewport": false, "viewport": false,
"upload_widget": false,
"avatar": false, "avatar": false,
"realm_icon": false,
"feature_flags": false, "feature_flags": false,
"search_suggestion": false, "search_suggestion": false,
"referral": false, "referral": false,
"notifications": false, "notifications": false,
"message_flags": false, "message_flags": false,
"bot_data": false, "bot_data": false,
"stream_sort": false,
"stream_list": false, "stream_list": false,
"stream_popover": false,
"narrow_state": false,
"narrow": false, "narrow": false,
"narrow_state": false,
"admin_sections": false,
"admin": false, "admin": false,
"stream_data": false, "stream_data": false,
"list_util": false,
"muting": false, "muting": false,
"Dict": false, "Dict": false,
"unread": false, "unread": false,
"alert_words_ui": false, "alert_words_ui": false,
"message_store": false, "message_store": false,
"message_util": false,
"message_events": false,
"message_fetch": false,
"favicon": false, "favicon": false,
"condense": false, "condense": false,
"list_render": false,
"floating_recipient_bar": false, "floating_recipient_bar": false,
"tab_bar": false, "tab_bar": false,
"emoji": false, "emoji": false,
"presence": false,
"activity": false, "activity": false,
"invite": false, "invite": false,
"colorspace": false, "colorspace": false,
@@ -130,26 +84,15 @@
"templates": false, "templates": false,
"alert_words": false, "alert_words": false,
"fenced_code": false, "fenced_code": false,
"markdown": false,
"echo": false, "echo": false,
"localstorage": false, "localstorage": false,
"localStorage": false,
"current_msg_list": true, "current_msg_list": true,
"home_msg_list": false, "home_msg_list": false,
"pm_list": false, "pm_list": false,
"pm_conversations": false,
"recent_senders": false,
"unread_ui": false, "unread_ui": false,
"unread_ops": false,
"user_events": false, "user_events": false,
"Plotly": false, "Plotly": false,
"emoji_codes": false, "emoji_codes": false
"drafts": false,
"katex": false,
"Clipboard": false,
"emoji_picker": false,
"hotspots": false,
"compose_ui": false
}, },
"rules": { "rules": {
"no-restricted-syntax": 0, "no-restricted-syntax": 0,
@@ -192,6 +135,7 @@
"no-new-func": "error", "no-new-func": "error",
"space-before-function-paren": ["error", { "anonymous": "always", "named": "never", "asyncArrow": "always" }], "space-before-function-paren": ["error", { "anonymous": "always", "named": "never", "asyncArrow": "always" }],
"no-param-reassign": 0, "no-param-reassign": 0,
"prefer-spread": "error",
"arrow-spacing": ["error", { "before": true, "after": true }], "arrow-spacing": ["error", { "before": true, "after": true }],
"no-alert": 2, "no-alert": 2,
"no-array-constructor": 2, "no-array-constructor": 2,

5
.gitignore vendored
View File

@@ -18,11 +18,8 @@ coverage/
/zproject/dev-secrets.conf /zproject/dev-secrets.conf
static/js/bundle.js static/js/bundle.js
static/generated/emoji static/generated/emoji
static/generated/pygments_data.js
static/generated/github-contributors.json static/generated/github-contributors.json
static/locale/language_options.json static/locale/language_options.json
static/third/emoji-data
static/webpack-bundles
/node_modules /node_modules
/staticfiles.json /staticfiles.json
npm-debug.log npm-debug.log
@@ -30,5 +27,3 @@ npm-debug.log
var/* var/*
.vscode/ .vscode/
tools/conf.ini tools/conf.ini
tools/custom_provision
api/bots/john/assets/var/database.db

View File

@@ -1,13 +0,0 @@
[general]
ignore=title-trailing-punctuation, body-min-length, body-is-missing
extra-path=tools/lib/gitlint-rules.py
[title-match-regex]
regex=^.+\.$
[title-max-length]
line-length=76
[body-max-line-length]
line-length=76

View File

@@ -1,31 +1,17 @@
# See https://zulip.readthedocs.io/en/latest/events-system.html for
# high-level documentation on our Travis CI setup.
dist: trusty dist: trusty
before_install:
- nvm install 0.10
install: install:
# Disable Travis CI's built-in NVM installation # Disable Travis CI's built-in NVM installation
- mv ~/.nvm ~/.travis-nvm-disabled - mv ~/.nvm ~/.travis-nvm-disabled
# Install coveralls, the library for the code coverage reporting tool we use
- pip install coveralls - pip install coveralls
# This is the main setup job for the test suite
- tools/travis/setup-$TEST_SUITE - tools/travis/setup-$TEST_SUITE
# Clean any virtualenvs that are not in use to avoid our cache
# becoming huge. TODO: Add similar cleanup code for the other caches.
- tools/clean-venv-cache --travis - tools/clean-venv-cache --travis
script:
# We unset GEM_PATH here as a hack to work around Travis CI having
# broken running their system puppet with Ruby. See
# https://travis-ci.org/zulip/zulip/jobs/240120991 for an example traceback.
- unset GEM_PATH
- ./tools/travis/$TEST_SUITE
cache: cache:
- apt: false - apt: false
- directories: - directories:
- $HOME/zulip-venv-cache - $HOME/zulip-venv-cache
- $HOME/zulip-npm-cache - node_modules
- $HOME/zulip-emoji-cache
- $HOME/node - $HOME/node
env: env:
global: global:
@@ -34,10 +20,13 @@ env:
- COVERALLS_SERVICE_NAME=travis-pro - COVERALLS_SERVICE_NAME=travis-pro
- COVERALLS_REPO_TOKEN=hnXUEBKsORKHc8xIENGs9JjktlTb2HKlG - COVERALLS_REPO_TOKEN=hnXUEBKsORKHc8xIENGs9JjktlTb2HKlG
- BOTO_CONFIG=/tmp/nowhere - BOTO_CONFIG=/tmp/nowhere
matrix:
- TEST_SUITE=frontend
- TEST_SUITE=backend
language: python language: python
# We run all of our test suites for both Python 2.7 and 3.4, with the python:
# exception of static analysis, which is just run once (and checks - "2.7"
# against both Python versions). - "3.4"
matrix: matrix:
include: include:
- python: "3.4" - python: "3.4"
@@ -46,31 +35,20 @@ matrix:
env: TEST_SUITE=production env: TEST_SUITE=production
- python: "2.7" - python: "2.7"
env: TEST_SUITE=production env: TEST_SUITE=production
- python: "2.7" # command to run tests
env: TEST_SUITE=frontend script:
- python: "3.4" - unset GEM_PATH
env: TEST_SUITE=frontend - ./tools/travis/$TEST_SUITE
- python: "2.7"
env: TEST_SUITE=backend
- python: "3.4"
env: TEST_SUITE=backend
sudo: required sudo: required
services: services:
- docker - docker
addons: addons:
artifacts: artifacts:
paths: paths:
# Casper debugging data (screenshots, etc.) is super useful for
# debugging test flakes.
- $(ls var/casper/* | tr "\n" ":") - $(ls var/casper/* | tr "\n" ":")
- $(ls /tmp/zulip-test-event-log/* | tr "\n" ":") - $(ls /tmp/zulip-test-event-log/* | tr "\n" ":")
postgresql: "9.3" postgresql: "9.3"
after_success: after_success:
coveralls coveralls
notifications: notifications:
webhooks: webhooks: https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN
urls:
- https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN
- https://zulip.org/zulipbot/travis
on_success: always
on_failure: always

100
README.md
View File

@@ -17,7 +17,7 @@ previews, group private messages, audible notifications,
missed-message emails, desktop apps, and much more. missed-message emails, desktop apps, and much more.
Further information on the Zulip project and its features can be found Further information on the Zulip project and its features can be found
at <https://www.zulip.org>. at https://www.zulip.org.
[![Build Status](https://travis-ci.org/zulip/zulip.svg?branch=master)](https://travis-ci.org/zulip/zulip) [![Coverage Status](https://coveralls.io/repos/github/zulip/zulip/badge.svg?branch=master)](https://coveralls.io/github/zulip/zulip?branch=master) [![docs](https://readthedocs.org/projects/zulip/badge/?version=latest)](http://zulip.readthedocs.io/en/latest/) [![Zulip chat](https://img.shields.io/badge/zulip-join_chat-brightgreen.svg)](https://chat.zulip.org) [![Build Status](https://travis-ci.org/zulip/zulip.svg?branch=master)](https://travis-ci.org/zulip/zulip) [![Coverage Status](https://coveralls.io/repos/github/zulip/zulip/badge.svg?branch=master)](https://coveralls.io/github/zulip/zulip?branch=master) [![docs](https://readthedocs.org/projects/zulip/badge/?version=latest)](http://zulip.readthedocs.io/en/latest/) [![Zulip chat](https://img.shields.io/badge/zulip-join_chat-brightgreen.svg)](https://chat.zulip.org)
@@ -25,15 +25,18 @@ at <https://www.zulip.org>.
There are several places online where folks discuss Zulip. There are several places online where folks discuss Zulip.
* The primary place is the One of those places is our [public Zulip instance](https://chat.zulip.org/).
[Zulip development community Zulip server](https://zulip.readthedocs.io/en/latest/chat-zulip-org.html). You can go through the simple signup process at that link, and then you
will soon be talking to core Zulip developers and other users. To get
help in real time, you will have the best luck finding core developers
roughly between 16:00 UTC and 23:59 UTC. Most questions get a reply
within minutes to a few hours, depending on time of day.
* For Google Summer of Code students and applicants, we have For Google Summer of Code students and applicants, we have [a mailing
[a mailing list](https://groups.google.com/forum/#!forum/zulip-gsoc) list](https://groups.google.com/forum/#!forum/zulip-gsoc) for help,
for help, questions, and announcements. But it's often simpler to questions, and announcements.
visit chat.zulip.org instead.
* We have We have
[a public mailing list](https://groups.google.com/forum/#!forum/zulip-devel) [a public mailing list](https://groups.google.com/forum/#!forum/zulip-devel)
that is currently pretty low traffic because most discussions happen that is currently pretty low traffic because most discussions happen
in our public Zulip instance. We use it to announce Zulip developer in our public Zulip instance. We use it to announce Zulip developer
@@ -44,14 +47,13 @@ ask for generic help getting started as a contributor (e.g. because
you want to do Google Summer of Code). The rest of this page covers you want to do Google Summer of Code). The rest of this page covers
how to get involved in the Zulip project in detail. how to get involved in the Zulip project in detail.
* Zulip also has a [blog](https://blog.zulip.org/) and Zulip also has a [blog](https://blog.zulip.org/).
[twitter account](https://twitter.com/zuliposs).
* Last but not least, we use [GitHub](https://github.com/zulip/zulip) Last but not least, we use [GitHub](https://github.com/zulip/zulip) to
to track Zulip-related issues (and store our code, of course). track Zulip-related issues (and store our code, of course).
Anybody with a GitHub account should be able to create Issues there Anybody with a GitHub account should be able to create Issues there
pertaining to bugs or enhancement requests. We also use Pull Requests pertaining to bugs or enhancement requests. We also use Pull
as our primary mechanism to receive code contributions. Requests as our primary mechanism to receive code contributions.
The Zulip community has a [Code of Conduct][code-of-conduct]. The Zulip community has a [Code of Conduct][code-of-conduct].
@@ -66,21 +68,17 @@ installation guide][dev-install].
Zulip in production supports Ubuntu 14.04 Trusty and Ubuntu 16.04 Zulip in production supports Ubuntu 14.04 Trusty and Ubuntu 16.04
Xenial. Work is ongoing on adding support for additional Xenial. Work is ongoing on adding support for additional
platforms. The installation process is documented at platforms. The installation process is documented at
<https://zulip.org/server.html> and in more detail in [the https://zulip.org/server.html and in more detail in [the
documentation](https://zulip.readthedocs.io/en/latest/prod-install.html). documentation](https://zulip.readthedocs.io/en/latest/prod-install.html).
## Ways to contribute ## Ways to contribute
Zulip welcomes all forms of contributions! This page documents the Zulip welcomes all forms of contributions! The page documents the
Zulip development process. Zulip development process.
* **Pull requests**. Before a pull request can be merged, you need to * **Pull requests**. Before a pull request can be merged, you need to
sign the [Dropbox Contributor License Agreement][cla]. Also, sign the [Dropbox Contributor License Agreement][cla]. Also,
please skim our [commit message style guidelines][doc-commit-style]. please skim our [commit message style guidelines][doc-commit-style].
We encourage early pull requests for work in progress. Prefix the title
of your pull request with `[WIP]` and reference it when asking for
community feedback. When you are ready for final review, remove
the `[WIP]`.
* **Testing**. The Zulip automated tests all run automatically when * **Testing**. The Zulip automated tests all run automatically when
you submit a pull request, but you can also run them all in your you submit a pull request, but you can also run them all in your
@@ -104,10 +102,10 @@ relevant list! Please report any security issues you discover to
zulip-security@googlegroups.com. zulip-security@googlegroups.com.
* **App codebases**. This repository is for the Zulip server and web * **App codebases**. This repository is for the Zulip server and web
app (including most integrations); the app (including most integrations); the [desktop][], [Android][], and
[React Native Mobile iOS app][ios-exp], [Android app][Android], [iOS][] apps, are separate repositories, as are our [experimental
[new Electron desktop app][electron], and React Native iOS app][ios-exp] and [alpha Electron desktop
[legacy QT-based desktop app][desktop] are all separate repositories. app][electron].
* **Glue code**. We maintain a [Hubot adapter][hubot-adapter] and several * **Glue code**. We maintain a [Hubot adapter][hubot-adapter] and several
integrations ([Phabricator][phab], [Jenkins][], [Puppet][], [Redmine][], integrations ([Phabricator][phab], [Jenkins][], [Puppet][], [Redmine][],
@@ -120,14 +118,6 @@ and [Trello][]), plus [node.js API bindings][node], an [isomorphic
[translating documentation][transifex] if you're interested in [translating documentation][transifex] if you're interested in
contributing! contributing!
* **Code Reviews**. Zulip is all about community and helping each
other out. Check out [#code review][code-review] on
[chat.zulip.org](https://zulip.readthedocs.io/en/latest/chat-zulip-org.html)
to help review PRs and give comments on other people's work. Everyone is
welcome to participate, even those new to Zulip! Even just checking out
the code, manually testing it, and posting on whether or not it worked
is valuable.
[cla]: https://opensource.dropbox.com/cla/ [cla]: https://opensource.dropbox.com/cla/
[code-of-conduct]: https://zulip.readthedocs.io/en/latest/code-of-conduct.html [code-of-conduct]: https://zulip.readthedocs.io/en/latest/code-of-conduct.html
[dev-install]: https://zulip.readthedocs.io/en/latest/dev-overview.html [dev-install]: https://zulip.readthedocs.io/en/latest/dev-overview.html
@@ -140,6 +130,7 @@ is valuable.
[gh-issues]: https://github.com/zulip/zulip/issues [gh-issues]: https://github.com/zulip/zulip/issues
[desktop]: https://github.com/zulip/zulip-desktop [desktop]: https://github.com/zulip/zulip-desktop
[android]: https://github.com/zulip/zulip-android [android]: https://github.com/zulip/zulip-android
[ios]: https://github.com/zulip/zulip-ios
[ios-exp]: https://github.com/zulip/zulip-mobile [ios-exp]: https://github.com/zulip/zulip-mobile
[email-android]: https://groups.google.com/forum/#!forum/zulip-android [email-android]: https://groups.google.com/forum/#!forum/zulip-android
[email-ios]: https://groups.google.com/forum/#!forum/zulip-ios [email-ios]: https://groups.google.com/forum/#!forum/zulip-ios
@@ -154,27 +145,16 @@ is valuable.
[tsearch]: https://github.com/zulip/tsearch_extras [tsearch]: https://github.com/zulip/tsearch_extras
[transifex]: https://zulip.readthedocs.io/en/latest/translating.html#testing-translations [transifex]: https://zulip.readthedocs.io/en/latest/translating.html#testing-translations
[z-org]: https://github.com/zulip/zulip.github.io [z-org]: https://github.com/zulip/zulip.github.io
[code-review]: https://chat.zulip.org/#narrow/stream/code.20review
## Google Summer of Code ## Google Summer of Code
We participated in We participated in
[GSoC](https://developers.google.com/open-source/gsoc/) in 2016 (with [GSoC](https://developers.google.com/open-source/gsoc/) last year and
[great results](https://blog.zulip.org/2016/10/13/static-types-in-python-oh-mypy/)) hope to do so again in 2017. For guidance, please read
and are participating in 2017 as well. For guidance, please read
[our GSoC instructions and ideas page](https://github.com/zulip/zulip.github.io/blob/master/gsoc-ideas.md) [our GSoC instructions and ideas page](https://github.com/zulip/zulip.github.io/blob/master/gsoc-ideas.md)
and feel free to email and feel free to email
[our GSoC mailing list](https://groups.google.com/forum/#!forum/zulip-gsoc). [our GSoC mailing list](https://groups.google.com/forum/#!forum/zulip-gsoc).
**Note**: For GSoC 2017, we will be focused on making our
[React Native app](https://github.com/zulip/zulip-mobile) better
rather than developing the
[Java Android app](https://github.com/zulip/zulip-android) and
[React Native app](https://github.com/zulip/zulip-mobile) in
parallel. You can review
[our detailed plan](https://github.com/zulip/zulip-android/blob/master/android-strategy.md)
for further details on the motivation and logistics.
## How to get involved with contributing to Zulip ## How to get involved with contributing to Zulip
First, subscribe to the Zulip [development discussion mailing First, subscribe to the Zulip [development discussion mailing
@@ -222,25 +202,16 @@ Another way to find issues in Zulip is to take advantage of our
our issues into areas like admin, compose, emoji, hotkeys, i18n, our issues into areas like admin, compose, emoji, hotkeys, i18n,
onboarding, search, etc. You can see this here: onboarding, search, etc. You can see this here:
<https://github.com/zulip/zulip/labels> [https://github.com/zulip/zulip/labels]
Click on any of the "area:" labels and you will see all the tickets Click on any of the "area:" labels and you will see all the tickets
related to your area of interest. related to your area of interest.
If you're excited about helping with an open issue, make sure to claim If you're excited about helping with an open issue, just post on the
the issue by commenting the following in the comment section: conversation thread that you're working on it. You're encouraged to
"**@zulipbot** claim". **@zulipbot** will assign you to the issue and ask questions on how to best implement or debug your changes -- the
label the issue as **in progress**. For more details, check out Zulip maintainers are excited to answer questions to help you stay
[**@zulipbot**](https://github.com/zulip/zulipbot). unblocked and working efficiently.
You're encouraged to ask questions on how to best implement or debug
your changes -- the Zulip maintainers are excited to answer questions
to help you stay unblocked and working efficiently. It's great to ask
questions in comments on GitHub issues and pull requests, or [on
chat.zulip.org](https://zulip.readthedocs.io/en/latest/chat-zulip-org.html). We'll
direct longer discussions to Zulip chat, but please post a summary of
what you learned from the chat, or link to the conversation, in a
comment on the GitHub issue.
We also welcome suggestions of features that you feel would be We also welcome suggestions of features that you feel would be
valuable or changes that you feel would make Zulip a better open valuable or changes that you feel would make Zulip a better open
@@ -270,16 +241,9 @@ Feedback on how to make this development process more efficient, fun,
and friendly to new contributors is very welcome! Just send an email and friendly to new contributors is very welcome! Just send an email
to the Zulip Developers list with your thoughts. to the Zulip Developers list with your thoughts.
When you feel like you have completed your work on an issue, post your
PR to the
[#code review](https://chat.zulip.org/#narrow/stream/code.20review)
stream on [chat.zulip.org](https://zulip.readthedocs.io/en/latest/chat-zulip-org.html).
This is our lightweight process that gives other developers the
opportunity to give you comments and suggestions on your work.
## License ## License
Copyright 2011-2017 Dropbox, Inc., Kandra Labs, Inc., and contributors Copyright 2011-2016 Dropbox, Inc. and contributors
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.

13
Vagrantfile vendored
View File

@@ -99,21 +99,8 @@ set -o pipefail
if [ -d "/sys/fs/selinux" ]; then if [ -d "/sys/fs/selinux" ]; then
sudo mount -o remount,ro /sys/fs/selinux sudo mount -o remount,ro /sys/fs/selinux
fi fi
# Set default locale, this prevents errors if the user has another locale set.
if ! grep -q 'LC_ALL=en_US.UTF-8' /etc/default/locale; then
echo "LC_ALL=en_US.UTF-8" | sudo tee -a /etc/default/locale
fi
# Provision the development environment
ln -nsf /srv/zulip ~/zulip ln -nsf /srv/zulip ~/zulip
/srv/zulip/tools/provision /srv/zulip/tools/provision
# Run any custom provision hooks the user has configured
if [ -f /srv/zulip/tools/custom_provision ]; then
chmod +x /srv/zulip/tools/custom_provision
/srv/zulip/tools/custom_provision
fi
SCRIPT SCRIPT
config.vm.provision "shell", config.vm.provision "shell",

View File

@@ -1,24 +1,19 @@
from django.conf import settings
from django.db import connection, models from django.db import connection, models
from django.db.models import F from django.utils import timezone
from django.conf import settings
from datetime import timedelta, datetime
from analytics.models import InstallationCount, RealmCount, \ from analytics.models import InstallationCount, RealmCount, \
UserCount, StreamCount, BaseCount, FillState, Anomaly, installation_epoch, \ UserCount, StreamCount, BaseCount, FillState, installation_epoch
last_successful_fill from zerver.models import Realm, UserProfile, Message, Stream, models
from zerver.models import Realm, UserProfile, Message, Stream, \ from zerver.lib.timestamp import floor_to_day
UserActivityInterval, RealmAuditLog, models
from zerver.lib.timestamp import floor_to_day, floor_to_hour, ceiling_to_day, \
ceiling_to_hour
from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Type, Union from typing import Any, Optional, Type, Tuple, Text
from collections import defaultdict, OrderedDict
from datetime import timedelta, datetime
import logging import logging
import time import time
## Logging setup ## ## Logging setup ##
log_format = '%(asctime)s %(levelname)-8s %(message)s' log_format = '%(asctime)s %(levelname)-8s %(message)s'
logging.basicConfig(format=log_format) logging.basicConfig(format=log_format)
@@ -30,68 +25,47 @@ logger = logging.getLogger("zulip.management")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
logger.addHandler(file_handler) logger.addHandler(file_handler)
# You can't subtract timedelta.max from a datetime, so use this instead # First post office in Boston
TIMEDELTA_MAX = timedelta(days=365*1000) MIN_TIME = datetime(1639, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
## Class definitions ##
class CountStat(object): class CountStat(object):
HOUR = 'hour' HOUR = 'hour'
DAY = 'day' DAY = 'day'
FREQUENCIES = frozenset([HOUR, DAY]) FREQUENCIES = frozenset([HOUR, DAY])
# Allowed intervals are HOUR, DAY, and, GAUGE
GAUGE = 'gauge'
def __init__(self, property, data_collector, frequency, interval=None): def __init__(self, property, zerver_count_query, filter_args, group_by, frequency, is_gauge):
# type: (str, DataCollector, str, Optional[timedelta]) -> None # type: (str, ZerverCountQuery, Dict[str, bool], Optional[Tuple[models.Model, str]], str, bool) -> None
self.property = property self.property = property
self.data_collector = data_collector self.zerver_count_query = zerver_count_query
# might have to do something different for bitfields # might have to do something different for bitfields
self.filter_args = filter_args
self.group_by = group_by
if frequency not in self.FREQUENCIES: if frequency not in self.FREQUENCIES:
raise AssertionError("Unknown frequency: %s" % (frequency,)) raise ValueError("Unknown frequency: %s" % (frequency,))
self.frequency = frequency self.frequency = frequency
if interval is not None: self.interval = self.GAUGE if is_gauge else frequency
self.interval = interval
elif frequency == CountStat.HOUR:
self.interval = timedelta(hours=1)
else: # frequency == CountStat.DAY
self.interval = timedelta(days=1)
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<CountStat: %s>" % (self.property,) return u"<CountStat: %s>" % (self.property,)
class LoggingCountStat(CountStat): class ZerverCountQuery(object):
def __init__(self, property, output_table, frequency): def __init__(self, zerver_table, analytics_table, query):
# type: (str, Type[BaseCount], str) -> None # type: (Type[models.Model], Type[BaseCount], Text) -> None
CountStat.__init__(self, property, DataCollector(output_table, None), frequency) self.zerver_table = zerver_table
self.analytics_table = analytics_table
self.query = query
class DependentCountStat(CountStat): def do_update_fill_state(fill_state, end_time, state):
def __init__(self, property, data_collector, frequency, interval=None, dependencies=[]): # type: (FillState, datetime, int) -> None
# type: (str, DataCollector, str, Optional[timedelta], List[str]) -> None fill_state.end_time = end_time
CountStat.__init__(self, property, data_collector, frequency, interval=interval) fill_state.state = state
self.dependencies = dependencies fill_state.save()
class DataCollector(object):
def __init__(self, output_table, pull_function):
# type: (Type[BaseCount], Optional[Callable[[str, datetime, datetime], int]]) -> None
self.output_table = output_table
self.pull_function = pull_function
## CountStat-level operations ##
def process_count_stat(stat, fill_to_time): def process_count_stat(stat, fill_to_time):
# type: (CountStat, datetime) -> None # type: (CountStat, datetime) -> None
if stat.frequency == CountStat.HOUR:
time_increment = timedelta(hours=1)
elif stat.frequency == CountStat.DAY:
time_increment = timedelta(days=1)
else:
raise AssertionError("Unknown frequency: %s" % (stat.frequency,))
if floor_to_hour(fill_to_time) != fill_to_time:
raise ValueError("fill_to_time must be on an hour boundary: %s" % (fill_to_time,))
if fill_to_time.tzinfo is None:
raise ValueError("fill_to_time must be timezone aware: %s" % (fill_to_time,))
fill_state = FillState.objects.filter(property=stat.property).first() fill_state = FillState.objects.filter(property=stat.property).first()
if fill_state is None: if fill_state is None:
currently_filled = installation_epoch() currently_filled = installation_epoch()
@@ -101,88 +75,81 @@ def process_count_stat(stat, fill_to_time):
logger.info("INITIALIZED %s %s" % (stat.property, currently_filled)) logger.info("INITIALIZED %s %s" % (stat.property, currently_filled))
elif fill_state.state == FillState.STARTED: elif fill_state.state == FillState.STARTED:
logger.info("UNDO START %s %s" % (stat.property, fill_state.end_time)) logger.info("UNDO START %s %s" % (stat.property, fill_state.end_time))
do_delete_counts_at_hour(stat, fill_state.end_time) do_delete_count_stat_at_hour(stat, fill_state.end_time)
currently_filled = fill_state.end_time - time_increment currently_filled = fill_state.end_time - timedelta(hours = 1)
do_update_fill_state(fill_state, currently_filled, FillState.DONE) do_update_fill_state(fill_state, currently_filled, FillState.DONE)
logger.info("UNDO DONE %s" % (stat.property,)) logger.info("UNDO DONE %s" % (stat.property,))
elif fill_state.state == FillState.DONE: elif fill_state.state == FillState.DONE:
currently_filled = fill_state.end_time currently_filled = fill_state.end_time
else: else:
raise AssertionError("Unknown value for FillState.state: %s." % (fill_state.state,)) raise ValueError("Unknown value for FillState.state: %s." % (fill_state.state,))
if isinstance(stat, DependentCountStat): currently_filled = currently_filled + timedelta(hours = 1)
for dependency in stat.dependencies:
dependency_fill_time = last_successful_fill(dependency)
if dependency_fill_time is None:
logger.warning("DependentCountStat %s run before dependency %s." %
(stat.property, dependency))
return
fill_to_time = min(fill_to_time, dependency_fill_time)
currently_filled = currently_filled + time_increment
while currently_filled <= fill_to_time: while currently_filled <= fill_to_time:
logger.info("START %s %s" % (stat.property, currently_filled)) logger.info("START %s %s %s" % (stat.property, stat.interval, currently_filled))
start = time.time() start = time.time()
do_update_fill_state(fill_state, currently_filled, FillState.STARTED) do_update_fill_state(fill_state, currently_filled, FillState.STARTED)
do_fill_count_stat_at_hour(stat, currently_filled) do_fill_count_stat_at_hour(stat, currently_filled)
do_update_fill_state(fill_state, currently_filled, FillState.DONE) do_update_fill_state(fill_state, currently_filled, FillState.DONE)
end = time.time() end = time.time()
currently_filled = currently_filled + time_increment currently_filled = currently_filled + timedelta(hours = 1)
logger.info("DONE %s (%dms)" % (stat.property, (end-start)*1000)) logger.info("DONE %s %s (%dms)" % (stat.property, stat.interval, (end-start)*1000))
def do_update_fill_state(fill_state, end_time, state): # We assume end_time is on an hour boundary, and is timezone aware.
# type: (FillState, datetime, int) -> None # It is the caller's responsibility to enforce this!
fill_state.end_time = end_time
fill_state.state = state
fill_state.save()
# We assume end_time is valid (e.g. is on a day or hour boundary as appropriate)
# and is timezone aware. It is the caller's responsibility to enforce this!
def do_fill_count_stat_at_hour(stat, end_time): def do_fill_count_stat_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None # type: (CountStat, datetime) -> None
start_time = end_time - stat.interval if stat.frequency == CountStat.DAY and (end_time != floor_to_day(end_time)):
if not isinstance(stat, LoggingCountStat): return
timer = time.time()
assert(stat.data_collector.pull_function is not None) if stat.interval == CountStat.HOUR:
rows_added = stat.data_collector.pull_function(stat.property, start_time, end_time) start_time = end_time - timedelta(hours = 1)
logger.info("%s run pull_function (%dms/%sr)" % elif stat.interval == CountStat.DAY:
(stat.property, (time.time()-timer)*1000, rows_added)) start_time = end_time - timedelta(days = 1)
else: # stat.interval == CountStat.GAUGE
start_time = MIN_TIME
do_pull_from_zerver(stat, start_time, end_time)
do_aggregate_to_summary_table(stat, end_time) do_aggregate_to_summary_table(stat, end_time)
def do_delete_counts_at_hour(stat, end_time): def do_delete_count_stat_at_hour(stat, end_time):
# type: (CountStat, datetime) -> None # type: (CountStat, datetime) -> None
if isinstance(stat, LoggingCountStat): UserCount.objects.filter(property = stat.property, end_time = end_time).delete()
InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete() StreamCount.objects.filter(property = stat.property, end_time = end_time).delete()
if stat.data_collector.output_table in [UserCount, StreamCount]: RealmCount.objects.filter(property = stat.property, end_time = end_time).delete()
RealmCount.objects.filter(property=stat.property, end_time=end_time).delete() InstallationCount.objects.filter(property = stat.property, end_time = end_time).delete()
else:
UserCount.objects.filter(property=stat.property, end_time=end_time).delete() def do_drop_all_analytics_tables():
StreamCount.objects.filter(property=stat.property, end_time=end_time).delete() # type: () -> None
RealmCount.objects.filter(property=stat.property, end_time=end_time).delete() UserCount.objects.all().delete()
InstallationCount.objects.filter(property=stat.property, end_time=end_time).delete() StreamCount.objects.all().delete()
RealmCount.objects.all().delete()
InstallationCount.objects.all().delete()
FillState.objects.all().delete()
def do_aggregate_to_summary_table(stat, end_time): def do_aggregate_to_summary_table(stat, end_time):
# type: (CountStat, datetime) -> None # type: (CountStat, datetime) -> None
cursor = connection.cursor() cursor = connection.cursor()
# Aggregate into RealmCount # Aggregate into RealmCount
output_table = stat.data_collector.output_table analytics_table = stat.zerver_count_query.analytics_table
if output_table in (UserCount, StreamCount): if analytics_table in (UserCount, StreamCount):
realmcount_query = """ realmcount_query = """
INSERT INTO analytics_realmcount INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time) (realm_id, value, property, subgroup, end_time)
SELECT SELECT
zerver_realm.id, COALESCE(sum(%(output_table)s.value), 0), '%(property)s', zerver_realm.id, COALESCE(sum(%(analytics_table)s.value), 0), '%(property)s',
%(output_table)s.subgroup, %%(end_time)s %(analytics_table)s.subgroup, %%(end_time)s
FROM zerver_realm FROM zerver_realm
JOIN %(output_table)s JOIN %(analytics_table)s
ON ON
zerver_realm.id = %(output_table)s.realm_id (
WHERE %(analytics_table)s.realm_id = zerver_realm.id AND
%(output_table)s.property = '%(property)s' AND %(analytics_table)s.property = '%(property)s' AND
%(output_table)s.end_time = %%(end_time)s %(analytics_table)s.end_time = %%(end_time)s
GROUP BY zerver_realm.id, %(output_table)s.subgroup )
""" % {'output_table': output_table._meta.db_table, GROUP BY zerver_realm.id, %(analytics_table)s.subgroup
""" % {'analytics_table': analytics_table._meta.db_table,
'property': stat.property} 'property': stat.property}
start = time.time() start = time.time()
cursor.execute(realmcount_query, {'end_time': end_time}) cursor.execute(realmcount_query, {'end_time': end_time})
@@ -197,9 +164,10 @@ def do_aggregate_to_summary_table(stat, end_time):
sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s sum(value), '%(property)s', analytics_realmcount.subgroup, %%(end_time)s
FROM analytics_realmcount FROM analytics_realmcount
WHERE WHERE
(
property = '%(property)s' AND property = '%(property)s' AND
end_time = %%(end_time)s end_time = %%(end_time)s
GROUP BY analytics_realmcount.subgroup ) GROUP BY analytics_realmcount.subgroup
""" % {'property': stat.property} """ % {'property': stat.property}
start = time.time() start = time.time()
cursor.execute(installationcount_query, {'end_time': end_time}) cursor.execute(installationcount_query, {'end_time': end_time})
@@ -207,91 +175,55 @@ def do_aggregate_to_summary_table(stat, end_time):
logger.info("%s InstallationCount aggregation (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount)) logger.info("%s InstallationCount aggregation (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
cursor.close() cursor.close()
## Utility functions called from outside counts.py ## # This is the only method that hits the prod databases directly.
def do_pull_from_zerver(stat, start_time, end_time):
# called from zerver/lib/actions.py; should not throw any errors # type: (CountStat, datetime, datetime) -> None
def do_increment_logging_stat(zerver_object, stat, subgroup, event_time, increment=1): zerver_table = stat.zerver_count_query.zerver_table._meta.db_table # type: ignore
# type: (Union[Realm, UserProfile, Stream], CountStat, Optional[Union[str, int, bool]], datetime, int) -> None join_args = ' '.join('AND %s.%s = %s' % (zerver_table, key, value)
table = stat.data_collector.output_table for key, value in stat.filter_args.items())
if table == RealmCount: if stat.group_by is None:
id_args = {'realm': zerver_object}
elif table == UserCount:
id_args = {'realm': zerver_object.realm, 'user': zerver_object}
else: # StreamCount
id_args = {'realm': zerver_object.realm, 'stream': zerver_object}
if stat.frequency == CountStat.DAY:
end_time = ceiling_to_day(event_time)
else: # CountStat.HOUR:
end_time = ceiling_to_hour(event_time)
row, created = table.objects.get_or_create(
property=stat.property, subgroup=subgroup, end_time=end_time,
defaults={'value': increment}, **id_args)
if not created:
row.value = F('value') + increment
row.save(update_fields=['value'])
def do_drop_all_analytics_tables():
# type: () -> None
UserCount.objects.all().delete()
StreamCount.objects.all().delete()
RealmCount.objects.all().delete()
InstallationCount.objects.all().delete()
FillState.objects.all().delete()
Anomaly.objects.all().delete()
## DataCollector-level operations ##
def do_pull_by_sql_query(property, start_time, end_time, query, group_by):
# type: (str, datetime, datetime, str, Optional[Tuple[models.Model, str]]) -> int
if group_by is None:
subgroup = 'NULL' subgroup = 'NULL'
group_by_clause = '' group_by_clause = ''
else: else:
subgroup = '%s.%s' % (group_by[0]._meta.db_table, group_by[1]) subgroup = '%s.%s' % (stat.group_by[0]._meta.db_table, stat.group_by[1])
group_by_clause = ', ' + subgroup group_by_clause = ', ' + subgroup
# We do string replacement here because cursor.execute will reject a # We do string replacement here because passing join_args as a param
# group_by_clause given as a param. # may result in problems when running cursor.execute; we do
# We pass in the datetimes as params to cursor.execute so that we don't have to # the string formatting prior so that cursor.execute runs it as sql
# think about how to convert python datetimes to SQL datetimes. query_ = stat.zerver_count_query.query % {'zerver_table': zerver_table,
query_ = query % {'property': property, 'subgroup': subgroup, 'property': stat.property,
'group_by_clause': group_by_clause} 'join_args': join_args,
'subgroup': subgroup,
'group_by_clause': group_by_clause}
cursor = connection.cursor() cursor = connection.cursor()
start = time.time()
cursor.execute(query_, {'time_start': start_time, 'time_end': end_time}) cursor.execute(query_, {'time_start': start_time, 'time_end': end_time})
rowcount = cursor.rowcount end = time.time()
logger.info("%s do_pull_from_zerver (%dms/%sr)" % (stat.property, (end-start)*1000, cursor.rowcount))
cursor.close() cursor.close()
return rowcount
def sql_data_collector(output_table, query, group_by): count_user_by_realm_query = """
# type: (Type[BaseCount], str, Optional[Tuple[models.Model, str]]) -> DataCollector INSERT INTO analytics_realmcount
def pull_function(property, start_time, end_time): (realm_id, value, property, subgroup, end_time)
# type: (str, datetime, datetime) -> int SELECT
return do_pull_by_sql_query(property, start_time, end_time, query, group_by) zerver_realm.id, count(%(zerver_table)s),'%(property)s', %(subgroup)s, %%(time_end)s
return DataCollector(output_table, pull_function) FROM zerver_realm
JOIN zerver_userprofile
def do_pull_minutes_active(property, start_time, end_time): ON
# type: (str, datetime, datetime) -> int (
user_activity_intervals = UserActivityInterval.objects.filter( zerver_userprofile.realm_id = zerver_realm.id AND
end__gt=start_time, start__lt=end_time zerver_userprofile.date_joined >= %%(time_start)s AND
).select_related( zerver_userprofile.date_joined < %%(time_end)s
'user_profile' %(join_args)s
).values_list( )
'user_profile_id', 'user_profile__realm_id', 'start', 'end') WHERE
zerver_realm.date_created < %%(time_end)s
seconds_active = defaultdict(float) # type: Dict[Tuple[int, int], float] GROUP BY zerver_realm.id %(group_by_clause)s
for user_id, realm_id, interval_start, interval_end in user_activity_intervals: """
start = max(start_time, interval_start) zerver_count_user_by_realm = ZerverCountQuery(UserProfile, RealmCount, count_user_by_realm_query)
end = min(end_time, interval_end)
seconds_active[(user_id, realm_id)] += (end - start).total_seconds()
rows = [UserCount(user_id=ids[0], realm_id=ids[1], property=property,
end_time=end_time, value=int(seconds // 60))
for ids, seconds in seconds_active.items() if seconds >= 60]
UserCount.objects.bulk_create(rows)
return len(rows)
# currently .sender_id is only Message specific thing
count_message_by_user_query = """ count_message_by_user_query = """
INSERT INTO analytics_usercount INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time) (user_id, realm_id, value, property, subgroup, end_time)
@@ -300,162 +232,17 @@ count_message_by_user_query = """
FROM zerver_userprofile FROM zerver_userprofile
JOIN zerver_message JOIN zerver_message
ON ON
zerver_userprofile.id = zerver_message.sender_id
WHERE
zerver_userprofile.date_joined < %%(time_end)s AND
zerver_message.pub_date >= %%(time_start)s AND
zerver_message.pub_date < %%(time_end)s
GROUP BY zerver_userprofile.id %(group_by_clause)s
"""
# Note: ignores the group_by / group_by_clause.
count_message_type_by_user_query = """
INSERT INTO analytics_usercount
(realm_id, user_id, value, property, subgroup, end_time)
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
FROM
( (
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*), zerver_message.sender_id = zerver_userprofile.id AND
CASE WHEN
zerver_recipient.type = 1 THEN 'private_message'
WHEN
zerver_recipient.type = 3 THEN 'huddle_message'
WHEN
zerver_stream.invite_only = TRUE THEN 'private_stream'
ELSE 'public_stream'
END
message_type
FROM zerver_userprofile
JOIN zerver_message
ON
zerver_userprofile.id = zerver_message.sender_id AND
zerver_message.pub_date >= %%(time_start)s AND
zerver_message.pub_date < %%(time_end)s
JOIN zerver_recipient
ON
zerver_message.recipient_id = zerver_recipient.id
LEFT JOIN zerver_stream
ON
zerver_recipient.type_id = zerver_stream.id
GROUP BY zerver_userprofile.realm_id, zerver_userprofile.id, zerver_recipient.type, zerver_stream.invite_only
) AS subquery
GROUP BY realm_id, id, message_type
"""
# This query joins to the UserProfile table since all current queries that
# use this also subgroup on UserProfile.is_bot. If in the future there is a
# stat that counts messages by stream and doesn't need the UserProfile
# table, consider writing a new query for efficiency.
count_message_by_stream_query = """
INSERT INTO analytics_streamcount
(stream_id, realm_id, value, property, subgroup, end_time)
SELECT
zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
FROM zerver_stream
JOIN zerver_recipient
ON
zerver_stream.id = zerver_recipient.type_id
JOIN zerver_message
ON
zerver_recipient.id = zerver_message.recipient_id
JOIN zerver_userprofile
ON
zerver_message.sender_id = zerver_userprofile.id
WHERE
zerver_stream.date_created < %%(time_end)s AND
zerver_recipient.type = 2 AND
zerver_message.pub_date >= %%(time_start)s AND zerver_message.pub_date >= %%(time_start)s AND
zerver_message.pub_date < %%(time_end)s zerver_message.pub_date < %%(time_end)s
GROUP BY zerver_stream.id %(group_by_clause)s %(join_args)s
""" )
# Hardcodes the query needed by active_users:is_bot:day, since that is
# currently the only stat that uses this.
count_user_by_realm_query = """
INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time)
SELECT
zerver_realm.id, count(*),'%(property)s', %(subgroup)s, %%(time_end)s
FROM zerver_realm
JOIN zerver_userprofile
ON
zerver_realm.id = zerver_userprofile.realm_id
WHERE WHERE
zerver_realm.date_created < %%(time_end)s AND zerver_userprofile.date_joined < %%(time_end)s
zerver_userprofile.date_joined >= %%(time_start)s AND
zerver_userprofile.date_joined < %%(time_end)s AND
zerver_userprofile.is_active = TRUE
GROUP BY zerver_realm.id %(group_by_clause)s
"""
# Currently hardcodes the query needed for active_users_audit:is_bot:day.
# Assumes that a user cannot have two RealmAuditLog entries with the same event_time and
# event_type in ['user_created', 'user_deactivated', etc].
# In particular, it's important to ensure that migrations don't cause that to happen.
check_realmauditlog_by_user_query = """
INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time)
SELECT
ral1.modified_user_id, ral1.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
FROM zerver_realmauditlog ral1
JOIN (
SELECT modified_user_id, max(event_time) AS max_event_time
FROM zerver_realmauditlog
WHERE
event_type in ('user_created', 'user_deactivated', 'user_activated', 'user_reactivated') AND
event_time < %%(time_end)s
GROUP BY modified_user_id
) ral2
ON
ral1.event_time = max_event_time AND
ral1.modified_user_id = ral2.modified_user_id
JOIN zerver_userprofile
ON
ral1.modified_user_id = zerver_userprofile.id
WHERE
ral1.event_type in ('user_created', 'user_activated', 'user_reactivated')
"""
check_useractivityinterval_by_user_query = """
INSERT INTO analytics_usercount
(user_id, realm_id, value, property, subgroup, end_time)
SELECT
zerver_userprofile.id, zerver_userprofile.realm_id, 1, '%(property)s', %(subgroup)s, %%(time_end)s
FROM zerver_userprofile
JOIN zerver_useractivityinterval
ON
zerver_userprofile.id = zerver_useractivityinterval.user_profile_id
WHERE
zerver_useractivityinterval.end >= %%(time_start)s AND
zerver_useractivityinterval.start < %%(time_end)s
GROUP BY zerver_userprofile.id %(group_by_clause)s GROUP BY zerver_userprofile.id %(group_by_clause)s
""" """
zerver_count_message_by_user = ZerverCountQuery(Message, UserCount, count_message_by_user_query)
count_realm_active_humans_query = """
INSERT INTO analytics_realmcount
(realm_id, value, property, subgroup, end_time)
SELECT
usercount1.realm_id, count(*), '%(property)s', NULL, %%(time_end)s
FROM (
SELECT realm_id, user_id
FROM analytics_usercount
WHERE
property = 'active_users_audit:is_bot:day' AND
subgroup = 'false' AND
end_time = %%(time_end)s
) usercount1
JOIN (
SELECT realm_id, user_id
FROM analytics_usercount
WHERE
property = '15day_actives::day' AND
end_time = %%(time_end)s
) usercount2
ON
usercount1.user_id = usercount2.user_id
GROUP BY usercount1.realm_id
"""
# Currently unused and untested # Currently unused and untested
count_stream_by_realm_query = """ count_stream_by_realm_query = """
@@ -466,71 +253,101 @@ count_stream_by_realm_query = """
FROM zerver_realm FROM zerver_realm
JOIN zerver_stream JOIN zerver_stream
ON ON
zerver_realm.id = zerver_stream.realm_id AND (
WHERE zerver_stream.realm_id = zerver_realm.id AND
zerver_realm.date_created < %%(time_end)s AND
zerver_stream.date_created >= %%(time_start)s AND zerver_stream.date_created >= %%(time_start)s AND
zerver_stream.date_created < %%(time_end)s zerver_stream.date_created < %%(time_end)s
%(join_args)s
)
WHERE
zerver_realm.date_created < %%(time_end)s
GROUP BY zerver_realm.id %(group_by_clause)s GROUP BY zerver_realm.id %(group_by_clause)s
""" """
zerver_count_stream_by_realm = ZerverCountQuery(Stream, RealmCount, count_stream_by_realm_query)
## CountStat declarations ## # This query violates the count_X_by_Y_query conventions in several ways. One,
# the X table is not specified by the query name; MessageType is not a zerver
# table. Two, it ignores the subgroup column in the CountStat object; instead,
# it uses 'message_type' from the subquery to fill in the subgroup column.
count_message_type_by_user_query = """
INSERT INTO analytics_usercount
(realm_id, user_id, value, property, subgroup, end_time)
SELECT realm_id, id, SUM(count) AS value, '%(property)s', message_type, %%(time_end)s
FROM
(
SELECT zerver_userprofile.realm_id, zerver_userprofile.id, count(*),
CASE WHEN
zerver_recipient.type != 2 THEN 'private_message'
WHEN
zerver_stream.invite_only = TRUE THEN 'private_stream'
ELSE 'public_stream'
END
message_type
count_stats_ = [ FROM zerver_userprofile
# Messages Sent stats JOIN zerver_message
# Stats that count the number of messages sent in various ways. ON
# These are also the set of stats that read from the Message table. zerver_message.sender_id = zerver_userprofile.id AND
zerver_message.pub_date >= %%(time_start)s AND
zerver_message.pub_date < %%(time_end)s
%(join_args)s
JOIN zerver_recipient
ON
zerver_recipient.id = zerver_message.recipient_id
LEFT JOIN zerver_stream
ON
zerver_stream.id = zerver_recipient.type_id
GROUP BY zerver_userprofile.realm_id, zerver_userprofile.id, zerver_recipient.type, zerver_stream.invite_only
) AS subquery
GROUP BY realm_id, id, message_type
"""
zerver_count_message_type_by_user = ZerverCountQuery(Message, UserCount, count_message_type_by_user_query)
CountStat('messages_sent:is_bot:hour', # Note that this query also joins to the UserProfile table, since all
sql_data_collector(UserCount, count_message_by_user_query, (UserProfile, 'is_bot')), # current queries that use this also subgroup on UserProfile.is_bot. If in
CountStat.HOUR), # the future there is a query that counts messages by stream and doesn't need
CountStat('messages_sent:message_type:day', # the UserProfile table, consider writing a new query for efficiency.
sql_data_collector(UserCount, count_message_type_by_user_query, None), CountStat.DAY), count_message_by_stream_query = """
CountStat('messages_sent:client:day', INSERT INTO analytics_streamcount
sql_data_collector(UserCount, count_message_by_user_query, (Message, 'sending_client_id')), (stream_id, realm_id, value, property, subgroup, end_time)
CountStat.DAY), SELECT
CountStat('messages_in_stream:is_bot:day', zerver_stream.id, zerver_stream.realm_id, count(*), '%(property)s', %(subgroup)s, %%(time_end)s
sql_data_collector(StreamCount, count_message_by_stream_query, (UserProfile, 'is_bot')), FROM zerver_stream
CountStat.DAY), JOIN zerver_recipient
ON
(
zerver_recipient.type = 2 AND
zerver_stream.id = zerver_recipient.type_id
)
JOIN zerver_message
ON
(
zerver_message.recipient_id = zerver_recipient.id AND
zerver_message.pub_date >= %%(time_start)s AND
zerver_message.pub_date < %%(time_end)s AND
zerver_stream.date_created < %%(time_end)s
%(join_args)s
)
JOIN zerver_userprofile
ON zerver_userprofile.id = zerver_message.sender_id
GROUP BY zerver_stream.id %(group_by_clause)s
"""
zerver_count_message_by_stream = ZerverCountQuery(Message, StreamCount, count_message_by_stream_query)
# Number of Users stats COUNT_STATS = {
# Stats that count the number of active users in the UserProfile.is_active sense. 'active_users:is_bot:day': CountStat(
'active_users:is_bot:day', zerver_count_user_by_realm, {'is_active': True},
# 'active_users_audit:is_bot:day' is the canonical record of which users were (UserProfile, 'is_bot'), CountStat.DAY, True),
# active on which days (in the UserProfile.is_active sense). 'messages_sent:is_bot:hour': CountStat(
# Important that this stay a daily stat, so that 'realm_active_humans::day' works as expected. 'messages_sent:is_bot:hour', zerver_count_message_by_user, {},
CountStat('active_users_audit:is_bot:day', (UserProfile, 'is_bot'), CountStat.HOUR, False),
sql_data_collector(UserCount, check_realmauditlog_by_user_query, (UserProfile, 'is_bot')), 'messages_sent:message_type:day': CountStat(
CountStat.DAY), 'messages_sent:message_type:day', zerver_count_message_type_by_user, {},
# Sanity check on 'active_users_audit:is_bot:day', and a archetype for future LoggingCountStats. None, CountStat.DAY, False),
# In RealmCount, 'active_users_audit:is_bot:day' should be the partial 'messages_sent:client:day': CountStat(
# sum sequence of 'active_users_log:is_bot:day', for any realm that 'messages_sent:client:day', zerver_count_message_by_user, {},
# started after the latter stat was introduced. (Message, 'sending_client_id'), CountStat.DAY, False),
LoggingCountStat('active_users_log:is_bot:day', RealmCount, CountStat.DAY), 'messages_sent_to_stream:is_bot:hour': CountStat(
# Another sanity check on 'active_users_audit:is_bot:day'. Is only an 'messages_sent_to_stream:is_bot', zerver_count_message_by_stream, {},
# approximation, e.g. if a user is deactivated between the end of the (UserProfile, 'is_bot'), CountStat.HOUR, False)
# day and when this stat is run, they won't be counted. However, is the }
# simplest of the three to inspect by hand.
CountStat('active_users:is_bot:day',
sql_data_collector(RealmCount, count_user_by_realm_query, (UserProfile, 'is_bot')),
CountStat.DAY, interval=TIMEDELTA_MAX),
# User Activity stats
# Stats that measure user activity in the UserActivityInterval sense.
CountStat('15day_actives::day',
sql_data_collector(UserCount, check_useractivityinterval_by_user_query, None),
CountStat.DAY, interval=timedelta(days=15)-UserActivityInterval.MIN_INTERVAL_LENGTH),
CountStat('minutes_active::day', DataCollector(UserCount, do_pull_minutes_active), CountStat.DAY),
# Dependent stats
# Must come after their dependencies.
# Canonical account of the number of active humans in a realm on each day.
DependentCountStat('realm_active_humans::day',
sql_data_collector(RealmCount, count_realm_active_humans_query, None),
CountStat.DAY,
dependencies=['active_users_audit:is_bot:day', '15day_actives::day'])
]
COUNT_STATS = OrderedDict([(stat.property, stat) for stat in count_stats_])

View File

@@ -8,13 +8,12 @@ from analytics.lib.time_utils import time_range
from datetime import datetime from datetime import datetime
from math import sqrt from math import sqrt
from random import gauss, random, seed from random import gauss, random, seed
from typing import List
from six.moves import range, zip from six.moves import range, zip
def generate_time_series_data(days=100, business_hours_base=10, non_business_hours_base=10, def generate_time_series_data(days=100, business_hours_base=10, non_business_hours_base=10,
growth=1, autocorrelation=0, spikiness=1, holiday_rate=0, growth=1, autocorrelation=0, spikiness=1, holiday_rate=0,
frequency=CountStat.DAY, partial_sum=False, random_seed=26): frequency=CountStat.DAY, is_gauge=False, random_seed=26):
# type: (int, float, float, float, float, float, float, str, bool, int) -> List[int] # type: (int, float, float, float, float, float, float, str, bool, int) -> List[int]
""" """
Generate semi-realistic looking time series data for testing analytics graphs. Generate semi-realistic looking time series data for testing analytics graphs.
@@ -32,7 +31,7 @@ def generate_time_series_data(days=100, business_hours_base=10, non_business_hou
the variance. the variance.
holiday_rate -- Fraction of days randomly set to 0, largely for testing how we handle 0s. holiday_rate -- Fraction of days randomly set to 0, largely for testing how we handle 0s.
frequency -- Should be CountStat.HOUR or CountStat.DAY. frequency -- Should be CountStat.HOUR or CountStat.DAY.
partial_sum -- If True, return partial sum of the series. is_gauge -- If True, return partial sum of the series.
random_seed -- Seed for random number generator. random_seed -- Seed for random number generator.
""" """
if frequency == CountStat.HOUR: if frequency == CountStat.HOUR:
@@ -50,10 +49,10 @@ def generate_time_series_data(days=100, business_hours_base=10, non_business_hou
[24*non_business_hours_base] * 2 [24*non_business_hours_base] * 2
holidays = [random() < holiday_rate for i in range(days)] holidays = [random() < holiday_rate for i in range(days)]
else: else:
raise AssertionError("Unknown frequency: %s" % (frequency,)) raise ValueError("Unknown frequency: %s" % (frequency,))
if length < 2: if length < 2:
raise AssertionError("Must be generating at least 2 data points. " raise ValueError("Must be generating at least 2 data points. "
"Currently generating %s" % (length,)) "Currently generating %s" % (length,))
growth_base = growth ** (1. / (length-1)) growth_base = growth ** (1. / (length-1))
values_no_noise = [seasonality[i % len(seasonality)] * (growth_base**i) for i in range(length)] values_no_noise = [seasonality[i % len(seasonality)] * (growth_base**i) for i in range(length)]
@@ -64,7 +63,7 @@ def generate_time_series_data(days=100, business_hours_base=10, non_business_hou
values = [0 if holiday else int(v + sqrt(v)*noise_scalar*spikiness) values = [0 if holiday else int(v + sqrt(v)*noise_scalar*spikiness)
for v, noise_scalar, holiday in zip(values_no_noise, noise_scalars, holidays)] for v, noise_scalar, holiday in zip(values_no_noise, noise_scalars, holidays)]
if partial_sum: if is_gauge:
for i in range(1, length): for i in range(1, length):
values[i] = values[i-1] + values[i] values[i] = values[i-1] + values[i]
return [max(v, 0) for v in values] return [max(v, 0) for v in values]

View File

@@ -17,7 +17,7 @@ def time_range(start, end, frequency, min_length):
end = floor_to_day(end) end = floor_to_day(end)
step = timedelta(days=1) step = timedelta(days=1)
else: else:
raise AssertionError("Unknown frequency: %s" % (frequency,)) raise ValueError("Unknown frequency: %s" % (frequency,))
times = [] times = []
if min_length is not None: if min_length is not None:

View File

@@ -2,8 +2,7 @@ from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now from typing import Any
from typing import Any, Dict, List
from zerver.models import UserPresence, UserActivity from zerver.models import UserPresence, UserActivity
from zerver.lib.utils import statsd, statsd_key from zerver.lib.utils import statsd, statsd_key
@@ -19,13 +18,13 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
# type: (*Any, **Any) -> None # type: (*Any, **Any) -> None
# Get list of all active users in the last 1 week # Get list of all active users in the last 1 week
cutoff = timezone_now() - timedelta(minutes=30, hours=168) cutoff = datetime.now() - timedelta(minutes=30, hours=168)
users = UserPresence.objects.select_related().filter(timestamp__gt=cutoff) users = UserPresence.objects.select_related().filter(timestamp__gt=cutoff)
# Calculate 10min, 2hrs, 12hrs, 1day, 2 business days (TODO business days), 1 week bucket of stats # Calculate 10min, 2hrs, 12hrs, 1day, 2 business days (TODO business days), 1 week bucket of stats
hour_buckets = [0.16, 2, 12, 24, 48, 168] hour_buckets = [0.16, 2, 12, 24, 48, 168]
user_info = defaultdict(dict) # type: Dict[str, Dict[float, List[str]]] user_info = defaultdict(dict) # type: Dict[str, Dict[float, List[str]]]
for last_presence in users: for last_presence in users:
if last_presence.status == UserPresence.IDLE: if last_presence.status == UserPresence.IDLE:
@@ -36,7 +35,7 @@ class Command(BaseCommand):
for bucket in hour_buckets: for bucket in hour_buckets:
if bucket not in user_info[last_presence.user_profile.realm.string_id]: if bucket not in user_info[last_presence.user_profile.realm.string_id]:
user_info[last_presence.user_profile.realm.string_id][bucket] = [] user_info[last_presence.user_profile.realm.string_id][bucket] = []
if timezone_now() - known_active < timedelta(hours=bucket): if datetime.now(known_active.tzinfo) - known_active < timedelta(hours=bucket):
user_info[last_presence.user_profile.realm.string_id][bucket].append(last_presence.user_profile.email) user_info[last_presence.user_profile.realm.string_id][bucket].append(last_presence.user_profile.email)
for realm, buckets in user_info.items(): for realm, buckets in user_info.items():
@@ -52,7 +51,7 @@ class Command(BaseCommand):
for bucket in hour_buckets: for bucket in hour_buckets:
if bucket not in user_info[activity.user_profile.realm.string_id]: if bucket not in user_info[activity.user_profile.realm.string_id]:
user_info[activity.user_profile.realm.string_id][bucket] = [] user_info[activity.user_profile.realm.string_id][bucket] = []
if timezone_now() - activity.last_visit < timedelta(hours=bucket): if datetime.now(activity.last_visit.tzinfo) - activity.last_visit < timedelta(hours=bucket):
user_info[activity.user_profile.realm.string_id][bucket].append(activity.user_profile.email) user_info[activity.user_profile.realm.string_id][bucket].append(activity.user_profile.email)
for realm, buckets in user_info.items(): for realm, buckets in user_info.items():
print("Realm %s" % (realm,)) print("Realm %s" % (realm,))

View File

@@ -6,9 +6,7 @@ import pytz
from optparse import make_option from optparse import make_option
from typing import Any from typing import Any
from django.core.management.base import BaseCommand, CommandParser from django.core.management.base import BaseCommand, CommandParser
from django.utils.timezone import now as timezone_now
from zerver.lib.statistics import activity_averages_during_day from zerver.lib.statistics import activity_averages_during_day
class Command(BaseCommand): class Command(BaseCommand):
@@ -22,9 +20,9 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
# type: (*Any, **Any) -> None # type: (*Any, **Any) -> None
if options["date"] is None: if options["date"] is None:
date = timezone_now() - datetime.timedelta(days=1) date = datetime.datetime.now() - datetime.timedelta(days=1)
else: else:
date = datetime.datetime.strptime(options["date"], "%Y-%m-%d").replace(tzinfo=pytz.utc) date = datetime.datetime.strptime(options["date"], "%Y-%m-%d")
print("Activity data for", date) print("Activity data for", date)
print(activity_averages_during_day(date)) print(activity_averages_during_day(date))
print("Please note that the total registered user count is a total for today") print("Please note that the total registered user count is a total for today")

View File

@@ -1,7 +1,7 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
from typing import Any, Dict from typing import Any
from optparse import make_option from optparse import make_option
from django.core.management.base import BaseCommand, CommandParser from django.core.management.base import BaseCommand, CommandParser
@@ -17,7 +17,7 @@ def compute_stats(log_level):
logger.setLevel(log_level) logger.setLevel(log_level)
one_week_ago = timestamp_to_datetime(time.time()) - datetime.timedelta(weeks=1) one_week_ago = timestamp_to_datetime(time.time()) - datetime.timedelta(weeks=1)
mit_query = Message.objects.filter(sender__realm__string_id="zephyr", mit_query = Message.objects.filter(sender__realm__string_id="mit",
recipient__type=Recipient.STREAM, recipient__type=Recipient.STREAM,
pub_date__gt=one_week_ago) pub_date__gt=one_week_ago)
for bot_sender_start in ["imap.", "rcmd.", "sys."]: for bot_sender_start in ["imap.", "rcmd.", "sys."]:
@@ -30,15 +30,15 @@ def compute_stats(log_level):
"bitcoin@mit.edu", "lp@mit.edu", "clocks@mit.edu", "bitcoin@mit.edu", "lp@mit.edu", "clocks@mit.edu",
"root@mit.edu", "nagios@mit.edu", "root@mit.edu", "nagios@mit.edu",
"www-data|local-realm@mit.edu"]) "www-data|local-realm@mit.edu"])
user_counts = {} # type: Dict[str, Dict[str, int]] user_counts = {} # type: Dict[str, Dict[str, int]]
for m in mit_query.select_related("sending_client", "sender"): for m in mit_query.select_related("sending_client", "sender"):
email = m.sender.email email = m.sender.email
user_counts.setdefault(email, {}) user_counts.setdefault(email, {})
user_counts[email].setdefault(m.sending_client.name, 0) user_counts[email].setdefault(m.sending_client.name, 0)
user_counts[email][m.sending_client.name] += 1 user_counts[email][m.sending_client.name] += 1
total_counts = {} # type: Dict[str, int] total_counts = {} # type: Dict[str, int]
total_user_counts = {} # type: Dict[str, int] total_user_counts = {} # type: Dict[str, int]
for email, counts in user_counts.items(): for email, counts in user_counts.items():
total_user_counts.setdefault(email, 0) total_user_counts.setdefault(email, 0)
for client_name, count in counts.items(): for client_name, count in counts.items():
@@ -47,7 +47,7 @@ def compute_stats(log_level):
total_user_counts[email] += count total_user_counts[email] += count
logging.debug("%40s | %10s | %s" % ("User", "Messages", "Percentage Zulip")) logging.debug("%40s | %10s | %s" % ("User", "Messages", "Percentage Zulip"))
top_percents = {} # type: Dict[int, float] top_percents = {} # type: Dict[int, float]
for size in [10, 25, 50, 100, 200, len(total_user_counts.keys())]: for size in [10, 25, 50, 100, 200, len(total_user_counts.keys())]:
top_percents[size] = 0.0 top_percents[size] = 0.0
for i, email in enumerate(sorted(total_user_counts.keys(), for i, email in enumerate(sorted(total_user_counts.keys(),

View File

@@ -6,7 +6,6 @@ from typing import Any
from argparse import ArgumentParser from argparse import ArgumentParser
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Count, QuerySet from django.db.models import Count, QuerySet
from django.utils.timezone import now as timezone_now
from zerver.models import UserActivity, UserProfile, Realm, \ from zerver.models import UserActivity, UserProfile, Realm, \
get_realm, get_user_profile_by_email get_realm, get_user_profile_by_email
@@ -39,7 +38,7 @@ Usage examples:
# #
# Importantly, this does NOT tell you anything about the relative # Importantly, this does NOT tell you anything about the relative
# volumes of requests from clients. # volumes of requests from clients.
threshold = timezone_now() - datetime.timedelta(days=7) threshold = datetime.datetime.now() - datetime.timedelta(days=7)
client_counts = user_activity_objects.filter( client_counts = user_activity_objects.filter(
last_visit__gt=threshold).values("client__name").annotate( last_visit__gt=threshold).values("client__name").annotate(
count=Count('client__name')) count=Count('client__name'))

View File

@@ -3,21 +3,20 @@ from __future__ import absolute_import, print_function
from argparse import ArgumentParser from argparse import ArgumentParser
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now from django.utils import timezone
from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount
from analytics.lib.counts import COUNT_STATS, CountStat, do_drop_all_analytics_tables from analytics.lib.counts import COUNT_STATS, CountStat, do_drop_all_analytics_tables
from analytics.lib.fixtures import generate_time_series_data from analytics.lib.fixtures import generate_time_series_data
from analytics.lib.time_utils import time_range from analytics.lib.time_utils import time_range
from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, FillState
from zerver.lib.timestamp import floor_to_day from zerver.lib.timestamp import floor_to_day
from zerver.models import Realm, UserProfile, Stream, Message, Client, \ from zerver.models import Realm, UserProfile, Stream, Message, Client
RealmAuditLog
from datetime import datetime, timedelta from datetime import datetime, timedelta
from six.moves import zip from six.moves import zip
from typing import Any, Dict, List, Optional, Text, Type, Union, Mapping from typing import Any, List, Optional, Text, Type, Union
class Command(BaseCommand): class Command(BaseCommand):
help = """Populates analytics tables with randomly generated data.""" help = """Populates analytics tables with randomly generated data."""
@@ -27,40 +26,38 @@ class Command(BaseCommand):
def create_user(self, email, full_name, is_staff, date_joined, realm): def create_user(self, email, full_name, is_staff, date_joined, realm):
# type: (Text, Text, Text, bool, datetime, Realm) -> UserProfile # type: (Text, Text, Text, bool, datetime, Realm) -> UserProfile
user = UserProfile.objects.create( return UserProfile.objects.create(
email=email, full_name=full_name, is_staff=is_staff, email=email, full_name=full_name, is_staff=is_staff,
realm=realm, short_name=full_name, pointer=-1, last_pointer_updater='none', realm=realm, short_name=full_name, pointer=-1, last_pointer_updater='none',
api_key='42', date_joined=date_joined) api_key='42', date_joined=date_joined)
RealmAuditLog.objects.create(
realm=realm, modified_user=user, event_type='user_created',
event_time=user.date_joined)
return user
def generate_fixture_data(self, stat, business_hours_base, non_business_hours_base, def generate_fixture_data(self, stat, business_hours_base, non_business_hours_base,
growth, autocorrelation, spikiness, holiday_rate=0, growth, autocorrelation, spikiness, holiday_rate=0):
partial_sum=False): # type: (CountStat, float, float, float, float, float, float) -> List[int]
# type: (CountStat, float, float, float, float, float, float, bool) -> List[int]
self.random_seed += 1 self.random_seed += 1
return generate_time_series_data( return generate_time_series_data(
days=self.DAYS_OF_DATA, business_hours_base=business_hours_base, days=self.DAYS_OF_DATA, business_hours_base=business_hours_base,
non_business_hours_base=non_business_hours_base, growth=growth, non_business_hours_base=non_business_hours_base, growth=growth,
autocorrelation=autocorrelation, spikiness=spikiness, holiday_rate=holiday_rate, autocorrelation=autocorrelation, spikiness=spikiness, holiday_rate=holiday_rate,
frequency=stat.frequency, partial_sum=partial_sum, random_seed=self.random_seed) frequency=stat.frequency, is_gauge=(stat.interval == CountStat.GAUGE),
random_seed=self.random_seed)
def handle(self, *args, **options): def handle(self, *args, **options):
# type: (*Any, **Any) -> None # type: (*Any, **Any) -> None
do_drop_all_analytics_tables() do_drop_all_analytics_tables()
# I believe this also deletes any objects with this realm as a foreign key # I believe this also deletes any objects with this realm as a foreign key
Realm.objects.filter(string_id='analytics').delete() Realm.objects.filter(string_id='analytics').delete()
Client.objects.filter(name__endswith='_').delete()
installation_time = timezone_now() - timedelta(days=self.DAYS_OF_DATA) installation_time = timezone.now() - timedelta(days=self.DAYS_OF_DATA)
last_end_time = floor_to_day(timezone_now()) last_end_time = floor_to_day(timezone.now())
realm = Realm.objects.create( realm = Realm.objects.create(
string_id='analytics', name='Analytics', date_created=installation_time) string_id='analytics', name='Analytics', domain='analytics.ds',
date_created=installation_time)
shylock = self.create_user('shylock@analytics.ds', 'Shylock', True, installation_time, realm) shylock = self.create_user('shylock@analytics.ds', 'Shylock', True, installation_time, realm)
def insert_fixture_data(stat, fixture_data, table): def insert_fixture_data(stat, fixture_data, table):
# type: (CountStat, Mapping[Optional[str], List[int]], Type[BaseCount]) -> None # type: (CountStat, Dict[Optional[str], List[int]], Type[BaseCount]) -> None
end_times = time_range(last_end_time, last_end_time, stat.frequency, end_times = time_range(last_end_time, last_end_time, stat.frequency,
len(list(fixture_data.values())[0])) len(list(fixture_data.values())[0]))
if table == RealmCount: if table == RealmCount:
@@ -73,66 +70,54 @@ class Command(BaseCommand):
value=value, **id_args) value=value, **id_args)
for end_time, value in zip(end_times, values) if value != 0]) for end_time, value in zip(end_times, values) if value != 0])
stat = COUNT_STATS['realm_active_humans::day'] stat = COUNT_STATS['active_users:is_bot:day']
realm_data = { realm_data = {
None: self.generate_fixture_data(stat, .1, .03, 3, .5, 3, partial_sum=True), 'false': self.generate_fixture_data(stat, .1, .03, 3, .5, 3),
} # type: Mapping[Optional[str], List[int]] 'true': self.generate_fixture_data(stat, .01, 0, 1, 0, 1)
} # type: Dict[Optional[str], List[int]]
insert_fixture_data(stat, realm_data, RealmCount) insert_fixture_data(stat, realm_data, RealmCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['messages_sent:is_bot:hour'] stat = COUNT_STATS['messages_sent:is_bot:hour']
user_data = {'false': self.generate_fixture_data( user_data = {'false': self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8, holiday_rate=.1)}
stat, 2, 1, 1.5, .6, 8, holiday_rate=.1)} # type: Mapping[Optional[str], List[int]]
insert_fixture_data(stat, user_data, UserCount) insert_fixture_data(stat, user_data, UserCount)
realm_data = {'false': self.generate_fixture_data(stat, 35, 15, 6, .6, 4), realm_data = {'false': self.generate_fixture_data(stat, 35, 15, 6, .6, 4),
'true': self.generate_fixture_data(stat, 15, 15, 3, .4, 2)} 'true': self.generate_fixture_data(stat, 15, 15, 3, .4, 2)}
insert_fixture_data(stat, realm_data, RealmCount) insert_fixture_data(stat, realm_data, RealmCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
stat = COUNT_STATS['messages_sent:message_type:day'] stat = COUNT_STATS['messages_sent:message_type:day']
user_data = { user_data = {
'public_stream': self.generate_fixture_data(stat, 1.5, 1, 3, .6, 8), 'public_stream': self.generate_fixture_data(stat, 1.5, 1, 3, .6, 8),
'private_message': self.generate_fixture_data(stat, .5, .3, 1, .6, 8), 'private_message': self.generate_fixture_data(stat, .5, .3, 1, .6, 8)}
'huddle_message': self.generate_fixture_data(stat, .2, .2, 2, .6, 8)}
insert_fixture_data(stat, user_data, UserCount) insert_fixture_data(stat, user_data, UserCount)
realm_data = { realm_data = {
'public_stream': self.generate_fixture_data(stat, 30, 8, 5, .6, 4), 'public_stream': self.generate_fixture_data(stat, 30, 8, 5, .6, 4),
'private_stream': self.generate_fixture_data(stat, 7, 7, 5, .6, 4), 'private_stream': self.generate_fixture_data(stat, 7, 7, 5, .6, 4),
'private_message': self.generate_fixture_data(stat, 13, 5, 5, .6, 4), 'private_message': self.generate_fixture_data(stat, 13, 5, 5, .6, 4)}
'huddle_message': self.generate_fixture_data(stat, 6, 3, 3, .6, 4)}
insert_fixture_data(stat, realm_data, RealmCount) insert_fixture_data(stat, realm_data, RealmCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
website, created = Client.objects.get_or_create(name='website') website_ = Client.objects.create(name='website_')
old_desktop, created = Client.objects.get_or_create(name='desktop app Linux 0.3.7') API_ = Client.objects.create(name='API_')
android, created = Client.objects.get_or_create(name='ZulipAndroid') android_ = Client.objects.create(name='android_')
iOS, created = Client.objects.get_or_create(name='ZulipiOS') iOS_ = Client.objects.create(name='iOS_')
react_native, created = Client.objects.get_or_create(name='ZulipMobile') react_native_ = Client.objects.create(name='react_native_')
API, created = Client.objects.get_or_create(name='API: Python') electron_ = Client.objects.create(name='electron_')
zephyr_mirror, created = Client.objects.get_or_create(name='zephyr_mirror') barnowl_ = Client.objects.create(name='barnowl_')
unused, created = Client.objects.get_or_create(name='unused') plan9_ = Client.objects.create(name='plan9_')
long_webhook, created = Client.objects.get_or_create(name='ZulipLooooooooooongNameWebhook')
stat = COUNT_STATS['messages_sent:client:day'] stat = COUNT_STATS['messages_sent:client:day']
user_data = { user_data = {
website.id: self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8), website_.id: self.generate_fixture_data(stat, 2, 1, 1.5, .6, 8),
zephyr_mirror.id: self.generate_fixture_data(stat, 0, .3, 1.5, .6, 8)} barnowl_.id: self.generate_fixture_data(stat, 0, .3, 1.5, .6, 8)}
insert_fixture_data(stat, user_data, UserCount) insert_fixture_data(stat, user_data, UserCount)
realm_data = { realm_data = {
website.id: self.generate_fixture_data(stat, 30, 20, 5, .6, 3), website_.id: self.generate_fixture_data(stat, 30, 20, 5, .6, 3),
old_desktop.id: self.generate_fixture_data(stat, 5, 3, 8, .6, 3), API_.id: self.generate_fixture_data(stat, 5, 5, 5, .6, 3),
android.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3), android_.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3),
iOS.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3), iOS_.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3),
react_native.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3), react_native_.id: self.generate_fixture_data(stat, 5, 5, 10, .6, 3),
API.id: self.generate_fixture_data(stat, 5, 5, 5, .6, 3), electron_.id: self.generate_fixture_data(stat, 5, 3, 8, .6, 3),
zephyr_mirror.id: self.generate_fixture_data(stat, 1, 1, 3, .6, 3), barnowl_.id: self.generate_fixture_data(stat, 1, 1, 3, .6, 3),
unused.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0), plan9_.id: self.generate_fixture_data(stat, 0, 0, 0, 0, 0, 0)}
long_webhook.id: self.generate_fixture_data(stat, 5, 5, 2, .6, 3)}
insert_fixture_data(stat, realm_data, RealmCount) insert_fixture_data(stat, realm_data, RealmCount)
FillState.objects.create(property=stat.property, end_time=last_end_time,
state=FillState.DONE)
# TODO: messages_sent_to_stream:is_bot # TODO: messages_sent_to_stream:is_bot

View File

@@ -2,7 +2,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from typing import Any, List from typing import Any
from argparse import ArgumentParser from argparse import ArgumentParser
import datetime import datetime
@@ -10,8 +10,6 @@ import pytz
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models import Count from django.db.models import Count
from django.utils.timezone import now as timezone_now
from zerver.models import UserProfile, Realm, Stream, Message, Recipient, UserActivity, \ from zerver.models import UserProfile, Realm, Stream, Message, Recipient, UserActivity, \
Subscription, UserMessage, get_realm Subscription, UserMessage, get_realm
@@ -31,7 +29,7 @@ class Command(BaseCommand):
def active_users(self, realm): def active_users(self, realm):
# type: (Realm) -> List[UserProfile] # type: (Realm) -> List[UserProfile]
# Has been active (on the website, for now) in the last 7 days. # Has been active (on the website, for now) in the last 7 days.
activity_cutoff = timezone_now() - datetime.timedelta(days=7) activity_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=7)
return [activity.user_profile for activity in ( return [activity.user_profile for activity in (
UserActivity.objects.filter(user_profile__realm=realm, UserActivity.objects.filter(user_profile__realm=realm,
user_profile__is_active=True, user_profile__is_active=True,
@@ -41,17 +39,17 @@ class Command(BaseCommand):
def messages_sent_by(self, user, days_ago): def messages_sent_by(self, user, days_ago):
# type: (UserProfile, int) -> int # type: (UserProfile, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender=user, pub_date__gt=sent_time_cutoff).count() return human_messages.filter(sender=user, pub_date__gt=sent_time_cutoff).count()
def total_messages(self, realm, days_ago): def total_messages(self, realm, days_ago):
# type: (Realm, int) -> int # type: (Realm, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return Message.objects.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count() return Message.objects.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count()
def human_messages(self, realm, days_ago): def human_messages(self, realm, days_ago):
# type: (Realm, int) -> int # type: (Realm, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count() return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count()
def api_messages(self, realm, days_ago): def api_messages(self, realm, days_ago):
@@ -60,19 +58,19 @@ class Command(BaseCommand):
def stream_messages(self, realm, days_ago): def stream_messages(self, realm, days_ago):
# type: (Realm, int) -> int # type: (Realm, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff, return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff,
recipient__type=Recipient.STREAM).count() recipient__type=Recipient.STREAM).count()
def private_messages(self, realm, days_ago): def private_messages(self, realm, days_ago):
# type: (Realm, int) -> int # type: (Realm, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude( return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.HUDDLE).count() recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.HUDDLE).count()
def group_private_messages(self, realm, days_ago): def group_private_messages(self, realm, days_ago):
# type: (Realm, int) -> int # type: (Realm, int) -> int
sent_time_cutoff = timezone_now() - datetime.timedelta(days=days_ago) sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude( return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.PERSONAL).count() recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.PERSONAL).count()

View File

@@ -7,20 +7,18 @@ from scripts.lib.zulip_tools import ENDC, WARNING
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import timedelta from datetime import timedelta
import time
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now from django.utils import timezone
from django.utils.timezone import utc as timezone_utc
from django.utils.dateparse import parse_datetime from django.utils.dateparse import parse_datetime
from django.conf import settings from django.conf import settings
from analytics.models import RealmCount, UserCount from analytics.models import RealmCount, UserCount
from analytics.lib.counts import COUNT_STATS, logger, process_count_stat from analytics.lib.counts import COUNT_STATS, logger, process_count_stat
from zerver.lib.timestamp import floor_to_hour from zerver.lib.timestamp import datetime_to_string, is_timezone_aware
from zerver.models import UserProfile, Message from zerver.models import UserProfile, Message
from typing import Any, Dict from typing import Any
class Command(BaseCommand): class Command(BaseCommand):
help = """Fills Analytics tables. help = """Fills Analytics tables.
@@ -32,18 +30,17 @@ class Command(BaseCommand):
parser.add_argument('--time', '-t', parser.add_argument('--time', '-t',
type=str, type=str,
help='Update stat tables from current state to --time. Defaults to the current time.', help='Update stat tables from current state to --time. Defaults to the current time.',
default=timezone_now().isoformat()) default=datetime_to_string(timezone.now()))
parser.add_argument('--utc', parser.add_argument('--utc',
action='store_true', type=bool,
help="Interpret --time in UTC.", help="Interpret --time in UTC.",
default=False) default=False)
parser.add_argument('--stat', '-s', parser.add_argument('--stat', '-s',
type=str, type=str,
help="CountStat to process. If omitted, all stats are processed.") help="CountStat to process. If omitted, all stats are processed.")
parser.add_argument('--verbose', parser.add_argument('--quiet', '-q',
action='store_true', type=str,
help="Print timing information to stdout.", help="Suppress output to stdout.")
default=False)
def handle(self, *args, **options): def handle(self, *args, **options):
# type: (*Any, **Any) -> None # type: (*Any, **Any) -> None
@@ -61,31 +58,18 @@ class Command(BaseCommand):
def run_update_analytics_counts(self, options): def run_update_analytics_counts(self, options):
# type: (Dict[str, Any]) -> None # type: (Dict[str, Any]) -> None
fill_to_time = parse_datetime(options['time']) fill_to_time = parse_datetime(options['time'])
if options['utc']: if options['utc']:
fill_to_time = fill_to_time.replace(tzinfo=timezone_utc) fill_to_time = fill_to_time.replace(tzinfo=timezone.utc)
if fill_to_time.tzinfo is None:
if not (is_timezone_aware(fill_to_time)):
raise ValueError("--time must be timezone aware. Maybe you meant to use the --utc option?") raise ValueError("--time must be timezone aware. Maybe you meant to use the --utc option?")
fill_to_time = floor_to_hour(fill_to_time.astimezone(timezone_utc)) logger.info("Starting updating analytics counts through %s" % (fill_to_time,))
if options['stat'] is not None: if options['stat'] is not None:
stats = [COUNT_STATS[options['stat']]] process_count_stat(COUNT_STATS[options['stat']], fill_to_time)
else: else:
stats = list(COUNT_STATS.values()) for stat in COUNT_STATS.values():
process_count_stat(stat, fill_to_time)
logger.info("Starting updating analytics counts through %s" % (fill_to_time,))
if options['verbose']:
start = time.time()
last = start
for stat in stats:
process_count_stat(stat, fill_to_time)
if options['verbose']:
print("Updated %s in %.3fs" % (stat.property, time.time() - last))
last = time.time()
if options['verbose']:
print("Finished updating analytics counts through %s in %.3fs" %
(fill_to_time, time.time() - start))
logger.info("Finished updating analytics counts through %s" % (fill_to_time,)) logger.info("Finished updating analytics counts through %s" % (fill_to_time,))

View File

@@ -7,8 +7,6 @@ import pytz
from typing import Any from typing import Any
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.utils.timezone import now as timezone_now
from zerver.models import UserProfile, Realm, Stream, Message, get_realm from zerver.models import UserProfile, Realm, Stream, Message, get_realm
from six.moves import range from six.moves import range
@@ -22,8 +20,8 @@ class Command(BaseCommand):
def messages_sent_by(self, user, week): def messages_sent_by(self, user, week):
# type: (UserProfile, int) -> int # type: (UserProfile, int) -> int
start = timezone_now() - datetime.timedelta(days=(week + 1)*7) start = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=(week + 1)*7)
end = timezone_now() - datetime.timedelta(days=week*7) end = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=week*7)
return Message.objects.filter(sender=user, pub_date__gt=start, pub_date__lte=end).count() return Message.objects.filter(sender=user, pub_date__gt=start, pub_date__lte=end).count()
def handle(self, *args, **options): def handle(self, *args, **options):

View File

@@ -1,30 +0,0 @@
# -*- coding: utf-8 -*-
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db import migrations
def delete_messages_sent_to_stream_stat(apps, schema_editor):
# type: (StateApps, DatabaseSchemaEditor) -> None
UserCount = apps.get_model('analytics', 'UserCount')
StreamCount = apps.get_model('analytics', 'StreamCount')
RealmCount = apps.get_model('analytics', 'RealmCount')
InstallationCount = apps.get_model('analytics', 'InstallationCount')
FillState = apps.get_model('analytics', 'FillState')
property = 'messages_sent_to_stream:is_bot'
UserCount.objects.filter(property=property).delete()
StreamCount.objects.filter(property=property).delete()
RealmCount.objects.filter(property=property).delete()
InstallationCount.objects.filter(property=property).delete()
FillState.objects.filter(property=property).delete()
class Migration(migrations.Migration):
dependencies = [
('analytics', '0008_add_count_indexes'),
]
operations = [
migrations.RunPython(delete_messages_sent_to_stream_stat),
]

View File

@@ -1,27 +0,0 @@
# -*- coding: utf-8 -*-
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db import migrations
def clear_message_sent_by_message_type_values(apps, schema_editor):
# type: (StateApps, DatabaseSchemaEditor) -> None
UserCount = apps.get_model('analytics', 'UserCount')
StreamCount = apps.get_model('analytics', 'StreamCount')
RealmCount = apps.get_model('analytics', 'RealmCount')
InstallationCount = apps.get_model('analytics', 'InstallationCount')
FillState = apps.get_model('analytics', 'FillState')
property = 'messages_sent:message_type:day'
UserCount.objects.filter(property=property).delete()
StreamCount.objects.filter(property=property).delete()
RealmCount.objects.filter(property=property).delete()
InstallationCount.objects.filter(property=property).delete()
FillState.objects.filter(property=property).delete()
class Migration(migrations.Migration):
dependencies = [('analytics', '0009_remove_messages_to_stream_stat')]
operations = [
migrations.RunPython(clear_message_sent_by_message_type_values),
]

View File

@@ -1,29 +0,0 @@
# -*- coding: utf-8 -*-
from django.db.backends.postgresql_psycopg2.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db import migrations
def clear_analytics_tables(apps, schema_editor):
# type: (StateApps, DatabaseSchemaEditor) -> None
UserCount = apps.get_model('analytics', 'UserCount')
StreamCount = apps.get_model('analytics', 'StreamCount')
RealmCount = apps.get_model('analytics', 'RealmCount')
InstallationCount = apps.get_model('analytics', 'InstallationCount')
FillState = apps.get_model('analytics', 'FillState')
UserCount.objects.all().delete()
StreamCount.objects.all().delete()
RealmCount.objects.all().delete()
InstallationCount.objects.all().delete()
FillState.objects.all().delete()
class Migration(migrations.Migration):
dependencies = [
('analytics', '0010_clear_messages_sent_values'),
]
operations = [
migrations.RunPython(clear_analytics_tables),
]

View File

@@ -1,23 +1,24 @@
from django.db import models from django.db import models
from django.utils import timezone
from zerver.models import Realm, UserProfile, Stream, Recipient from zerver.models import Realm, UserProfile, Stream, Recipient
from zerver.lib.str_utils import ModelReprMixin from zerver.lib.str_utils import ModelReprMixin
from zerver.lib.timestamp import floor_to_day from zerver.lib.timestamp import datetime_to_UTC, floor_to_day
import datetime import datetime
from typing import Optional, Tuple, Union, Dict, Any, Text from typing import Optional, Tuple, Union, Dict, Any, Text
class FillState(ModelReprMixin, models.Model): class FillState(ModelReprMixin, models.Model):
property = models.CharField(max_length=40, unique=True) # type: Text property = models.CharField(max_length=40, unique=True) # type: Text
end_time = models.DateTimeField() # type: datetime.datetime end_time = models.DateTimeField() # type: datetime.datetime
# Valid states are {DONE, STARTED} # Valid states are {DONE, STARTED}
DONE = 1 DONE = 1
STARTED = 2 STARTED = 2
state = models.PositiveSmallIntegerField() # type: int state = models.PositiveSmallIntegerField() # type: int
last_modified = models.DateTimeField(auto_now=True) # type: datetime.datetime last_modified = models.DateTimeField(auto_now=True) # type: datetime.datetime
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
@@ -28,20 +29,11 @@ class FillState(ModelReprMixin, models.Model):
def installation_epoch(): def installation_epoch():
# type: () -> datetime.datetime # type: () -> datetime.datetime
earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min'] earliest_realm_creation = Realm.objects.aggregate(models.Min('date_created'))['date_created__min']
return floor_to_day(earliest_realm_creation) return floor_to_day(datetime_to_UTC(earliest_realm_creation))
def last_successful_fill(property):
# type: (str) -> Optional[datetime.datetime]
fillstate = FillState.objects.filter(property=property).first()
if fillstate is None:
return None
if fillstate.state == FillState.DONE:
return fillstate.end_time
return fillstate.end_time - datetime.timedelta(hours=1)
# would only ever make entries here by hand # would only ever make entries here by hand
class Anomaly(ModelReprMixin, models.Model): class Anomaly(ModelReprMixin, models.Model):
info = models.CharField(max_length=1000) # type: Text info = models.CharField(max_length=1000) # type: Text
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
@@ -51,20 +43,40 @@ class BaseCount(ModelReprMixin, models.Model):
# Note: When inheriting from BaseCount, you may want to rearrange # Note: When inheriting from BaseCount, you may want to rearrange
# the order of the columns in the migration to make sure they # the order of the columns in the migration to make sure they
# match how you'd like the table to be arranged. # match how you'd like the table to be arranged.
property = models.CharField(max_length=32) # type: Text property = models.CharField(max_length=32) # type: Text
subgroup = models.CharField(max_length=16, null=True) # type: Optional[Text] subgroup = models.CharField(max_length=16, null=True) # type: Text
end_time = models.DateTimeField() # type: datetime.datetime end_time = models.DateTimeField() # type: datetime.datetime
value = models.BigIntegerField() # type: int value = models.BigIntegerField() # type: int
anomaly = models.ForeignKey(Anomaly, null=True) # type: Optional[Anomaly] anomaly = models.ForeignKey(Anomaly, null=True) # type: Optional[Anomaly]
class Meta(object): class Meta(object):
abstract = True abstract = True
@staticmethod
def extended_id():
# type: () -> Tuple[str, ...]
raise NotImplementedError
@staticmethod
def key_model():
# type: () -> models.Model
raise NotImplementedError
class InstallationCount(BaseCount): class InstallationCount(BaseCount):
class Meta(object): class Meta(object):
unique_together = ("property", "subgroup", "end_time") unique_together = ("property", "subgroup", "end_time")
@staticmethod
def extended_id():
# type: () -> Tuple[str, ...]
return ()
@staticmethod
def key_model():
# type: () -> models.Model
return None
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<InstallationCount: %s %s %s>" % (self.property, self.subgroup, self.value) return u"<InstallationCount: %s %s %s>" % (self.property, self.subgroup, self.value)
@@ -76,6 +88,16 @@ class RealmCount(BaseCount):
unique_together = ("realm", "property", "subgroup", "end_time") unique_together = ("realm", "property", "subgroup", "end_time")
index_together = ["property", "end_time"] index_together = ["property", "end_time"]
@staticmethod
def extended_id():
# type: () -> Tuple[str, ...]
return ('realm_id',)
@staticmethod
def key_model():
# type: () -> models.Model
return Realm
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<RealmCount: %s %s %s %s>" % (self.realm, self.property, self.subgroup, self.value) return u"<RealmCount: %s %s %s %s>" % (self.realm, self.property, self.subgroup, self.value)
@@ -90,6 +112,16 @@ class UserCount(BaseCount):
# aggregating from users to realms # aggregating from users to realms
index_together = ["property", "realm", "end_time"] index_together = ["property", "realm", "end_time"]
@staticmethod
def extended_id():
# type: () -> Tuple[str, ...]
return ('user_id', 'realm_id')
@staticmethod
def key_model():
# type: () -> models.Model
return UserProfile
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<UserCount: %s %s %s %s>" % (self.user, self.property, self.subgroup, self.value) return u"<UserCount: %s %s %s %s>" % (self.user, self.property, self.subgroup, self.value)
@@ -104,6 +136,16 @@ class StreamCount(BaseCount):
# aggregating from streams to realms # aggregating from streams to realms
index_together = ["property", "realm", "end_time"] index_together = ["property", "realm", "end_time"]
@staticmethod
def extended_id():
# type: () -> Tuple[str, ...]
return ('stream_id', 'realm_id')
@staticmethod
def key_model():
# type: () -> models.Model
return Stream
def __unicode__(self): def __unicode__(self):
# type: () -> Text # type: () -> Text
return u"<StreamCount: %s %s %s %s %s>" % (self.stream, self.property, self.subgroup, self.value, self.id) return u"<StreamCount: %s %s %s %s %s>" % (self.stream, self.property, self.subgroup, self.value, self.id)

View File

@@ -1,47 +1,39 @@
from __future__ import absolute_import from __future__ import absolute_import
from django.apps import apps
from django.db import models from django.db import models
from django.db.models import Sum
from django.test import TestCase from django.test import TestCase
from django.utils.timezone import now as timezone_now from django.utils import timezone
from django.utils.timezone import utc as timezone_utc
from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \ from analytics.lib.counts import CountStat, COUNT_STATS, process_count_stat, \
do_fill_count_stat_at_hour, do_increment_logging_stat, DataCollector, \ zerver_count_user_by_realm, zerver_count_message_by_user, \
sql_data_collector, LoggingCountStat, do_aggregate_to_summary_table, \ zerver_count_message_by_stream, zerver_count_stream_by_realm, \
do_drop_all_analytics_tables, DependentCountStat do_fill_count_stat_at_hour, ZerverCountQuery
from analytics.models import BaseCount, InstallationCount, RealmCount, \ from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, FillState, Anomaly, installation_epoch, \ UserCount, StreamCount, FillState, installation_epoch
last_successful_fill
from zerver.lib.actions import do_create_user, do_deactivate_user, \
do_activate_user, do_reactivate_user, update_user_activity_interval
from zerver.lib.timestamp import floor_to_day
from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \ from zerver.models import Realm, UserProfile, Message, Stream, Recipient, \
Huddle, Client, UserActivityInterval, RealmAuditLog, \ Huddle, Client, get_user_profile_by_email, get_client
get_user_profile_by_email, get_client
from datetime import datetime, timedelta from datetime import datetime, timedelta
import ujson
from six.moves import range from six.moves import range
from typing import Any, Dict, List, Optional, Text, Tuple, Type, Union from typing import Any, Type, Optional, Text, Tuple, List, Union
class AnalyticsTestCase(TestCase): class AnalyticsTestCase(TestCase):
MINUTE = timedelta(seconds = 60) MINUTE = timedelta(seconds = 60)
HOUR = MINUTE * 60 HOUR = MINUTE * 60
DAY = HOUR * 24 DAY = HOUR * 24
TIME_ZERO = datetime(1988, 3, 14).replace(tzinfo=timezone_utc) TIME_ZERO = datetime(1988, 3, 14).replace(tzinfo=timezone.utc)
TIME_LAST_HOUR = TIME_ZERO - HOUR TIME_LAST_HOUR = TIME_ZERO - HOUR
def setUp(self): def setUp(self):
# type: () -> None # type: () -> None
self.default_realm = Realm.objects.create( self.default_realm = Realm.objects.create(
string_id='realmtest', name='Realm Test', date_created=self.TIME_ZERO - 2*self.DAY) string_id='realmtest', name='Realm Test',
domain='test.analytics', date_created=self.TIME_ZERO - 2*self.DAY)
# used to generate unique names in self.create_* # used to generate unique names in self.create_*
self.name_counter = 100 self.name_counter = 100
# used as defaults in self.assertCountEquals # used as defaults in self.assertCountEquals
self.current_property = None # type: Optional[str] self.current_property = None # type: Optional[str]
# Lightweight creation of users, streams, and messages # Lightweight creation of users, streams, and messages
def create_user(self, **kwargs): def create_user(self, **kwargs):
@@ -111,7 +103,7 @@ class AnalyticsTestCase(TestCase):
self.assertEqual(queryset.values_list('value', flat=True)[0], value) self.assertEqual(queryset.values_list('value', flat=True)[0], value)
def assertTableState(self, table, arg_keys, arg_values): def assertTableState(self, table, arg_keys, arg_values):
# type: (Type[BaseCount], List[str], List[List[Union[int, str, bool, datetime, Realm, UserProfile, Stream]]]) -> None # type: (Type[BaseCount], List[str], List[List[Union[int, str, Realm, UserProfile, Stream]]]) -> None
"""Assert that the state of a *Count table is what it should be. """Assert that the state of a *Count table is what it should be.
Example usage: Example usage:
@@ -136,10 +128,9 @@ class AnalyticsTestCase(TestCase):
defaults = { defaults = {
'property': self.current_property, 'property': self.current_property,
'subgroup': None, 'subgroup': None,
'end_time': self.TIME_ZERO, 'end_time': self.TIME_ZERO}
'value': 1}
for values in arg_values: for values in arg_values:
kwargs = {} # type: Dict[str, Any] kwargs = {} # type: Dict[str, Any]
for i in range(len(values)): for i in range(len(values)):
kwargs[arg_keys[i]] = values[i] kwargs[arg_keys[i]] = values[i]
for key, value in defaults.items(): for key, value in defaults.items():
@@ -156,15 +147,20 @@ class AnalyticsTestCase(TestCase):
self.assertEqual(table.objects.count(), len(arg_values)) self.assertEqual(table.objects.count(), len(arg_values))
class TestProcessCountStat(AnalyticsTestCase): class TestProcessCountStat(AnalyticsTestCase):
def make_dummy_count_stat(self, property): def make_dummy_count_stat(self, current_time):
# type: (str) -> CountStat # type: (datetime) -> CountStat
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time) dummy_query = """INSERT INTO analytics_realmcount (realm_id, property, end_time, value)
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, property) VALUES (1, 'test stat', '%(end_time)s', 22)""" % {'end_time': current_time}
return CountStat(property, sql_data_collector(RealmCount, query, None), CountStat.HOUR) count_stat = CountStat('test stat', ZerverCountQuery(Recipient, UserCount, dummy_query),
{}, None, CountStat.HOUR, False)
return count_stat
def assertFillStateEquals(self, stat, end_time, state=FillState.DONE): def assertFillStateEquals(self, end_time, state = FillState.DONE, property = None):
# type: (CountStat, datetime, int) -> None # type: (datetime, int, Optional[Text]) -> None
fill_state = FillState.objects.filter(property=stat.property).first() count_stat = self.make_dummy_count_stat(end_time)
if property is None:
property = count_stat.property
fill_state = FillState.objects.filter(property=property).first()
self.assertEqual(fill_state.end_time, end_time) self.assertEqual(fill_state.end_time, end_time)
self.assertEqual(fill_state.state, state) self.assertEqual(fill_state.state, state)
@@ -172,131 +168,29 @@ class TestProcessCountStat(AnalyticsTestCase):
# type: () -> None # type: () -> None
# process new stat # process new stat
current_time = installation_epoch() + self.HOUR current_time = installation_epoch() + self.HOUR
stat = self.make_dummy_count_stat('test stat') count_stat = self.make_dummy_count_stat(current_time)
process_count_stat(stat, current_time) property = count_stat.property
self.assertFillStateEquals(stat, current_time) process_count_stat(count_stat, current_time)
self.assertEqual(InstallationCount.objects.filter(property=stat.property).count(), 1) self.assertFillStateEquals(current_time)
self.assertEqual(InstallationCount.objects.filter(property=property).count(), 1)
# dirty stat # dirty stat
FillState.objects.filter(property=stat.property).update(state=FillState.STARTED) FillState.objects.filter(property=property).update(state=FillState.STARTED)
process_count_stat(stat, current_time) process_count_stat(count_stat, current_time)
self.assertFillStateEquals(stat, current_time) self.assertFillStateEquals(current_time)
self.assertEqual(InstallationCount.objects.filter(property=stat.property).count(), 1) self.assertEqual(InstallationCount.objects.filter(property=property).count(), 1)
# clean stat, no update # clean stat, no update
process_count_stat(stat, current_time) process_count_stat(count_stat, current_time)
self.assertFillStateEquals(stat, current_time) self.assertFillStateEquals(current_time)
self.assertEqual(InstallationCount.objects.filter(property=stat.property).count(), 1) self.assertEqual(InstallationCount.objects.filter(property=property).count(), 1)
# clean stat, with update # clean stat, with update
current_time = current_time + self.HOUR current_time = current_time + self.HOUR
stat = self.make_dummy_count_stat('test stat') count_stat = self.make_dummy_count_stat(current_time)
process_count_stat(stat, current_time) process_count_stat(count_stat, current_time)
self.assertFillStateEquals(stat, current_time) self.assertFillStateEquals(current_time)
self.assertEqual(InstallationCount.objects.filter(property=stat.property).count(), 2) self.assertEqual(InstallationCount.objects.filter(property=property).count(), 2)
def test_bad_fill_to_time(self):
# type: () -> None
stat = self.make_dummy_count_stat('test stat')
with self.assertRaises(ValueError):
process_count_stat(stat, installation_epoch() + 65*self.MINUTE)
with self.assertRaises(ValueError):
process_count_stat(stat, installation_epoch().replace(tzinfo=None) + self.HOUR) # type: ignore # https://github.com/python/typeshed/pull/1347
# This tests the LoggingCountStat branch of the code in do_delete_counts_at_hour.
# It is important that do_delete_counts_at_hour not delete any of the collected
# logging data!
def test_process_logging_stat(self):
# type: () -> None
end_time = self.TIME_ZERO
user_stat = LoggingCountStat('user stat', UserCount, CountStat.DAY)
stream_stat = LoggingCountStat('stream stat', StreamCount, CountStat.DAY)
realm_stat = LoggingCountStat('realm stat', RealmCount, CountStat.DAY)
user = self.create_user()
stream = self.create_stream_with_recipient()[0]
realm = self.default_realm
UserCount.objects.create(
user=user, realm=realm, property=user_stat.property, end_time=end_time, value=5)
StreamCount.objects.create(
stream=stream, realm=realm, property=stream_stat.property, end_time=end_time, value=5)
RealmCount.objects.create(
realm=realm, property=realm_stat.property, end_time=end_time, value=5)
# Normal run of process_count_stat
for stat in [user_stat, stream_stat, realm_stat]:
process_count_stat(stat, end_time)
self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 5]])
self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 5]])
self.assertTableState(RealmCount, ['property', 'value'],
[[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]])
self.assertTableState(InstallationCount, ['property', 'value'],
[[user_stat.property, 5], [stream_stat.property, 5], [realm_stat.property, 5]])
# Change the logged data and mark FillState as dirty
UserCount.objects.update(value=6)
StreamCount.objects.update(value=6)
RealmCount.objects.filter(property=realm_stat.property).update(value=6)
FillState.objects.update(state=FillState.STARTED)
# Check that the change propagated (and the collected data wasn't deleted)
for stat in [user_stat, stream_stat, realm_stat]:
process_count_stat(stat, end_time)
self.assertTableState(UserCount, ['property', 'value'], [[user_stat.property, 6]])
self.assertTableState(StreamCount, ['property', 'value'], [[stream_stat.property, 6]])
self.assertTableState(RealmCount, ['property', 'value'],
[[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]])
self.assertTableState(InstallationCount, ['property', 'value'],
[[user_stat.property, 6], [stream_stat.property, 6], [realm_stat.property, 6]])
def test_process_dependent_stat(self):
# type: () -> None
stat1 = self.make_dummy_count_stat('stat1')
stat2 = self.make_dummy_count_stat('stat2')
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat3')
stat3 = DependentCountStat('stat3', sql_data_collector(RealmCount, query, None), CountStat.HOUR,
dependencies=['stat1', 'stat2'])
hour = [installation_epoch() + i*self.HOUR for i in range(5)]
# test when one dependency has been run, and the other hasn't
process_count_stat(stat1, hour[2])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]]])
self.assertFillStateEquals(stat3, hour[0])
# test that we don't fill past the fill_to_time argument, even if
# dependencies have later last_successful_fill
process_count_stat(stat2, hour[3])
process_count_stat(stat3, hour[1])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]]])
self.assertFillStateEquals(stat3, hour[1])
# test that we don't fill past the dependency last_successful_fill times,
# even if fill_to_time is later
process_count_stat(stat3, hour[4])
self.assertTableState(InstallationCount, ['property', 'end_time'],
[['stat1', hour[1]], ['stat1', hour[2]],
['stat2', hour[1]], ['stat2', hour[2]], ['stat2', hour[3]],
['stat3', hour[1]], ['stat3', hour[2]]])
self.assertFillStateEquals(stat3, hour[2])
# test daily dependent stat with hourly dependencies
query = """INSERT INTO analytics_realmcount (realm_id, value, property, end_time)
VALUES (%s, 1, '%s', %%%%(time_end)s)""" % (self.default_realm.id, 'stat4')
stat4 = DependentCountStat('stat4', sql_data_collector(RealmCount, query, None), CountStat.DAY,
dependencies=['stat1', 'stat2'])
hour24 = installation_epoch() + 24*self.HOUR
hour25 = installation_epoch() + 25*self.HOUR
process_count_stat(stat1, hour25)
process_count_stat(stat2, hour25)
process_count_stat(stat4, hour25)
self.assertEqual(InstallationCount.objects.filter(property='stat4').count(), 1)
self.assertFillStateEquals(stat4, hour24)
class TestCountStats(AnalyticsTestCase): class TestCountStats(AnalyticsTestCase):
def setUp(self): def setUp(self):
@@ -307,7 +201,7 @@ class TestCountStats(AnalyticsTestCase):
# the queries). # the queries).
self.second_realm = Realm.objects.create( self.second_realm = Realm.objects.create(
string_id='second-realm', name='Second Realm', string_id='second-realm', name='Second Realm',
date_created=self.TIME_ZERO-2*self.DAY) domain='second.analytics', date_created=self.TIME_ZERO-2*self.DAY)
for minutes_ago in [0, 1, 61, 60*24+1]: for minutes_ago in [0, 1, 61, 60*24+1]:
creation_time = self.TIME_ZERO - minutes_ago*self.MINUTE creation_time = self.TIME_ZERO - minutes_ago*self.MINUTE
user = self.create_user(email='user-%s@second.analytics' % (minutes_ago,), user = self.create_user(email='user-%s@second.analytics' % (minutes_ago,),
@@ -323,7 +217,7 @@ class TestCountStats(AnalyticsTestCase):
# messages_* CountStats # messages_* CountStats
self.no_message_realm = Realm.objects.create( self.no_message_realm = Realm.objects.create(
string_id='no-message-realm', name='No Message Realm', string_id='no-message-realm', name='No Message Realm',
date_created=self.TIME_ZERO-2*self.DAY) domain='no.message', date_created=self.TIME_ZERO-2*self.DAY)
self.create_user(realm=self.no_message_realm) self.create_user(realm=self.no_message_realm)
self.create_stream_with_recipient(realm=self.no_message_realm) self.create_stream_with_recipient(realm=self.no_message_realm)
# This huddle should not show up anywhere # This huddle should not show up anywhere
@@ -429,19 +323,16 @@ class TestCountStats(AnalyticsTestCase):
[2, 'private_stream', user2], [2, 'private_stream', user2],
[2, 'public_stream', user1], [2, 'public_stream', user1],
[1, 'public_stream', user2], [1, 'public_stream', user2],
[1, 'private_message', user1], [2, 'private_message', user1],
[1, 'private_message', user2], [2, 'private_message', user2],
[1, 'private_message', user3], [1, 'private_message', user3],
[1, 'huddle_message', user1],
[1, 'huddle_message', user2],
[1, 'public_stream', self.hourly_user], [1, 'public_stream', self.hourly_user],
[1, 'public_stream', self.daily_user]]) [1, 'public_stream', self.daily_user]])
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
[[3, 'private_stream'], [3, 'public_stream'], [3, 'private_message'], [[3, 'private_stream'], [3, 'public_stream'], [5, 'private_message'],
[2, 'huddle_message'], [2, 'public_stream', self.second_realm]]) [2, 'public_stream', self.second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], self.assertTableState(InstallationCount, ['value', 'subgroup'],
[[3, 'private_stream'], [5, 'public_stream'], [3, 'private_message'], [[3, 'private_stream'], [5, 'public_stream'], [5, 'private_message']])
[2, 'huddle_message']])
self.assertTableState(StreamCount, [], []) self.assertTableState(StreamCount, [], [])
def test_messages_sent_to_recipients_with_same_id(self): def test_messages_sent_to_recipients_with_same_id(self):
@@ -460,8 +351,7 @@ class TestCountStats(AnalyticsTestCase):
do_fill_count_stat_at_hour(stat, self.TIME_ZERO) do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
self.assertCountEquals(UserCount, 1, subgroup='private_message') self.assertCountEquals(UserCount, 2, subgroup='private_message')
self.assertCountEquals(UserCount, 1, subgroup='huddle_message')
self.assertCountEquals(UserCount, 1, subgroup='public_stream') self.assertCountEquals(UserCount, 1, subgroup='public_stream')
def test_messages_sent_by_client(self): def test_messages_sent_by_client(self):
@@ -487,7 +377,7 @@ class TestCountStats(AnalyticsTestCase):
do_fill_count_stat_at_hour(stat, self.TIME_ZERO) do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
client2_id = str(client2.id) client2_id = str(client2.id)
website_client_id = str(get_client('website').id) # default for self.create_message website_client_id = str(get_client('website').id) # default for self.create_message
self.assertTableState(UserCount, ['value', 'subgroup', 'user'], self.assertTableState(UserCount, ['value', 'subgroup', 'user'],
[[2, website_client_id, user1], [[2, website_client_id, user1],
[1, client2_id, user1], [2, client2_id, user2], [1, client2_id, user1], [2, client2_id, user2],
@@ -502,7 +392,7 @@ class TestCountStats(AnalyticsTestCase):
def test_messages_sent_to_stream_by_is_bot(self): def test_messages_sent_to_stream_by_is_bot(self):
# type: () -> None # type: () -> None
stat = COUNT_STATS['messages_in_stream:is_bot:day'] stat = COUNT_STATS['messages_sent_to_stream:is_bot:hour']
self.current_property = stat.property self.current_property = stat.property
bot = self.create_user(is_bot=True) bot = self.create_user(is_bot=True)
@@ -530,520 +420,9 @@ class TestCountStats(AnalyticsTestCase):
self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'], self.assertTableState(StreamCount, ['value', 'subgroup', 'stream'],
[[2, 'false', stream1], [1, 'false', stream2], [2, 'true', stream2], [[2, 'false', stream1], [1, 'false', stream2], [2, 'true', stream2],
# "hourly" and "daily" stream, from TestCountStats.setUp # "hourly" stream, from TestCountStats.setUp
[1, 'false', Stream.objects.get(name='stream 1')], [1, 'false', Stream.objects.get(name='stream 1')]])
[1, 'false', Stream.objects.get(name='stream 61')]])
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'], self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
[[3, 'false'], [2, 'true'], [2, 'false', self.second_realm]]) [[3, 'false'], [2, 'true'], [1, 'false', self.second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[5, 'false'], [2, 'true']]) self.assertTableState(InstallationCount, ['value', 'subgroup'], [[4, 'false'], [2, 'true']])
self.assertTableState(UserCount, [], []) self.assertTableState(UserCount, [], [])
def create_interval(self, user, start_offset, end_offset):
# type: (UserProfile, timedelta, timedelta) -> None
UserActivityInterval.objects.create(
user_profile=user, start=self.TIME_ZERO-start_offset,
end=self.TIME_ZERO-end_offset)
def test_15day_actives(self):
# type: () -> None
stat = COUNT_STATS['15day_actives::day']
self.current_property = stat.property
_15day = 15*self.DAY - UserActivityInterval.MIN_INTERVAL_LENGTH
# Outside time range, should not appear. Also tests upper boundary.
user1 = self.create_user()
self.create_interval(user1, _15day + self.DAY, _15day + timedelta(seconds=1))
self.create_interval(user1, timedelta(0), -self.HOUR)
# On lower boundary, should appear
user2 = self.create_user()
self.create_interval(user2, _15day + self.DAY, _15day)
# Multiple intervals, including one outside boundary
user3 = self.create_user()
self.create_interval(user3, 20*self.DAY, 19*self.DAY)
self.create_interval(user3, 20*self.HOUR, 19*self.HOUR)
self.create_interval(user3, 20*self.MINUTE, 19*self.MINUTE)
# Intervals crossing boundary
user4 = self.create_user()
self.create_interval(user4, 20*self.DAY, 10*self.DAY)
user5 = self.create_user()
self.create_interval(user5, self.MINUTE, -self.MINUTE)
# Interval subsuming time range
user6 = self.create_user()
self.create_interval(user6, 20*self.DAY, -2*self.DAY)
# Second realm
user7 = self.create_user(realm=self.second_realm)
self.create_interval(user7, 20*self.MINUTE, 19*self.MINUTE)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['value', 'user'],
[[1, user2], [1, user3], [1, user4], [1, user5], [1, user6], [1, user7]])
self.assertTableState(RealmCount, ['value', 'realm'],
[[5, self.default_realm], [1, self.second_realm]])
self.assertTableState(InstallationCount, ['value'], [[6]])
self.assertTableState(StreamCount, [], [])
def test_minutes_active(self):
# type: () -> None
stat = COUNT_STATS['minutes_active::day']
self.current_property = stat.property
# Outside time range, should not appear. Also testing for intervals
# starting and ending on boundary
user1 = self.create_user()
self.create_interval(user1, 25*self.HOUR, self.DAY)
self.create_interval(user1, timedelta(0), -self.HOUR)
# Multiple intervals, including one outside boundary
user2 = self.create_user()
self.create_interval(user2, 20*self.DAY, 19*self.DAY)
self.create_interval(user2, 20*self.HOUR, 19*self.HOUR)
self.create_interval(user2, 20*self.MINUTE, 19*self.MINUTE)
# Intervals crossing boundary
user3 = self.create_user()
self.create_interval(user3, 25*self.HOUR, 22*self.HOUR)
self.create_interval(user3, self.MINUTE, -self.MINUTE)
# Interval subsuming time range
user4 = self.create_user()
self.create_interval(user4, 2*self.DAY, -2*self.DAY)
# Less than 60 seconds, should not appear
user5 = self.create_user()
self.create_interval(user5, self.MINUTE, timedelta(seconds=30))
self.create_interval(user5, timedelta(seconds=20), timedelta(seconds=10))
# Second realm
user6 = self.create_user(realm=self.second_realm)
self.create_interval(user6, 20*self.MINUTE, 19*self.MINUTE)
do_fill_count_stat_at_hour(stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['value', 'user'],
[[61, user2], [121, user3], [24*60, user4], [1, user6]])
self.assertTableState(RealmCount, ['value', 'realm'],
[[61 + 121 + 24*60, self.default_realm], [1, self.second_realm]])
self.assertTableState(InstallationCount, ['value'], [[61 + 121 + 24*60 + 1]])
self.assertTableState(StreamCount, [], [])
class TestDoAggregateToSummaryTable(AnalyticsTestCase):
# do_aggregate_to_summary_table is mostly tested by the end to end
# nature of the tests in TestCountStats. But want to highlight one
# feature important for keeping the size of the analytics tables small,
# which is that if there is no relevant data in the table being
# aggregated, the aggregation table doesn't get a row with value 0.
def test_no_aggregated_zeros(self):
# type: () -> None
stat = LoggingCountStat('test stat', UserCount, CountStat.HOUR)
do_aggregate_to_summary_table(stat, self.TIME_ZERO)
self.assertFalse(RealmCount.objects.exists())
self.assertFalse(InstallationCount.objects.exists())
class TestDoIncrementLoggingStat(AnalyticsTestCase):
def test_table_and_id_args(self):
# type: () -> None
# For realms, streams, and users, tests that the new rows are going to
# the appropriate *Count table, and that using a different zerver_object
# results in a new row being created
self.current_property = 'test'
second_realm = Realm.objects.create(string_id='moo', name='moo')
stat = LoggingCountStat('test', RealmCount, CountStat.DAY)
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
do_increment_logging_stat(second_realm, stat, None, self.TIME_ZERO)
self.assertTableState(RealmCount, ['realm'], [[self.default_realm], [second_realm]])
user1 = self.create_user()
user2 = self.create_user()
stat = LoggingCountStat('test', UserCount, CountStat.DAY)
do_increment_logging_stat(user1, stat, None, self.TIME_ZERO)
do_increment_logging_stat(user2, stat, None, self.TIME_ZERO)
self.assertTableState(UserCount, ['user'], [[user1], [user2]])
stream1 = self.create_stream_with_recipient()[0]
stream2 = self.create_stream_with_recipient()[0]
stat = LoggingCountStat('test', StreamCount, CountStat.DAY)
do_increment_logging_stat(stream1, stat, None, self.TIME_ZERO)
do_increment_logging_stat(stream2, stat, None, self.TIME_ZERO)
self.assertTableState(StreamCount, ['stream'], [[stream1], [stream2]])
def test_frequency(self):
# type: () -> None
times = [self.TIME_ZERO - self.MINUTE*i for i in [0, 1, 61, 24*60+1]]
stat = LoggingCountStat('day test', RealmCount, CountStat.DAY)
for time_ in times:
do_increment_logging_stat(self.default_realm, stat, None, time_)
stat = LoggingCountStat('hour test', RealmCount, CountStat.HOUR)
for time_ in times:
do_increment_logging_stat(self.default_realm, stat, None, time_)
self.assertTableState(RealmCount, ['value', 'property', 'end_time'],
[[3, 'day test', self.TIME_ZERO],
[1, 'day test', self.TIME_ZERO - self.DAY],
[2, 'hour test', self.TIME_ZERO],
[1, 'hour test', self.TIME_LAST_HOUR],
[1, 'hour test', self.TIME_ZERO - self.DAY]])
def test_get_or_create(self):
# type: () -> None
stat = LoggingCountStat('test', RealmCount, CountStat.HOUR)
# All these should trigger the create part of get_or_create.
# property is tested in test_frequency, and id_args are tested in test_id_args,
# so this only tests a new subgroup and end_time
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_ZERO)
do_increment_logging_stat(self.default_realm, stat, 'subgroup2', self.TIME_ZERO)
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_LAST_HOUR)
self.current_property = 'test'
self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'],
[[1, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO],
[1, 'subgroup1', self.TIME_LAST_HOUR]])
# This should trigger the get part of get_or_create
do_increment_logging_stat(self.default_realm, stat, 'subgroup1', self.TIME_ZERO)
self.assertTableState(RealmCount, ['value', 'subgroup', 'end_time'],
[[2, 'subgroup1', self.TIME_ZERO], [1, 'subgroup2', self.TIME_ZERO],
[1, 'subgroup1', self.TIME_LAST_HOUR]])
def test_increment(self):
# type: () -> None
stat = LoggingCountStat('test', RealmCount, CountStat.DAY)
self.current_property = 'test'
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO, increment=-1)
self.assertTableState(RealmCount, ['value'], [[-1]])
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO, increment=3)
self.assertTableState(RealmCount, ['value'], [[2]])
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
self.assertTableState(RealmCount, ['value'], [[3]])
class TestLoggingCountStats(AnalyticsTestCase):
def test_aggregation(self):
# type: () -> None
stat = LoggingCountStat('realm test', RealmCount, CountStat.DAY)
do_increment_logging_stat(self.default_realm, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
user = self.create_user()
stat = LoggingCountStat('user test', UserCount, CountStat.DAY)
do_increment_logging_stat(user, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
stream = self.create_stream_with_recipient()[0]
stat = LoggingCountStat('stream test', StreamCount, CountStat.DAY)
do_increment_logging_stat(stream, stat, None, self.TIME_ZERO)
process_count_stat(stat, self.TIME_ZERO)
self.assertTableState(InstallationCount, ['property', 'value'],
[['realm test', 1], ['user test', 1], ['stream test', 1]])
self.assertTableState(RealmCount, ['property', 'value'],
[['realm test', 1], ['user test', 1], ['stream test', 1]])
self.assertTableState(UserCount, ['property', 'value'], [['user test', 1]])
self.assertTableState(StreamCount, ['property', 'value'], [['stream test', 1]])
def test_active_users_log_by_is_bot(self):
# type: () -> None
property = 'active_users_log:is_bot:day'
user = do_create_user('email', 'password', self.default_realm, 'full_name', 'short_name')
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_deactivate_user(user)
self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_activate_user(user)
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_deactivate_user(user)
self.assertEqual(0, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
do_reactivate_user(user)
self.assertEqual(1, RealmCount.objects.filter(property=property, subgroup=False)
.aggregate(Sum('value'))['value__sum'])
class TestDeleteStats(AnalyticsTestCase):
def test_do_drop_all_analytics_tables(self):
# type: () -> None
user = self.create_user()
stream = self.create_stream_with_recipient()[0]
count_args = {'property': 'test', 'end_time': self.TIME_ZERO, 'value': 10}
UserCount.objects.create(user=user, realm=user.realm, **count_args)
StreamCount.objects.create(stream=stream, realm=stream.realm, **count_args)
RealmCount.objects.create(realm=user.realm, **count_args)
InstallationCount.objects.create(**count_args)
FillState.objects.create(property='test', end_time=self.TIME_ZERO, state=FillState.DONE)
Anomaly.objects.create(info='test anomaly')
analytics = apps.get_app_config('analytics')
for table in list(analytics.models.values()):
self.assertTrue(table.objects.exists())
do_drop_all_analytics_tables()
for table in list(analytics.models.values()):
self.assertFalse(table.objects.exists())
class TestActiveUsersAudit(AnalyticsTestCase):
def setUp(self):
# type: () -> None
super(TestActiveUsersAudit, self).setUp()
self.user = self.create_user()
self.stat = COUNT_STATS['active_users_audit:is_bot:day']
self.current_property = self.stat.property
def add_event(self, event_type, days_offset, user=None):
# type: (str, float, Optional[UserProfile]) -> None
hours_offset = int(24*days_offset)
if user is None:
user = self.user
RealmAuditLog.objects.create(
realm=user.realm, modified_user=user, event_type=event_type,
event_time=self.TIME_ZERO - hours_offset*self.HOUR)
def test_user_deactivated_in_future(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('user_deactivated', 0)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_user_reactivated_in_future(self):
# type: () -> None
self.add_event('user_deactivated', 1)
self.add_event('user_reactivated', 0)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_user_active_then_deactivated_same_day(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('user_deactivated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_user_unactive_then_activated_same_day(self):
# type: () -> None
self.add_event('user_deactivated', 1)
self.add_event('user_reactivated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
# Arguably these next two tests are duplicates of the _in_future tests, but are
# a guard against future refactorings where they may no longer be duplicates
def test_user_active_then_deactivated_with_day_gap(self):
# type: () -> None
self.add_event('user_created', 2)
self.add_event('user_deactivated', 1)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO - self.DAY]])
def test_user_deactivated_then_reactivated_with_day_gap(self):
# type: () -> None
self.add_event('user_deactivated', 2)
self.add_event('user_reactivated', 1)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_event_types(self):
# type: () -> None
self.add_event('user_created', 4)
self.add_event('user_deactivated', 3)
self.add_event('user_activated', 2)
self.add_event('user_reactivated', 1)
for i in range(4):
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO - i*self.DAY)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO - i*self.DAY] for i in [3, 1, 0]])
# Also tests that aggregation to RealmCount and InstallationCount is
# being done, and that we're storing the user correctly in UserCount
def test_multiple_users_realms_and_bots(self):
# type: () -> None
user1 = self.create_user()
user2 = self.create_user()
second_realm = Realm.objects.create(string_id='moo', name='moo')
user3 = self.create_user(realm=second_realm)
user4 = self.create_user(realm=second_realm, is_bot=True)
for user in [user1, user2, user3, user4]:
self.add_event('user_created', 1, user=user)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'user'],
[['false', user1], ['false', user2], ['false', user3], ['true', user4]])
self.assertTableState(RealmCount, ['value', 'subgroup', 'realm'],
[[2, 'false', self.default_realm], [1, 'false', second_realm],
[1, 'true', second_realm]])
self.assertTableState(InstallationCount, ['value', 'subgroup'], [[3, 'false'], [1, 'true']])
self.assertTableState(StreamCount, [], [])
# Not that interesting a test if you look at the SQL query at hand, but
# almost all other CountStats have a start_date, so guarding against a
# refactoring that adds that in.
# Also tests the slightly more end-to-end process_count_stat rather than
# do_fill_count_stat_at_hour. E.g. if one changes self.stat.frequency to
# CountStat.HOUR from CountStat.DAY, this will fail, while many of the
# tests above will not.
def test_update_from_two_days_ago(self):
# type: () -> None
self.add_event('user_created', 2)
process_count_stat(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup', 'end_time'],
[['false', self.TIME_ZERO], ['false', self.TIME_ZERO-self.DAY]])
# User with no relevant activity could happen e.g. for a system bot that
# doesn't go through do_create_user. Mainly just want to make sure that
# that situation doesn't throw an error.
def test_empty_realm_or_user_with_no_relevant_activity(self):
# type: () -> None
self.add_event('unrelated', 1)
self.create_user() # also test a user with no RealmAuditLog entries
Realm.objects.create(string_id='moo', name='moo')
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, [], [])
def test_max_audit_entry_is_unrelated(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('unrelated', .5)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
# Simultaneous related audit entries should not be allowed, and so not testing for that.
def test_simultaneous_unrelated_audit_entry(self):
# type: () -> None
self.add_event('user_created', 1)
self.add_event('unrelated', 1)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['subgroup'], [['false']])
def test_simultaneous_max_audit_entries_of_different_users(self):
# type: () -> None
user1 = self.create_user()
user2 = self.create_user()
user3 = self.create_user()
self.add_event('user_created', .5, user=user1)
self.add_event('user_created', .5, user=user2)
self.add_event('user_created', 1, user=user3)
self.add_event('user_deactivated', .5, user=user3)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(UserCount, ['user', 'subgroup'],
[[user1, 'false'], [user2, 'false']])
def test_end_to_end_with_actions_dot_py(self):
# type: () -> None
user1 = do_create_user('email1', 'password', self.default_realm, 'full_name', 'short_name')
user2 = do_create_user('email2', 'password', self.default_realm, 'full_name', 'short_name')
user3 = do_create_user('email3', 'password', self.default_realm, 'full_name', 'short_name')
user4 = do_create_user('email4', 'password', self.default_realm, 'full_name', 'short_name')
do_deactivate_user(user2)
do_activate_user(user3)
do_reactivate_user(user4)
end_time = floor_to_day(timezone_now()) + self.DAY
do_fill_count_stat_at_hour(self.stat, end_time)
for user in [user1, user3, user4]:
self.assertTrue(UserCount.objects.filter(
user=user, property=self.current_property, subgroup='false',
end_time=end_time, value=1).exists())
self.assertFalse(UserCount.objects.filter(user=user2).exists())
class TestRealmActiveHumans(AnalyticsTestCase):
def setUp(self):
# type: () -> None
super(TestRealmActiveHumans, self).setUp()
self.stat = COUNT_STATS['realm_active_humans::day']
self.current_property = self.stat.property
def mark_audit_active(self, user, end_time=None):
# type: (UserProfile, Optional[datetime]) -> None
if end_time is None:
end_time = self.TIME_ZERO
UserCount.objects.create(
user=user, realm=user.realm, property='active_users_audit:is_bot:day',
subgroup=ujson.dumps(user.is_bot), end_time=end_time, value=1)
def mark_15day_active(self, user, end_time=None):
# type: (UserProfile, Optional[datetime]) -> None
if end_time is None:
end_time = self.TIME_ZERO
UserCount.objects.create(
user=user, realm=user.realm, property='15day_actives::day',
end_time=end_time, value=1)
def test_basic_boolean_logic(self):
# type: () -> None
user = self.create_user()
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
self.mark_15day_active(user, end_time=self.TIME_ZERO)
self.mark_audit_active(user, end_time=self.TIME_ZERO + self.DAY)
self.mark_15day_active(user, end_time=self.TIME_ZERO + self.DAY)
for i in [-1, 0, 1]:
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
self.assertTableState(RealmCount, ['value', 'end_time'], [[1, self.TIME_ZERO + self.DAY]])
def test_bots_not_counted(self):
# type: () -> None
bot = self.create_user(is_bot=True)
self.mark_audit_active(bot)
self.mark_15day_active(bot)
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO)
self.assertTableState(RealmCount, [], [])
def test_multiple_users_realms_and_times(self):
# type: () -> None
user1 = self.create_user()
user2 = self.create_user()
second_realm = Realm.objects.create(string_id='second', name='second')
user3 = self.create_user(realm=second_realm)
user4 = self.create_user(realm=second_realm)
user5 = self.create_user(realm=second_realm)
for user in [user1, user2, user3, user4, user5]:
self.mark_audit_active(user)
self.mark_15day_active(user)
for user in [user1, user3, user4]:
self.mark_audit_active(user, end_time=self.TIME_ZERO - self.DAY)
self.mark_15day_active(user, end_time=self.TIME_ZERO - self.DAY)
for i in [-1, 0, 1]:
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
self.assertTableState(RealmCount, ['value', 'realm', 'end_time'],
[[2, self.default_realm, self.TIME_ZERO],
[3, second_realm, self.TIME_ZERO],
[1, self.default_realm, self.TIME_ZERO - self.DAY],
[2, second_realm, self.TIME_ZERO - self.DAY]])
# Check that adding spurious entries doesn't make a difference
self.mark_audit_active(user1, end_time=self.TIME_ZERO + self.DAY)
self.mark_15day_active(user2, end_time=self.TIME_ZERO + self.DAY)
self.mark_15day_active(user2, end_time=self.TIME_ZERO - self.DAY)
self.create_user()
third_realm = Realm.objects.create(string_id='third', name='third')
self.create_user(realm=third_realm)
RealmCount.objects.all().delete()
for i in [-1, 0, 1]:
do_fill_count_stat_at_hour(self.stat, self.TIME_ZERO + i*self.DAY)
self.assertTableState(RealmCount, ['value', 'realm', 'end_time'],
[[2, self.default_realm, self.TIME_ZERO],
[3, second_realm, self.TIME_ZERO],
[1, self.default_realm, self.TIME_ZERO - self.DAY],
[2, second_realm, self.TIME_ZERO - self.DAY]])
def test_end_to_end(self):
# type: () -> None
user1 = do_create_user('email1', 'password', self.default_realm, 'full_name', 'short_name')
user2 = do_create_user('email2', 'password', self.default_realm, 'full_name', 'short_name')
do_create_user('email3', 'password', self.default_realm, 'full_name', 'short_name')
time_zero = floor_to_day(timezone_now()) + self.DAY
update_user_activity_interval(user1, time_zero)
update_user_activity_interval(user2, time_zero)
do_deactivate_user(user2)
for property in ['active_users_audit:is_bot:day', '15day_actives::day',
'realm_active_humans::day']:
FillState.objects.create(property=property, state=FillState.DONE, end_time=time_zero)
process_count_stat(COUNT_STATS[property], time_zero+self.DAY)
self.assertEqual(RealmCount.objects.filter(
property='realm_active_humans::day', end_time=time_zero+self.DAY, value=1).count(), 1)
self.assertEqual(RealmCount.objects.filter(property='realm_active_humans::day').count(), 1)

View File

@@ -1,31 +0,0 @@
from zerver.lib.test_classes import ZulipTestCase
from analytics.lib.counts import CountStat
from analytics.lib.fixtures import generate_time_series_data
# A very light test suite; the code being tested is not run in production.
class TestFixtures(ZulipTestCase):
def test_deterministic_settings(self):
# type: () -> None
# test basic business_hour / non_business_hour calculation
# test we get an array of the right length with frequency=CountStat.DAY
data = generate_time_series_data(
days=7, business_hours_base=20, non_business_hours_base=15, spikiness=0)
self.assertEqual(data, [400, 400, 400, 400, 400, 360, 360])
data = generate_time_series_data(
days=1, business_hours_base=2000, non_business_hours_base=1500,
growth=2, spikiness=0, frequency=CountStat.HOUR)
# test we get an array of the right length with frequency=CountStat.HOUR
self.assertEqual(len(data), 24)
# test that growth doesn't affect the first data point
self.assertEqual(data[0], 2000)
# test that the last data point is growth times what it otherwise would be
self.assertEqual(data[-1], 1500*2)
# test autocorrelation == 1, since that's the easiest value to test
data = generate_time_series_data(
days=1, business_hours_base=2000, non_business_hours_base=2000,
autocorrelation=1, frequency=CountStat.HOUR)
self.assertEqual(data[0], data[1])
self.assertEqual(data[0], data[-1])

View File

@@ -1,283 +1,18 @@
from __future__ import absolute_import from django.utils.timezone import get_fixed_timezone
from django.utils.timezone import get_fixed_timezone, utc
from zerver.lib.test_classes import ZulipTestCase from zerver.lib.test_classes import ZulipTestCase
from zerver.lib.timestamp import ceiling_to_hour, ceiling_to_day, \
datetime_to_timestamp
from zerver.models import Realm, UserProfile, Client, get_realm, \
get_user_profile_by_email
from analytics.lib.counts import CountStat, COUNT_STATS from analytics.lib.counts import CountStat
from analytics.lib.time_utils import time_range from analytics.lib.time_utils import time_range
from analytics.models import RealmCount, UserCount, BaseCount, \ from analytics.views import rewrite_client_arrays
FillState, last_successful_fill
from analytics.views import stats, get_chart_data, sort_by_totals, \
sort_client_labels, rewrite_client_arrays
from datetime import datetime, timedelta from datetime import datetime, timedelta
import mock
import ujson
from six.moves import range
from typing import List, Dict, Optional
class TestStatsEndpoint(ZulipTestCase):
def test_stats(self):
# type: () -> None
self.user = get_user_profile_by_email('hamlet@zulip.com')
self.login(self.user.email)
result = self.client_get('/stats')
self.assertEqual(result.status_code, 200)
# Check that we get something back
self.assert_in_response("Zulip Analytics for", result)
class TestGetChartData(ZulipTestCase):
def setUp(self):
# type: () -> None
self.realm = get_realm('zulip')
self.user = get_user_profile_by_email('hamlet@zulip.com')
self.login(self.user.email)
self.end_times_hour = [ceiling_to_hour(self.realm.date_created) + timedelta(hours=i)
for i in range(4)]
self.end_times_day = [ceiling_to_day(self.realm.date_created) + timedelta(days=i)
for i in range(4)]
def data(self, i):
# type: (int) -> List[int]
return [0, 0, i, 0]
def insert_data(self, stat, realm_subgroups, user_subgroups):
# type: (CountStat, List[Optional[str]], List[str]) -> None
if stat.frequency == CountStat.HOUR:
insert_time = self.end_times_hour[2]
fill_time = self.end_times_hour[-1]
if stat.frequency == CountStat.DAY:
insert_time = self.end_times_day[2]
fill_time = self.end_times_day[-1]
RealmCount.objects.bulk_create([
RealmCount(property=stat.property, subgroup=subgroup, end_time=insert_time,
value=100+i, realm=self.realm)
for i, subgroup in enumerate(realm_subgroups)])
UserCount.objects.bulk_create([
UserCount(property=stat.property, subgroup=subgroup, end_time=insert_time,
value=200+i, realm=self.realm, user=self.user)
for i, subgroup in enumerate(user_subgroups)])
FillState.objects.create(property=stat.property, end_time=fill_time, state=FillState.DONE)
def test_number_of_humans(self):
# type: () -> None
stat = COUNT_STATS['realm_active_humans::day']
self.insert_data(stat, [None], [])
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data, {
'msg': '',
'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day],
'frequency': CountStat.DAY,
'realm': {'human': self.data(100)},
'display_order': None,
'result': 'success',
})
def test_messages_sent_over_time(self):
# type: () -> None
stat = COUNT_STATS['messages_sent:is_bot:hour']
self.insert_data(stat, ['true', 'false'], ['false'])
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_over_time'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data, {
'msg': '',
'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_hour],
'frequency': CountStat.HOUR,
'realm': {'bot': self.data(100), 'human': self.data(101)},
'user': {'bot': self.data(0), 'human': self.data(200)},
'display_order': None,
'result': 'success',
})
def test_messages_sent_by_message_type(self):
# type: () -> None
stat = COUNT_STATS['messages_sent:message_type:day']
self.insert_data(stat, ['public_stream', 'private_message'],
['public_stream', 'private_stream'])
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_by_message_type'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data, {
'msg': '',
'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day],
'frequency': CountStat.DAY,
'realm': {'Public streams': self.data(100), 'Private streams': self.data(0),
'Private messages': self.data(101), 'Group private messages': self.data(0)},
'user': {'Public streams': self.data(200), 'Private streams': self.data(201),
'Private messages': self.data(0), 'Group private messages': self.data(0)},
'display_order': ['Private messages', 'Public streams', 'Private streams', 'Group private messages'],
'result': 'success',
})
def test_messages_sent_by_client(self):
# type: () -> None
stat = COUNT_STATS['messages_sent:client:day']
client1 = Client.objects.create(name='client 1')
client2 = Client.objects.create(name='client 2')
client3 = Client.objects.create(name='client 3')
client4 = Client.objects.create(name='client 4')
self.insert_data(stat, [client4.id, client3.id, client2.id],
[client3.id, client1.id])
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_by_client'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data, {
'msg': '',
'end_times': [datetime_to_timestamp(dt) for dt in self.end_times_day],
'frequency': CountStat.DAY,
'realm': {'client 4': self.data(100), 'client 3': self.data(101),
'client 2': self.data(102)},
'user': {'client 3': self.data(200), 'client 1': self.data(201)},
'display_order': ['client 1', 'client 2', 'client 3', 'client 4'],
'result': 'success',
})
def test_include_empty_subgroups(self):
# type: () -> None
FillState.objects.create(
property='realm_active_humans::day', end_time=self.end_times_day[0], state=FillState.DONE)
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['realm'], {'human': [0]})
self.assertFalse('user' in data)
FillState.objects.create(
property='messages_sent:is_bot:hour', end_time=self.end_times_hour[0], state=FillState.DONE)
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_over_time'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['realm'], {'human': [0], 'bot': [0]})
self.assertEqual(data['user'], {'human': [0], 'bot': [0]})
FillState.objects.create(
property='messages_sent:message_type:day', end_time=self.end_times_day[0], state=FillState.DONE)
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_by_message_type'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['realm'], {
'Public streams': [0], 'Private streams': [0], 'Private messages': [0], 'Group private messages': [0]})
self.assertEqual(data['user'], {
'Public streams': [0], 'Private streams': [0], 'Private messages': [0], 'Group private messages': [0]})
FillState.objects.create(
property='messages_sent:client:day', end_time=self.end_times_day[0], state=FillState.DONE)
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'messages_sent_by_client'})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['realm'], {})
self.assertEqual(data['user'], {})
def test_start_and_end(self):
# type: () -> None
stat = COUNT_STATS['realm_active_humans::day']
self.insert_data(stat, [None], [])
end_time_timestamps = [datetime_to_timestamp(dt) for dt in self.end_times_day]
# valid start and end
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans',
'start': end_time_timestamps[1],
'end': end_time_timestamps[2]})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['end_times'], end_time_timestamps[1:3])
self.assertEqual(data['realm'], {'human': [0, 100]})
# start later then end
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans',
'start': end_time_timestamps[2],
'end': end_time_timestamps[1]})
self.assert_json_error_contains(result, 'Start time is later than')
def test_min_length(self):
# type: () -> None
stat = COUNT_STATS['realm_active_humans::day']
self.insert_data(stat, [None], [])
# test min_length is too short to change anything
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans',
'min_length': 2})
self.assert_json_success(result)
data = ujson.loads(result.content)
self.assertEqual(data['end_times'], [datetime_to_timestamp(dt) for dt in self.end_times_day])
self.assertEqual(data['realm'], {'human': self.data(100)})
# test min_length larger than filled data
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans',
'min_length': 5})
self.assert_json_success(result)
data = ujson.loads(result.content)
end_times = [ceiling_to_day(self.realm.date_created) + timedelta(days=i) for i in range(-1, 4)]
self.assertEqual(data['end_times'], [datetime_to_timestamp(dt) for dt in end_times])
self.assertEqual(data['realm'], {'human': [0]+self.data(100)})
def test_non_existent_chart(self):
# type: () -> None
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'does_not_exist'})
self.assert_json_error_contains(result, 'Unknown chart name')
def test_analytics_not_running(self):
# type: () -> None
# try to get data for a valid chart, but before we've put anything in the database
# (e.g. before update_analytics_counts has been run)
with mock.patch('logging.warning'):
result = self.client_get('/json/analytics/chart_data',
{'chart_name': 'number_of_humans'})
self.assert_json_error_contains(result, 'No analytics data available')
class TestGetChartDataHelpers(ZulipTestCase):
# last_successful_fill is in analytics/models.py, but get_chart_data is
# the only function that uses it at the moment
def test_last_successful_fill(self):
# type: () -> None
self.assertIsNone(last_successful_fill('non-existant'))
a_time = datetime(2016, 3, 14, 19).replace(tzinfo=utc)
one_hour_before = datetime(2016, 3, 14, 18).replace(tzinfo=utc)
fillstate = FillState.objects.create(property='property', end_time=a_time,
state=FillState.DONE)
self.assertEqual(last_successful_fill('property'), a_time)
fillstate.state = FillState.STARTED
fillstate.save()
self.assertEqual(last_successful_fill('property'), one_hour_before)
def test_sort_by_totals(self):
# type: () -> None
empty = [] # type: List[int]
value_arrays = {'c': [0, 1], 'a': [9], 'b': [1, 1, 1], 'd': empty}
self.assertEqual(sort_by_totals(value_arrays), ['a', 'b', 'c', 'd'])
def test_sort_client_labels(self):
# type: () -> None
data = {'realm': {'a': [16], 'c': [15], 'b': [14], 'e': [13], 'd': [12], 'h': [11]},
'user': {'a': [6], 'b': [5], 'd': [4], 'e': [3], 'f': [2], 'g': [1]}}
self.assertEqual(sort_client_labels(data), ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
class TestTimeRange(ZulipTestCase): class TestTimeRange(ZulipTestCase):
def test_time_range(self): def test_time_range(self):
# type: () -> None # type: () -> None
HOUR = timedelta(hours=1) HOUR = timedelta(hours=1)
DAY = timedelta(days=1) DAY = timedelta(days=1)
TZINFO = get_fixed_timezone(-100) # 100 minutes west of UTC TZINFO = get_fixed_timezone(-100) # 100 minutes west of UTC
# Using 22:59 so that converting to UTC and applying floor_to_{hour,day} do not commute # Using 22:59 so that converting to UTC and applying floor_to_{hour,day} do not commute
a_time = datetime(2016, 3, 14, 22, 59).replace(tzinfo=TZINFO) a_time = datetime(2016, 3, 14, 22, 59).replace(tzinfo=TZINFO)

View File

@@ -1,49 +1,46 @@
from __future__ import absolute_import, division from __future__ import absolute_import, division
from django.conf import settings
from django.core import urlresolvers from django.core import urlresolvers
from django.db import connection from django.db import connection
from django.db.models import Sum from django.db.models import Sum
from django.db.models.query import QuerySet from django.db.models.query import QuerySet
from django.http import HttpResponseNotFound, HttpRequest, HttpResponse from django.http import HttpResponseNotFound, HttpRequest, HttpResponse
from django.template import RequestContext, loader from django.template import RequestContext, loader
from django.utils.timezone import now as timezone_now from django.utils import timezone
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _
from django.shortcuts import render
from jinja2 import Markup as mark_safe from jinja2 import Markup as mark_safe
from analytics.lib.counts import CountStat, process_count_stat, COUNT_STATS from analytics.lib.counts import CountStat, process_count_stat, COUNT_STATS
from analytics.lib.time_utils import time_range from analytics.lib.time_utils import time_range
from analytics.models import BaseCount, InstallationCount, RealmCount, \ from analytics.models import BaseCount, InstallationCount, RealmCount, \
UserCount, StreamCount, last_successful_fill UserCount, StreamCount
from zerver.decorator import has_request_variables, REQ, require_server_admin, \ from zerver.decorator import has_request_variables, REQ, zulip_internal, \
zulip_login_required, to_non_negative_int, to_utc_datetime zulip_login_required, to_non_negative_int, to_utc_datetime
from zerver.lib.request import JsonableError from zerver.lib.request import JsonableError
from zerver.lib.response import json_success from zerver.lib.response import json_success
from zerver.lib.timestamp import ceiling_to_hour, ceiling_to_day, timestamp_to_datetime from zerver.lib.timestamp import ceiling_to_hour, ceiling_to_day, timestamp_to_datetime
from zerver.models import Realm, UserProfile, UserActivity, \ from zerver.models import Realm, UserProfile, UserActivity, \
UserActivityInterval, Client UserActivityInterval, Client
from zproject.jinja2 import render_to_response
from collections import defaultdict from collections import defaultdict
from datetime import datetime, timedelta from datetime import datetime, timedelta
import itertools import itertools
import json import json
import logging
import pytz import pytz
import re import re
import time import time
from six.moves import filter, map, range, zip from six.moves import filter, map, range, zip
from typing import Any, Callable, Dict, List, Optional, Set, Text, \ from typing import Any, Dict, List, Tuple, Optional, Sequence, Callable, Type, \
Tuple, Type, Union Union, Text
@zulip_login_required @zulip_login_required
def stats(request): def stats(request):
# type: (HttpRequest) -> HttpResponse # type: (HttpRequest) -> HttpResponse
return render(request, return render_to_response('analytics/stats.html',
'analytics/stats.html', context=dict(realm_name = request.user.realm.name))
context=dict(realm_name = request.user.realm.name))
@has_request_variables @has_request_variables
def get_chart_data(request, user_profile, chart_name=REQ(), def get_chart_data(request, user_profile, chart_name=REQ(),
@@ -51,94 +48,54 @@ def get_chart_data(request, user_profile, chart_name=REQ(),
start=REQ(converter=to_utc_datetime, default=None), start=REQ(converter=to_utc_datetime, default=None),
end=REQ(converter=to_utc_datetime, default=None)): end=REQ(converter=to_utc_datetime, default=None)):
# type: (HttpRequest, UserProfile, Text, Optional[int], Optional[datetime], Optional[datetime]) -> HttpResponse # type: (HttpRequest, UserProfile, Text, Optional[int], Optional[datetime], Optional[datetime]) -> HttpResponse
realm = user_profile.realm
# These are implicitly relying on realm.date_created and timezone.now being in UTC.
if start is None:
start = realm.date_created
if end is None:
end = timezone.now()
if start > end:
raise JsonableError(_("Start time is later than end time. Start: %(start)s, End: %(end)s") %
{'start': start, 'end': end})
if chart_name == 'number_of_humans': if chart_name == 'number_of_humans':
stat = COUNT_STATS['realm_active_humans::day'] stat = COUNT_STATS['active_users:is_bot:day']
tables = [RealmCount] tables = [RealmCount]
subgroup_to_label = {None: 'human'} # type: Dict[Optional[str], str] subgroups = ['false', 'true']
labels_sort_function = None labels = ['human', 'bot']
include_empty_subgroups = True include_empty_subgroups = True
elif chart_name == 'messages_sent_over_time': elif chart_name == 'messages_sent_over_time':
stat = COUNT_STATS['messages_sent:is_bot:hour'] stat = COUNT_STATS['messages_sent:is_bot:hour']
tables = [RealmCount, UserCount] tables = [RealmCount]
subgroup_to_label = {'false': 'human', 'true': 'bot'} subgroups = ['false', 'true']
labels_sort_function = None labels = ['human', 'bot']
include_empty_subgroups = True include_empty_subgroups = True
elif chart_name == 'messages_sent_by_message_type': elif chart_name == 'messages_sent_by_message_type':
stat = COUNT_STATS['messages_sent:message_type:day'] stat = COUNT_STATS['messages_sent:message_type:day']
tables = [RealmCount, UserCount] tables = [RealmCount, UserCount]
subgroup_to_label = {'public_stream': 'Public streams', subgroups = ['public_stream', 'private_stream', 'private_message']
'private_stream': 'Private streams', labels = None
'private_message': 'Private messages',
'huddle_message': 'Group private messages'}
labels_sort_function = lambda data: sort_by_totals(data['realm'])
include_empty_subgroups = True include_empty_subgroups = True
elif chart_name == 'messages_sent_by_client': elif chart_name == 'messages_sent_by_client':
stat = COUNT_STATS['messages_sent:client:day'] stat = COUNT_STATS['messages_sent:client:day']
tables = [RealmCount, UserCount] tables = [RealmCount, UserCount]
# Note that the labels are further re-written by client_label_map subgroups = [str(x) for x in Client.objects.values_list('id', flat=True).order_by('id')]
subgroup_to_label = {str(id): name for id, name in Client.objects.values_list('id', 'name')} labels = list(Client.objects.values_list('name', flat=True).order_by('id'))
labels_sort_function = sort_client_labels
include_empty_subgroups = False include_empty_subgroups = False
else: else:
raise JsonableError(_("Unknown chart name: %s") % (chart_name,)) raise JsonableError(_("Unknown chart name: %s") % (chart_name,))
# Most likely someone using our API endpoint. The /stats page does not
# pass a start or end in its requests.
if start is not None and end is not None and start > end:
raise JsonableError(_("Start time is later than end time. Start: %(start)s, End: %(end)s") %
{'start': start, 'end': end})
realm = user_profile.realm
if start is None:
start = realm.date_created
if end is None:
end = last_successful_fill(stat.property)
if end is None or start > end:
logging.warning("User from realm %s attempted to access /stats, but the computed "
"start time: %s (creation time of realm) is later than the computed "
"end time: %s (last successful analytics update). Is the "
"analytics cron job running?" % (realm.string_id, start, end))
raise JsonableError(_("No analytics data available. Please contact your server administrator."))
end_times = time_range(start, end, stat.frequency, min_length) end_times = time_range(start, end, stat.frequency, min_length)
data = {'end_times': end_times, 'frequency': stat.frequency} data = {'end_times': end_times, 'frequency': stat.frequency, 'interval': stat.interval}
for table in tables: for table in tables:
if table == RealmCount: if table == RealmCount:
data['realm'] = get_time_series_by_subgroup( data['realm'] = get_time_series_by_subgroup(
stat, RealmCount, realm.id, end_times, subgroup_to_label, include_empty_subgroups) stat, RealmCount, realm.id, end_times, subgroups, labels, include_empty_subgroups)
if table == UserCount: if table == UserCount:
data['user'] = get_time_series_by_subgroup( data['user'] = get_time_series_by_subgroup(
stat, UserCount, user_profile.id, end_times, subgroup_to_label, include_empty_subgroups) stat, UserCount, user_profile.id, end_times, subgroups, labels, include_empty_subgroups)
if labels_sort_function is not None:
data['display_order'] = labels_sort_function(data)
else:
data['display_order'] = None
return json_success(data=data) return json_success(data=data)
def sort_by_totals(value_arrays):
# type: (Dict[str, List[int]]) -> List[str]
totals = [(sum(values), label) for label, values in value_arrays.items()]
totals.sort(reverse=True)
return [label for total, label in totals]
# For any given user, we want to show a fixed set of clients in the chart,
# regardless of the time aggregation or whether we're looking at realm or
# user data. This fixed set ideally includes the clients most important in
# understanding the realm's traffic and the user's traffic. This function
# tries to rank the clients so that taking the first N elements of the
# sorted list has a reasonable chance of doing so.
def sort_client_labels(data):
# type: (Dict[str, Dict[str, List[int]]]) -> List[str]
realm_order = sort_by_totals(data['realm'])
user_order = sort_by_totals(data['user'])
label_sort_values = {} # type: Dict[str, float]
for i, label in enumerate(realm_order):
label_sort_values[label] = i
for i, label in enumerate(user_order):
label_sort_values[label] = min(i-.1, label_sort_values.get(label, i))
return [label for label, sort_value in sorted(label_sort_values.items(),
key=lambda x: x[1])]
def table_filtered_to_id(table, key_id): def table_filtered_to_id(table, key_id):
# type: (Type[BaseCount], int) -> QuerySet # type: (Type[BaseCount], int) -> QuerySet
if table == RealmCount: if table == RealmCount:
@@ -150,7 +107,7 @@ def table_filtered_to_id(table, key_id):
elif table == InstallationCount: elif table == InstallationCount:
return InstallationCount.objects.all() return InstallationCount.objects.all()
else: else:
raise AssertionError("Unknown table: %s" % (table,)) raise ValueError("Unknown table: %s" % (table,))
def client_label_map(name): def client_label_map(name):
# type: (str) -> str # type: (str) -> str
@@ -172,7 +129,7 @@ def client_label_map(name):
def rewrite_client_arrays(value_arrays): def rewrite_client_arrays(value_arrays):
# type: (Dict[str, List[int]]) -> Dict[str, List[int]] # type: (Dict[str, List[int]]) -> Dict[str, List[int]]
mapped_arrays = {} # type: Dict[str, List[int]] mapped_arrays = {} # type: Dict[str, List[int]]
for label, array in value_arrays.items(): for label, array in value_arrays.items():
mapped_label = client_label_map(label) mapped_label = client_label_map(label)
if mapped_label in mapped_arrays: if mapped_label in mapped_arrays:
@@ -182,15 +139,20 @@ def rewrite_client_arrays(value_arrays):
mapped_arrays[mapped_label] = [value_arrays[label][i] for i in range(0, len(array))] mapped_arrays[mapped_label] = [value_arrays[label][i] for i in range(0, len(array))]
return mapped_arrays return mapped_arrays
def get_time_series_by_subgroup(stat, table, key_id, end_times, subgroup_to_label, include_empty_subgroups): def get_time_series_by_subgroup(stat, table, key_id, end_times, subgroups, labels, include_empty_subgroups):
# type: (CountStat, Type[BaseCount], int, List[datetime], Dict[Optional[str], str], bool) -> Dict[str, List[int]] # type: (CountStat, Type[BaseCount], Optional[int], List[datetime], List[str], Optional[List[str]], bool) -> Dict[str, List[int]]
if labels is None:
labels = subgroups
if len(subgroups) != len(labels):
raise ValueError("subgroups and labels have lengths %s and %s, which are different." %
(len(subgroups), len(labels)))
queryset = table_filtered_to_id(table, key_id).filter(property=stat.property) \ queryset = table_filtered_to_id(table, key_id).filter(property=stat.property) \
.values_list('subgroup', 'end_time', 'value') .values_list('subgroup', 'end_time', 'value')
value_dicts = defaultdict(lambda: defaultdict(int)) # type: Dict[Optional[str], Dict[datetime, int]] value_dicts = defaultdict(lambda: defaultdict(int)) # type: Dict[Optional[str], Dict[datetime, int]]
for subgroup, end_time, value in queryset: for subgroup, end_time, value in queryset:
value_dicts[subgroup][end_time] = value value_dicts[subgroup][end_time] = value
value_arrays = {} value_arrays = {}
for subgroup, label in subgroup_to_label.items(): for subgroup, label in zip(subgroups, labels):
if (subgroup in value_dicts) or include_empty_subgroups: if (subgroup in value_dicts) or include_empty_subgroups:
value_arrays[label] = [value_dicts[subgroup][end_time] for end_time in end_times] value_arrays[label] = [value_dicts[subgroup][end_time] for end_time in end_times]
@@ -261,7 +223,7 @@ def get_realm_day_counts():
rows = dictfetchall(cursor) rows = dictfetchall(cursor)
cursor.close() cursor.close()
counts = defaultdict(dict) # type: Dict[str, Dict[int, int]] counts = defaultdict(dict) # type: Dict[str, Dict[int, int]]
for row in rows: for row in rows:
counts[row['string_id']][row['age']] = row['cnt'] counts[row['string_id']][row['age']] = row['cnt']
@@ -741,12 +703,12 @@ def ad_hoc_queries():
return pages return pages
@require_server_admin @zulip_internal
@has_request_variables @has_request_variables
def get_activity(request): def get_activity(request):
# type: (HttpRequest) -> HttpResponse # type: (HttpRequest) -> HttpResponse
duration_content, realm_minutes = user_activity_intervals() # type: Tuple[mark_safe, Dict[str, float]] duration_content, realm_minutes = user_activity_intervals() # type: Tuple[mark_safe, Dict[str, float]]
counts_content = realm_summary_table(realm_minutes) # type: str counts_content = realm_summary_table(realm_minutes) # type: str
data = [ data = [
('Counts', counts_content), ('Counts', counts_content),
('Durations', duration_content), ('Durations', duration_content),
@@ -756,10 +718,10 @@ def get_activity(request):
title = 'Activity' title = 'Activity'
return render( return render_to_response(
request,
'analytics/activity.html', 'analytics/activity.html',
context=dict(data=data, title=title, is_home=True), dict(data=data, title=title, is_home=True),
request=request
) )
def get_user_activity_records_for_realm(realm, is_bot): def get_user_activity_records_for_realm(realm, is_bot):
@@ -828,7 +790,7 @@ def get_user_activity_summary(records):
#: We could use something like: #: We could use something like:
# `Union[Dict[str, Dict[str, int]], Dict[str, Dict[str, datetime]]]` # `Union[Dict[str, Dict[str, int]], Dict[str, Dict[str, datetime]]]`
#: but that would require this long `Union` to carry on throughout inner functions. #: but that would require this long `Union` to carry on throughout inner functions.
summary = {} # type: Dict[str, Dict[str, Any]] summary = {} # type: Dict[str, Dict[str, Any]]
def update(action, record): def update(action, record):
# type: (str, QuerySet) -> None # type: (str, QuerySet) -> None
@@ -991,7 +953,7 @@ def realm_user_summary_table(all_records, admin_emails):
def is_recent(val): def is_recent(val):
# type: (Optional[datetime]) -> bool # type: (Optional[datetime]) -> bool
age = timezone_now() - val age = datetime.now(val.tzinfo) - val
return age.total_seconds() < 5 * 60 return age.total_seconds() < 5 * 60
rows = [] rows = []
@@ -1013,7 +975,7 @@ def realm_user_summary_table(all_records, admin_emails):
rows.append(row) rows.append(row)
def by_used_time(row): def by_used_time(row):
# type: (Dict[str, Any]) -> str # type: (Dict[str, Sequence[str]]) -> str
return row['cells'][3] return row['cells'][3]
rows = sorted(rows, key=by_used_time, reverse=True) rows = sorted(rows, key=by_used_time, reverse=True)
@@ -1035,11 +997,11 @@ def realm_user_summary_table(all_records, admin_emails):
content = make_table(title, cols, rows, has_row_class=True) content = make_table(title, cols, rows, has_row_class=True)
return user_records, content return user_records, content
@require_server_admin @zulip_internal
def get_realm_activity(request, realm_str): def get_realm_activity(request, realm_str):
# type: (HttpRequest, str) -> HttpResponse # type: (HttpRequest, str) -> HttpResponse
data = [] # type: List[Tuple[str, str]] data = [] # type: List[Tuple[str, str]]
all_user_records = {} # type: Dict[str, Any] all_user_records = {} # type: Dict[str, Any]
try: try:
admins = Realm.objects.get(string_id=realm_str).get_admin_users() admins = Realm.objects.get(string_id=realm_str).get_admin_users()
@@ -1068,18 +1030,18 @@ def get_realm_activity(request, realm_str):
realm_link += '&target=stats.gauges.staging.users.active.%s.0_16hr' % (realm_str,) realm_link += '&target=stats.gauges.staging.users.active.%s.0_16hr' % (realm_str,)
title = realm_str title = realm_str
return render( return render_to_response(
request,
'analytics/activity.html', 'analytics/activity.html',
context=dict(data=data, realm_link=realm_link, title=title), dict(data=data, realm_link=realm_link, title=title),
request=request
) )
@require_server_admin @zulip_internal
def get_user_activity(request, email): def get_user_activity(request, email):
# type: (HttpRequest, str) -> HttpResponse # type: (HttpRequest, str) -> HttpResponse
records = get_user_activity_records_for_email(email) records = get_user_activity_records_for_email(email)
data = [] # type: List[Tuple[str, str]] data = [] # type: List[Tuple[str, str]]
user_summary = get_user_activity_summary(records) user_summary = get_user_activity_summary(records)
content = user_activity_summary_table(user_summary) content = user_activity_summary_table(user_summary)
@@ -1089,8 +1051,8 @@ def get_user_activity(request, email):
data += [('Info', content)] data += [('Info', content)]
title = email title = email
return render( return render_to_response(
request,
'analytics/activity.html', 'analytics/activity.html',
context=dict(data=data, title=title), dict(data=data, title=title),
request=request
) )

View File

@@ -44,12 +44,7 @@ Alternatively, you may explicitly use "--user", "--api-key", and
`--site` in our examples, which is especially useful when testing. If `--site` in our examples, which is especially useful when testing. If
you are running several bots which share a home directory, we you are running several bots which share a home directory, we
recommend using `--config` to specify the path to the `zuliprc` file recommend using `--config` to specify the path to the `zuliprc` file
for a specific bot. Finally, you can control the defaults for all of for a specific bot.
these variables using the environment variables `ZULIP_CONFIG`,
`ZULIP_API_KEY`, `ZULIP_EMAIL`, `ZULIP_SITE`, `ZULIP_CERT`,
`ZULIP_CERT_KEY`, and `ZULIP_CERT_BUNDLE`. Command-line options take
precedence over environment variables take precedence over the config
files.
The command line equivalents for other configuration options are: The command line equivalents for other configuration options are:

View File

@@ -78,7 +78,7 @@ def main(argv=None):
parser.add_option('-m', '--message', parser.add_option('-m', '--message',
help='Specifies the message to send, prevents interactive prompting.') help='Specifies the message to send, prevents interactive prompting.')
group = optparse.OptionGroup(parser, 'Stream parameters') # type: ignore # https://github.com/python/typeshed/pull/1248 group = optparse.OptionGroup(parser, 'Stream parameters')
group.add_option('-s', '--stream', group.add_option('-s', '--stream',
dest='stream', dest='stream',
action='store', action='store',
@@ -122,7 +122,7 @@ def main(argv=None):
if not do_send_message(client, message_data): if not do_send_message(client, message_data):
return 1 return 1
return 0
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@@ -1,130 +0,0 @@
# See readme.md for instructions on running this code.
from __future__ import absolute_import
from __future__ import division
import copy
import importlib
import sys
from math import log10, floor
import utils
import re
def is_float(value):
try:
float(value)
return True
except ValueError:
return False
# Rounds the number 'x' to 'digits' significant digits.
# A normal 'round()' would round the number to an absolute amount of
# fractional decimals, e.g. 0.00045 would become 0.0.
# 'round_to()' rounds only the digits that are not 0.
# 0.00045 would then become 0.0005.
def round_to(x, digits):
return round(x, digits-int(floor(log10(abs(x)))))
class ConverterHandler(object):
'''
This plugin allows users to make conversions between various units,
e.g. Celsius to Fahrenheit, or kilobytes to gigabytes.
It looks for messages of the format
'@mention-bot <number> <unit_from> <unit_to>'
The message '@mention-bot help' posts a short description of how to use
the plugin, along with a list of all supported units.
'''
def usage(self):
return '''
This plugin allows users to make conversions between
various units, e.g. Celsius to Fahrenheit,
or kilobytes to gigabytes. It looks for messages of
the format '@mention-bot <number> <unit_from> <unit_to>'
The message '@mention-bot help' posts a short description of
how to use the plugin, along with a list of
all supported units.
'''
def handle_message(self, message, client, state_handler):
bot_response = get_bot_converter_response(message, client)
client.send_reply(message, bot_response)
def get_bot_converter_response(message, client):
content = message['content']
words = content.lower().split()
convert_indexes = [i for i, word in enumerate(words) if word == "@convert"]
convert_indexes = [-1] + convert_indexes
results = []
for convert_index in convert_indexes:
if (convert_index + 1) < len(words) and words[convert_index + 1] == 'help':
results.append(utils.HELP_MESSAGE)
continue
if (convert_index + 3) < len(words):
number = words[convert_index + 1]
unit_from = utils.ALIASES.get(words[convert_index + 2], words[convert_index + 2])
unit_to = utils.ALIASES.get(words[convert_index + 3], words[convert_index + 3])
exponent = 0
if not is_float(number):
results.append(number + ' is not a valid number. ' + utils.QUICK_HELP)
continue
number = float(number)
number_res = copy.copy(number)
for key, exp in utils.PREFIXES.items():
if unit_from.startswith(key):
exponent += exp
unit_from = unit_from[len(key):]
if unit_to.startswith(key):
exponent -= exp
unit_to = unit_to[len(key):]
uf_to_std = utils.UNITS.get(unit_from, False)
ut_to_std = utils.UNITS.get(unit_to, False)
if uf_to_std is False:
results.append(unit_from + ' is not a valid unit. ' + utils.QUICK_HELP)
if ut_to_std is False:
results.append(unit_to + ' is not a valid unit.' + utils.QUICK_HELP)
if uf_to_std is False or ut_to_std is False:
continue
base_unit = uf_to_std[2]
if uf_to_std[2] != ut_to_std[2]:
unit_from = unit_from.capitalize() if uf_to_std[2] == 'kelvin' else unit_from
results.append(unit_to.capitalize() + ' and ' + unit_from +
' are not from the same category. ' + utils.QUICK_HELP)
continue
# perform the conversion between the units
number_res *= uf_to_std[1]
number_res += uf_to_std[0]
number_res -= ut_to_std[0]
number_res /= ut_to_std[1]
if base_unit == 'bit':
number_res *= 1024 ** (exponent // 3)
else:
number_res *= 10 ** exponent
number_res = round_to(number_res, 7)
results.append('{} {} = {} {}'.format(number,
words[convert_index + 2],
number_res,
words[convert_index + 3]))
else:
results.append('Too few arguments given. ' + utils.QUICK_HELP)
new_content = ''
for idx, result in enumerate(results, 1):
new_content += ((str(idx) + '. conversion: ') if len(results) > 1 else '') + result + '\n'
return new_content
handler_class = ConverterHandler

View File

@@ -1,30 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestConverterBot(BotTestCase):
bot_name = "converter"
def test_bot(self):
expected = {
"": ('Too few arguments given. Enter `@convert help` '
'for help on using the converter.\n'),
"foo bar": ('Too few arguments given. Enter `@convert help` '
'for help on using the converter.\n'),
"2 m cm": "2.0 m = 200.0 cm\n",
"12.0 celsius fahrenheit": "12.0 celsius = 53.600054 fahrenheit\n",
"0.002 kilometer millimile": "0.002 kilometer = 1.2427424 millimile\n",
"3 megabyte kilobit": "3.0 megabyte = 24576.0 kilobit\n",
}
self.check_expected_responses(expected)

View File

@@ -1,28 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestDefineBot(BotTestCase):
bot_name = "define"
def test_bot(self):
expected = {
"": 'Please enter a word to define.',
"foo": "**foo**:\nDefinition not available.",
"cat": ("**cat**:\n\n* (**noun**) a small domesticated carnivorous mammal "
"with soft fur, a short snout, and retractile claws. It is widely "
"kept as a pet or for catching mice, and many breeds have been "
"developed.\n&nbsp;&nbsp;their pet cat\n\n"),
}
self.check_expected_responses(expected)

View File

@@ -1,27 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestEncryptBot(BotTestCase):
bot_name = "encrypt"
def test_bot(self):
expected = {
"": "Encrypted/Decrypted text: ",
"Let\'s Do It": "Encrypted/Decrypted text: Yrg\'f Qb Vg",
"me&mom together..!!": "Encrypted/Decrypted text: zr&zbz gbtrgure..!!",
"foo bar": "Encrypted/Decrypted text: sbb one",
"Please encrypt this": "Encrypted/Decrypted text: Cyrnfr rapelcg guvf",
}
self.check_expected_responses(expected)

View File

@@ -1,39 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestFollowUpBot(BotTestCase):
bot_name = "followup"
def test_bot(self):
expected_send_reply = {
"": 'Please specify the message you want to send to followup stream after @mention-bot'
}
self.check_expected_responses(expected_send_reply, expected_method='send_reply')
expected_send_message = {
"foo": {
'type': 'stream',
'to': 'followup',
'subject': 'foo_sender@zulip.com',
'content': 'from foo_sender@zulip.com: foo',
},
"I have completed my task": {
'type': 'stream',
'to': 'followup',
'subject': 'foo_sender@zulip.com',
'content': 'from foo_sender@zulip.com: I have completed my task',
},
}
self.check_expected_responses(expected_send_message, expected_method='send_message')

View File

@@ -1,61 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import json
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
from bots.giphy import giphy
def get_http_response_json(gif_url):
response_json = {
'meta': {
'status': 200
},
'data': {
'images': {
'original': {
'url': gif_url
}
}
}
}
return response_json
def get_bot_response(gif_url):
return ('[Click to enlarge](%s)'
'[](/static/images/interactive-bot/giphy/powered-by-giphy.png)'
% (gif_url))
def get_http_request(keyword):
return {
'api_url': giphy.GIPHY_TRANSLATE_API,
'params': {
's': keyword,
'api_key': giphy.get_giphy_api_key_from_config()
}
}
class TestGiphyBot(BotTestCase):
bot_name = "giphy"
def test_bot(self):
# This message calls `send_reply` function of BotHandlerApi
keyword = "Hello"
gif_url = "https://media4.giphy.com/media/3o6ZtpxSZbQRRnwCKQ/giphy.gif"
expectations = {
keyword: get_bot_response(gif_url)
}
self.check_expected_responses(
expectations=expectations,
http_request=get_http_request(keyword),
http_response=get_http_response_json(gif_url)
)

View File

@@ -1,93 +0,0 @@
# See readme.md for instructions on running this code.
from __future__ import print_function
import logging
import http.client
from six.moves.urllib.request import urlopen
# Uses the Google search engine bindings
# pip install --upgrade google
from google import search
def get_google_result(search_keywords):
help_message = "To use this bot, start messages with @mentioned-bot, \
followed by what you want to search for. If \
found, Zulip will return the first search result \
on Google.\
\
An example message that could be sent is:\
'@mentioned-bot zulip' or \
'@mentioned-bot how to create a chatbot'."
if search_keywords == 'help':
return help_message
elif search_keywords == '' or search_keywords is None:
return help_message
else:
try:
urls = search(search_keywords, stop=20)
urlopen('http://216.58.192.142', timeout=1)
except http.client.RemoteDisconnected as er:
logging.exception(er)
return 'Error: No internet connection. {}.'.format(er)
except Exception as e:
logging.exception(e)
return 'Error: Search failed. {}.'.format(e)
try:
url = next(urls)
except AttributeError as a_err:
# google.search query failed and urls is of object
# 'NoneType'
logging.exception(a_err)
return "Error: Google search failed with a NoneType result. {}.".format(a_err)
except TypeError as t_err:
# google.search query failed and returned None
# This technically should not happen but the prior
# error check assumed this behavior
logging.exception(t_err)
return "Error: Google search function failed. {}.".format(t_err)
except Exception as e:
logging.exception(e)
return 'Error: Search failed. {}.'.format(e)
return 'Success: {}'.format(url)
class GoogleSearchHandler(object):
'''
This plugin allows users to enter a search
term in Zulip and get the top URL sent back
to the context (stream or private) in which
it was called. It looks for messages starting
with @mentioned-bot.
'''
def usage(self):
return '''
This plugin will allow users to search
for a given search term on Google from
Zulip. Use '@mentioned-bot help' to get
more information on the bot usage. Users
should preface messages with
@mentioned-bot.
'''
def handle_message(self, message, client, state_handler):
original_content = message['content']
result = get_google_result(original_content)
client.send_reply(message, result)
handler_class = GoogleSearchHandler
def test():
try:
urlopen('http://216.58.192.142', timeout=1)
print('Success')
return True
except http.client.RemoteDisconnected as e:
print('Error: {}'.format(e))
return False
if __name__ == '__main__':
test()

View File

@@ -1,23 +0,0 @@
# Google Search bot
This bot allows users to do Google search queries and have the bot
respond with the first search result. It is by default set to the
highest safe-search setting.
## Usage
Run this bot as described
[here](http://zulip.readthedocs.io/en/latest/bots-guide.html#how-to-deploy-a-bot).
Use this bot with the following command
`@mentioned-bot <search terms>`
This will return the first link found by Google for `<search terms>`
and print the resulting URL.
If no `<search terms>` are entered, a help message is printed instead.
If there was an error in the process of running the search (socket
errors, Google search function failed, or general failures), an error
message is returned.

View File

@@ -1,18 +0,0 @@
# See readme.md for instructions on running this code.
class HelloWorldHandler(object):
def usage(self):
return '''
This is a boilerplate bot that responds to a user query with
"beep boop", which is robot for "Hello World".
This bot can be used as a template for other, more
sophisticated, bots.
'''
def handle_message(self, message, client, state_handler):
content = 'beep boop'
client.send_reply(message, content)
handler_class = HelloWorldHandler

View File

@@ -1,4 +0,0 @@
Simple Zulip bot that will respond to any query with a "beep boop".
The helloworld bot is a boilerplate bot that can be used as a
template for more sophisticated/evolved Zulip bots.

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
from six.moves import zip
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestHelloWorldBot(BotTestCase):
bot_name = "helloworld"
def test_bot(self):
txt = "beep boop"
messages = ["", "foo", "Hi, my name is abc"]
self.check_expected_responses(dict(list(zip(messages, len(messages)*[txt]))))

View File

@@ -1,18 +0,0 @@
# See readme.md for instructions on running this code.
class HelpHandler(object):
def usage(self):
return '''
This plugin will give info about Zulip to
any user that types a message saying "help".
This is example code; ideally, you would flesh
this out for more useful help pertaining to
your Zulip instance.
'''
def handle_message(self, message, client, state_handler):
help_content = "Info on Zulip can be found here:\nhttps://github.com/zulip/zulip"
client.send_reply(message, help_content)
handler_class = HelpHandler

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
from six.moves import zip
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestHelpBot(BotTestCase):
bot_name = "help"
def test_bot(self):
txt = "Info on Zulip can be found here:\nhttps://github.com/zulip/zulip"
messages = ["", "help", "Hi, my name is abc"]
self.check_expected_responses(dict(list(zip(messages, len(messages)*[txt]))))

View File

@@ -1,30 +0,0 @@
# See readme.md for instructions on running this code.
class IncrementorHandler(object):
def __init__(self):
self.number = 0
self.message_id = None
def usage(self):
return '''
This is a boilerplate bot that makes use of the
update_message function. For the first @-mention, it initially
replies with one message containing a `1`. Every time the bot
is @-mentioned, this number will be incremented in the same message.
'''
def handle_message(self, message, client, state_handler):
self.number += 1
if self.message_id is None:
result = client.send_reply(message, str(self.number))
self.message_id = result['id']
else:
client.update_message(dict(
message_id=self.message_id,
content=str(self.number),
))
handler_class = IncrementorHandler

View File

@@ -1,6 +0,0 @@
# Incrementor bot
This is a boilerplate bot that makes use of the
update_message function. For the first @-mention, it initially
replies with one message containing a `1`. Every time the bot
is @-mentioned, this number will be incremented in the same message.

View File

@@ -1,33 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestThesaurusBot(BotTestCase):
bot_name = "thesaurus"
def test_bot(self):
expected = {
"synonym good": "great, satisfying, exceptional, positive, acceptable",
"synonym nice": "cordial, kind, good, okay, fair",
"synonym foo": "bar, thud, X, baz, corge",
"antonym dirty": "ordered, sterile, spotless, moral, clean",
"antonym bar": "loss, whole, advantage, aid, failure",
"": ("To use this bot, start messages with either "
"@mention-bot synonym (to get the synonyms of a given word) "
"or @mention-bot antonym (to get the antonyms of a given word). "
"Phrases are not accepted so only use single words "
"to search. For example you could search '@mention-bot synonym hello' "
"or '@mention-bot antonym goodbye'."),
}
self.check_expected_responses(expected)

View File

@@ -1,71 +0,0 @@
# See zulip/api/bots/readme.md for instructions on running this code.
from __future__ import print_function
import sys
import logging
try:
from PyDictionary import PyDictionary as Dictionary
except ImportError:
logging.error("Dependency Missing!")
sys.exit(0)
#Uses Python's Dictionary module
# pip install PyDictionary
def get_clean_response(m, method):
try:
response = method(m)
except Exception as e:
logging.exception(e)
return e
if isinstance(response, str):
return response
elif isinstance(response, list):
return ', '.join(response)
else:
return "Sorry, no result found! Please check the word."
def get_thesaurus_result(original_content):
help_message = ("To use this bot, start messages with either "
"@mention-bot synonym (to get the synonyms of a given word) "
"or @mention-bot antonym (to get the antonyms of a given word). "
"Phrases are not accepted so only use single words "
"to search. For example you could search '@mention-bot synonym hello' "
"or '@mention-bot antonym goodbye'.")
query = original_content.strip().split(' ', 1)
if len(query) < 2:
return help_message
else:
search_keyword = query[1]
if original_content.startswith('synonym'):
result = get_clean_response(search_keyword, method = Dictionary.synonym)
elif original_content.startswith('antonym'):
result = get_clean_response(search_keyword, method = Dictionary.antonym)
else:
result = help_message
return result
class ThesaurusHandler(object):
'''
This plugin allows users to enter a word in zulip
and get synonyms, and antonyms, for that word sent
back to the context (stream or private) in which
it was sent. It looks for messages starting with
'@mention-bot synonym' or '@mention-bot @antonym'.
'''
def usage(self):
return '''
This plugin will allow users to get both synonyms
and antonyms for a given word from zulip. To use this
plugin, users need to install the PyDictionary module
using 'pip install PyDictionary'.Use '@mention-bot synonym help' or
'@mention-bot antonym help' for more usage information. Users should
preface messages with @mention-bot synonym or @mention-bot antonym.
'''
def handle_message(self, message, client, state_handler):
original_content = message['content'].strip()
new_content = get_thesaurus_result(original_content)
client.send_reply(message, new_content)
handler_class = ThesaurusHandler

View File

@@ -1,44 +0,0 @@
# Virtual fs bot
This bot allows users to store information in a virtual file system,
for a given stream or private chat.
## Usage
Run this bot as described in
[here](http://zulip.readthedocs.io/en/latest/bots-guide.html#how-to-deploy-a-bot).
Use this bot with any of the following commands:
`@fs mkdir` : create a directory
`@fs ls` : list a directory
`@fs cd` : change directory
`@fs pwd` : show current path
`@fs write` : write text
`@fs read` : read text
`@fs rm` : remove a file
`@fs rmdir` : remove a directory
where `fs` may be the name of the bot you registered in the zulip system.
### Usage examples
`@fs ls` - Initially shows nothing (with a warning)
`@fs pwd` - Show which directory we are in: we start in /
`@fs mkdir foo` - Make directory foo
`@fs ls` - Show that foo is now created
`@fs cd foo` - Change into foo (and do a pwd, automatically)
`@fs write test hello world` - Write "hello world" to the file 'test'
`@fs read test` - Check the text was written
`@fs ls` - Show that the new file exists
`@fs rm test` - Remove that file
`@fs cd /` - Change back to root directory
`@fs rmdir foo` - Remove foo
## Notes
* In a stream, the bot must be mentioned; in a private chat, the bot
will assume every message is a command and so does not require this,
though doing so will still work.
* Use commands like `@fs help write` for more details on a command.

View File

@@ -1,56 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestVirtualFsBot(BotTestCase):
bot_name = "virtual_fs"
def test_bot(self):
expected = {
"cd /home": "foo_sender@zulip.com:\nERROR: invalid path",
"mkdir home": "foo_sender@zulip.com:\ndirectory created",
"pwd": "foo_sender@zulip.com:\n/",
"help": ('foo_sender@zulip.com:\n\nThis bot implements a virtual file system for a stream.\n'
'The locations of text are persisted for the lifetime of the bot\n'
'running, and if you rename a stream, you will lose the info.\n'
'Example commands:\n\n```\n'
'@mention-bot sample_conversation: sample conversation with the bot\n'
'@mention-bot mkdir: create a directory\n'
'@mention-bot ls: list a directory\n'
'@mention-bot cd: change directory\n'
'@mention-bot pwd: show current path\n'
'@mention-bot write: write text\n'
'@mention-bot read: read text\n'
'@mention-bot rm: remove a file\n'
'@mention-bot rmdir: remove a directory\n'
'```\n'
'Use commands like `@mention-bot help write` for more details on specific\ncommands.\n'),
"help ls": "foo_sender@zulip.com:\nsyntax: ls <optional_path>",
"": ('foo_sender@zulip.com:\n\nThis bot implements a virtual file system for a stream.\n'
'The locations of text are persisted for the lifetime of the bot\n'
'running, and if you rename a stream, you will lose the info.\n'
'Example commands:\n\n```\n'
'@mention-bot sample_conversation: sample conversation with the bot\n'
'@mention-bot mkdir: create a directory\n'
'@mention-bot ls: list a directory\n'
'@mention-bot cd: change directory\n'
'@mention-bot pwd: show current path\n'
'@mention-bot write: write text\n'
'@mention-bot read: read text\n'
'@mention-bot rm: remove a file\n'
'@mention-bot rmdir: remove a directory\n'
'```\n'
'Use commands like `@mention-bot help write` for more details on specific\ncommands.\n'),
}
self.check_expected_responses(expected)

View File

@@ -1,2 +0,0 @@
[weather-config]
key=XXXXXXXX

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -1,16 +0,0 @@
# WeatherBot
* This is a bot that sends weather information to a selected stream on
request.
* Weather information is brought to the website using an
OpenWeatherMap API. The bot posts the weather information to the
stream from which the user inputs the message. If the user inputs a
city that does not exist, the bot displays a "Sorry, city not found"
message.
* Before using this bot, you have to generate an OpenWeatherMap API
key and replace the dummy value in .weather_config.
![Example Usage](assets/screen1.png)
![Wrong City](assets/screen2.png)

View File

@@ -1,75 +0,0 @@
# See readme.md for instructions on running this code.
from __future__ import print_function
import requests
import json
import os
import sys
from six.moves.configparser import SafeConfigParser
class WeatherHandler(object):
def __init__(self):
self.directory = os.path.dirname(os.path.realpath(__file__)) + '/'
self.config_name = '.weather_config'
self.response_pattern = 'Weather in {}, {}:\n{} F / {} C\n{}'
if not os.path.exists(self.directory + self.config_name):
print('Weather bot config file not found, please set it up in {} file in this bot main directory'
'\n\nUsing format:\n\n[weather-config]\nkey=<OpenWeatherMap API key here>\n\n'.format(self.config_name))
sys.exit(1)
super(WeatherHandler, self).__init__()
def usage(self):
return '''
This plugin will give info about weather in a specified city
'''
def handle_message(self, message, client, state_handler):
help_content = '''
This bot returns weather info for specified city.
You specify city in the following format:
city, state/country
state and country parameter is optional(useful when there are many cities with the same name)
For example:
@**Weather Bot** Portland
@**Weather Bot** Portland, Me
'''.strip()
if (message['content'] == 'help') or (message['content'] == ''):
response = help_content
else:
url = 'http://api.openweathermap.org/data/2.5/weather?q=' + message['content'] + '&APPID='
r = requests.get(url + get_weather_api_key_from_config(self.directory, self.config_name))
if "city not found" in r.text:
response = "Sorry, city not found"
else:
response = format_response(r.text, message['content'], self.response_pattern)
client.send_reply(message, response)
def format_response(text, city, response_pattern):
j = json.loads(text)
city = j['name']
country = j['sys']['country']
fahrenheit = to_fahrenheit(j['main']['temp'])
celsius = to_celsius(j['main']['temp'])
description = j['weather'][0]['description'].title()
return response_pattern.format(city, country, fahrenheit, celsius, description)
def to_celsius(temp_kelvin):
return int(temp_kelvin) - 273.15
def to_fahrenheit(temp_kelvin):
return int(temp_kelvin) * 9 / 5 - 459.67
def get_weather_api_key_from_config(directory, config_name):
config = SafeConfigParser()
with open(directory + config_name, 'r') as config_file:
config.readfp(config_file)
return config.get("weather-config", "key")
handler_class = WeatherHandler

View File

@@ -1,31 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestWikipediaBot(BotTestCase):
bot_name = "wikipedia"
def test_bot(self):
expected = {
"": 'Please enter your message after @mention-bot',
"sssssss kkkkk": ('I am sorry. The search term you provided '
'is not found :slightly_frowning_face:'),
"foo": ('For search term "foo", '
'https://en.wikipedia.org/wiki/Foobar'),
"123": ('For search term "123", '
'https://en.wikipedia.org/wiki/123'),
"laugh": ('For search term "laugh", '
'https://en.wikipedia.org/wiki/Laughter'),
}
self.check_expected_responses(expected)

View File

@@ -1,43 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import mock
import os
import sys
our_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.normpath(os.path.join(our_dir)))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '..')):
sys.path.insert(0, '..')
from bots_test_lib import BotTestCase
class TestXkcdBot(BotTestCase):
bot_name = "xkcd"
@mock.patch('logging.exception')
def test_bot(self, mock_logging_exception):
help_txt = "xkcd bot supports these commands:"
err_txt = "xkcd bot only supports these commands:"
commands = '''
* `@xkcd help` to show this help message.
* `@xkcd latest` to fetch the latest comic strip from xkcd.
* `@xkcd random` to fetch a random comic strip from xkcd.
* `@xkcd <comic id>` to fetch a comic strip based on `<comic id>` e.g `@xkcd 1234`.'''
invalid_id_txt = "Sorry, there is likely no xkcd comic strip with id: #"
expected = {
"": err_txt+commands,
"help": help_txt+commands,
"x": err_txt+commands,
"0": invalid_id_txt + "0",
"1": ("#1: **Barrel - Part 1**\n[Don't we all.]"
"(https://imgs.xkcd.com/comics/barrel_cropped_(1).jpg)"),
"1800": ("#1800: **Chess Notation**\n"
"[I've decided to score all my conversations "
"using chess win-loss notation. (??)]"
"(https://imgs.xkcd.com/comics/chess_notation.png)"),
"999999999": invalid_id_txt + "999999999",
}
self.check_expected_responses(expected)

View File

@@ -1,136 +0,0 @@
# See readme.md for instructions on running this code.
from __future__ import print_function
import os
import logging
import ssl
import sys
try:
import requests
except ImportError as e:
logging.error("Dependency missing!!\n{}".format(e))
sys.exit(0)
HELP_MESSAGE = '''
This bot allows users to translate a sentence into
'Yoda speak'.
Users should preface messages with '@mention-bot'.
Before running this, make sure to get a Mashape Api token.
Instructions are in the 'readme.md' file.
Store it in the 'yoda.config' file.
The 'yoda.config' file should be located at '~/yoda.config'.
Example input:
@mention-bot You will learn how to speak like me someday.
'''
class ApiKeyError(Exception):
'''raise this when there is an error with the Mashape Api Key'''
class YodaSpeakHandler(object):
'''
This bot will allow users to translate a sentence into 'Yoda speak'.
It looks for messages starting with '@mention-bot'.
'''
def usage(self):
return '''
This bot will allow users to translate a sentence into
'Yoda speak'.
Users should preface messages with '@mention-bot'.
Before running this, make sure to get a Mashape Api token.
Instructions are in the 'readme.md' file.
Store it in the 'yoda.config' file.
The 'yoda.config' file should be located at '~/yoda.config'.
Example input:
@mention-bot You will learn how to speak like me someday.
'''
def handle_message(self, message, client, state_handler):
handle_input(message, client)
handler_class = YodaSpeakHandler
def send_to_yoda_api(sentence, api_key):
# function for sending sentence to api
response = requests.get("https://yoda.p.mashape.com/yoda?sentence=" + sentence,
headers={
"X-Mashape-Key": api_key,
"Accept": "text/plain"
}
)
if response.status_code == 200:
return response.text
if response.status_code == 403:
raise ApiKeyError
else:
error_message = response.text['message']
logging.error(error_message)
error_code = response.status_code
error_message = error_message + 'Error code: ' + error_code +\
' Did you follow the instructions in the `readme.md` file?'
return error_message
def format_input(original_content):
# gets rid of whitespace around the edges, so that they aren't a problem in the future
message_content = original_content.strip()
# replaces all spaces with '+' to be in the format the api requires
sentence = message_content.replace(' ', '+')
return sentence
def handle_input(message, client):
original_content = message['content']
if is_help(original_content):
client.send_reply(message, HELP_MESSAGE)
else:
sentence = format_input(original_content)
try:
reply_message = send_to_yoda_api(sentence, get_api_key())
except ssl.SSLError or TypeError:
reply_message = 'The service is temporarily unavailable, please try again.'
logging.error(reply_message)
except ApiKeyError:
reply_message = 'Invalid Api Key. Did you follow the instructions in the ' \
'`readme.md` file?'
logging.error(reply_message)
client.send_reply(message, reply_message)
def get_api_key():
# function for getting Mashape api key
home = os.path.expanduser('~')
with open(home + '/yoda.config') as api_key_file:
api_key = api_key_file.read().strip()
return api_key
def send_message(client, message, stream, subject):
# function for sending a message
client.send_message(dict(
type='stream',
to=stream,
subject=subject,
content=message
))
def is_help(original_content):
# gets rid of whitespace around the edges, so that they aren't a problem in the future
message_content = original_content.strip()
if message_content == 'help':
return True
else:
return False

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

View File

@@ -1,11 +0,0 @@
# Youtube bot
Youtube bot is a Zulip bot that can fetch first video from youtube
search results for a specified term. To use youtube bot you can simply
call it with `@mention-bot` followed by a command. Like this:
```
@mention-bot <search term>
```
![example usage](assets/screen.png)

View File

@@ -1,31 +0,0 @@
# See readme.md for instructions on running this bot.
import requests
from bs4 import BeautifulSoup
class YoutubeHandler(object):
def usage(self):
return '''
This bot will return the first Youtube search result for the give query.
'''
def handle_message(self, message, client, state_handler):
help_content = '''
To use the, Youtube Bot send `@mention-bot search terms`
Example:
@mention-bot funny cats
'''.strip()
if message['content'] == '':
client.send_reply(message, help_content)
else:
text_to_search = message['content']
url = "https://www.youtube.com/results?search_query=" + text_to_search
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
video_id = soup.find(attrs={'class': 'yt-uix-tile-link'})
try:
link = 'https://www.youtube.com' + video_id['href']
client.send_reply(message, link)
except TypeError:
client.send_reply(message, 'No video found for specified search terms')
handler_class = YoutubeHandler

View File

@@ -1,177 +0,0 @@
from __future__ import print_function
import logging
import os
import signal
import sys
import time
import re
if False:
from mypy_extensions import NoReturn
from typing import Any, Optional, List, Dict
from types import ModuleType
our_dir = os.path.dirname(os.path.abspath(__file__))
# For dev setups, we can find the API in the repo itself.
if os.path.exists(os.path.join(our_dir, '../zulip')):
sys.path.insert(0, os.path.join(our_dir, '../'))
from zulip import Client
def exit_gracefully(signum, frame):
# type: (int, Optional[Any]) -> None
sys.exit(0)
class RateLimit(object):
def __init__(self, message_limit, interval_limit):
# type: (int, int) -> None
self.message_limit = message_limit
self.interval_limit = interval_limit
self.message_list = [] # type: List[float]
self.error_message = '-----> !*!*!*MESSAGE RATE LIMIT REACHED, EXITING*!*!*! <-----\n'
'Is your bot trapped in an infinite loop by reacting to its own messages?'
def is_legal(self):
# type: () -> bool
self.message_list.append(time.time())
if len(self.message_list) > self.message_limit:
self.message_list.pop(0)
time_diff = self.message_list[-1] - self.message_list[0]
return time_diff >= self.interval_limit
else:
return True
def show_error_and_exit(self):
# type: () -> NoReturn
logging.error(self.error_message)
sys.exit(1)
class BotHandlerApi(object):
def __init__(self, client):
# type: (Client) -> None
# Only expose a subset of our Client's functionality
user_profile = client.get_profile()
self._rate_limit = RateLimit(20, 5)
self._client = client
try:
self.full_name = user_profile['full_name']
self.email = user_profile['email']
except KeyError:
logging.error('Cannot fetch user profile, make sure you have set'
' up the zuliprc file correctly.')
sys.exit(1)
def send_message(self, message):
# type: (Dict[str, Any]) -> Dict[str, Any]
if self._rate_limit.is_legal():
return self._client.send_message(message)
else:
self._rate_limit.show_error_and_exit()
def update_message(self, message):
# type: (Dict[str, Any]) -> Dict[str, Any]
if self._rate_limit.is_legal():
return self._client.update_message(message)
else:
self._rate_limit.show_error_and_exit()
def send_reply(self, message, response):
# type: (Dict[str, Any], str) -> Dict[str, Any]
if message['type'] == 'private':
return self.send_message(dict(
type='private',
to=[x['email'] for x in message['display_recipient'] if self.email != x['email']],
content=response,
))
else:
return self.send_message(dict(
type='stream',
to=message['display_recipient'],
subject=message['subject'],
content=response,
))
class StateHandler(object):
def __init__(self):
# type: () -> None
self.state = None # type: Any
def set_state(self, state):
# type: (Any) -> None
self.state = state
def get_state(self):
# type: () -> Any
return self.state
def run_message_handler_for_bot(lib_module, quiet, config_file):
# type: (Any, bool, str) -> Any
#
# lib_module is of type Any, since it can contain any bot's
# handler class. Eventually, we want bot's handler classes to
# inherit from a common prototype specifying the handle_message
# function.
#
# Make sure you set up your ~/.zuliprc
client = Client(config_file=config_file)
restricted_client = BotHandlerApi(client)
message_handler = lib_module.handler_class()
state_handler = StateHandler()
if not quiet:
print(message_handler.usage())
def extract_query_without_mention(message, client):
# type: (Dict[str, Any], BotHandlerApi) -> str
"""
If the bot is the first @mention in the message, then this function returns
the message with the bot's @mention removed. Otherwise, it returns None.
"""
bot_mention = r'^@(\*\*{0}\*\*)'.format(client.full_name)
start_with_mention = re.compile(bot_mention).match(message['content'])
if start_with_mention is None:
return None
query_without_mention = message['content'][len(start_with_mention.group()):]
return query_without_mention.lstrip()
def is_private(message, client):
# type: (Dict[str, Any], BotHandlerApi) -> bool
# bot will not reply if the sender name is the same as the bot name
# to prevent infinite loop
if message['type'] == 'private':
return client.full_name != message['sender_full_name']
return False
def handle_message(message):
# type: (Dict[str, Any]) -> None
logging.info('waiting for next message')
# is_mentioned is true if the bot is mentioned at ANY position (not necessarily
# the first @mention in the message).
is_mentioned = message['is_mentioned']
is_private_message = is_private(message, restricted_client)
# Strip at-mention botname from the message
if is_mentioned:
# message['content'] will be None when the bot's @-mention is not at the beginning.
# In that case, the message shall not be handled.
message['content'] = extract_query_without_mention(message=message, client=restricted_client)
if message['content'] is None:
return
if is_private_message or is_mentioned:
message_handler.handle_message(
message=message,
client=restricted_client,
state_handler=state_handler
)
signal.signal(signal.SIGINT, exit_gracefully)
logging.info('starting message handling...')
client.call_on_each_message(handle_message)

View File

@@ -1,130 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import unittest
import logging
import requests
import mock
from mock import MagicMock, patch
from run import get_lib_module
from bot_lib import StateHandler
from bots_api import bot_lib
from six.moves import zip
from unittest import TestCase
from typing import List, Dict, Any, Optional
from types import ModuleType
current_dir = os.path.dirname(os.path.abspath(__file__))
class BotTestCase(TestCase):
bot_name = '' # type: str
def check_expected_responses(self, expectations, expected_method='send_reply',
email="foo_sender@zulip.com", recipient="foo", subject="foo",
type="all", http_request=None, http_response=None):
# type: (Dict[str, Any], str, str, str, str, str, Dict[str, Any], Dict[str, Any]) -> None
# To test send_message, Any would be a Dict type,
# to test send_reply, Any would be a str type.
if type not in ["private", "stream", "all"]:
logging.exception("check_expected_response expects type to be 'private', 'stream' or 'all'")
for m, r in expectations.items():
if type != "stream":
self.mock_test(
messages={'content': m, 'type': "private", 'display_recipient': recipient,
'sender_email': email}, bot_response=r, expected_method=expected_method,
http_request=http_request, http_response=http_response)
if type != "private":
self.mock_test(
messages={'content': m, 'type': "stream", 'display_recipient': recipient,
'subject': subject, 'sender_email': email}, bot_response=r,
expected_method=expected_method, http_request=http_request, http_response=http_response)
def mock_test(self, messages, bot_response, expected_method,
http_request=None, http_response=None):
# type: (Dict[str, str], Any, str, Dict[str, Any], Dict[str, Any]) -> None
if expected_method == "send_message":
# Since send_message function uses bot_response of type Dict, no
# further changes required.
self.assert_bot_output(messages=[messages], bot_response=[bot_response], expected_method=expected_method,
http_request=http_request, http_response=http_response)
else:
# Since send_reply function uses bot_response of type str, we
# do convert the str type to a Dict type to have the same assert_bot_output function.
bot_response_type_dict = {'content': bot_response}
self.assert_bot_output(messages=[messages], bot_response=[bot_response_type_dict], expected_method=expected_method,
http_request=http_request, http_response=http_response)
def get_bot_message_handler(self):
# type: () -> Any
# message_handler is of type 'Any', since it can contain any bot's
# handler class. Eventually, we want bot's handler classes to
# inherit from a common prototype specifying the handle_message
# function.
bot_module = os.path.join(current_dir, "bots",
self.bot_name, self.bot_name + ".py")
message_handler = self.bot_to_run(bot_module)
return message_handler
def call_request(self, message_handler, message, expected_method,
MockClass, response):
# type: (Any, Dict[str, Any], str, Any, Optional[Dict[str, Any]]) -> None
# Send message to the concerned bot
message_handler.handle_message(message, MockClass(), StateHandler())
# Check if the bot is sending a message via `send_message` function.
# Where response is a dictionary here.
instance = MockClass.return_value
if expected_method == "send_message":
instance.send_message.assert_called_with(response)
else:
instance.send_reply.assert_called_with(message, response['content'])
def assert_bot_output(self, messages, bot_response, expected_method,
http_request=None, http_response=None):
# type: (List[Dict[str, Any]], List[Dict[str, str]], str, Optional[Dict[str, Any]], Optional[Dict[str, Any]]) -> None
message_handler = self.get_bot_message_handler()
# Mocking BotHandlerApi
with patch('bots_api.bot_lib.BotHandlerApi') as MockClass:
for (message, response) in zip(messages, bot_response):
# If not mock http_request/http_response are provided,
# just call the request normally (potentially using
# the Internet)
if http_response is None:
assert http_request is None
self.call_request(message_handler, message, expected_method,
MockClass, response)
continue
# Otherwise, we mock requests, and verify that the bot
# made the correct HTTP request to the third-party API
# (and provide the correct third-party API response.
# This allows us to test things that would require the
# Internet without it).
assert http_request is not None
with patch('requests.get') as mock_get:
mock_result = mock.MagicMock()
mock_result.json.return_value = http_response
mock_result.ok.return_value = True
mock_get.return_value = mock_result
self.call_request(message_handler, message, expected_method,
MockClass, response)
# Check if the bot is sending the correct http_request corresponding
# to the given http_response.
if http_request is not None:
mock_get.assert_called_with(http_request['api_url'],
params=http_request['params'])
def bot_to_run(self, bot_module):
# Returning Any, same argument as in get_bot_message_handler function.
# type: (str) -> Any
lib_module = get_lib_module(bot_module)
message_handler = lib_module.handler_class()
return message_handler

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
import argparse
import os
import sys
import unittest
from unittest import TestCase
def dir_join(dir1, dir2):
# type: (str, str) -> str
return os.path.abspath(os.path.join(dir1, dir2))
if __name__ == '__main__':
description = 'Script to run test_<bot>.py files in bots/<bot> directories'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('--bot',
nargs=1,
type=str,
action='store',
help='test specified single bot')
args = parser.parse_args()
bots_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = dir_join(bots_dir, '..')
bots_test_dir = dir_join(bots_dir, '../bots')
sys.path.insert(0, root_dir)
sys.path.insert(0, bots_test_dir)
# mypy doesn't recognize the TestLoader attribute, even though the code
# is executable
loader = unittest.TestLoader() # type: ignore
if args.bot is not None:
bots_test_dir = dir_join(bots_test_dir, args.bot[0])
suite = loader.discover(start_dir=bots_test_dir, top_level_dir=root_dir)
runner = unittest.TextTestRunner(verbosity=2)
# same issue as for TestLoader
result = runner.run(suite) # type: ignore
if result.errors or result.failures:
raise Exception('Test failed!')

View File

@@ -32,7 +32,7 @@ from typing import IO
import zulip import zulip
class StringIO(_StringIO): class StringIO(_StringIO):
name = '' # https://github.com/python/typeshed/issues/598 name = '' # https://github.com/python/typeshed/issues/598
usage = """upload-file --user=<user's email address> --api-key=<user's api key> [options] usage = """upload-file --user=<user's email address> --api-key=<user's api key> [options]
@@ -51,7 +51,7 @@ parser.add_option_group(zulip.generate_option_group(parser))
client = zulip.init_from_options(options) client = zulip.init_from_options(options)
file = None # type: IO file = None # type: IO
if options.file_path: if options.file_path:
file = open(options.file_path, 'rb') file = open(options.file_path, 'rb')
else: else:

View File

@@ -23,12 +23,34 @@
### REQUIRED CONFIGURATION ### ### REQUIRED CONFIGURATION ###
# Change these values to your Slack credentials. # Change these values to your Asana credentials.
SLACK_TOKEN = 'slack_token' ASANA_API_KEY = "0123456789abcdef0123456789abcdef"
# Change these values to the credentials for your Slack bot. # Change these values to the credentials for your Asana bot.
ZULIP_USER = 'user-email@zulip.com' ZULIP_USER = "asana-bot@example.com"
ZULIP_API_KEY = 'user-email_api_key' ZULIP_API_KEY = "0123456789abcdef0123456789abcdef"
# The Zulip stream that will receive Asana task updates.
ZULIP_STREAM_NAME = "asana"
### OPTIONAL CONFIGURATION ###
# Set to None for logging to stdout when testing, and to a file for
# logging in production.
#LOG_FILE = "/var/tmp/zulip_asana.log"
LOG_FILE = None
# This file is used to resume this mirror in case the script shuts down.
# It is required and needs to be writeable.
RESUME_FILE = "/var/tmp/zulip_asana.state"
# When initially started, how many hours of messages to include.
ASANA_INITIAL_HISTORY_HOURS = 1
# Set this to your Zulip API server URI # Set this to your Zulip API server URI
ZULIP_SITE = 'https://zulip.example.com' ZULIP_SITE = "https://zulip.example.com"
# If properly installed, the Zulip API should be in your import
# path, but if not, set a custom path below
ZULIP_API_PATH = None

View File

@@ -0,0 +1,306 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Asana integration for Zulip
#
# Copyright © 2014 Zulip, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# The "zulip_asana_mirror" script is run continuously, possibly on a work computer
# or preferably on a server.
#
# When restarted, it will attempt to pick up where it left off.
#
# python-dateutil is a dependency for this script.
from __future__ import print_function
import base64
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Any, Tuple
import json
import logging
import os
import time
from six.moves import urllib
from six.moves.urllib import request as urllib_request
import sys
try:
import dateutil.parser
from dateutil.tz import gettz
except ImportError as e:
print(e, file=sys.stderr)
print("Please install the python-dateutil package.", file=sys.stderr)
exit(1)
sys.path.insert(0, os.path.dirname(__file__))
import zulip_asana_config as config
VERSION = "0.9"
if config.ZULIP_API_PATH is not None:
sys.path.append(config.ZULIP_API_PATH)
import zulip
if config.LOG_FILE:
logging.basicConfig(filename=config.LOG_FILE, level=logging.WARNING)
else:
logging.basicConfig(level=logging.INFO)
client = zulip.Client(email=config.ZULIP_USER, api_key=config.ZULIP_API_KEY,
site=config.ZULIP_SITE, client="ZulipAsana/" + VERSION)
def fetch_from_asana(path):
# type: (str) -> Optional[Dict[str, Any]]
"""
Request a resource through the Asana API, authenticating using
HTTP basic auth.
"""
auth = base64.encodestring(b'%s:' % (config.ASANA_API_KEY,))
headers = {"Authorization": "Basic %s" % auth}
url = "https://app.asana.com/api/1.0" + path
request = urllib_request.Request(url, None, headers) # type: ignore
result = urllib_request.urlopen(request) # type: ignore
return json.load(result)
def send_zulip(topic, content):
# type: (str, str) -> Dict[str, str]
"""
Send a message to Zulip using the configured stream and bot credentials.
"""
message = {"type": "stream",
"sender": config.ZULIP_USER,
"to": config.ZULIP_STREAM_NAME,
"subject": topic,
"content": content,
}
return client.send_message(message)
def datestring_to_datetime(datestring):
# type: (str) -> datetime
"""
Given an ISO 8601 datestring, return the corresponding datetime object.
"""
return dateutil.parser.parse(datestring).replace(
tzinfo=gettz('Z'))
class TaskDict(dict):
"""
A helper class to turn a dictionary with task information into an
object where each of the keys is an attribute for easy access.
"""
def __getattr__(self, field):
# type: (TaskDict, str) -> Any
return self.get(field)
def format_topic(task, projects):
# type: (TaskDict, Dict[str, str]) -> str
"""
Return a string that will be the Zulip message topic for this task.
"""
# Tasks can be associated with multiple projects, but in practice they seem
# to mostly be associated with one.
project_name = projects[task.projects[0]["id"]]
return "%s: %s" % (project_name, task.name)
def format_assignee(task, users):
# type: (TaskDict, Dict[str, str]) -> str
"""
Return a string describing the task's assignee.
"""
if task.assignee:
assignee_name = users[task.assignee["id"]]
assignee_info = "**Assigned to**: %s (%s)" % (
assignee_name, task.assignee_status)
else:
assignee_info = "**Status**: Unassigned"
return assignee_info
def format_due_date(task):
# type: (TaskDict) -> str
"""
Return a string describing the task's due date.
"""
if task.due_on:
due_date_info = "**Due on**: %s" % (task.due_on,)
else:
due_date_info = "**Due date**: None"
return due_date_info
def format_task_creation_event(task, projects, users):
# type: (TaskDict, Dict[str, str], Dict[str, str]) -> Tuple[str, str]
"""
Format the topic and content for a newly-created task.
"""
topic = format_topic(task, projects)
assignee_info = format_assignee(task, users)
due_date_info = format_due_date(task)
content = """Task **%s** created:
~~~ quote
%s
~~~
%s
%s
""" % (task.name, task.notes, assignee_info, due_date_info)
return topic, content
def format_task_completion_event(task, projects, users):
# type: (TaskDict, Dict[str, str], Dict[str, str]) -> Tuple[str, str]
"""
Format the topic and content for a completed task.
"""
topic = format_topic(task, projects)
assignee_info = format_assignee(task, users)
due_date_info = format_due_date(task)
content = """Task **%s** completed. :white_check_mark:
%s
%s
""" % (task.name, assignee_info, due_date_info)
return topic, content
def since():
# type: () -> datetime
"""
Return a newness threshold for task events to be processed.
"""
# If we have a record of the last event processed and it is recent, use it,
# else process everything from ASANA_INITIAL_HISTORY_HOURS ago.
def default_since():
# type: () -> datetime
return datetime.utcnow() - timedelta(
hours=config.ASANA_INITIAL_HISTORY_HOURS)
if os.path.exists(config.RESUME_FILE):
try:
with open(config.RESUME_FILE, "r") as f:
datestring = f.readline().strip()
timestamp = float(datestring)
max_timestamp_processed = datetime.fromtimestamp(timestamp)
logging.info("Reading from resume file: " + datestring)
except (ValueError, IOError) as e:
logging.warn("Could not open resume file: " + str(e))
max_timestamp_processed = default_since()
else:
logging.info("No resume file, processing an initial history.")
max_timestamp_processed = default_since()
# Even if we can read a timestamp from RESUME_FILE, if it is old don't use
# it.
return max(max_timestamp_processed, default_since())
def process_new_events():
# type: () -> None
"""
Forward new Asana task events to Zulip.
"""
# In task queries, Asana only exposes IDs for projects and users, so we need
# to look up the mappings.
projects = dict((elt["id"], elt["name"]) for elt in
fetch_from_asana("/projects")["data"])
users = dict((elt["id"], elt["name"]) for elt in
fetch_from_asana("/users")["data"])
cutoff = since()
max_timestamp_processed = cutoff
time_operations = (("created_at", format_task_creation_event),
("completed_at", format_task_completion_event))
task_fields = ["assignee", "assignee_status", "created_at", "completed_at",
"modified_at", "due_on", "name", "notes", "projects"]
# First, gather all of the tasks that need processing. We'll
# process them in order.
new_events = []
for project_id in projects:
project_url = "/projects/%d/tasks?opt_fields=%s" % (
project_id, ",".join(task_fields))
tasks = fetch_from_asana(project_url)["data"]
for task in tasks:
task = TaskDict(task)
for time_field, operation in time_operations:
if task[time_field]:
operation_time = datestring_to_datetime(task[time_field])
if operation_time > cutoff:
new_events.append((operation_time, time_field, operation, task))
new_events.sort()
now = datetime.utcnow()
for operation_time, time_field, operation, task in new_events:
# Unfortunately, creating an Asana task is not an atomic operation. If
# the task was just created, or is missing basic information, it is
# probably because the task is still being filled out -- wait until the
# next round to process it.
if (time_field == "created_at") and \
(now - operation_time < timedelta(seconds=30)):
# The task was just created, give the user some time to fill out
# more information.
return
if (time_field == "created_at") and (not task.name) and \
(now - operation_time < timedelta(seconds=60)):
# If this new task hasn't had a name for a full 30 seconds, assume
# you don't plan on giving it one.
return
topic, content = operation(task, projects, users)
logging.info("Sending Zulip for " + topic)
result = send_zulip(topic, content)
# If the Zulip wasn't sent successfully, don't update the
# max timestamp processed so the task has another change to
# be forwarded. Exit, giving temporary issues time to
# resolve.
if not result.get("result"):
logging.warn("Malformed result, exiting:")
logging.warn(str(result))
return
if result["result"] != "success":
logging.warn(result["msg"])
return
if operation_time > max_timestamp_processed:
max_timestamp_processed = operation_time
if max_timestamp_processed > cutoff:
max_datestring = max_timestamp_processed.strftime("%s.%f")
logging.info("Updating resume file: " + max_datestring)
open(config.RESUME_FILE, 'w').write(max_datestring)
while True:
try:
process_new_events()
time.sleep(5)
except KeyboardInterrupt:
logging.info("Shutting down...")
logging.info("Set LOG_FILE to log to a file instead of stdout.")
break

View File

@@ -1,7 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#
# Copyright © 2012-2014 Zulip, Inc. # Copyright © 2014 Zulip, Inc.
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -21,24 +20,32 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
from __future__ import print_function
import sys
from os import path
import optparse
usage = """get-presence --email=<email address> [options] # Change these values to configure authentication for basecamp account
BASECAMP_ACCOUNT_ID = "12345678"
BASECAMP_USERNAME = "foo@example.com"
BASECAMP_PASSWORD = "p455w0rd"
Get presence data for another user. # This script will mirror this many hours of history on the first run.
""" # On subsequent runs this value is ignored.
BASECAMP_INITIAL_HISTORY_HOURS = 0
sys.path.append(path.join(path.dirname(__file__), '..')) # Change these values to configure Zulip authentication for the plugin
import zulip ZULIP_USER = "basecamp-bot@example.com"
ZULIP_API_KEY = "0123456789abcdef0123456789abcdef"
ZULIP_STREAM_NAME = "basecamp"
parser = optparse.OptionParser(usage=usage) ## If properly installed, the Zulip API should be in your import
parser.add_option_group(zulip.generate_option_group(parser)) ## path, but if not, set a custom path below
parser.add_option('--email') ZULIP_API_PATH = None
(options, args) = parser.parse_args()
client = zulip.init_from_options(options) # Set this to your Zulip API server URI
ZULIP_SITE = "https://zulip.example.com"
print(client.get_presence(options.email)) # If you wish to log to a file rather than stdout/stderr,
# please fill this out your desired path
LOG_FILE = None
# This file is used to resume this mirror in case the script shuts down.
# It is required and needs to be writeable.
RESUME_FILE = "/var/tmp/zulip_basecamp.state"

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Zulip mirror of Basecamp activity
# Copyright © 2014 Zulip, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# The "basecamp-mirror.py" script is run continuously, possibly on a work computer
# or preferably on a server.
# You may need to install the python-requests library.
from __future__ import absolute_import
import requests
import logging
import time
import re
import sys
from stderror import write
import os
from datetime import datetime, timedelta
sys.path.insert(0, os.path.dirname(__file__))
import zulip_basecamp_config as config
VERSION = "0.9"
if config.ZULIP_API_PATH is not None:
sys.path.append(config.ZULIP_API_PATH)
import zulip
from six.moves.html_parser import HTMLParser
from typing import Any, Dict
import six
client = zulip.Client(
email=config.ZULIP_USER,
site=config.ZULIP_SITE,
api_key=config.ZULIP_API_KEY,
client="ZulipBasecamp/" + VERSION)
user_agent = "Basecamp To Zulip Mirroring script (zulip-devel@googlegroups.com)"
htmlParser = HTMLParser()
# find some form of JSON loader/dumper, with a preference order for speed.
json_implementations = ['ujson', 'cjson', 'simplejson', 'json']
while len(json_implementations):
try:
json = __import__(json_implementations.pop(0))
break
except ImportError:
continue
# void function that checks the permissions of the files this script needs.
def check_permissions():
# type: () -> None
# check that the log file can be written
if config.LOG_FILE:
try:
open(config.LOG_FILE, "w")
except IOError as e:
sys.stderr.write("Could not open up log for writing:")
sys.stderr.write(str(e))
# check that the resume file can be written (this creates if it doesn't exist)
try:
open(config.RESUME_FILE, "a+")
except IOError as e:
sys.stderr.write("Could not open up the file %s for reading and writing" % (config.RESUME_FILE),)
sys.stderr.write(str(e))
# builds the message dict for sending a message with the Zulip API
def build_message(event):
# type: (Dict[str, Any]) -> Dict[str, Any]
if not ('bucket' in event and 'creator' in event and 'html_url' in event):
logging.error("Perhaps the Basecamp API changed behavior? "
"This event doesn't have the expected format:\n%s" % (event,))
return None
# adjust the topic length to be bounded to 60 characters
topic = event['bucket']['name']
if len(topic) > 60:
topic = topic[0:57] + "..."
# get the action and target values
action = htmlParser.unescape(re.sub(r"<[^<>]+>", "", event.get('action', '')))
target = htmlParser.unescape(event.get('target', ''))
# Some events have "excerpts", which we blockquote
excerpt = htmlParser.unescape(event.get('excerpt', ''))
if excerpt.strip() == "":
message = '**%s** %s [%s](%s).' % (event['creator']['name'], action, target, event['html_url'])
else:
message = '**%s** %s [%s](%s).\n> %s' % (event['creator']['name'], action, target, event['html_url'], excerpt)
# assemble the message data dict
message_data = {
"type": "stream",
"to": config.ZULIP_STREAM_NAME,
"subject": topic,
"content": message,
}
return message_data
# the main run loop for this mirror script
def run_mirror():
# type: () -> None
# we should have the right (write) permissions on the resume file, as seen
# in check_permissions, but it may still be empty or corrupted
try:
with open(config.RESUME_FILE) as f:
since = f.read() # type: Any
since = re.search(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}-\d{2}:\d{2}", since)
assert since, "resume file does not meet expected format"
since = since.string
except (AssertionError, IOError) as e:
logging.warn("Could not open resume file: %s" % (e,))
since = (datetime.utcnow() - timedelta(hours=config.BASECAMP_INITIAL_HISTORY_HOURS)).isoformat() + "-00:00"
try:
# we use an exponential backoff approach when we get 429 (Too Many Requests).
sleepInterval = 1
while True:
time.sleep(sleepInterval)
response = requests.get("https://basecamp.com/%s/api/v1/events.json" % (config.BASECAMP_ACCOUNT_ID),
params={'since': since},
auth=(config.BASECAMP_USERNAME, config.BASECAMP_PASSWORD),
headers = {"User-Agent": user_agent})
if response.status_code == 200:
sleepInterval = 1
events = json.loads(response.text)
if len(events):
logging.info("Got event(s): %s" % (response.text,))
if response.status_code >= 500:
logging.error(str(response.status_code))
continue
if response.status_code == 429:
# exponential backoff
sleepInterval *= 2
logging.error(str(response.status_code))
continue
if response.status_code == 400:
logging.error("Something went wrong. Basecamp must be unhappy for this reason: %s" % (response.text,))
sys.exit(-1)
if response.status_code == 401:
logging.error("Bad authorization from Basecamp. Please check your Basecamp login credentials")
sys.exit(-1)
if len(events):
since = events[0]['created_at']
for event in reversed(events):
message_data = build_message(event)
if not message_data:
continue
zulip_api_result = client.send_message(message_data)
if zulip_api_result['result'] == "success":
logging.info("sent zulip with id: %s" % (zulip_api_result['id'],))
else:
logging.warn("%s %s" % (zulip_api_result['result'], zulip_api_result['msg']))
# update 'since' each time in case we get KeyboardInterrupted
since = event['created_at']
# avoid hitting rate-limit
time.sleep(0.2)
except KeyboardInterrupt:
logging.info("Shutting down, please hold")
open("events.last", 'w').write(since)
logging.info("Done!")
if __name__ == "__main__":
if not isinstance(config.RESUME_FILE, six.string_types):
sys.stderr.write("RESUME_FILE path not given; refusing to continue")
check_permissions()
if config.LOG_FILE:
logging.basicConfig(filename=config.LOG_FILE, level=logging.INFO)
else:
logging.basicConfig(level=logging.INFO)
run_mirror()

View File

@@ -33,7 +33,6 @@ from __future__ import print_function
from __future__ import absolute_import from __future__ import absolute_import
import requests import requests
import logging import logging
import pytz
import time import time
import sys import sys
import os import os
@@ -78,7 +77,7 @@ def make_api_call(path):
# type: (str) -> Optional[List[Dict[str, Any]]] # type: (str) -> Optional[List[Dict[str, Any]]]
response = requests.get("https://api3.codebasehq.com/%s" % (path,), response = requests.get("https://api3.codebasehq.com/%s" % (path,),
auth=(config.CODEBASE_API_USERNAME, config.CODEBASE_API_KEY), auth=(config.CODEBASE_API_USERNAME, config.CODEBASE_API_KEY),
params={'raw': 'True'}, params={'raw': True},
headers = {"User-Agent": user_agent, headers = {"User-Agent": user_agent,
"Content-Type": "application/json", "Content-Type": "application/json",
"Accept": "application/json"}) "Accept": "application/json"})
@@ -270,7 +269,7 @@ def run_mirror():
# in check_permissions, but it may still be empty or corrupted # in check_permissions, but it may still be empty or corrupted
def default_since(): def default_since():
# type: () -> datetime # type: () -> datetime
return datetime.now(tz=pytz.utc) - timedelta(hours=config.CODEBASE_INITIAL_HISTORY_HOURS) return datetime.utcnow() - timedelta(hours=config.CODEBASE_INITIAL_HISTORY_HOURS)
try: try:
with open(config.RESUME_FILE) as f: with open(config.RESUME_FILE) as f:
@@ -278,7 +277,7 @@ def run_mirror():
if timestamp == '': if timestamp == '':
since = default_since() since = default_since()
else: else:
since = datetime.fromtimestamp(float(timestamp), tz=pytz.utc) since = datetime.fromtimestamp(float(timestamp))
except (ValueError, IOError) as e: except (ValueError, IOError) as e:
logging.warn("Could not open resume file: %s" % (str(e))) logging.warn("Could not open resume file: %s" % (str(e)))
since = default_since() since = default_since()
@@ -291,7 +290,7 @@ def run_mirror():
sleepInterval = 1 sleepInterval = 1
for event in events: for event in events:
timestamp = event.get('event', {}).get('timestamp', '') timestamp = event.get('event', {}).get('timestamp', '')
event_date = dateutil.parser.parse(timestamp) event_date = dateutil.parser.parse(timestamp).replace(tzinfo=None)
if event_date > since: if event_date > since:
handle_event(event) handle_event(event)
since = event_date since = event_date

View File

@@ -1,16 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
#
# This script depends on python-dateutil and python-pytz for properly handling
# times and time zones of calendar events.
from __future__ import print_function from __future__ import print_function
import datetime import datetime
import dateutil.parser
import httplib2 import httplib2
import itertools import itertools
import logging import logging
import optparse import optparse
import os import os
import pytz
from six.moves import urllib from six.moves import urllib
import sys import sys
import time import time
@@ -107,45 +102,29 @@ def get_credentials():
logging.error("Run the get-google-credentials script from this directory first.") logging.error("Run the get-google-credentials script from this directory first.")
def populate_events(): def get_events():
# type: () -> Optional[None] # type: () -> Iterable[Tuple[int, datetime.datetime, str]]
global events
credentials = get_credentials() credentials = get_credentials()
creds = credentials.authorize(httplib2.Http()) creds = credentials.authorize(httplib2.Http())
service = discovery.build('calendar', 'v3', http=creds) service = discovery.build('calendar', 'v3', http=creds)
now = datetime.datetime.now(pytz.utc).isoformat() now = datetime.datetime.utcnow().isoformat() + 'Z' # 'Z' indicates UTC time
feed = service.events().list(calendarId=options.calendarID, timeMin=now, maxResults=5, feed = service.events().list(calendarId=options.calendarID, timeMin=now, maxResults=5,
singleEvents=True, orderBy='startTime').execute() singleEvents=True, orderBy='startTime').execute()
events = []
for event in feed["items"]: for event in feed["items"]:
try: try:
start = dateutil.parser.parse(event["start"]["dateTime"]) start = event["start"]["dateTime"]
# According to the API documentation, a time zone offset is required
# for start.dateTime unless a time zone is explicitly specified in
# start.timeZone.
if start.tzinfo is None:
event_timezone = pytz.timezone(event["start"]["timeZone"])
# pytz timezones include an extra localize method that's not part
# of the tzinfo base class.
start = event_timezone.localize(start) # type: ignore
except KeyError: except KeyError:
# All-day events can have only a date. start = event["start"]["date"]
start_naive = dateutil.parser.parse(event["start"]["date"]) start = start[:19]
# All-day events can have only a date
# All-day events don't have a time zone offset; instead, we use the fmt = '%Y-%m-%dT%H:%M:%S' if 'T' in start else '%Y-%m-%d'
# time zone of the calendar. start = datetime.datetime.strptime(start, fmt)
calendar_timezone = pytz.timezone(feed["timeZone"])
# pytz timezones include an extra localize method that's not part
# of the tzinfo base class.
start = calendar_timezone.localize(start_naive) # type: ignore
try: try:
events.append((event["id"], start, event["summary"])) yield (event["id"], start, event["summary"])
except KeyError: except KeyError:
events.append((event["id"], start, "(No Title)")) yield (event["id"], start, "(No Title)")
def send_reminders(): def send_reminders():
@@ -154,7 +133,7 @@ def send_reminders():
messages = [] messages = []
keys = set() keys = set()
now = datetime.datetime.now(tz=pytz.utc) now = datetime.datetime.now()
for id, start, summary in events: for id, start, summary in events:
dt = start - now dt = start - now
@@ -193,8 +172,8 @@ for i in itertools.count():
# We check reminders every minute, but only # We check reminders every minute, but only
# download the calendar every 10 minutes. # download the calendar every 10 minutes.
if not i % 10: if not i % 10:
populate_events() events = list(get_events())
send_reminders() send_reminders()
except Exception: except:
logging.exception("Couldn't download Google calendar and/or couldn't post to Zulip.") logging.exception("Couldn't download Google calendar and/or couldn't post to Zulip.")
time.sleep(60) time.sleep(60)

View File

@@ -87,7 +87,7 @@ def format_commit_lines(web_url, repo, base, tip):
return "\n".join(summary for summary in commit_summaries) return "\n".join(summary for summary in commit_summaries)
def send_zulip(email, api_key, site, stream, subject, content): def send_zulip(email, api_key, site, stream, subject, content):
# type: (str, str, str, str, str, Text) -> None # type: (str, str, str, str, str, Text) -> str
""" """
Send a message to Zulip using the provided credentials, which should be for Send a message to Zulip using the provided credentials, which should be for
a bot in most cases. a bot in most cases.
@@ -114,7 +114,7 @@ def get_config(ui, item):
return None return None
def hook(ui, repo, **kwargs): def hook(ui, repo, **kwargs):
# type: (ui, repo, **Text) -> None # type: (ui, repo, Optional[Text]) -> None
""" """
Invoked by configuring a [hook] entry in .hg/hgrc. Invoked by configuring a [hook] entry in .hg/hgrc.
""" """

View File

@@ -25,7 +25,6 @@
import os import os
import subprocess import subprocess
import sys import sys
from typing import Dict
sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
import zulip_openshift_config as config import zulip_openshift_config as config

View File

@@ -21,7 +21,7 @@
# THE SOFTWARE. # THE SOFTWARE.
# https://github.com/python/mypy/issues/1141 # https://github.com/python/mypy/issues/1141
from typing import Dict, Text from typing import Text
# Change these values to configure authentication for the plugin # Change these values to configure authentication for the plugin
ZULIP_USER = 'openshift-bot@example.com' ZULIP_USER = 'openshift-bot@example.com'
@@ -69,7 +69,7 @@ def format_deployment_message(
## If properly installed, the Zulip API should be in your import ## If properly installed, the Zulip API should be in your import
## path, but if not, set a custom path below ## path, but if not, set a custom path below
ZULIP_API_PATH = None # type: str ZULIP_API_PATH = None # type: str
# Set this to your Zulip server's API URI # Set this to your Zulip server's API URI
ZULIP_SITE = 'https://zulip.example.com' ZULIP_SITE = 'https://zulip.example.com'

View File

@@ -31,7 +31,6 @@ from six.moves.html_parser import HTMLParser
import logging import logging
import optparse import optparse
import os import os
import re
import sys import sys
import time import time
from six.moves import urllib from six.moves import urllib
@@ -39,9 +38,9 @@ from typing import Dict, List, Tuple, Any
import feedparser import feedparser
import zulip import zulip
VERSION = "0.9" # type: str VERSION = "0.9" # type: str
RSS_DATA_DIR = os.path.expanduser(os.path.join('~', '.cache', 'zulip-rss')) # type: str RSS_DATA_DIR = os.path.expanduser(os.path.join('~', '.cache', 'zulip-rss')) # type: str
OLDNESS_THRESHOLD = 30 # type: int OLDNESS_THRESHOLD = 30 # type: int
usage = """Usage: Send summaries of RSS entries for your favorite feeds to Zulip. usage = """Usage: Send summaries of RSS entries for your favorite feeds to Zulip.
@@ -67,7 +66,7 @@ stream every 5 minutes is:
*/5 * * * * /usr/local/share/zulip/integrations/rss/rss-bot""" */5 * * * * /usr/local/share/zulip/integrations/rss/rss-bot"""
parser = optparse.OptionParser(usage) # type: optparse.OptionParser parser = optparse.OptionParser(usage) # type: optparse.OptionParser
parser.add_option('--stream', parser.add_option('--stream',
dest='stream', dest='stream',
help='The stream to which to send RSS messages.', help='The stream to which to send RSS messages.',
@@ -83,18 +82,8 @@ parser.add_option('--feed-file',
help='The file containing a list of RSS feed URLs to follow, one URL per line', help='The file containing a list of RSS feed URLs to follow, one URL per line',
default=os.path.join(RSS_DATA_DIR, "rss-feeds"), default=os.path.join(RSS_DATA_DIR, "rss-feeds"),
action='store') action='store')
parser.add_option('--unwrap',
dest='unwrap',
action='store_true',
help='Convert word-wrapped paragraphs into single lines',
default=False)
parser.add_option('--math',
dest='math',
action='store_true',
help='Convert $ to $$ (for KaTeX processing)',
default=False)
parser.add_option_group(zulip.generate_option_group(parser)) parser.add_option_group(zulip.generate_option_group(parser))
(opts, args) = parser.parse_args() # type: Tuple[Any, List[str]] (opts, args) = parser.parse_args() # type: Tuple[Any, List[str]]
def mkdir_p(path): def mkdir_p(path):
# type: (str) -> None # type: (str) -> None
@@ -114,15 +103,15 @@ except OSError:
print("Unable to store RSS data at %s." % (opts.data_dir,), file=sys.stderr) print("Unable to store RSS data at %s." % (opts.data_dir,), file=sys.stderr)
exit(1) exit(1)
log_file = os.path.join(opts.data_dir, "rss-bot.log") # type: str log_file = os.path.join(opts.data_dir, "rss-bot.log") # type: str
log_format = "%(asctime)s: %(message)s" # type: str log_format = "%(asctime)s: %(message)s" # type: str
logging.basicConfig(format=log_format) logging.basicConfig(format=log_format)
formatter = logging.Formatter(log_format) # type: logging.Formatter formatter = logging.Formatter(log_format) # type: logging.Formatter
file_handler = logging.FileHandler(log_file) # type: logging.FileHandler file_handler = logging.FileHandler(log_file) # type: logging.FileHandler
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
logger = logging.getLogger(__name__) # type: logging.Logger logger = logging.getLogger(__name__) # type: logging.Logger
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
logger.addHandler(file_handler) logger.addHandler(file_handler)
@@ -136,7 +125,7 @@ class MLStripper(HTMLParser):
def __init__(self): def __init__(self):
# type: () -> None # type: () -> None
self.reset() self.reset()
self.fed = [] # type: List[str] self.fed = [] # type: List[str]
def handle_data(self, data): def handle_data(self, data):
# type: (str) -> None # type: (str) -> None
@@ -158,12 +147,6 @@ def compute_entry_hash(entry):
entry_id = entry.get("id", entry.get("link")) entry_id = entry.get("id", entry.get("link"))
return hashlib.md5(entry_id + str(entry_time)).hexdigest() return hashlib.md5(entry_id + str(entry_time)).hexdigest()
def unwrap_text(body):
# type: (str) -> str
# Replace \n by space if it is preceded and followed by a non-\n.
# Example: '\na\nb\nc\n\nd\n' -> '\na b c\n\nd\n'
return re.sub('(?<=[^\n])\n(?=[^\n])', ' ', body)
def elide_subject(subject): def elide_subject(subject):
# type: (str) -> str # type: (str) -> str
MAX_TOPIC_LENGTH = 60 MAX_TOPIC_LENGTH = 60
@@ -173,53 +156,45 @@ def elide_subject(subject):
def send_zulip(entry, feed_name): def send_zulip(entry, feed_name):
# type: (Any, str) -> Dict[str, Any] # type: (Any, str) -> Dict[str, Any]
body = entry.summary # type: str
if opts.unwrap:
body = unwrap_text(body)
content = "**[%s](%s)**\n%s\n%s" % (entry.title, content = "**[%s](%s)**\n%s\n%s" % (entry.title,
entry.link, entry.link,
strip_tags(body), strip_tags(entry.summary),
entry.link) # type: str entry.link) # type: str
if opts.math:
content = content.replace('$', '$$')
message = {"type": "stream", message = {"type": "stream",
"sender": opts.zulip_email, "sender": opts.zulip_email,
"to": opts.stream, "to": opts.stream,
"subject": elide_subject(feed_name), "subject": elide_subject(feed_name),
"content": content, "content": content,
} # type: Dict[str, str] } # type: Dict[str, str]
return client.send_message(message) return client.send_message(message)
try: try:
with open(opts.feed_file, "r") as f: with open(opts.feed_file, "r") as f:
feed_urls = [feed.strip() for feed in f.readlines()] # type: List[str] feed_urls = [feed.strip() for feed in f.readlines()] # type: List[str]
except IOError: except IOError:
log_error_and_exit("Unable to read feed file at %s." % (opts.feed_file,)) log_error_and_exit("Unable to read feed file at %s." % (opts.feed_file,))
client = zulip.Client(email=opts.zulip_email, api_key=opts.zulip_api_key, client = zulip.Client(email=opts.zulip_email, api_key=opts.zulip_api_key,
site=opts.zulip_site, client="ZulipRSS/" + VERSION) # type: zulip.Client site=opts.zulip_site, client="ZulipRSS/" + VERSION) # type: zulip.Client
first_message = True # type: bool first_message = True # type: bool
for feed_url in feed_urls: for feed_url in feed_urls:
feed_file = os.path.join(opts.data_dir, urllib.parse.urlparse(feed_url).netloc) # Type: str feed_file = os.path.join(opts.data_dir, urllib.parse.urlparse(feed_url).netloc) # Type: str
try: try:
with open(feed_file, "r") as f: with open(feed_file, "r") as f:
old_feed_hashes = dict((line.strip(), True) for line in f.readlines()) # type: Dict[str, bool] old_feed_hashes = dict((line.strip(), True) for line in f.readlines()) # type: Dict[str, bool]
except IOError: except IOError:
old_feed_hashes = {} old_feed_hashes = {}
new_hashes = [] # type: List[str] new_hashes = [] # type: List[str]
data = feedparser.parse(feed_url) # type: feedparser.parse data = feedparser.parse(feed_url) # type: feedparser.parse
for entry in data.entries: for entry in data.entries:
entry_hash = compute_entry_hash(entry) # type: str entry_hash = compute_entry_hash(entry) # type: str
# An entry has either been published or updated. # An entry has either been published or updated.
entry_time = entry.get("published_parsed", entry.get("updated_parsed")) # type: Tuple[int, int] entry_time = entry.get("published_parsed", entry.get("updated_parsed")) # type: Tuple[int, int]
if entry_time is not None and (time.time() - calendar.timegm(entry_time)) > OLDNESS_THRESHOLD * 60 * 60 * 24: if entry_time is not None and (time.time() - calendar.timegm(entry_time)) > OLDNESS_THRESHOLD * 60 * 60 * 24:
# As a safeguard against misbehaving feeds, don't try to process # As a safeguard against misbehaving feeds, don't try to process
# entries older than some threshold. # entries older than some threshold.
@@ -232,9 +207,9 @@ for feed_url in feed_urls:
# entries in reverse chronological order. # entries in reverse chronological order.
break break
feed_name = data.feed.title or feed_url # type: str feed_name = data.feed.title or feed_url # type: str
response = send_zulip(entry, feed_name) # type: Dict[str, Any] response = send_zulip(entry, feed_name) # type: Dict[str, Any]
if response["result"] != "success": if response["result"] != "success":
logger.error("Error processing %s" % (feed_url,)) logger.error("Error processing %s" % (feed_url,))
logger.error(str(response)) logger.error(str(response))

View File

@@ -1,127 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#
# slacker is a dependency for this script.
#
from __future__ import absolute_import
from __future__ import print_function
import sys
import string
import random
from six.moves import range
from typing import List, Dict
import zulip
from slacker import Slacker, Response, Error as SlackError
import zulip_slack_config as config
client = zulip.Client(email=config.ZULIP_USER, api_key=config.ZULIP_API_KEY, site=config.ZULIP_SITE)
class FromSlackImporter(object):
def __init__(self, slack_token, get_archived_channels=True):
# type: (str, bool) -> None
self.slack = Slacker(slack_token)
self.get_archived_channels = get_archived_channels
self._check_slack_token()
def get_slack_users_email(self):
# type: () -> Dict[str, Dict[str, str]]
r = self.slack.users.list()
self._check_if_response_is_successful(r)
results_dict = {}
for user in r.body['members']:
if user['profile'].get('email') and user.get('deleted') is False:
results_dict[user['id']] = {'email': user['profile']['email'], 'name': user['profile']['real_name']}
return results_dict
def get_slack_public_channels_names(self):
# type: () -> List[Dict[str, str]]
r = self.slack.channels.list()
self._check_if_response_is_successful(r)
return [{'name': channel['name'], 'members': channel['members']} for channel in r.body['channels']]
def get_slack_private_channels_names(self):
# type: () -> List[str]
r = self.slack.groups.list()
self._check_if_response_is_successful(r)
return [
channel['name'] for channel in r.body['groups']
if not channel['is_archived'] or self.get_archived_channels
]
def _check_slack_token(self):
# type: () -> None
try:
r = self.slack.api.test()
self._check_if_response_is_successful(r)
except SlackError as e:
print(e)
sys.exit(1)
except Exception as e:
print(e)
sys.exit(1)
def _check_if_response_is_successful(self, response):
# type: (Response) -> None
print(response)
if not response.successful:
print(response.error)
sys.exit(1)
def _generate_random_password(size=10):
# type: (int) -> str
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(size))
def get_and_add_users(slack_importer):
# type: (Slacker) -> Dict[str, Dict[str, str]]
users = slack_importer.get_slack_users_email()
added_users = {}
print('######### IMPORTING USERS STARTED #########\n')
for user_id, user in users.items():
r = client.create_user({
'email': user['email'],
'full_name': user['name'],
'short_name': user['name']
})
if not r.get('msg'):
added_users[user_id] = user
print(u"{} -> {}\nCreated\n".format(user['name'], user['email']))
else:
print(u"{} -> {}\n{}\n".format(user['name'], user['email'], r.get('msg')))
print('######### IMPORTING USERS FINISHED #########\n')
return added_users
def create_streams_and_add_subscribers(slack_importer, added_users):
# type: (Slacker, Dict[str, Dict[str, str]]) -> None
channels_list = slack_importer.get_slack_public_channels_names()
print('######### IMPORTING STREAMS STARTED #########\n')
for stream in channels_list:
subscribed_users = [added_users[member]['email'] for member in stream['members'] if member in added_users.keys()]
if subscribed_users:
r = client.add_subscriptions([{"name": stream['name']}], principals=subscribed_users)
if not r.get('msg'):
print(u"{} -> created\n".format(stream['name']))
else:
print(u"{} -> {}\n".format(stream['name'], r.get('msg')))
else:
print(u"{} -> wasn't created\nNo subscribers\n".format(stream['name']))
print('######### IMPORTING STREAMS FINISHED #########\n')
def main():
# type: () -> None
importer = FromSlackImporter(config.SLACK_TOKEN)
added_users = get_and_add_users(importer)
create_streams_and_add_subscribers(importer, added_users)
if __name__ == '__main__':
main()

View File

@@ -57,7 +57,7 @@ path, rev = sys.argv[1:] # type: Tuple[Text, Text]
# since its a local path, prepend "file://" # since its a local path, prepend "file://"
path = "file://" + path path = "file://" + path
entry = svn.log(path, revision_end=pysvn.Revision(pysvn.opt_revision_kind.number, rev))[0] # type: Dict[Text, Any] entry = svn.log(path, revision_end=pysvn.Revision(pysvn.opt_revision_kind.number, rev))[0] # type: Dict[Text, Union[Text, pysvn.Revision, List[Dict[Text, pysvn.Revision]]]]
message = "**{0}** committed revision r{1} to `{2}`.\n\n> {3}".format( message = "**{0}** committed revision r{1} to `{2}`.\n\n> {3}".format(
entry['author'], entry['author'],
rev, rev,

View File

@@ -43,7 +43,7 @@ import zulip_trac_config as config
VERSION = "0.9" VERSION = "0.9"
if False: if False:
from typing import Any, Dict from typing import Any
if config.ZULIP_API_PATH is not None: if config.ZULIP_API_PATH is not None:
sys.path.append(config.ZULIP_API_PATH) sys.path.append(config.ZULIP_API_PATH)

View File

@@ -1,3 +0,0 @@
#!/bin/sh
krb_user_id=1051
env KRB5CCNAME=/tmp/krb5cc_"$krb_user_id".tmp kinit -k -t /home/zulip/tabbott.extra.keytab tabbott/extra@ATHENA.MIT.EDU; mv /tmp/krb5cc_"$krb_user_id".tmp /tmp/krb5cc_"$krb_user_id"

View File

@@ -3,7 +3,7 @@
from __future__ import print_function from __future__ import print_function
if False: if False:
from typing import Any, Dict, Generator, List, Tuple from typing import Any, Generator, List, Tuple
import os import os
import sys import sys
@@ -34,19 +34,18 @@ package_info = dict(
author='Zulip Open Source Project', author='Zulip Open Source Project',
author_email='zulip-devel@googlegroups.com', author_email='zulip-devel@googlegroups.com',
classifiers=[ classifiers=[
'Development Status :: 4 - Beta', 'Development Status :: 3 - Alpha',
'Environment :: Web Environment', 'Environment :: Web Environment',
'Intended Audience :: Developers', 'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License', 'License :: OSI Approved :: MIT License',
'Topic :: Communications :: Chat', 'Topic :: Communications :: Chat',
], ],
url='https://www.zulip.org/', url='https://www.zulip.org/dist/api/',
packages=['zulip'], packages=['zulip'],
data_files=[('share/zulip/examples', data_files=[('share/zulip/examples',
["examples/zuliprc", ["examples/zuliprc",
"examples/create-user", "examples/create-user",
"examples/edit-message", "examples/edit-message",
"examples/get-presence",
"examples/get-public-streams", "examples/get-public-streams",
"examples/list-members", "examples/list-members",
"examples/list-subscriptions", "examples/list-subscriptions",
@@ -57,12 +56,8 @@ package_info = dict(
"examples/subscribe", "examples/subscribe",
"examples/unsubscribe", "examples/unsubscribe",
])] + list(recur_expand('share/zulip', 'integrations/')), ])] + list(recur_expand('share/zulip', 'integrations/')),
entry_points={ scripts=["bin/zulip-send"],
'console_scripts': [ ) # type: Dict[str, Any]
'zulip-send=zulip.send:main',
],
},
) # type: Dict[str, Any]
setuptools_info = dict( setuptools_info = dict(
install_requires=['requests>=0.12.1', install_requires=['requests>=0.12.1',

Some files were not shown because too many files have changed in this diff Show More