Move management commands to the analytics app.

Move commands related to stats collection and reporting from
zilencer to analytics.  To do this, we had to make "analytics"
officially an app.

(imported from commit 63ef6c68d1b1ebb5043ee4aca999aa209e7f494d)
This commit is contained in:
Steve Howell
2013-11-06 14:20:51 -05:00
parent 9a0faf9ca8
commit 64fb17f9c2
11 changed files with 1 additions and 0 deletions

View File

View File

@@ -0,0 +1,57 @@
from __future__ import absolute_import
from django.core.management.base import BaseCommand
from zerver.models import UserPresence, UserActivity
from zerver.lib.utils import statsd, statsd_key
from datetime import datetime, timedelta
from collections import defaultdict
class Command(BaseCommand):
help = """Sends active user statistics to statsd.
Run as a cron job that runs every 10 minutes."""
def handle(self, *args, **options):
# Get list of all active users in the last 1 week
cutoff = datetime.now() - timedelta(minutes=30, hours=168)
users = UserPresence.objects.select_related().filter(timestamp__gt=cutoff)
# Calculate 10min, 2hrs, 12hrs, 1day, 2 business days (TODO business days), 1 week bucket of stats
hour_buckets = [0.16, 2, 12, 24, 48, 168]
user_info = defaultdict(dict)
for last_presence in users:
if last_presence.status == UserPresence.IDLE:
known_active = last_presence.timestamp - timedelta(minutes=30)
else:
known_active = last_presence.timestamp
for bucket in hour_buckets:
if not bucket in user_info[last_presence.user_profile.realm.domain]:
user_info[last_presence.user_profile.realm.domain][bucket] = []
if datetime.now(known_active.tzinfo) - known_active < timedelta(hours=bucket):
user_info[last_presence.user_profile.realm.domain][bucket].append(last_presence.user_profile.email)
for realm, buckets in user_info.items():
print("Realm %s" % realm)
for hr, users in sorted(buckets.items()):
print("\tUsers for %s: %s" % (hr, len(users)))
statsd.gauge("users.active.%s.%shr" % (statsd_key(realm, True), statsd_key(hr, True)), len(users))
# Also do stats for how many users have been reading the app.
users_reading = UserActivity.objects.select_related().filter(query="/json/update_message_flags")
user_info = defaultdict(dict)
for activity in users_reading:
for bucket in hour_buckets:
if not bucket in user_info[activity.user_profile.realm.domain]:
user_info[activity.user_profile.realm.domain][bucket] = []
if datetime.now(activity.last_visit.tzinfo) - activity.last_visit < timedelta(hours=bucket):
user_info[activity.user_profile.realm.domain][bucket].append(activity.user_profile.email)
for realm, buckets in user_info.items():
print("Realm %s" % realm)
for hr, users in sorted(buckets.items()):
print("\tUsers reading for %s: %s" % (hr, len(users)))
statsd.gauge("users.reading.%s.%shr" % (statsd_key(realm, True), statsd_key(hr, True)), len(users))

View File

@@ -0,0 +1,81 @@
from __future__ import absolute_import
from optparse import make_option
from django.core.management.base import BaseCommand
from zerver.models import Recipient, Message
from zerver.lib.timestamp import timestamp_to_datetime
import datetime
import time
import logging
def compute_stats(log_level):
logger = logging.getLogger()
logger.setLevel(log_level)
one_week_ago = timestamp_to_datetime(time.time()) - datetime.timedelta(weeks=1)
mit_query = Message.objects.filter(sender__realm__domain="mit.edu",
recipient__type=Recipient.STREAM,
pub_date__gt=one_week_ago)
for bot_sender_start in ["imap.", "rcmd.", "sys."]:
mit_query = mit_query.exclude(sender__email__startswith=(bot_sender_start))
# Filtering for "/" covers tabbott/extra@ and all the daemon/foo bots.
mit_query = mit_query.exclude(sender__email__contains=("/"))
mit_query = mit_query.exclude(sender__email__contains=("aim.com"))
mit_query = mit_query.exclude(
sender__email__in=["rss@mit.edu", "bash@mit.edu", "apache@mit.edu",
"bitcoin@mit.edu", "lp@mit.edu", "clocks@mit.edu",
"root@mit.edu", "nagios@mit.edu",
"www-data|local-realm@mit.edu"])
user_counts = {}
for m in mit_query.select_related("sending_client", "sender"):
email = m.sender.email
user_counts.setdefault(email, {})
user_counts[email].setdefault(m.sending_client.name, 0)
user_counts[email][m.sending_client.name] += 1
total_counts = {}
total_user_counts = {}
for email, counts in user_counts.items():
total_user_counts.setdefault(email, 0)
for client_name, count in counts.items():
total_counts.setdefault(client_name, 0)
total_counts[client_name] += count
total_user_counts[email] += count
logging.debug("%40s | %10s | %s" % ("User", "Messages", "Percentage Zulip"))
top_percents = {}
for size in [10, 25, 50, 100, 200, len(total_user_counts.keys())]:
top_percents[size] = 0
for i, email in enumerate(sorted(total_user_counts.keys(),
key=lambda x: -total_user_counts[x])):
percent_zulip = round(100 - (user_counts[email].get("zephyr_mirror", 0)) * 100. /
total_user_counts[email], 1)
for size in top_percents.keys():
top_percents.setdefault(size, 0)
if i < size:
top_percents[size] += (percent_zulip * 1.0 / size)
logging.debug("%40s | %10s | %s%%" % (email, total_user_counts[email],
percent_zulip))
logging.info("")
for size in sorted(top_percents.keys()):
logging.info("Top %6s | %s%%" % (size, round(top_percents[size], 1)))
grand_total = sum(total_counts.values())
print grand_total
logging.info("%15s | %s" % ("Client", "Percentage"))
for client in total_counts.keys():
logging.info("%15s | %s%%" % (client, round(100. * total_counts[client] / grand_total, 1)))
class Command(BaseCommand):
option_list = BaseCommand.option_list + \
(make_option('--verbose', default=False, action='store_true'),)
help = "Compute statistics on MIT Zephyr usage."
def handle(self, *args, **options):
level = logging.INFO
if options["verbose"]:
level = logging.DEBUG
compute_stats(level)

View File

@@ -0,0 +1,57 @@
from __future__ import absolute_import
from zerver.lib.statistics import seconds_usage_between
from optparse import make_option
from django.core.management.base import BaseCommand
from zerver.models import UserProfile
import datetime
from django.utils.timezone import utc
def analyze_activity(options):
day_start = datetime.datetime.strptime(options["date"], "%Y-%m-%d").replace(tzinfo=utc)
day_end = day_start + datetime.timedelta(days=options["duration"])
user_profile_query = UserProfile.objects.all()
if options["realm"]:
user_profile_query = user_profile_query.filter(realm__domain=options["realm"])
print "Per-user online duration:\n"
total_duration = datetime.timedelta(0)
for user_profile in user_profile_query:
duration = seconds_usage_between(user_profile, day_start, day_end)
if duration == datetime.timedelta(0):
continue
total_duration += duration
print "%-*s%s" % (37, user_profile.email, duration, )
print "\nTotal Duration: %s" % (total_duration,)
print "\nTotal Duration in minutes: %s" % (total_duration.total_seconds() / 60.,)
print "Total Duration amortized to a month: %s" % (total_duration.total_seconds() * 30. / 60.,)
class Command(BaseCommand):
help = """Report analytics of user activity on a per-user and realm basis.
This command aggregates user activity data that is collected by each user using Zulip. It attempts
to approximate how much each user has been using Zulip per day, measured by recording each 15 minute
period where some activity has occurred (mouse move or keyboard activity).
It will correctly not count server-initiated reloads in the activity statistics.
The duration flag can be used to control how many days to show usage duration for
Usage: python manage.py analyze_user_activity [--realm=zulip.com] [--date=2013-09-10] [--duration=1]
By default, if no date is selected 2013-09-10 is used. If no realm is provided, information
is shown for all realms"""
option_list = BaseCommand.option_list + (
make_option('--realm', action='store'),
make_option('--date', action='store', default="2013-09-06"),
make_option('--duration', action='store', default=1, type=int, help="How many days to show usage information for"),
)
def handle(self, *args, **options):
analyze_activity(options)

View File

@@ -0,0 +1,69 @@
from __future__ import absolute_import
from django.core.management.base import BaseCommand
from django.db.models import Count
from zerver.models import UserActivity, UserProfile, Realm, \
get_realm, get_user_profile_by_email
import datetime
class Command(BaseCommand):
help = """Report rough client activity globally, for a realm, or for a user
Usage examples:
python manage.py client_activity
python manage.py client_activity zulip.com
python manage.py client_activity jesstess@zulip.com"""
def compute_activity(self, user_activity_objects):
# Report data from the past week.
#
# This is a rough report of client activity because we inconsistently
# register activity from various clients; think of it as telling you
# approximately how many people from a group have used a particular
# client recently. For example, this might be useful to get a sense of
# how popular different versions of a desktop client are.
#
# Importantly, this does NOT tell you anything about the relative
# volumes of requests from clients.
threshold = datetime.datetime.now() - datetime.timedelta(days=7)
client_counts = user_activity_objects.filter(
last_visit__gt=threshold).values("client__name").annotate(
count=Count('client__name'))
total = 0
counts = []
for client_type in client_counts:
count = client_type["count"]
client = client_type["client__name"]
total += count
counts.append((count, client))
counts.sort()
for count in counts:
print "%25s %15d" % (count[1], count[0])
print "Total:", total
def handle(self, *args, **options):
if len(args) == 0:
# Report global activity.
self.compute_activity(UserActivity.objects.all())
elif len(args) == 1:
try:
# Report activity for a user.
user_profile = get_user_profile_by_email(args[0])
self.compute_activity(UserActivity.objects.filter(
user_profile=user_profile))
except UserProfile.DoesNotExist:
try:
# Report activity for a realm.
realm = get_realm(args[0])
self.compute_activity(UserActivity.objects.filter(
user_profile__realm=realm))
except Realm.DoesNotExist:
print "Unknown user or domain %s" % (args[0],)
exit(1)

View File

@@ -0,0 +1,148 @@
from __future__ import absolute_import
import datetime
import pytz
from django.core.management.base import BaseCommand
from django.db.models import Count
from zerver.models import UserProfile, Realm, Stream, Message, Recipient, UserActivity, \
Subscription, UserMessage
MOBILE_CLIENT_LIST = ["Android", "iPhone"]
HUMAN_CLIENT_LIST = MOBILE_CLIENT_LIST + ["website"]
human_messages = Message.objects.filter(sending_client__name__in=HUMAN_CLIENT_LIST)
class Command(BaseCommand):
help = "Generate statistics on realm activity."
def active_users(self, realm):
# Has been active (on the website, for now) in the last 7 days.
activity_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=7)
return [activity.user_profile for activity in \
UserActivity.objects.filter(user_profile__realm=realm,
user_profile__is_active=True,
last_visit__gt=activity_cutoff,
query="/json/update_pointer",
client__name="website")]
def messages_sent_by(self, user, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender=user, pub_date__gt=sent_time_cutoff).count()
def total_messages(self, realm, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return Message.objects.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count()
def human_messages(self, realm, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).count()
def api_messages(self, realm, days_ago):
return (self.total_messages(realm, days_ago) - self.human_messages(realm, days_ago))
def stream_messages(self, realm, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff,
recipient__type=Recipient.STREAM).count()
def private_messages(self, realm, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.HUDDLE).count()
def group_private_messages(self, realm, days_ago):
sent_time_cutoff = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=days_ago)
return human_messages.filter(sender__realm=realm, pub_date__gt=sent_time_cutoff).exclude(
recipient__type=Recipient.STREAM).exclude(recipient__type=Recipient.PERSONAL).count()
def report_percentage(self, numerator, denominator, text):
if not denominator:
fraction = 0.0
else:
fraction = numerator / float(denominator)
print "%.2f%% of" % (fraction * 100,), text
def handle(self, *args, **options):
if args:
try:
realms = [Realm.objects.get(domain=domain) for domain in args]
except Realm.DoesNotExist, e:
print e
exit(1)
else:
realms = Realm.objects.all()
for realm in realms:
print realm.domain
user_profiles = UserProfile.objects.filter(realm=realm, is_active=True)
active_users = self.active_users(realm)
num_active = len(active_users)
print "%d active users (%d total)" % (num_active, len(user_profiles))
streams = Stream.objects.filter(realm=realm).extra(
tables=['zerver_subscription', 'zerver_recipient'],
where=['zerver_subscription.recipient_id = zerver_recipient.id',
'zerver_recipient.type = 2',
'zerver_recipient.type_id = zerver_stream.id',
'zerver_subscription.active = true']).annotate(count=Count("name"))
print "%d streams" % (streams.count(),)
for days_ago in (1, 7, 30):
print "In last %d days, users sent:" % (days_ago,)
sender_quantities = [self.messages_sent_by(user, days_ago) for user in user_profiles]
for quantity in sorted(sender_quantities, reverse=True):
print quantity,
print ""
print "%d stream messages" % (self.stream_messages(realm, days_ago),)
print "%d one-on-one private messages" % (self.private_messages(realm, days_ago),)
print "%d messages sent via the API" % (self.api_messages(realm, days_ago),)
print "%d group private messages" % (self.group_private_messages(realm, days_ago),)
num_notifications_enabled = len(filter(lambda x: x.enable_desktop_notifications == True,
active_users))
self.report_percentage(num_notifications_enabled, num_active,
"active users have desktop notifications enabled")
num_enter_sends = len(filter(lambda x: x.enter_sends, active_users))
self.report_percentage(num_enter_sends, num_active,
"active users have enter-sends")
all_message_count = human_messages.filter(sender__realm=realm).count()
multi_paragraph_message_count = human_messages.filter(
sender__realm=realm, content__contains="\n\n").count()
self.report_percentage(multi_paragraph_message_count, all_message_count,
"all messages are multi-paragraph")
# Starred messages
starrers = UserMessage.objects.filter(user_profile__in=user_profiles,
flags=UserMessage.flags.starred).values(
"user_profile").annotate(count=Count("user_profile"))
print "%d users have starred %d messages" % (
len(starrers), sum([elt["count"] for elt in starrers]))
active_user_subs = Subscription.objects.filter(
user_profile__in=user_profiles, active=True)
# Streams not in home view
non_home_view = active_user_subs.filter(in_home_view=False).values(
"user_profile").annotate(count=Count("user_profile"))
print "%d users have %d streams not in home view" % (
len(non_home_view), sum([elt["count"] for elt in non_home_view]))
# Code block markup
markup_messages = human_messages.filter(
sender__realm=realm, content__contains="~~~").values(
"sender").annotate(count=Count("sender"))
print "%d users have used code block markup on %s messages" % (
len(markup_messages), sum([elt["count"] for elt in markup_messages]))
# Notifications for stream messages
notifications = active_user_subs.filter(notifications=True).values(
"user_profile").annotate(count=Count("user_profile"))
print "%d users receive desktop notifications for %d streams" % (
len(notifications), sum([elt["count"] for elt in notifications]))
print ""

View File

@@ -0,0 +1,24 @@
from __future__ import absolute_import
from django.core.management.base import BaseCommand
from django.conf import settings
class Command(BaseCommand):
help = """Send some stats to statsd.
Usage: python manage.py send_stats [incr|decr|timing|timer|gauge] name val"""
def handle(self, *args, **options):
if len(args) != 3:
print "Usage: python manage.py send_stats [incr|decr|timing|timer|gauge] name val"
exit(1)
operation = args[0]
name = args[1]
val = args[2]
if settings.USING_STATSD:
from statsd import statsd
func = getattr(statsd, operation)
func(name, val)

View File

@@ -0,0 +1,36 @@
from __future__ import absolute_import
from django.core.management.base import BaseCommand
from django.db.models import Q
from zerver.models import Realm, Stream, Message, Subscription, Recipient
class Command(BaseCommand):
help = "Generate statistics on the streams for a realm."
def handle(self, *args, **options):
if args:
try:
realms = [Realm.objects.get(domain=domain) for domain in args]
except Realm.DoesNotExist, e:
print e
exit(1)
else:
realms = Realm.objects.all()
for realm in realms:
print realm.domain
print "------------"
print "%25s %15s %10s" % ("stream", "subscribers", "messages")
streams = Stream.objects.filter(realm=realm).exclude(Q(name__istartswith="tutorial-"))
invite_only_count = 0
for stream in streams:
if stream.invite_only:
invite_only_count += 1
continue
print "%25s" % (stream.name,),
recipient = Recipient.objects.filter(type=Recipient.STREAM, type_id=stream.id)
print "%10d" % (len(Subscription.objects.filter(recipient=recipient, active=True)),),
num_messages = len(Message.objects.filter(recipient=recipient))
print "%12d" % (num_messages,)
print "%d invite-only streams" % (invite_only_count,)
print ""

View File

@@ -0,0 +1,37 @@
from __future__ import absolute_import
import datetime
import pytz
from django.core.management.base import BaseCommand
from zerver.models import UserProfile, Realm, Stream, Message
class Command(BaseCommand):
help = "Generate statistics on user activity."
def messages_sent_by(self, user, week):
start = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=(week + 1)*7)
end = datetime.datetime.now(tz=pytz.utc) - datetime.timedelta(days=week*7)
return Message.objects.filter(sender=user, pub_date__gt=start, pub_date__lte=end).count()
def handle(self, *args, **options):
if args:
try:
realms = [Realm.objects.get(domain=domain) for domain in args]
except Realm.DoesNotExist, e:
print e
exit(1)
else:
realms = Realm.objects.all()
for realm in realms:
print realm.domain
user_profiles = UserProfile.objects.filter(realm=realm, is_active=True)
print "%d users" % (len(user_profiles),)
print "%d streams" % (len(Stream.objects.filter(realm=realm)),)
for user_profile in user_profiles:
print "%35s" % (user_profile.email,),
for week in range(10):
print "%5d" % (self.messages_sent_by(user_profile, week)),
print ""