diff --git a/analytics/management/commands/active_user_stats_by_day.py b/analytics/management/commands/active_user_stats_by_day.py new file mode 100644 index 0000000000..37f12468a5 --- /dev/null +++ b/analytics/management/commands/active_user_stats_by_day.py @@ -0,0 +1,24 @@ +from __future__ import absolute_import + +import datetime +import pytz + +from optparse import make_option +from django.core.management.base import BaseCommand +from zerver.lib.statistics import activity_averages_during_day + +class Command(BaseCommand): + help = "Generate statistics on user activity for a given day." + + option_list = BaseCommand.option_list + \ + (make_option('--date', default=None, action='store', + help="Day to query in format 2013-12-05. Default is yesterday"),) + + def handle(self, *args, **options): + if options["date"] is None: + date = datetime.datetime.now() - datetime.timedelta(days=1) + else: + date = datetime.datetime.strptime(options["date"], "%Y-%m-%d") + print "Activity data for", date + print activity_averages_during_day(date) + print "Please note that the total registered user count is a total for today" diff --git a/zerver/lib/statistics.py b/zerver/lib/statistics.py index 4a516f1bab..abdacad4ad 100644 --- a/zerver/lib/statistics.py +++ b/zerver/lib/statistics.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -from zerver.models import UserProfile, UserActivity, UserActivityInterval +from zerver.models import UserProfile, UserActivity, UserActivityInterval, Message from django.utils.timezone import utc @@ -19,22 +19,34 @@ def median(data): after = size//2 return (data[before] + data[after]) / 2.0 -def active_users_to_measure(): +users_who_sent_query = Message.objects.select_related("sender") \ + .exclude(sending_client__name__contains="mirror") \ + .exclude(sending_client__name__contains="API") + +def active_users(): # Return a list of active users we want to count towards various # statistics. This eliminates bots, @zulip.com, @customer29.invalid and customer3.invalid - exclude_realms = ["zulip.com", "customer29.invalid", "customer3.invalid"] + exclude_realms = ["zulip.com", "customer29.invalid", "customer3.invalid", + "ios_appreview.zulip.com", "wdaher.com", "customer30.invalid"] return UserProfile.objects.filter(is_bot=False, is_active=True) \ .exclude(realm__domain__in=exclude_realms) \ .select_related() -# Return a set of users who have done some activity in the given timespan--that is, -# we have a UserActivity row for them. This counts pointer moves, flag updates, etc. -def users_active_between(begin, end): - activities = UserActivity.objects.filter(last_visit__gt=begin, last_visit__lt=end) - active = set([a.user_profile for a in activities]) +def users_who_sent_between(begin, end): + sender_objs = users_who_sent_query.filter(pub_date__gt=begin, pub_date__lt=end) \ + .values("sender__id") + return set(s["sender__id"] for s in sender_objs) - interesting_users = set(active_users_to_measure()) - return active.intersection(interesting_users) +def users_who_sent_ever(): + return set(s["sender__id"] for s in users_who_sent_query.values("sender__id")) + +def active_users_to_measure(): + senders = users_who_sent_ever() + return [u for u in active_users() if u.id in senders] + +def active_users_who_sent_between(begin, end): + senders = users_who_sent_between(begin, end) + return [u for u in active_users() if u.id in senders] # Return the amount of Zulip usage for this user between the two # given dates @@ -51,7 +63,7 @@ def seconds_usage_between(user_profile, begin, end): def seconds_active_during_day(day): begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc) end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc) - active_users = users_active_between(begin_day, end_day) + active_users = active_users_to_measure() # Exclude Friday CUSTOMER4 activity numbers if day.weekday() == 4: @@ -59,25 +71,47 @@ def seconds_active_during_day(day): return [seconds_usage_between(user, begin_day, end_day).total_seconds() for user in active_users] -def calculate_stats(data): +def users_active_nosend_during_day(day): + begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc) + end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc) + active_users = active_users_to_measure() + today_senders = users_who_sent_between(begin_day, end_day) + + today_users = [] + for user_profile in active_users: + intervals = UserActivityInterval.objects.filter(user_profile=user_profile, + end__gte=begin_day, + start__lte=end_day) + if len(intervals) != 0: + today_users.append(user_profile) + return [u for u in today_users if not u.id in today_senders] + +def calculate_stats(data, all_users): if len(data) == 0: - return 0, 0 + return {"# data points": 0} - mean_data = sum(data) / len(data) - median_data = median(data) + active_user_count = len([x for x in data if x > 1]) + mean_data = sum(data) / active_user_count + median_data = median([x for x in data if x > 1]) - return {'mean': str(timedelta(seconds=mean_data)), 'median': str(timedelta(seconds=median_data)), '# data points': len(data)} + return {'active users': active_user_count, + 'total users': len(all_users), + 'mean': str(timedelta(seconds=mean_data)), + 'median': str(timedelta(seconds=median_data)), + '# data points': len(data)} # Return an info dict {mean: , median} containing the mean/median seconds users were active on a given day def activity_averages_during_day(day): + users_to_measure = active_users_to_measure() seconds_active = seconds_active_during_day(day) - return calculate_stats(seconds_active) + return calculate_stats(seconds_active, all_users=users_to_measure) # Returns an info dict {mean: , median} with engagement numbers for all users according # to active_users_to_measure. This will ignore weekends, and ignore users.customer4.invalid # on Fridays def activity_averages_between(begin, end, by_day=True): seconds_active = {} + users_to_measure = active_users_to_measure() for i in range((end - begin).days): day = begin + timedelta(days=i) @@ -88,6 +122,8 @@ def activity_averages_between(begin, end, by_day=True): seconds_active[day] = seconds_active_during_day(day) if by_day: - return dict((day, calculate_stats(values)) for day, values in seconds_active.iteritems()) + return dict((day, calculate_stats(values, all_users=users_to_measure)) + for day, values in seconds_active.iteritems()) else: - return calculate_stats(list(chain.from_iterable(seconds_active.values()))) + return calculate_stats(list(chain.from_iterable(seconds_active.values())), + all_users=users_to_measure)