mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-30 19:43:47 +00:00 
			
		
		
		
	Add tool to query our usage stats as of a given date.
This contains the various fixes that needed to be made in order to get accurate statistics. Most notably, the active_users_between function in the previous version of zerver/lib/statistics.py was broken for end dates in the past, because it used the UserActivity table to get its data -- so in fact it really was querying "users last active between". This commit isn't super clean, but I figure we're probably better off having our latest code for historical usage data in git so it doesn't bitrot and anyone can improve on it. (imported from commit 24ff2f24a22e5bdc004ea8043d8da12deb97ff2f)
This commit is contained in:
		
							
								
								
									
										24
									
								
								analytics/management/commands/active_user_stats_by_day.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								analytics/management/commands/active_user_stats_by_day.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| import datetime | ||||
| import pytz | ||||
|  | ||||
| from optparse import make_option | ||||
| from django.core.management.base import BaseCommand | ||||
| from zerver.lib.statistics import activity_averages_during_day | ||||
|  | ||||
| class Command(BaseCommand): | ||||
|     help = "Generate statistics on user activity for a given day." | ||||
|  | ||||
|     option_list = BaseCommand.option_list + \ | ||||
|         (make_option('--date', default=None, action='store', | ||||
|                      help="Day to query in format 2013-12-05.  Default is yesterday"),) | ||||
|  | ||||
|     def handle(self, *args, **options): | ||||
|         if options["date"] is None: | ||||
|             date = datetime.datetime.now() - datetime.timedelta(days=1) | ||||
|         else: | ||||
|             date = datetime.datetime.strptime(options["date"], "%Y-%m-%d") | ||||
|         print "Activity data for", date | ||||
|         print activity_averages_during_day(date) | ||||
|         print "Please note that the total registered user count is a total for today" | ||||
| @@ -1,7 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| from zerver.models import UserProfile, UserActivity, UserActivityInterval | ||||
| from zerver.models import UserProfile, UserActivity, UserActivityInterval, Message | ||||
|  | ||||
| from django.utils.timezone import utc | ||||
|  | ||||
| @@ -19,22 +19,34 @@ def median(data): | ||||
|         after = size//2 | ||||
|         return (data[before] + data[after]) / 2.0 | ||||
|  | ||||
| def active_users_to_measure(): | ||||
| users_who_sent_query = Message.objects.select_related("sender") \ | ||||
|         .exclude(sending_client__name__contains="mirror") \ | ||||
|         .exclude(sending_client__name__contains="API") | ||||
|  | ||||
| def active_users(): | ||||
|     # Return a list of active users we want to count towards various | ||||
|     # statistics. This eliminates bots, @zulip.com, @customer29.invalid and customer3.invalid | ||||
|     exclude_realms = ["zulip.com", "customer29.invalid", "customer3.invalid"] | ||||
|     exclude_realms = ["zulip.com", "customer29.invalid", "customer3.invalid", | ||||
|                       "ios_appreview.zulip.com", "wdaher.com", "customer30.invalid"] | ||||
|     return UserProfile.objects.filter(is_bot=False, is_active=True) \ | ||||
|                               .exclude(realm__domain__in=exclude_realms) \ | ||||
|                               .select_related() | ||||
|  | ||||
| # Return a set of users who have done some activity in the given timespan--that is, | ||||
| # we have a UserActivity row for them. This counts pointer moves, flag updates, etc. | ||||
| def users_active_between(begin, end): | ||||
|     activities = UserActivity.objects.filter(last_visit__gt=begin, last_visit__lt=end) | ||||
|     active = set([a.user_profile for a in activities]) | ||||
| def users_who_sent_between(begin, end): | ||||
|     sender_objs = users_who_sent_query.filter(pub_date__gt=begin, pub_date__lt=end) \ | ||||
|         .values("sender__id") | ||||
|     return set(s["sender__id"] for s in sender_objs) | ||||
|  | ||||
|     interesting_users = set(active_users_to_measure()) | ||||
|     return active.intersection(interesting_users) | ||||
| def users_who_sent_ever(): | ||||
|     return set(s["sender__id"] for s in users_who_sent_query.values("sender__id")) | ||||
|  | ||||
| def active_users_to_measure(): | ||||
|     senders = users_who_sent_ever() | ||||
|     return [u for u in active_users() if u.id in senders] | ||||
|  | ||||
| def active_users_who_sent_between(begin, end): | ||||
|     senders = users_who_sent_between(begin, end) | ||||
|     return [u for u in active_users() if u.id in senders] | ||||
|  | ||||
| # Return the amount of Zulip usage for this user between the two | ||||
| # given dates | ||||
| @@ -51,7 +63,7 @@ def seconds_usage_between(user_profile, begin, end): | ||||
| def seconds_active_during_day(day): | ||||
|     begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc) | ||||
|     end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc) | ||||
|     active_users = users_active_between(begin_day, end_day) | ||||
|     active_users = active_users_to_measure() | ||||
|  | ||||
|     # Exclude Friday CUSTOMER4 activity numbers | ||||
|     if day.weekday() == 4: | ||||
| @@ -59,25 +71,47 @@ def seconds_active_during_day(day): | ||||
|  | ||||
|     return [seconds_usage_between(user, begin_day, end_day).total_seconds() for user in active_users] | ||||
|  | ||||
| def calculate_stats(data): | ||||
| def users_active_nosend_during_day(day): | ||||
|     begin_day = day.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=utc) | ||||
|     end_day = day.replace(hour=23, minute=59, second=59, microsecond=0, tzinfo=utc) | ||||
|     active_users = active_users_to_measure() | ||||
|     today_senders = users_who_sent_between(begin_day, end_day) | ||||
|  | ||||
|     today_users = [] | ||||
|     for user_profile in active_users: | ||||
|         intervals = UserActivityInterval.objects.filter(user_profile=user_profile, | ||||
|                                                         end__gte=begin_day, | ||||
|                                                         start__lte=end_day) | ||||
|         if len(intervals) != 0: | ||||
|             today_users.append(user_profile) | ||||
|     return [u for u in today_users if not u.id in today_senders] | ||||
|  | ||||
| def calculate_stats(data, all_users): | ||||
|     if len(data) == 0: | ||||
|         return 0, 0 | ||||
|         return {"# data points": 0} | ||||
|  | ||||
|     mean_data = sum(data) / len(data) | ||||
|     median_data = median(data) | ||||
|     active_user_count = len([x for x in data if x > 1]) | ||||
|     mean_data = sum(data) / active_user_count | ||||
|     median_data = median([x for x in data if x > 1]) | ||||
|  | ||||
|     return {'mean': str(timedelta(seconds=mean_data)), 'median': str(timedelta(seconds=median_data)), '# data points': len(data)} | ||||
|     return {'active users': active_user_count, | ||||
|             'total users': len(all_users), | ||||
|             'mean': str(timedelta(seconds=mean_data)), | ||||
|             'median': str(timedelta(seconds=median_data)), | ||||
|             '# data points': len(data)} | ||||
|  | ||||
| # Return an info dict {mean: , median} containing the mean/median seconds users were active on a given day | ||||
| def activity_averages_during_day(day): | ||||
|     users_to_measure = active_users_to_measure() | ||||
|     seconds_active = seconds_active_during_day(day) | ||||
|     return calculate_stats(seconds_active) | ||||
|     return calculate_stats(seconds_active, all_users=users_to_measure) | ||||
|  | ||||
| # Returns an info dict {mean: , median} with engagement numbers for all users according | ||||
| # to active_users_to_measure. This will ignore weekends, and ignore users.customer4.invalid | ||||
| # on Fridays | ||||
| def activity_averages_between(begin, end, by_day=True): | ||||
|     seconds_active = {} | ||||
|     users_to_measure = active_users_to_measure() | ||||
|     for i in range((end - begin).days): | ||||
|         day = begin + timedelta(days=i) | ||||
|  | ||||
| @@ -88,6 +122,8 @@ def activity_averages_between(begin, end, by_day=True): | ||||
|         seconds_active[day] = seconds_active_during_day(day) | ||||
|  | ||||
|     if by_day: | ||||
|         return dict((day, calculate_stats(values)) for day, values in seconds_active.iteritems()) | ||||
|         return dict((day, calculate_stats(values, all_users=users_to_measure)) | ||||
|                     for day, values in seconds_active.iteritems()) | ||||
|     else: | ||||
|         return calculate_stats(list(chain.from_iterable(seconds_active.values()))) | ||||
|         return calculate_stats(list(chain.from_iterable(seconds_active.values())), | ||||
|                                all_users=users_to_measure) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user