mirror of
				https://github.com/zulip/zulip.git
				synced 2025-10-26 17:43:58 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			80 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			80 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import print_function
 | |
| from typing import Any, Dict, List
 | |
| # This is hacky code to analyze data on our support stream.  The main
 | |
| # reusable bits are get_recent_messages and get_words.
 | |
| 
 | |
| import zulip
 | |
| import re
 | |
| import collections
 | |
| 
 | |
| def get_recent_messages(client, narrow, count=100):
 | |
|     narrow = [word.split(':') for word in narrow.split()]
 | |
|     req = {
 | |
|         'narrow': narrow,
 | |
|         'num_before': count,
 | |
|         'num_after': 0,
 | |
|         'anchor': 1000000000,
 | |
|         'apply_markdown': False
 | |
|     }
 | |
|     old_messages = client.do_api_query(req, zulip.API_VERSTRING + 'messages', method='GET')
 | |
|     if 'messages' not in old_messages:
 | |
|         return []
 | |
|     return old_messages['messages']
 | |
| 
 | |
| def get_words(content):
 | |
|     regex = "[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+"
 | |
|     words = re.findall(regex, content, re.M)
 | |
|     words = [w.lower() for w in words]
 | |
|     # words = [w.rstrip('s') for w in words]
 | |
|     return words
 | |
| 
 | |
| def analyze_messages(msgs, word_count, email_count):
 | |
|     for msg in msgs:
 | |
|         if False:
 | |
|             if ' ack' in msg['content']:
 | |
|                 name = msg['sender_full_name'].split()[0]
 | |
|                 print('ACK', name)
 | |
|         m = re.search('ticket (Z....).*email: (\S+).*~~~(.*)', msg['content'], re.M | re.S)
 | |
|         if m:
 | |
|             ticket, email, req = m.groups()
 | |
|             words = get_words(req)
 | |
|             for word in words:
 | |
|                 word_count[word] += 1
 | |
|             email_count[email] += 1
 | |
|         if False:
 | |
|             print()
 | |
|             for k, v in msg.items():
 | |
|                 print('%-20s: %s' % (k, v))
 | |
| 
 | |
| def generate_support_stats():
 | |
|     client = zulip.Client()
 | |
|     narrow = 'stream:support'
 | |
|     count = 2000
 | |
|     msgs = get_recent_messages(client, narrow, count)
 | |
|     msgs_by_topic = collections.defaultdict(list) # type: Dict[str, List[Dict[str, Any]]]
 | |
|     for msg in msgs:
 | |
|         topic = msg['subject']
 | |
|         msgs_by_topic[topic].append(msg)
 | |
| 
 | |
|     word_count = collections.defaultdict(int) # type: Dict[str, int]
 | |
|     email_count = collections.defaultdict(int) # type: Dict[str, int]
 | |
| 
 | |
|     if False:
 | |
|         for topic in msgs_by_topic:
 | |
|             msgs = msgs_by_topic[topic]
 | |
|     analyze_messages(msgs, word_count, email_count)
 | |
| 
 | |
|     if True:
 | |
|         words = [w for w in word_count.keys() if word_count[w] >= 10 and len(w) >= 5]
 | |
|         words = sorted(words, key=lambda w: word_count[w], reverse=True)
 | |
|         for word in words:
 | |
|             print(word, word_count[word])
 | |
| 
 | |
|     if False:
 | |
|         emails = sorted(list(email_count.keys()),
 | |
|                         key=lambda w: email_count[w], reverse=True)
 | |
|         for email in emails:
 | |
|             print(email, email_count[email])
 | |
| 
 | |
| generate_support_stats()
 |