Files
zulip/tools/fetch-contributor-data
Anders Kaseorg 365fe0b3d5 python: Sort imports with isort.
Fixes #2665.

Regenerated by tabbott with `lint --fix` after a rebase and change in
parameters.

Note from tabbott: In a few cases, this converts technical debt in the
form of unsorted imports into different technical debt in the form of
our largest files having very long, ugly import sequences at the
start.  I expect this change will increase pressure for us to split
those files, which isn't a bad thing.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2020-06-11 16:45:32 -07:00

150 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Fetch contributors data from Github using their API, convert it to structured
JSON data for the /team page contributors section.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.setup_path import setup_path
setup_path()
import argparse
import logging
from datetime import date
from random import randrange
from time import sleep
from typing import Any, Dict, List, Optional, Union
from typing_extensions import TypedDict
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
import django
django.setup()
import json
import requests
from django.conf import settings
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=10,
help='Number of times to retry fetching data from Github')
args = parser.parse_args()
class ContributorsJSON(TypedDict):
date: str
contrib: List[Dict[str, Union[str, int]]]
logger = logging.getLogger('zulip.fetch_contributors_json')
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
r: requests.Response = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES'))
return r.json() if r.status_code == 200 else None
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
with open(out_file, 'w') as f:
try:
f.write(f"{json.dumps(json_data, indent=2, sort_keys=True)}\n")
except OSError as e:
logger.warning(e)
sys.exit(1)
def update_contributor_data_file() -> None:
"""
Get contributors data from Github and insert them into a temporary
dictionary. Retry fetching each repository if responded with non HTTP 200
status.
"""
# This dictionary should hold all repositories that should be included in
# the total count, including those that should *not* have tabs on the team
# page (e.g. if they are deprecated).
repositories = {
'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
'desktop': 'https://api.github.com/repos/zulip/zulip-desktop/stats/contributors',
'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',
'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/stats/contributors',
'zulip-ios-legacy': 'https://api.github.com/repos/zulip/zulip-ios-legacy/stats/contributors',
'zulip-android': 'https://api.github.com/repos/zulip/zulip-android/stats/contributors',
}
data: ContributorsJSON = dict(date=str(date.today()), contrib=[])
contribs_list: Dict[str, Dict[str, Union[str, int]]] = {}
retry_attempts = 0
while True:
repos_done = []
for name, link in repositories.items():
contribs = fetch_contributors(link)
if contribs:
repos_done.append(name)
for contrib in contribs:
assert contrib is not None # TODO: To improve/clarify
author = contrib.get('author')
if author is None:
# This happens for users who've deleted their GitHub account.
continue
username = author.get('login')
assert username is not None # TODO: To improve/clarify
avatar = author.get('avatar_url')
assert avatar is not None # TODO: To improve/clarify
total = contrib.get('total')
assert total is not None # TODO: To improve/clarify
contrib_data = {
'avatar': avatar,
name: total,
}
if username in contribs_list:
contribs_list[username].update(contrib_data)
else:
contribs_list[username] = contrib_data
retry_attempts = 0
else:
retry_attempts += 1
if retry_attempts > args.max_retries:
logger.warning("Failed retries fetching contributors data from Github.")
sys.exit(1)
sleep_time = randrange(0, min(64, 2**retry_attempts))
sleep(sleep_time)
for repo in repos_done:
del repositories[repo]
if not repositories:
break
# remove duplicate contributions count
# find commits at the time of split and subtract from zulip-server
with open(duplicate_commits_file) as f:
duplicate_commits = json.load(f)
for committer in duplicate_commits:
if committer in contribs_list and contribs_list[committer].get('server'):
total_commits = contribs_list[committer]['server']
assert isinstance(total_commits, int)
duplicate_commits_count = duplicate_commits[committer]
original_commits = total_commits - duplicate_commits_count
contribs_list[committer]['server'] = original_commits
for contributor_name, contributor_data in contribs_list.items():
contributor_data['name'] = contributor_name
data['contrib'].append(contributor_data)
write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
if __name__ == "__main__":
update_contributor_data_file()