Files
zulip/tools/fetch-contributor-data
Vishnu KS 3ec64b6092 team: Include users without an associated GitHub profile.
Including anon=1 in API requests will retrieve all contributors
of the repo. If there is no asscoiated GitHub account present for
the commits then the email and name of the author mentioned in
commit messages is returned.
2020-07-24 10:51:47 -07:00

150 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Fetch contributors data from Github using their API, convert it to structured
JSON data for the /team page contributors section.
"""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from scripts.lib.setup_path import setup_path
setup_path()
import argparse
import logging
from datetime import date
from random import randrange
from time import sleep
from typing import Dict, List, Optional, Union
from typing_extensions import TypedDict
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
import django
django.setup()
import json
import requests
from django.conf import settings
from zerver.lib.avatar_hash import gravatar_hash
from zproject.config import get_secret
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
parser = argparse.ArgumentParser()
parser.add_argument('--max-retries', type=int, default=10,
help='Number of times to retry fetching data from Github')
args = parser.parse_args()
class ContributorsJSON(TypedDict):
date: str
contributors: List[Dict[str, Union[int, str]]]
class Contributor(TypedDict):
avatar_url: Optional[str]
contributions: int
login: Optional[str]
email: Optional[str]
name: Optional[str]
logger = logging.getLogger('zulip.fetch_contributors_json')
def fetch_contributors(repo_name: str, max_retries: int) -> List[Contributor]:
contributors: List[Contributor] = []
retry_attempts = 0
page_index = 1
api_link = f"https://api.github.com/repos/zulip/{repo_name}/contributors?anon=1"
certificates = os.environ.get('CUSTOM_CA_CERTIFICATES')
headers: Dict[str, str] = {}
personal_access_token = get_secret('github_personal_access_token')
if personal_access_token is not None:
headers = {"Authorization": f"token {personal_access_token}"}
while True:
response: requests.Response = requests.get(f"{api_link}&page={page_index}", verify=certificates, headers=headers)
if response.status_code == 200:
data = response.json()
if len(data) == 0:
return contributors
contributors.extend(data)
retry_attempts = 0
page_index += 1
else:
retry_attempts += 1
if retry_attempts > args.max_retries:
logger.warning("Failed retries fetching contributors data from Github.")
sys.exit(1)
sleep_time = randrange(0, min(64, 2**retry_attempts))
sleep(sleep_time)
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
with open(out_file, 'w') as f:
json.dump(json_data, f, indent=2, sort_keys=True)
f.write("\n")
def update_contributor_data_file() -> None:
# This list should hold all repositories that should be included in
# the total count, including those that should *not* have tabs on the team
# page (e.g. if they are deprecated).
repo_names = ['zulip', 'zulip-desktop', 'zulip-mobile', 'python-zulip-api', 'zulip-js', 'zulipbot',
'zulip-terminal', 'zulip-ios-legacy', 'zulip-android']
data: ContributorsJSON = dict(date=str(date.today()), contributors=[])
contributor_username_to_data: Dict[str, Dict[str, Union[str, int]]] = {}
for repo_name in repo_names:
contributors = fetch_contributors(repo_name, args.max_retries)
for contributor in contributors:
username = contributor.get('login') or contributor.get('email')
assert(username is not None)
if username in contributor_username_to_data:
contributor_username_to_data[username][repo_name] = contributor['contributions']
else:
contributor_username_to_data[username] = {
repo_name: contributor['contributions']
}
avatar_url = contributor.get('avatar_url')
if avatar_url is not None:
contributor_username_to_data[username]['avatar'] = avatar_url
email = contributor.get('email')
if email is not None:
contributor_username_to_data[username]["email"] = email
hash_key = gravatar_hash(email)
gravatar_url = f"https://secure.gravatar.com/avatar/{hash_key}?d=identicon"
contributor_username_to_data[username]['avatar'] = gravatar_url
login = contributor.get('login')
if login is not None:
contributor_username_to_data[username]["github_username"] = login
name = contributor.get('name')
if name is not None:
contributor_username_to_data[username]["name"] = name
# remove duplicate contributions count
# find commits at the time of split and subtract from zulip-server
with open(duplicate_commits_file) as f:
duplicate_commits = json.load(f)
for committer in duplicate_commits:
if committer in contributor_username_to_data and contributor_username_to_data[committer].get('zulip'):
total_commits = contributor_username_to_data[committer]['zulip']
assert isinstance(total_commits, int)
duplicate_commits_count = duplicate_commits[committer]
original_commits = total_commits - duplicate_commits_count
contributor_username_to_data[committer]['zulip'] = original_commits
data['contributors'] = list(contributor_username_to_data.values())
write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
if __name__ == "__main__":
update_contributor_data_file()