mirror of
				https://github.com/zulip/zulip.git
				synced 2025-11-03 21:43:21 +00:00 
			
		
		
		
	Fixes #2665. Regenerated by tabbott with `lint --fix` after a rebase and change in parameters. Note from tabbott: In a few cases, this converts technical debt in the form of unsorted imports into different technical debt in the form of our largest files having very long, ugly import sequences at the start. I expect this change will increase pressure for us to split those files, which isn't a bad thing. Signed-off-by: Anders Kaseorg <anders@zulip.com>
		
			
				
	
	
		
			150 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			150 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
"""
 | 
						|
Fetch contributors data from Github using their API, convert it to structured
 | 
						|
JSON data for the /team page contributors section.
 | 
						|
"""
 | 
						|
import os
 | 
						|
import sys
 | 
						|
 | 
						|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 | 
						|
from scripts.lib.setup_path import setup_path
 | 
						|
 | 
						|
setup_path()
 | 
						|
 | 
						|
import argparse
 | 
						|
import logging
 | 
						|
from datetime import date
 | 
						|
from random import randrange
 | 
						|
from time import sleep
 | 
						|
from typing import Any, Dict, List, Optional, Union
 | 
						|
 | 
						|
from typing_extensions import TypedDict
 | 
						|
 | 
						|
os.environ['DJANGO_SETTINGS_MODULE'] = 'zproject.settings'
 | 
						|
import django
 | 
						|
 | 
						|
django.setup()
 | 
						|
 | 
						|
import json
 | 
						|
 | 
						|
import requests
 | 
						|
from django.conf import settings
 | 
						|
 | 
						|
duplicate_commits_file = os.path.join(os.path.dirname(__file__), 'duplicate_commits.json')
 | 
						|
 | 
						|
parser = argparse.ArgumentParser()
 | 
						|
parser.add_argument('--max-retries', type=int, default=10,
 | 
						|
                    help='Number of times to retry fetching data from Github')
 | 
						|
args = parser.parse_args()
 | 
						|
 | 
						|
class ContributorsJSON(TypedDict):
 | 
						|
    date: str
 | 
						|
    contrib: List[Dict[str, Union[str, int]]]
 | 
						|
 | 
						|
logger = logging.getLogger('zulip.fetch_contributors_json')
 | 
						|
 | 
						|
def fetch_contributors(repo_link: str) -> Optional[List[Dict[str, Dict[str, Any]]]]:
 | 
						|
    r: requests.Response = requests.get(repo_link, verify=os.environ.get('CUSTOM_CA_CERTIFICATES'))
 | 
						|
    return r.json() if r.status_code == 200 else None
 | 
						|
 | 
						|
def write_to_disk(json_data: ContributorsJSON, out_file: str) -> None:
 | 
						|
    with open(out_file, 'w') as f:
 | 
						|
        try:
 | 
						|
            f.write(f"{json.dumps(json_data, indent=2, sort_keys=True)}\n")
 | 
						|
        except OSError as e:
 | 
						|
            logger.warning(e)
 | 
						|
            sys.exit(1)
 | 
						|
 | 
						|
def update_contributor_data_file() -> None:
 | 
						|
    """
 | 
						|
    Get contributors data from Github and insert them into a temporary
 | 
						|
    dictionary. Retry fetching each repository if responded with non HTTP 200
 | 
						|
    status.
 | 
						|
    """
 | 
						|
 | 
						|
    # This dictionary should hold all repositories that should be included in
 | 
						|
    # the total count, including those that should *not* have tabs on the team
 | 
						|
    # page (e.g. if they are deprecated).
 | 
						|
    repositories = {
 | 
						|
        'server': 'https://api.github.com/repos/zulip/zulip/stats/contributors',
 | 
						|
        'desktop': 'https://api.github.com/repos/zulip/zulip-desktop/stats/contributors',
 | 
						|
        'mobile': 'https://api.github.com/repos/zulip/zulip-mobile/stats/contributors',
 | 
						|
        'python-zulip-api': 'https://api.github.com/repos/zulip/python-zulip-api/stats/contributors',
 | 
						|
        'zulip-js': 'https://api.github.com/repos/zulip/zulip-js/stats/contributors',
 | 
						|
        'zulipbot': 'https://api.github.com/repos/zulip/zulipbot/stats/contributors',
 | 
						|
        'terminal': 'https://api.github.com/repos/zulip/zulip-terminal/stats/contributors',
 | 
						|
        'zulip-ios-legacy': 'https://api.github.com/repos/zulip/zulip-ios-legacy/stats/contributors',
 | 
						|
        'zulip-android': 'https://api.github.com/repos/zulip/zulip-android/stats/contributors',
 | 
						|
    }
 | 
						|
 | 
						|
    data: ContributorsJSON = dict(date=str(date.today()), contrib=[])
 | 
						|
    contribs_list: Dict[str, Dict[str, Union[str, int]]] = {}
 | 
						|
    retry_attempts = 0
 | 
						|
 | 
						|
    while True:
 | 
						|
        repos_done = []
 | 
						|
        for name, link in repositories.items():
 | 
						|
            contribs = fetch_contributors(link)
 | 
						|
            if contribs:
 | 
						|
                repos_done.append(name)
 | 
						|
                for contrib in contribs:
 | 
						|
                    assert contrib is not None  # TODO: To improve/clarify
 | 
						|
 | 
						|
                    author = contrib.get('author')
 | 
						|
                    if author is None:
 | 
						|
                        # This happens for users who've deleted their GitHub account.
 | 
						|
                        continue
 | 
						|
 | 
						|
                    username = author.get('login')
 | 
						|
                    assert username is not None  # TODO: To improve/clarify
 | 
						|
 | 
						|
                    avatar = author.get('avatar_url')
 | 
						|
                    assert avatar is not None  # TODO: To improve/clarify
 | 
						|
                    total = contrib.get('total')
 | 
						|
                    assert total is not None  # TODO: To improve/clarify
 | 
						|
 | 
						|
                    contrib_data = {
 | 
						|
                        'avatar': avatar,
 | 
						|
                        name: total,
 | 
						|
                    }
 | 
						|
                    if username in contribs_list:
 | 
						|
                        contribs_list[username].update(contrib_data)
 | 
						|
                    else:
 | 
						|
                        contribs_list[username] = contrib_data
 | 
						|
                retry_attempts = 0
 | 
						|
            else:
 | 
						|
                retry_attempts += 1
 | 
						|
                if retry_attempts > args.max_retries:
 | 
						|
                    logger.warning("Failed retries fetching contributors data from Github.")
 | 
						|
                    sys.exit(1)
 | 
						|
 | 
						|
                sleep_time = randrange(0, min(64, 2**retry_attempts))
 | 
						|
                sleep(sleep_time)
 | 
						|
 | 
						|
        for repo in repos_done:
 | 
						|
            del repositories[repo]
 | 
						|
 | 
						|
        if not repositories:
 | 
						|
            break
 | 
						|
 | 
						|
    # remove duplicate contributions count
 | 
						|
    # find commits at the time of split and subtract from zulip-server
 | 
						|
    with open(duplicate_commits_file) as f:
 | 
						|
        duplicate_commits = json.load(f)
 | 
						|
        for committer in duplicate_commits:
 | 
						|
            if committer in contribs_list and contribs_list[committer].get('server'):
 | 
						|
                total_commits = contribs_list[committer]['server']
 | 
						|
                assert isinstance(total_commits, int)
 | 
						|
                duplicate_commits_count = duplicate_commits[committer]
 | 
						|
                original_commits = total_commits - duplicate_commits_count
 | 
						|
                contribs_list[committer]['server'] = original_commits
 | 
						|
 | 
						|
    for contributor_name, contributor_data in contribs_list.items():
 | 
						|
        contributor_data['name'] = contributor_name
 | 
						|
        data['contrib'].append(contributor_data)
 | 
						|
 | 
						|
    write_to_disk(data, settings.CONTRIBUTOR_DATA_FILE_PATH)
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    update_contributor_data_file()
 |