Files
zulip/tools/total-contributions
Anders Kaseorg df001db1a9 black: Reformat with Black 23.
Black 23 enforces some slightly more specific rules about empty line
counts and redundant parenthesis removal, but the result is still
compatible with Black 22.

(This does not actually upgrade our Python environment to Black 23
yet.)

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2023-02-02 10:40:13 -08:00

215 lines
7.1 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import pathlib
import subprocess
import sys
from collections import defaultdict
from typing import Dict, List
bot_commits = 0
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
os.chdir(ZULIP_PATH)
def add_log(committer_dict: Dict[str, int], input: List[str]) -> None:
for dataset in input:
committer_name = dataset.split("\t")[1]
commit_count = int(dataset.split("\t")[0])
if committer_name.endswith("[bot]"):
# Exclude dependabot[bot] and other GitHub bots.
global bot_commits
bot_commits += commit_count
continue
committer_dict[committer_name] += commit_count
def retrieve_log(repo: str, lower_version: str, upper_version: str) -> List[str]:
return subprocess.check_output(
["git", "shortlog", "-s", lower_version + ".." + upper_version],
cwd=find_path(repo),
text=True,
).splitlines()
def find_path(repository: str) -> str:
return str(pathlib.Path().resolve().parents[0] / repository)
def find_last_commit_before_time(repository: str, branch: str, time: str) -> str:
"""Find the latest release version for the target repository as of the
specified time.
"""
return subprocess.check_output(
["git", "rev-list", "-1", f"--before={time}", branch, "--"],
cwd=find_path(repository),
text=True,
).strip()
# argparse
parser = argparse.ArgumentParser(
prog="python3 total-contributions",
formatter_class=argparse.RawTextHelpFormatter,
description="""\
Aggregates the total commit contributions to Zulip that should be
attributed to the time window between the two provided
zulip/zulip versions (tags or branches).
The attribution algorithm used by this tool attributes all changes for
a Zulip project between:
* The last release of the target project before the first zulip/zulip version.
* The last release of the target project before the last zulip/zulip version.
This algorithm has the key property that the totals for a given contributor of
2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0.
Its main downside is that contributions to projects other than
zulip/zulip in the last few weeks before a zulip/zulip release will be
delayed (i.e. counted in the total for the next zulip/zulip release).
Expects that all Zulip repositories repositories are in the current working
directory, which does not need to be the directory this is run from.
# Changes between two major releases.
total-contributions 4.0 5.0
# Changes between a release and the current main branch.
total-contributions 4.0 main
total-contributions 2.1.0
""",
)
parser.add_argument(
"version",
metavar="version",
nargs="*",
# TODO: Ideally, we'd replace "1.3.0" with "First commit", to
# simplify including contributions before the 1.3.0 release.
default=["1.3.0", "main"],
help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.",
)
parser.add_argument(
"-a",
"--ascending",
action="store_true",
help="Sort contributors based on number of commits(ascending order)",
)
args = parser.parse_args()
if len(args.version) > 2:
parser.error("Expects 0 to 2 version number(s)")
lower_zulip_version = args.version[0]
if len(args.version) == 1:
upper_zulip_version = "main"
else:
upper_zulip_version = args.version[1]
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
# Extract git version and time. It's important that we use the commit
# date (%ci), not the author date (%ai), since while those are often
# near identical for release commits, if we pass a branch like `main`,
# it's possible the latest commit on the branch might have a months
# old author date if the last pull request merged was started at that
# time.
try:
lower_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", lower_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
upper_time = subprocess.check_output(
["git", "log", "-1", "--format=%ci", upper_zulip_version],
stderr=subprocess.DEVNULL,
text=True,
).split()[0]
except subprocess.CalledProcessError:
print("Specified version(s) don't exist")
sys.exit(0)
print(
f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}"
)
repository_dict: Dict[str, int] = defaultdict(int)
out_dict: Dict[str, int] = defaultdict(int)
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
commit_count = len(
subprocess.check_output(
["git", "log", "--pretty=oneline", f"{lower_zulip_version}..{upper_zulip_version}"],
cwd=find_path("zulip"),
text=True,
).splitlines()
)
repo_log = retrieve_log("zulip", lower_zulip_version, upper_zulip_version)
print(
f"{commit_count} commits from zulip/zulip: {lower_zulip_version[0:12]}..{upper_zulip_version[0:12]}"
)
add_log(out_dict, repo_log)
# TODO: We should migrate the last couple repositories to use the
# `main` default branch name and then simplify this.
for full_repository, branch in [
("zulip/zulip-mobile", "main"),
("zulip/zulip-desktop", "main"),
("zulip/docker-zulip", "main"),
("zulip/python-zulip-api", "main"),
("zulip/zulip-terminal", "main"),
("zulip/zulint", "main"),
("zulip/github-actions-zulip", "main"),
("zulip/zulip-js", "main"),
("zulip/zulip-archive", "master"),
("zulip/zulipbot", "main"),
("zulip/zulip-zapier", "master"),
]:
repository = os.path.basename(full_repository)
if os.path.exists(find_path(repository)):
# Update the checkout for the project in question.
subprocess.check_call(
["git", "pull", "--rebase", "-q"],
cwd=find_path(repository),
)
else:
subprocess.check_call(
["git", "clone", f"git@github.com:{full_repository}.git"],
cwd=os.path.dirname(find_path(repository)),
)
subprocess.check_call(["git", "fetch", "-a"], cwd=find_path(repository))
lower_repo_version = find_last_commit_before_time(repository, branch, lower_time)
upper_repo_version = find_last_commit_before_time(repository, branch, upper_time)
commit_count = len(
subprocess.check_output(
["git", "log", "--pretty=oneline", f"{lower_repo_version}..{upper_repo_version}"],
cwd=find_path(repository),
text=True,
).splitlines()
)
repo_log = retrieve_log(repository, lower_repo_version, upper_repo_version)
print(
f"{commit_count} commits from {full_repository}: {lower_repo_version[0:12]}..{upper_repo_version[0:12]}"
)
add_log(out_dict, repo_log)
# Sorting based on number of commits
grand_total = 0
for committer_name, commit_count in sorted(
out_dict.items(), key=lambda item: item[1], reverse=not args.ascending
):
print(str(commit_count) + "\t" + committer_name)
grand_total += commit_count
print(f"Excluded {bot_commits} commits authored by bots.")
print(
f"{grand_total} total commits by {len(out_dict)} contributors between "
f"{lower_zulip_version} and {upper_zulip_version}."
)