mirror of
https://github.com/zulip/zulip.git
synced 2025-10-23 04:52:12 +00:00
When using a start date before the first commit to a repo, we should include the repo's entire history (up to the end date) in our totals. Instead, we were using a range like "..{upper_version}", which in Git revision-range syntax means the start of the range is HEAD -- so the range was empty. Fix that by leaving out the ".." when we want no left endpoint.
231 lines
7.3 KiB
Python
Executable File
231 lines
7.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import os
|
|
import pathlib
|
|
import subprocess
|
|
import sys
|
|
from collections import defaultdict
|
|
from typing import Dict, List
|
|
|
|
bot_commits = 0
|
|
|
|
ZULIP_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
os.chdir(ZULIP_PATH)
|
|
|
|
|
|
def add_log(committer_dict: Dict[str, int], input: List[str]) -> None:
|
|
for dataset in input:
|
|
committer_name = dataset.split("\t")[1]
|
|
commit_count = int(dataset.split("\t")[0])
|
|
|
|
if committer_name.endswith("[bot]"):
|
|
# Exclude dependabot[bot] and other GitHub bots.
|
|
global bot_commits
|
|
bot_commits += commit_count
|
|
continue
|
|
|
|
committer_dict[committer_name] += commit_count
|
|
|
|
|
|
def retrieve_log(repo: str, revisions: str) -> List[str]:
|
|
return subprocess.check_output(
|
|
["git", "shortlog", "-s", revisions],
|
|
cwd=find_path(repo),
|
|
text=True,
|
|
).splitlines()
|
|
|
|
|
|
def find_path(repository: str) -> str:
|
|
return str(pathlib.Path().resolve().parents[0] / repository)
|
|
|
|
|
|
def process_repo(
|
|
*,
|
|
out_dict: Dict[str, int],
|
|
repo_short: str,
|
|
repo_full: str,
|
|
lower_version: str,
|
|
upper_version: str,
|
|
) -> None:
|
|
if not lower_version:
|
|
revisions = upper_version
|
|
revisions_display = f"(start)..{upper_version[0:12]}"
|
|
else:
|
|
revisions = f"{lower_version}..{upper_version}"
|
|
revisions_display = f"{lower_version[0:12]}..{upper_version[0:12]}"
|
|
commit_count = len(
|
|
subprocess.check_output(
|
|
["git", "log", "--pretty=oneline", revisions],
|
|
cwd=find_path(repo_short),
|
|
text=True,
|
|
).splitlines()
|
|
)
|
|
repo_log = retrieve_log(repo_short, revisions)
|
|
print(f"{commit_count} commits from {repo_full}: {revisions_display}")
|
|
add_log(out_dict, repo_log)
|
|
|
|
|
|
def find_last_commit_before_time(repository: str, branch: str, time: str) -> str:
|
|
"""Find the latest release version for the target repository as of the
|
|
specified time.
|
|
"""
|
|
return subprocess.check_output(
|
|
["git", "rev-list", "-1", f"--before={time}", branch, "--"],
|
|
cwd=find_path(repository),
|
|
text=True,
|
|
).strip()
|
|
|
|
|
|
# argparse
|
|
parser = argparse.ArgumentParser(
|
|
prog="python3 total-contributions",
|
|
formatter_class=argparse.RawTextHelpFormatter,
|
|
description="""\
|
|
Aggregates the total commit contributions to Zulip that should be
|
|
attributed to the time window between the two provided
|
|
zulip/zulip versions (tags or branches).
|
|
|
|
The attribution algorithm used by this tool attributes all changes for
|
|
a Zulip project between:
|
|
|
|
* The last release of the target project before the first zulip/zulip version.
|
|
* The last release of the target project before the last zulip/zulip version.
|
|
|
|
This algorithm has the key property that the totals for a given contributor of
|
|
2.1.0..4.0 will equal the sum of 2.1.0..3.0 and 3.0..4.0.
|
|
|
|
Its main downside is that contributions to projects other than
|
|
zulip/zulip in the last few weeks before a zulip/zulip release will be
|
|
delayed (i.e. counted in the total for the next zulip/zulip release).
|
|
|
|
Expects that all Zulip repositories repositories are in the current working
|
|
directory, which does not need to be the directory this is run from.
|
|
|
|
# Changes between two major releases.
|
|
total-contributions 4.0 5.0
|
|
|
|
# Changes between a release and the current main branch.
|
|
total-contributions 4.0 main
|
|
total-contributions 2.1.0
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"version",
|
|
metavar="version",
|
|
nargs="*",
|
|
# TODO: Ideally, we'd replace "1.3.0" with "First commit", to
|
|
# simplify including contributions before the 1.3.0 release.
|
|
default=["1.3.0", "main"],
|
|
help="Git tag or branch in zulip/zulip specifying one end of the commit range to use.",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"-a",
|
|
"--ascending",
|
|
action="store_true",
|
|
help="Sort contributors based on number of commits(ascending order)",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if len(args.version) > 2:
|
|
parser.error("Expects 0 to 2 version number(s)")
|
|
|
|
lower_zulip_version = args.version[0]
|
|
if len(args.version) == 1:
|
|
upper_zulip_version = "main"
|
|
else:
|
|
upper_zulip_version = args.version[1]
|
|
|
|
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
|
|
|
|
# Extract git version and time. It's important that we use the commit
|
|
# date (%ci), not the author date (%ai), since while those are often
|
|
# near identical for release commits, if we pass a branch like `main`,
|
|
# it's possible the latest commit on the branch might have a months
|
|
# old author date if the last pull request merged was started at that
|
|
# time.
|
|
try:
|
|
lower_time = subprocess.check_output(
|
|
["git", "log", "-1", "--format=%ci", lower_zulip_version],
|
|
stderr=subprocess.DEVNULL,
|
|
text=True,
|
|
).split()[0]
|
|
upper_time = subprocess.check_output(
|
|
["git", "log", "-1", "--format=%ci", upper_zulip_version],
|
|
stderr=subprocess.DEVNULL,
|
|
text=True,
|
|
).split()[0]
|
|
except subprocess.CalledProcessError:
|
|
print("Specified version(s) don't exist")
|
|
sys.exit(0)
|
|
|
|
print(
|
|
f"Commit range {lower_zulip_version}..{upper_zulip_version} corresponds to {lower_time} to {upper_time}"
|
|
)
|
|
|
|
repository_dict: Dict[str, int] = defaultdict(int)
|
|
out_dict: Dict[str, int] = defaultdict(int)
|
|
subprocess.check_call(["git", "fetch"], cwd=find_path("zulip"))
|
|
process_repo(
|
|
out_dict=out_dict,
|
|
repo_short="zulip",
|
|
repo_full="zulip/zulip",
|
|
lower_version=lower_zulip_version,
|
|
upper_version=upper_zulip_version,
|
|
)
|
|
|
|
# TODO: We should migrate the last couple repositories to use the
|
|
# `main` default branch name and then simplify this.
|
|
for full_repository, branch in [
|
|
("zulip/zulip-mobile", "main"),
|
|
("zulip/zulip-desktop", "main"),
|
|
("zulip/docker-zulip", "main"),
|
|
("zulip/python-zulip-api", "main"),
|
|
("zulip/zulip-terminal", "main"),
|
|
("zulip/zulint", "main"),
|
|
("zulip/github-actions-zulip", "main"),
|
|
("zulip/zulip-js", "main"),
|
|
("zulip/zulip-archive", "master"),
|
|
("zulip/zulipbot", "main"),
|
|
("zulip/zulip-zapier", "master"),
|
|
]:
|
|
repository = os.path.basename(full_repository)
|
|
|
|
if os.path.exists(find_path(repository)):
|
|
# Update the checkout for the project in question.
|
|
subprocess.check_call(
|
|
["git", "pull", "--rebase", "-q"],
|
|
cwd=find_path(repository),
|
|
)
|
|
else:
|
|
subprocess.check_call(
|
|
["git", "clone", f"git@github.com:{full_repository}.git"],
|
|
cwd=os.path.dirname(find_path(repository)),
|
|
)
|
|
|
|
subprocess.check_call(["git", "fetch", "-a"], cwd=find_path(repository))
|
|
lower_repo_version = find_last_commit_before_time(repository, branch, lower_time)
|
|
upper_repo_version = find_last_commit_before_time(repository, branch, upper_time)
|
|
process_repo(
|
|
out_dict=out_dict,
|
|
repo_short=repository,
|
|
repo_full=full_repository,
|
|
lower_version=lower_repo_version,
|
|
upper_version=upper_repo_version,
|
|
)
|
|
|
|
# Sorting based on number of commits
|
|
grand_total = 0
|
|
for committer_name, commit_count in sorted(
|
|
out_dict.items(), key=lambda item: item[1], reverse=not args.ascending
|
|
):
|
|
print(str(commit_count) + "\t" + committer_name)
|
|
grand_total += commit_count
|
|
|
|
print(f"Excluded {bot_commits} commits authored by bots.")
|
|
print(
|
|
f"{grand_total} total commits by {len(out_dict)} contributors between "
|
|
f"{lower_zulip_version} and {upper_zulip_version}."
|
|
)
|