xref: /aosp_15_r20/external/executorch/build/pick_doc_commits.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1#!/usr/bin/env python3
2# Copyright (c) Meta Platforms, Inc. and affiliates.
3# All rights reserved.
4#
5# This source code is licensed under the BSD-style license found in the
6# LICENSE file in the root directory of this source tree.
7
8"""Helps find commits to cherrypick into a release branch.
9
10Usage:
11  pick_doc_commits.py --main=origin/main --release=origin/release/5.5
12
13It will find commits on the main branch that are not on the release branch, and
14filter them down to the docs-only commits that should be cherrypicked. It will
15also print the commits that were filtered out.
16
17This tool will not actually modify the git repo, it will only print the commands
18to run.
19
20Must be run from inside the repo, ideally after a recent `git pull`. Does not
21care which branch is currently checked out.
22"""
23
24import argparse
25import datetime
26import re
27import subprocess
28import sys
29import textwrap
30from typing import List
31
32
33# The script will print extra info when this is > 0, and more at higher levels.
34# Controlled by the --verbose flag.
35verbosity = 0
36
37
38def debug_log(message: str):
39    """Prints a message to stderr if verbosity is greater than zero."""
40    global verbosity
41    if verbosity > 0:
42        sys.stderr.write(f"VERBOSE: {message}\n")
43
44
45def run_git(command: List[str]) -> List[str]:
46    """Runs a git command and returns its stdout as a list of lines.
47
48    Prints the command and its output to debug_log() if verbosity is greater
49    than 1.
50
51    Args:
52        command: The args to pass to `git`, without the leading `git` itself.
53    Returns:
54        A list of the non-empty lines printed to stdout, without trailing
55        newlines.
56    Raises:
57        Exception: The command failed.
58    """
59    try:
60        if verbosity > 1:  # Higher verbosity required
61            debug_log("Running command: 'git " + " ".join(command) + "'")
62        result = subprocess.run(["git", *command], capture_output=True, text=True)
63        if result.returncode != 0:
64            raise Exception(f"Error running command '{command}':\n{result.stderr}")
65        lines = result.stdout.split("\n")
66        # Remove empty and whitespace-only lines.
67        lines = [line.strip() for line in lines if line.strip()]
68        global verbose
69        if verbosity > 1:
70            debug_log("-----BEGIN GIT OUTPUT-----")
71            for line in lines:
72                debug_log(line)
73            debug_log("-----END GIT OUTPUT-----")
74        return lines
75    except Exception as e:
76        raise Exception(f"Error running command '{command}': {e}")
77
78
79class Commit:
80    """A git commit hash and its one-line message."""
81
82    def __init__(self, hash: str, message: str = ""):
83        """Creates a new Commit with the given hash.
84
85        Args:
86            hash: The hexadecimal hash of the commit.
87            message: The one-line summary of the commit. If empty, this method
88                will ask git for the commit message.
89        """
90        self.hash = hash.strip()
91        if not message:
92            # Ask git for the commit message.
93            lines = run_git(["log", "-1", "--pretty=%s", self.hash])
94            # Should just be one line, but could be zero.
95            message = " ".join(lines)
96        self.message = message.strip()
97
98    @staticmethod
99    def from_line(line: str) -> "Commit":
100        """Creates a Commit from a string of the form '<hash> [<message>]'."""
101        parts = line.split(" ", maxsplit=1)
102        parts = [part.strip() for part in parts if part.strip()]
103        assert len(parts) >= 1, f"Expected at least one part in line '{line}'"
104        return Commit(hash=parts[0], message=parts[1] if len(parts) > 1 else "")
105
106    def __repr__(self):
107        return f"Commit('{self.hash[:8]}', '{self.message}')"
108
109    def __str__(self):
110        return f"{self.hash[:8]} {self.message}"
111
112
113def is_doc_only_commit(commit: Commit) -> bool:
114    """Returns True if the commit only touched "documentation files"."""
115
116    def is_doc_file(path: str) -> bool:
117        """Returns true if the path is considered to be a "documentation file"."""
118        return (
119            # Everything under docs, regardless of the file type.
120            path.startswith("docs/")
121            # Any markdown or RST file in the repo.
122            or path.endswith(".md")
123            or path.endswith(".rst")
124        )
125
126    # The first line is the full hash, and the rest are the files modified by
127    # the commit, relative to the root of the repo.
128    lines = run_git(["diff-tree", "--name-only", "-r", commit.hash])
129    all_files = frozenset(lines[1:])
130    doc_files = frozenset(filter(is_doc_file, all_files))
131    non_doc_files = all_files - doc_files
132    is_doc_only = all_files == doc_files
133
134    if verbosity > 0 and not is_doc_only:
135        debug_log(
136            f"{repr(commit)} touches {len(non_doc_files)} non-doc files, "
137            + f"like '{sorted(non_doc_files)[0]}'."
138        )
139
140    return is_doc_only
141
142
143def print_wrapped(text: str, width: int = 80) -> None:
144    """Print text wrapped to fit within the given width.
145
146    Indents additional lines by four spaces.
147    """
148    print("\n    ".join(textwrap.wrap(text, width=width - 4, break_on_hyphens=False)))
149
150
151def parse_args() -> argparse.Namespace:
152    parser = argparse.ArgumentParser(
153        description="Prints differences between git branches."
154    )
155    parser.add_argument(
156        "--main",
157        default="origin/main",
158        type=str,
159        help="The name of the main (source) branch to pick commits from.",
160    )
161    parser.add_argument(
162        "--release",
163        type=str,
164        help="The name of the release (destination) branch to pick commits onto, "
165        + "ideally with the 'origin/' prefix",
166    )
167    parser.add_argument(
168        "-v",
169        "--verbose",
170        action="count",
171        default=0,
172        help="Log extra output. Specify more times (-vv) for more output.",
173    )
174    return parser.parse_args()
175
176
177def main():
178    args = parse_args()
179    main_branch = args.main
180    release_branch = args.release
181
182    global verbosity
183    verbosity = args.verbose
184
185    # Returns a list of hashes that are on the main branch but not the release
186    # branch. Each hash is preceded by `+ ` if the commit has not been cherry
187    # picked onto the release branch, or `- ` if it has.
188    cherry_lines = run_git(["cherry", release_branch, main_branch])
189    print_wrapped(
190        f"Commits on '{main_branch}' that have already been cherry-picked into '{release_branch}':"
191    )
192    if not cherry_lines:
193        print("- <none>")
194    candidate_commits = []
195    for line in cherry_lines:
196        commit = Commit.from_line(line[2:])
197        if line.startswith("+ "):
198            candidate_commits.append(commit)
199        elif line.startswith("- "):
200            print(f"- {commit}")
201    print("")
202
203    # Filter out and print the commits that touch non-documentation files.
204    print_wrapped(
205        f"Will not pick these commits on '{main_branch}' that touch non-documentation files:"
206    )
207    if not candidate_commits:
208        print("- <none>")
209    doc_only_commits = []
210    for commit in candidate_commits:
211        if is_doc_only_commit(commit):
212            doc_only_commits.append(commit)
213        else:
214            print(f"- {commit}")
215    print("")
216
217    # Print the commits to cherry-pick.
218    print_wrapped(
219        f"Remaining '{main_branch}' commits that touch only documentation files; "
220        + f"will be cherry-picked into '{release_branch}':"
221    )
222    if not doc_only_commits:
223        print("- <none>")
224    for commit in doc_only_commits:
225        print(f"- {commit}")
226    print("")
227
228    # Print instructions for cherry-picking the commits.
229    if doc_only_commits:
230        # Recommend a unique branch name.
231        suffix = datetime.datetime.utcnow().strftime("%Y%m%d%H%M")
232        branch_name = "cherrypick-" + release_branch.replace("/", "-") + "-" + suffix
233
234        print("Cherry pick by running the commands:")
235        print("```")
236        print(f"git checkout {release_branch}")
237        print(
238            # Split lines with backslashes to make long lists more legible but
239            # still copy-pasteable.
240            "git cherry-pick \\\n  "
241            + " \\\n  ".join([commit.hash for commit in doc_only_commits])
242        )
243        print(f"git checkout -b {branch_name}")
244        print("```")
245        print("")
246        print("To verify that this worked, re-run this script with the arguments:")
247        print("```")
248        print(f"--main={main_branch} --release={branch_name}")
249        print("```")
250        print("It should show no doc-only commits to cherry-pick.")
251        print("")
252        print(f"Then, push {branch_name} to GitHub:")
253        print("```")
254        print(f"git push --set-upstream origin {branch_name}")
255        print("```")
256        print("")
257        print_wrapped(
258            "When creating the PR, remember to set the 'into' branch to be "
259            # Remove "origin/" if present since it won't appear in the GitHub
260            # UI.
261            + f"'{re.sub('^origin/', '', release_branch)}'."
262        )
263    else:
264        print_wrapped(
265            "It looks like there are no doc-only commits "
266            + f"on '{main_branch}' to cherry-pick into '{release_branch}'."
267        )
268
269
270if __name__ == "__main__":
271    main()
272