1#!/usr/bin/env python3 2# Copyright (c) Meta Platforms, Inc. and affiliates. 3# All rights reserved. 4# 5# This source code is licensed under the BSD-style license found in the 6# LICENSE file in the root directory of this source tree. 7 8"""Helps find commits to cherrypick into a release branch. 9 10Usage: 11 pick_doc_commits.py --main=origin/main --release=origin/release/5.5 12 13It will find commits on the main branch that are not on the release branch, and 14filter them down to the docs-only commits that should be cherrypicked. It will 15also print the commits that were filtered out. 16 17This tool will not actually modify the git repo, it will only print the commands 18to run. 19 20Must be run from inside the repo, ideally after a recent `git pull`. Does not 21care which branch is currently checked out. 22""" 23 24import argparse 25import datetime 26import re 27import subprocess 28import sys 29import textwrap 30from typing import List 31 32 33# The script will print extra info when this is > 0, and more at higher levels. 34# Controlled by the --verbose flag. 35verbosity = 0 36 37 38def debug_log(message: str): 39 """Prints a message to stderr if verbosity is greater than zero.""" 40 global verbosity 41 if verbosity > 0: 42 sys.stderr.write(f"VERBOSE: {message}\n") 43 44 45def run_git(command: List[str]) -> List[str]: 46 """Runs a git command and returns its stdout as a list of lines. 47 48 Prints the command and its output to debug_log() if verbosity is greater 49 than 1. 50 51 Args: 52 command: The args to pass to `git`, without the leading `git` itself. 53 Returns: 54 A list of the non-empty lines printed to stdout, without trailing 55 newlines. 56 Raises: 57 Exception: The command failed. 58 """ 59 try: 60 if verbosity > 1: # Higher verbosity required 61 debug_log("Running command: 'git " + " ".join(command) + "'") 62 result = subprocess.run(["git", *command], capture_output=True, text=True) 63 if result.returncode != 0: 64 raise Exception(f"Error running command '{command}':\n{result.stderr}") 65 lines = result.stdout.split("\n") 66 # Remove empty and whitespace-only lines. 67 lines = [line.strip() for line in lines if line.strip()] 68 global verbose 69 if verbosity > 1: 70 debug_log("-----BEGIN GIT OUTPUT-----") 71 for line in lines: 72 debug_log(line) 73 debug_log("-----END GIT OUTPUT-----") 74 return lines 75 except Exception as e: 76 raise Exception(f"Error running command '{command}': {e}") 77 78 79class Commit: 80 """A git commit hash and its one-line message.""" 81 82 def __init__(self, hash: str, message: str = ""): 83 """Creates a new Commit with the given hash. 84 85 Args: 86 hash: The hexadecimal hash of the commit. 87 message: The one-line summary of the commit. If empty, this method 88 will ask git for the commit message. 89 """ 90 self.hash = hash.strip() 91 if not message: 92 # Ask git for the commit message. 93 lines = run_git(["log", "-1", "--pretty=%s", self.hash]) 94 # Should just be one line, but could be zero. 95 message = " ".join(lines) 96 self.message = message.strip() 97 98 @staticmethod 99 def from_line(line: str) -> "Commit": 100 """Creates a Commit from a string of the form '<hash> [<message>]'.""" 101 parts = line.split(" ", maxsplit=1) 102 parts = [part.strip() for part in parts if part.strip()] 103 assert len(parts) >= 1, f"Expected at least one part in line '{line}'" 104 return Commit(hash=parts[0], message=parts[1] if len(parts) > 1 else "") 105 106 def __repr__(self): 107 return f"Commit('{self.hash[:8]}', '{self.message}')" 108 109 def __str__(self): 110 return f"{self.hash[:8]} {self.message}" 111 112 113def is_doc_only_commit(commit: Commit) -> bool: 114 """Returns True if the commit only touched "documentation files".""" 115 116 def is_doc_file(path: str) -> bool: 117 """Returns true if the path is considered to be a "documentation file".""" 118 return ( 119 # Everything under docs, regardless of the file type. 120 path.startswith("docs/") 121 # Any markdown or RST file in the repo. 122 or path.endswith(".md") 123 or path.endswith(".rst") 124 ) 125 126 # The first line is the full hash, and the rest are the files modified by 127 # the commit, relative to the root of the repo. 128 lines = run_git(["diff-tree", "--name-only", "-r", commit.hash]) 129 all_files = frozenset(lines[1:]) 130 doc_files = frozenset(filter(is_doc_file, all_files)) 131 non_doc_files = all_files - doc_files 132 is_doc_only = all_files == doc_files 133 134 if verbosity > 0 and not is_doc_only: 135 debug_log( 136 f"{repr(commit)} touches {len(non_doc_files)} non-doc files, " 137 + f"like '{sorted(non_doc_files)[0]}'." 138 ) 139 140 return is_doc_only 141 142 143def print_wrapped(text: str, width: int = 80) -> None: 144 """Print text wrapped to fit within the given width. 145 146 Indents additional lines by four spaces. 147 """ 148 print("\n ".join(textwrap.wrap(text, width=width - 4, break_on_hyphens=False))) 149 150 151def parse_args() -> argparse.Namespace: 152 parser = argparse.ArgumentParser( 153 description="Prints differences between git branches." 154 ) 155 parser.add_argument( 156 "--main", 157 default="origin/main", 158 type=str, 159 help="The name of the main (source) branch to pick commits from.", 160 ) 161 parser.add_argument( 162 "--release", 163 type=str, 164 help="The name of the release (destination) branch to pick commits onto, " 165 + "ideally with the 'origin/' prefix", 166 ) 167 parser.add_argument( 168 "-v", 169 "--verbose", 170 action="count", 171 default=0, 172 help="Log extra output. Specify more times (-vv) for more output.", 173 ) 174 return parser.parse_args() 175 176 177def main(): 178 args = parse_args() 179 main_branch = args.main 180 release_branch = args.release 181 182 global verbosity 183 verbosity = args.verbose 184 185 # Returns a list of hashes that are on the main branch but not the release 186 # branch. Each hash is preceded by `+ ` if the commit has not been cherry 187 # picked onto the release branch, or `- ` if it has. 188 cherry_lines = run_git(["cherry", release_branch, main_branch]) 189 print_wrapped( 190 f"Commits on '{main_branch}' that have already been cherry-picked into '{release_branch}':" 191 ) 192 if not cherry_lines: 193 print("- <none>") 194 candidate_commits = [] 195 for line in cherry_lines: 196 commit = Commit.from_line(line[2:]) 197 if line.startswith("+ "): 198 candidate_commits.append(commit) 199 elif line.startswith("- "): 200 print(f"- {commit}") 201 print("") 202 203 # Filter out and print the commits that touch non-documentation files. 204 print_wrapped( 205 f"Will not pick these commits on '{main_branch}' that touch non-documentation files:" 206 ) 207 if not candidate_commits: 208 print("- <none>") 209 doc_only_commits = [] 210 for commit in candidate_commits: 211 if is_doc_only_commit(commit): 212 doc_only_commits.append(commit) 213 else: 214 print(f"- {commit}") 215 print("") 216 217 # Print the commits to cherry-pick. 218 print_wrapped( 219 f"Remaining '{main_branch}' commits that touch only documentation files; " 220 + f"will be cherry-picked into '{release_branch}':" 221 ) 222 if not doc_only_commits: 223 print("- <none>") 224 for commit in doc_only_commits: 225 print(f"- {commit}") 226 print("") 227 228 # Print instructions for cherry-picking the commits. 229 if doc_only_commits: 230 # Recommend a unique branch name. 231 suffix = datetime.datetime.utcnow().strftime("%Y%m%d%H%M") 232 branch_name = "cherrypick-" + release_branch.replace("/", "-") + "-" + suffix 233 234 print("Cherry pick by running the commands:") 235 print("```") 236 print(f"git checkout {release_branch}") 237 print( 238 # Split lines with backslashes to make long lists more legible but 239 # still copy-pasteable. 240 "git cherry-pick \\\n " 241 + " \\\n ".join([commit.hash for commit in doc_only_commits]) 242 ) 243 print(f"git checkout -b {branch_name}") 244 print("```") 245 print("") 246 print("To verify that this worked, re-run this script with the arguments:") 247 print("```") 248 print(f"--main={main_branch} --release={branch_name}") 249 print("```") 250 print("It should show no doc-only commits to cherry-pick.") 251 print("") 252 print(f"Then, push {branch_name} to GitHub:") 253 print("```") 254 print(f"git push --set-upstream origin {branch_name}") 255 print("```") 256 print("") 257 print_wrapped( 258 "When creating the PR, remember to set the 'into' branch to be " 259 # Remove "origin/" if present since it won't appear in the GitHub 260 # UI. 261 + f"'{re.sub('^origin/', '', release_branch)}'." 262 ) 263 else: 264 print_wrapped( 265 "It looks like there are no doc-only commits " 266 + f"on '{main_branch}' to cherry-pick into '{release_branch}'." 267 ) 268 269 270if __name__ == "__main__": 271 main() 272