1#!/usr/bin/env python3 2# Copyright 2019 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Maps LLVM git SHAs to synthetic revision numbers and back. 7 8Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, 9r1234 is parsed as '(main, 1234)'. 10""" 11 12import argparse 13from pathlib import Path 14import re 15import subprocess 16import sys 17from typing import IO, Iterable, List, NamedTuple, Optional, Tuple, Union 18 19 20MAIN_BRANCH = "main" 21 22# Note that after base_llvm_sha, we reach The Wild West(TM) of commits. 23# So reasonable input that could break us includes: 24# 25# Revert foo 26# 27# This reverts foo, which had the commit message: 28# 29# bar 30# llvm-svn: 375505 31# 32# While saddening, this is something we should probably try to handle 33# reasonably. 34base_llvm_revision = 375505 35base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" 36 37# Known pairs of [revision, SHA] in ascending order. 38# The first element is the first non-`llvm-svn` commit that exists. Later ones 39# are functional nops, but speed this script up immensely, since `git` can take 40# quite a while to walk >100K commits. 41known_llvm_rev_sha_pairs = ( 42 (base_llvm_revision, base_llvm_sha), 43 (425000, "af870e11aed7a5c475ae41a72e3015c4c88597d1"), 44 (450000, "906ebd5830e6053b50c52bf098e3586b567e8499"), 45 (475000, "530d14a99611a71f8f3eb811920fd7b5c4d4e1f8"), 46 (500000, "173855f9b0bdfe45d71272596b510650bfc1ca33"), 47) 48 49# Represents an LLVM git checkout: 50# - |dir| is the directory of the LLVM checkout 51# - |remote| is the name of the LLVM remote. Generally it's "origin". 52LLVMConfig = NamedTuple( 53 "LLVMConfig", (("remote", str), ("dir", Union[Path, str])) 54) 55 56 57class Rev(NamedTuple("Rev", (("branch", str), ("number", int)))): 58 """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" 59 60 @staticmethod 61 def parse(rev: str) -> "Rev": 62 """Parses a Rev from the given string. 63 64 Raises a ValueError on a failed parse. 65 """ 66 # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) 67 # pairs. 68 # 69 # We support r${commits_since_base_commit} as shorthand for 70 # (main, r${commits_since_base_commit}). 71 if rev.startswith("r"): 72 branch_name = MAIN_BRANCH 73 rev_string = rev[1:] 74 else: 75 match = re.match(r"\((.+), r(\d+)\)", rev) 76 if not match: 77 raise ValueError("%r isn't a valid revision" % rev) 78 79 branch_name, rev_string = match.groups() 80 81 return Rev(branch=branch_name, number=int(rev_string)) 82 83 def __str__(self) -> str: 84 branch_name, number = self 85 if branch_name == MAIN_BRANCH: 86 return "r%d" % number 87 return "(%s, r%d)" % (branch_name, number) 88 89 90def is_git_sha(xs: str) -> bool: 91 """Returns whether the given string looks like a valid git commit SHA.""" 92 return ( 93 len(xs) > 6 94 and len(xs) <= 40 95 and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs) 96 ) 97 98 99def check_output(command: List[str], cwd: Union[Path, str]) -> str: 100 """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" 101 result = subprocess.run( 102 command, 103 cwd=cwd, 104 check=True, 105 stdin=subprocess.DEVNULL, 106 stdout=subprocess.PIPE, 107 encoding="utf-8", 108 ) 109 return result.stdout 110 111 112def translate_prebase_sha_to_rev_number( 113 llvm_config: LLVMConfig, sha: str 114) -> int: 115 """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). 116 117 This function assumes that the given SHA is an ancestor of |base_llvm_sha|. 118 """ 119 commit_message = check_output( 120 ["git", "log", "-n1", "--format=%B", sha, "--"], 121 cwd=llvm_config.dir, 122 ) 123 last_line = commit_message.strip().splitlines()[-1] 124 svn_match = re.match(r"^llvm-svn: (\d+)$", last_line) 125 126 if not svn_match: 127 raise ValueError( 128 f"No llvm-svn line found for {sha}, which... shouldn't happen?" 129 ) 130 131 return int(svn_match.group(1)) 132 133 134def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: 135 """Translates a sha or git ref to a Rev.""" 136 137 if is_git_sha(sha_or_ref): 138 sha = sha_or_ref 139 else: 140 sha = check_output( 141 ["git", "rev-parse", "--revs-only", sha_or_ref, "--"], 142 cwd=llvm_config.dir, 143 ) 144 sha = sha.strip() 145 146 for base_rev, base_sha in reversed(known_llvm_rev_sha_pairs): 147 merge_base = check_output( 148 ["git", "merge-base", base_sha, sha, "--"], 149 cwd=llvm_config.dir, 150 ) 151 merge_base = merge_base.strip() 152 if merge_base == base_sha: 153 result = check_output( 154 [ 155 "git", 156 "rev-list", 157 "--count", 158 "--first-parent", 159 f"{base_sha}..{sha}", 160 "--", 161 ], 162 cwd=llvm_config.dir, 163 ) 164 count = int(result.strip()) 165 return Rev(branch=MAIN_BRANCH, number=count + base_rev) 166 167 # Otherwise, either: 168 # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) 169 # - |merge_base| is neither (we have a guaranteed llvm-svn number on 170 # |merge_base|, but not |sha|) 171 merge_base_number = translate_prebase_sha_to_rev_number( 172 llvm_config, merge_base 173 ) 174 if merge_base == sha: 175 return Rev(branch=MAIN_BRANCH, number=merge_base_number) 176 177 distance_from_base = check_output( 178 [ 179 "git", 180 "rev-list", 181 "--count", 182 "--first-parent", 183 f"{merge_base}..{sha}", 184 "--", 185 ], 186 cwd=llvm_config.dir, 187 ) 188 189 revision_number = merge_base_number + int(distance_from_base.strip()) 190 branches_containing = check_output( 191 ["git", "branch", "-r", "--contains", sha], 192 cwd=llvm_config.dir, 193 ) 194 195 candidates = [] 196 197 prefix = llvm_config.remote + "/" 198 for branch in branches_containing.splitlines(): 199 branch = branch.strip() 200 if branch.startswith(prefix): 201 candidates.append(branch[len(prefix) :]) 202 203 if not candidates: 204 raise ValueError( 205 f"No viable branches found from {llvm_config.remote} with {sha}" 206 ) 207 208 # It seems that some `origin/release/.*` branches have 209 # `origin/upstream/release/.*` equivalents, which is... awkward to deal 210 # with. Prefer the latter, since that seems to have newer commits than the 211 # former. Technically n^2, but len(elements) should be like, tens in the 212 # worst case. 213 candidates = [x for x in candidates if f"upstream/{x}" not in candidates] 214 if len(candidates) != 1: 215 raise ValueError( 216 f"Ambiguity: multiple branches from {llvm_config.remote} have " 217 f"{sha}: {sorted(candidates)}" 218 ) 219 220 return Rev(branch=candidates[0], number=revision_number) 221 222 223def parse_git_commit_messages( 224 stream: Union[Iterable[str], IO[str]], separator: str 225) -> Iterable[Tuple[str, str]]: 226 """Parses a stream of git log messages. 227 228 These are expected to be in the format: 229 230 40 character sha 231 commit 232 message 233 body 234 separator 235 40 character sha 236 commit 237 message 238 body 239 separator 240 """ 241 242 lines = iter(stream) 243 while True: 244 # Looks like a potential bug in pylint? crbug.com/1041148 245 # pylint: disable=stop-iteration-return 246 sha = next(lines, None) 247 if sha is None: 248 return 249 250 sha = sha.strip() 251 assert is_git_sha(sha), f"Invalid git SHA: {sha}" 252 253 message = [] 254 for line in lines: 255 if line.strip() == separator: 256 break 257 message.append(line) 258 259 yield sha, "".join(message) 260 261 262def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: 263 """Translates a Rev to a SHA. 264 265 This function assumes that the given rev refers to a commit that's an 266 ancestor of |base_llvm_sha|. 267 """ 268 # Because reverts may include reverted commit messages, we can't just |-n1| 269 # and pick that. 270 separator = ">!" * 80 271 looking_for = f"llvm-svn: {rev.number}" 272 273 git_command = [ 274 "git", 275 "log", 276 "--grep", 277 f"^{looking_for}$", 278 f"--format=%H%n%B{separator}", 279 base_llvm_sha, 280 ] 281 282 with subprocess.Popen( 283 git_command, 284 cwd=llvm_config.dir, 285 stdin=subprocess.DEVNULL, 286 stdout=subprocess.PIPE, 287 encoding="utf-8", 288 ) as subp: 289 assert subp.stdout is not None 290 for sha, message in parse_git_commit_messages(subp.stdout, separator): 291 last_line = message.splitlines()[-1] 292 if last_line.strip() == looking_for: 293 subp.terminate() 294 return sha 295 if subp.wait() != 0: 296 raise subprocess.CalledProcessError(subp.returncode, git_command) 297 298 raise ValueError(f"No commit with revision {rev} found") 299 300 301def translate_rev_to_sha_from_baseline( 302 llvm_config: LLVMConfig, 303 parent_sha: str, 304 parent_rev: int, 305 child_sha: str, 306 child_rev: Optional[int], 307 want_rev: int, 308 branch_name: str, 309) -> str: 310 """Translates a revision number between a parent & child to a SHA. 311 312 Args: 313 llvm_config: LLVM config to use. 314 parent_sha: SHA of the parent that the revision number is a child of. 315 parent_rev: Revision number of `parent_sha`. 316 child_sha: A child of `parent_sha` to find a rev on. 317 child_rev: Optional note of what the child's revision number is. 318 want_rev: The desired revision number between child and parent. 319 branch_name: Name of the branch to refer to if a ValueError is raised. 320 321 Raises: 322 ValueError if the given child isn't far enough away from the parent to 323 find `want_rev`. 324 """ 325 # As a convenience, have a fast path for want_rev < parent_rev. In 326 # particular, branches can hit this case. 327 if want_rev < parent_rev: 328 baseline_git_sha = parent_sha 329 commits_behind_baseline = parent_rev - want_rev 330 else: 331 if child_rev is None: 332 commits_between_parent_and_child = check_output( 333 [ 334 "git", 335 "rev-list", 336 "--count", 337 "--first-parent", 338 f"{parent_sha}..{child_sha}", 339 "--", 340 ], 341 cwd=llvm_config.dir, 342 ) 343 child_rev = parent_rev + int( 344 commits_between_parent_and_child.strip() 345 ) 346 if child_rev < want_rev: 347 raise ValueError( 348 "Revision {want_rev} is past " 349 f"{llvm_config.remote}/{branch_name}. Try updating your tree?" 350 ) 351 baseline_git_sha = child_sha 352 commits_behind_baseline = child_rev - want_rev 353 354 if not commits_behind_baseline: 355 return baseline_git_sha 356 357 result = check_output( 358 [ 359 "git", 360 "rev-parse", 361 "--revs-only", 362 f"{baseline_git_sha}~{commits_behind_baseline}", 363 ], 364 cwd=llvm_config.dir, 365 ) 366 return result.strip() 367 368 369def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: 370 """Translates a Rev to a SHA. 371 372 Raises a ValueError if the given Rev doesn't exist in the given config. 373 """ 374 branch, number = rev 375 376 branch_tip = check_output( 377 ["git", "rev-parse", "--revs-only", f"{llvm_config.remote}/{branch}"], 378 cwd=llvm_config.dir, 379 ).strip() 380 381 if branch != MAIN_BRANCH: 382 main_merge_point = check_output( 383 [ 384 "git", 385 "merge-base", 386 f"{llvm_config.remote}/{MAIN_BRANCH}", 387 branch_tip, 388 ], 389 cwd=llvm_config.dir, 390 ) 391 main_merge_point = main_merge_point.strip() 392 main_rev = translate_sha_to_rev(llvm_config, main_merge_point) 393 return translate_rev_to_sha_from_baseline( 394 llvm_config, 395 parent_sha=main_merge_point, 396 parent_rev=main_rev.number, 397 child_sha=branch_tip, 398 child_rev=None, 399 want_rev=number, 400 branch_name=branch, 401 ) 402 403 if number < base_llvm_revision: 404 return translate_prebase_rev_to_sha(llvm_config, rev) 405 406 # Technically this could be a binary search, but the list has fewer than 10 407 # elems, and won't grow fast. Linear is easier. 408 last_cached_rev = None 409 last_cached_sha = branch_tip 410 for cached_rev, cached_sha in reversed(known_llvm_rev_sha_pairs): 411 if cached_rev == number: 412 return cached_sha 413 414 if cached_rev < number: 415 return translate_rev_to_sha_from_baseline( 416 llvm_config, 417 parent_sha=cached_sha, 418 parent_rev=cached_rev, 419 child_sha=last_cached_sha, 420 child_rev=last_cached_rev, 421 want_rev=number, 422 branch_name=branch, 423 ) 424 425 last_cached_rev = cached_rev 426 last_cached_sha = cached_sha 427 428 # This is only hit if `number >= base_llvm_revision` _and_ there's no 429 # coverage for `number` in `known_llvm_rev_sha_pairs`, which contains 430 # `base_llvm_revision`. 431 assert False, "Couldn't find a base SHA for a rev on main?" 432 433 434def find_root_llvm_dir(root_dir: str = ".") -> str: 435 """Finds the root of an LLVM directory starting at |root_dir|. 436 437 Raises a subprocess.CalledProcessError if no git directory is found. 438 """ 439 result = check_output( 440 ["git", "rev-parse", "--show-toplevel"], 441 cwd=root_dir, 442 ) 443 return result.strip() 444 445 446def main(argv: List[str]) -> None: 447 parser = argparse.ArgumentParser(description=__doc__) 448 parser.add_argument( 449 "--llvm_dir", 450 help="LLVM directory to consult for git history, etc. Autodetected " 451 "if cwd is inside of an LLVM tree", 452 ) 453 parser.add_argument( 454 "--upstream", 455 default="origin", 456 help="LLVM upstream's remote name. Defaults to %(default)s.", 457 ) 458 sha_or_rev = parser.add_mutually_exclusive_group(required=True) 459 sha_or_rev.add_argument( 460 "--sha", help="A git SHA (or ref) to convert to a rev" 461 ) 462 sha_or_rev.add_argument("--rev", help="A rev to convert into a sha") 463 opts = parser.parse_args(argv) 464 465 llvm_dir = opts.llvm_dir 466 if llvm_dir is None: 467 try: 468 llvm_dir = find_root_llvm_dir() 469 except subprocess.CalledProcessError: 470 parser.error( 471 "Couldn't autodetect an LLVM tree; please use --llvm_dir" 472 ) 473 474 config = LLVMConfig( 475 remote=opts.upstream, 476 dir=opts.llvm_dir or find_root_llvm_dir(), 477 ) 478 479 if opts.sha: 480 rev = translate_sha_to_rev(config, opts.sha) 481 print(rev) 482 else: 483 sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) 484 print(sha) 485 486 487if __name__ == "__main__": 488 main(sys.argv[1:]) 489