1#!/usr/bin/env python3 2# Copyright 2022 The ChromiumOS Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# pylint: disable=line-too-long 7 8"""Handle most aspects of creating and benchmarking PGO profiles for Rust. 9 10This is meant to be done at Rust uprev time. Ultimately profdata files need 11to be placed at 12 13gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata{s}.tz 14and 15gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata{s}.tz 16 17Here {s} is an optional suffix to distinguish between profdata files on the same 18Rust version. 19 20The intended flow is that you first get the new Rust version in a shape so that 21it builds, for instance modifying or adding patches as necessary. Note that if 22you need to generate manifests for dev-lang/rust and dev-lang/rust-host before 23the profdata files are created, which will cause the `ebuild manifest` command 24to fail. One way to handle this is to temporarily delete the lines of the 25variable SRC_URI in cros-rustc.eclass which refer to profdata files. 26 27After you have a new working Rust version, you can run the following. 28 29``` 30$ ./pgo_rust.py generate # generate profdata files 31$ ./pgo_rust.py benchmark-pgo # benchmark with PGO 32$ ./pgo_rust.py benchmark-nopgo # benchmark without PGO 33$ ./pgo_rust.py upload-profdata # upload profdata to localmirror 34``` 35 36The benchmark steps aren't strictly necessary, but are recommended and will 37upload benchmark data to 38 39gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/ 40 41Currently by default ripgrep 13.0.0 is used as both the crate to build using an 42instrumented Rust while generating profdata, and the crate to build to 43benchmark Rust. You may wish to experiment with other crates for either role. 44In that case upload your crate to 45 46gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz 47 48and use `--crate-name` and `--crate-version` to indicate which crate to build 49to generate profdata (or which crate's generated profdata to use), and 50`--bench-crate-name` to indicate which crate to build in benchmarks. 51 52Notes on various local and GS locations follow. 53 54Note that currently we need to keep separate profdata files for the LLVM and 55frontend components of Rust. This is because LLVM profdata is instrumented by 56the system LLVM, but Rust's profdata is instrumented by its own LLVM, which 57may have separate profdata. 58 59profdata files accessed by ebuilds must be stored in 60 61gs://chromeos-localmirror/distfiles 62 63Specifically, they go to 64 65gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz 66 67gs://chromeos-localmirror/distfiles/ 68 rust-pgo-{rust-version}-frontend.profdata.xz 69 70But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts. 71 72GS locations: 73 74{GS_BASE}/crates/ - store crates we may use for generating profiles or 75benchmarking PGO optimized Rust compilers 76 77{GS_BASE}/benchmarks/{rust_version}/nopgo/ 78 {bench_crate_name}-{bench_crate_version}-{triple} 79 80{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/ 81 {bench_crate_name}-{bench_crate_version}-{triple} 82 83Local locations: 84 85{LOCAL_BASE}/crates/ 86 87{LOCAL_BASE}/llvm-profraw/ 88 89{LOCAL_BASE}/frontend-profraw/ 90 91{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata 92 93{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata 94 95{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/ 96 {bench_crate_name}-{bench_crate_version}-{triple} 97 98{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/ 99 {bench_crate_name}-{bench_crate_version}-{triple} 100 101{LOCAL_BASE}/llvm.profdata - must go here to be used by Rust ebuild 102{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild 103""" 104 105import argparse 106import contextlib 107import logging 108import os 109from pathlib import Path 110from pathlib import PurePosixPath 111import re 112import shutil 113import subprocess 114import sys 115from typing import cast, List, Mapping, Optional 116 117 118TARGET_TRIPLES = [ 119 "x86_64-cros-linux-gnu", 120 "x86_64-pc-linux-gnu", 121 "armv7a-cros-linux-gnueabihf", 122 "aarch64-cros-linux-gnu", 123] 124 125LOCAL_BASE = Path("/tmp/rust-pgo") 126 127GS_BASE = PurePosixPath("/chromeos-toolchain-artifacts/rust-pgo") 128 129GS_DISTFILES = PurePosixPath("/chromeos-localmirror/distfiles") 130 131CRATE_NAME = "ripgrep" 132 133CRATE_VERSION = "13.0.0" 134 135 136@contextlib.contextmanager 137def chdir(new_directory: Path): 138 initial_directory = Path.cwd() 139 os.chdir(new_directory) 140 try: 141 yield 142 finally: 143 os.chdir(initial_directory) 144 145 146def run( 147 args: List, 148 *, 149 indent: int = 4, 150 env: Optional[Mapping[str, str]] = None, 151 capture_stdout: bool = False, 152 message: bool = True, 153) -> Optional[str]: 154 args = [str(arg) for arg in args] 155 156 if env is None: 157 new_env: Mapping[str, str] = os.environ 158 else: 159 new_env = os.environ.copy() 160 new_env.update(env) 161 162 if message: 163 if env is None: 164 logging.info("Running %s", args) 165 else: 166 logging.info("Running %s in environment %s", args, env) 167 168 result = subprocess.run( 169 args, 170 env=new_env, 171 stdout=subprocess.PIPE, 172 stderr=subprocess.PIPE, 173 encoding="utf-8", 174 check=False, 175 ) 176 177 stdout = result.stdout 178 stderr = result.stderr 179 if indent != 0: 180 stdout = re.sub("^", " " * indent, stdout, flags=re.MULTILINE) 181 stderr = re.sub("^", " " * indent, stderr, flags=re.MULTILINE) 182 183 if capture_stdout: 184 ret = result.stdout 185 else: 186 logging.info("STDOUT:") 187 logging.info(stdout) 188 logging.info("STDERR:") 189 logging.info(stderr) 190 ret = None 191 192 result.check_returncode() 193 194 if message: 195 if env is None: 196 logging.info("Ran %s\n", args) 197 else: 198 logging.info("Ran %s in environment %s\n", args, env) 199 200 return ret 201 202 203def get_command_output(args: List, **kwargs) -> str: 204 """Runs a command and returns its stdout and stderr as a string.""" 205 return cast(str, run(args, capture_stdout=True, **kwargs)) 206 207 208def get_rust_version() -> str: 209 s = get_command_output(["rustc", "--version"]) 210 m = re.search(r"\d+\.\d+\.\d+", s) 211 assert m is not None, repr(s) 212 return m.group(0) 213 214 215def download_unpack_crate(*, crate_name: str, crate_version: str): 216 filename_no_extension = f"{crate_name}-{crate_version}" 217 gs_path = GS_BASE / "crates" / f"{filename_no_extension}.tar.xz" 218 local_path = LOCAL_BASE / "crates" 219 shutil.rmtree( 220 local_path / f"{crate_name}-{crate_version}", ignore_errors=True 221 ) 222 with chdir(local_path): 223 run(["gsutil", "cp", f"gs:/{gs_path}", "."]) 224 run(["tar", "xaf", f"{filename_no_extension}.tar.xz"]) 225 226 227def build_crate( 228 *, 229 crate_name: str, 230 crate_version: str, 231 target_triple: str, 232 time_file: Optional[str] = None, 233): 234 local_path = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}" 235 with chdir(local_path): 236 Path(".cargo").mkdir(exist_ok=True) 237 with open(".cargo/config.toml", "w", encoding="utf-8") as f: 238 f.write( 239 "\n".join( 240 ( 241 "[source.crates-io]", 242 'replace-with = "vendored-sources"', 243 "", 244 "[source.vendored-sources]", 245 'directory = "vendor"', 246 "", 247 f"[target.{target_triple}]", 248 f'linker = "{target_triple}-clang"', 249 "", 250 "[target.'cfg(all())']", 251 "rustflags = [", 252 ' "-Clto=thin",', 253 ' "-Cembed-bitcode=yes",', 254 "]", 255 ) 256 ) 257 ) 258 259 run(["cargo", "clean"]) 260 261 cargo_cmd = ["cargo", "build", "--release", "--target", target_triple] 262 263 if time_file is None: 264 run(cargo_cmd) 265 else: 266 time_cmd = [ 267 "/usr/bin/time", 268 f"--output={time_file}", 269 "--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n", 270 ] 271 run(time_cmd + cargo_cmd) 272 273 274def build_rust( 275 *, 276 generate_frontend_profile: bool = False, 277 generate_llvm_profile: bool = False, 278 use_frontend_profile: bool = False, 279 use_llvm_profile: bool = False, 280): 281 if use_frontend_profile or use_llvm_profile: 282 assert not generate_frontend_profile and not generate_llvm_profile, ( 283 "Can't build a compiler to both use profile information " 284 "and generate it" 285 ) 286 287 assert ( 288 not generate_frontend_profile or not generate_llvm_profile 289 ), "Can't generate both frontend and LLVM profile information" 290 291 use = "-rust_profile_frontend_use -rust_profile_llvm_use " 292 if generate_frontend_profile: 293 use += "rust_profile_frontend_generate " 294 if generate_llvm_profile: 295 use += "rust_profile_llvm_generate " 296 if use_frontend_profile: 297 use += "rust_profile_frontend_use_local " 298 if use_llvm_profile: 299 use += "rust_profile_llvm_use_local " 300 301 env_use = os.getenv("USE", "").rstrip() 302 use = (env_use + " " + use).strip() 303 rust_cross_packages = [ 304 f"cross-{x}/rust" for x in TARGET_TRIPLES if "-pc-linux-" not in x 305 ] 306 307 # -E to preserve environment variables like USE, FEATURES, etc. 308 run( 309 [ 310 "sudo", 311 "-E", 312 "emerge", 313 "-j", 314 "dev-lang/rust-host", 315 ] 316 + rust_cross_packages, 317 env={"USE": use}, 318 ) 319 320 321def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path): 322 assert llvm_or_frontend in ("llvm", "frontend") 323 324 # The two `llvm-profdata` programs come from different LLVM versions, and 325 # may support different versions of the profdata format, so make sure to 326 # use the right one. 327 llvm_profdata = ( 328 "/usr/bin/llvm-profdata" 329 if llvm_or_frontend == "llvm" 330 else "/usr/libexec/rust/llvm-profdata" 331 ) 332 333 dest.parent.mkdir(parents=True, exist_ok=True) 334 335 files = list(source_directory.glob("*.profraw")) 336 assert files, f"No profraw files found in {source_directory}" 337 run([llvm_profdata, "merge", f"--output={dest}"] + files) 338 339 340def do_upload_profdata(*, source: Path, dest: PurePosixPath): 341 new_path = source.parent / (source.name + ".xz") 342 run(["xz", "--keep", "--compress", "--force", source]) 343 upload_file(source=new_path, dest=dest, public_read=True) 344 345 346def upload_file( 347 *, source: Path, dest: PurePosixPath, public_read: bool = False 348): 349 if public_read: 350 run(["gsutil", "cp", "-a", "public-read", source, f"gs:/{dest}"]) 351 else: 352 run(["gsutil", "cp", source, f"gs:/{dest}"]) 353 354 355def maybe_download_crate(*, crate_name: str, crate_version: str): 356 """Downloads a crate if its download directory does not already exist.""" 357 directory = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}" 358 if directory.is_dir(): 359 logging.info("Crate already downloaded") 360 else: 361 logging.info("Downloading crate") 362 download_unpack_crate( 363 crate_name=crate_name, crate_version=crate_version 364 ) 365 366 367def generate(args): 368 maybe_download_crate( 369 crate_name=args.crate_name, crate_version=args.crate_version 370 ) 371 372 llvm_dir = LOCAL_BASE / "llvm-profraw" 373 shutil.rmtree(llvm_dir, ignore_errors=True) 374 frontend_dir = LOCAL_BASE / "frontend-profraw" 375 shutil.rmtree(frontend_dir, ignore_errors=True) 376 377 logging.info("Building Rust instrumented for llvm") 378 build_rust(generate_llvm_profile=True) 379 380 llvm_dir.mkdir(parents=True, exist_ok=True) 381 for triple in TARGET_TRIPLES: 382 logging.info( 383 "Building crate with LLVM instrumentation, for triple %s", triple 384 ) 385 build_crate( 386 crate_name=args.crate_name, 387 crate_version=args.crate_version, 388 target_triple=triple, 389 ) 390 391 logging.info("Merging LLVM profile data") 392 merge_profdata( 393 "llvm", 394 source_directory=LOCAL_BASE / "llvm-profraw", 395 dest=( 396 LOCAL_BASE 397 / "profdata" 398 / f"{args.crate_name}-{args.crate_version}" 399 / "llvm.profdata" 400 ), 401 ) 402 403 logging.info("Building Rust instrumented for frontend") 404 build_rust(generate_frontend_profile=True) 405 406 frontend_dir.mkdir(parents=True, exist_ok=True) 407 for triple in TARGET_TRIPLES: 408 logging.info( 409 "Building crate with frontend instrumentation, for triple %s", 410 triple, 411 ) 412 build_crate( 413 crate_name=args.crate_name, 414 crate_version=args.crate_version, 415 target_triple=triple, 416 ) 417 418 logging.info("Merging frontend profile data") 419 merge_profdata( 420 "frontend", 421 source_directory=LOCAL_BASE / "frontend-profraw", 422 dest=( 423 LOCAL_BASE 424 / "profdata" 425 / f"{args.crate_name}-{args.crate_version}" 426 / "frontend.profdata" 427 ), 428 ) 429 430 431def benchmark_nopgo(args): 432 maybe_download_crate( 433 crate_name=args.bench_crate_name, crate_version=args.bench_crate_version 434 ) 435 436 logging.info("Building Rust, no PGO") 437 build_rust() 438 439 time_directory = LOCAL_BASE / "benchmarks" / "nopgo" 440 logging.info("Benchmarking crate build with no PGO") 441 time_directory.mkdir(parents=True, exist_ok=True) 442 for triple in TARGET_TRIPLES: 443 build_crate( 444 crate_name=args.bench_crate_name, 445 crate_version=args.bench_crate_version, 446 target_triple=triple, 447 time_file=( 448 time_directory 449 / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}" 450 ), 451 ) 452 453 rust_version = get_rust_version() 454 dest_directory = ( 455 GS_BASE / "benchmarks" / rust_version / f"nopgo{args.suffix}" 456 ) 457 logging.info("Uploading benchmark data") 458 for file in time_directory.iterdir(): 459 upload_file( 460 source=time_directory / file.name, dest=dest_directory / file.name 461 ) 462 463 464def benchmark_pgo(args): 465 maybe_download_crate( 466 crate_name=args.bench_crate_name, crate_version=args.bench_crate_version 467 ) 468 469 files_dir = Path( 470 "/mnt/host/source/src/third_party/chromiumos-overlay", 471 "dev-lang/rust/files", 472 ) 473 474 logging.info("Copying profile data to be used in building Rust") 475 run( 476 [ 477 "cp", 478 ( 479 LOCAL_BASE 480 / "profdata" 481 / f"{args.crate_name}-{args.crate_version}" 482 / "llvm.profdata" 483 ), 484 files_dir, 485 ] 486 ) 487 run( 488 [ 489 "cp", 490 ( 491 LOCAL_BASE 492 / "profdata" 493 / f"{args.crate_name}-{args.crate_version}" 494 / "frontend.profdata" 495 ), 496 files_dir, 497 ] 498 ) 499 500 logging.info("Building Rust with PGO") 501 build_rust(use_llvm_profile=True, use_frontend_profile=True) 502 503 time_directory = ( 504 LOCAL_BASE / "benchmarks" / f"{args.crate_name}-{args.crate_version}" 505 ) 506 time_directory.mkdir(parents=True, exist_ok=True) 507 logging.info("Benchmarking crate built with PGO") 508 for triple in TARGET_TRIPLES: 509 build_crate( 510 crate_name=args.bench_crate_name, 511 crate_version=args.bench_crate_version, 512 target_triple=triple, 513 time_file=( 514 time_directory 515 / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}" 516 ), 517 ) 518 519 rust_version = get_rust_version() 520 dest_directory = ( 521 GS_BASE 522 / "benchmarks" 523 / rust_version 524 / f"{args.crate_name}-{args.crate_version}{args.suffix}" 525 ) 526 logging.info("Uploading benchmark data") 527 for file in time_directory.iterdir(): 528 upload_file( 529 source=time_directory / file.name, dest=dest_directory / file.name 530 ) 531 532 533def upload_profdata(args): 534 directory = ( 535 LOCAL_BASE / "profdata" / f"{args.crate_name}-{args.crate_version}" 536 ) 537 rust_version = get_rust_version() 538 539 logging.info("Uploading LLVM profdata") 540 do_upload_profdata( 541 source=directory / "llvm.profdata", 542 dest=( 543 GS_DISTFILES 544 / f"rust-pgo-{rust_version}-llvm{args.suffix}.profdata.xz" 545 ), 546 ) 547 548 logging.info("Uploading frontend profdata") 549 do_upload_profdata( 550 source=directory / "frontend.profdata", 551 dest=( 552 GS_DISTFILES 553 / f"rust-pgo-{rust_version}-frontend{args.suffix}.profdata.xz" 554 ), 555 ) 556 557 558def main(argv: List[str]) -> int: 559 logging.basicConfig( 560 stream=sys.stdout, level=logging.NOTSET, format="%(message)s" 561 ) 562 563 parser = argparse.ArgumentParser( 564 prog=argv[0], 565 description=__doc__, 566 formatter_class=argparse.RawDescriptionHelpFormatter, 567 ) 568 subparsers = parser.add_subparsers(dest="command", help="") 569 subparsers.required = True 570 571 parser_generate = subparsers.add_parser( 572 "generate", 573 help="Generate LLVM and frontend profdata files by building " 574 "instrumented Rust compilers, and using them to build the " 575 "indicated crate (downloading the crate if necessary).", 576 ) 577 parser_generate.set_defaults(func=generate) 578 parser_generate.add_argument( 579 "--crate-name", default=CRATE_NAME, help="Name of the crate to build" 580 ) 581 parser_generate.add_argument( 582 "--crate-version", 583 default=CRATE_VERSION, 584 help="Version of the crate to build", 585 ) 586 587 parser_benchmark_nopgo = subparsers.add_parser( 588 "benchmark-nopgo", 589 help="Build the Rust compiler without PGO, benchmark " 590 "the build of the indicated crate, and upload " 591 "the benchmark data.", 592 ) 593 parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo) 594 parser_benchmark_nopgo.add_argument( 595 "--bench-crate-name", 596 default=CRATE_NAME, 597 help="Name of the crate whose build to benchmark", 598 ) 599 parser_benchmark_nopgo.add_argument( 600 "--bench-crate-version", 601 default=CRATE_VERSION, 602 help="Version of the crate whose benchmark to build", 603 ) 604 parser_benchmark_nopgo.add_argument( 605 "--suffix", 606 default="", 607 help="Suffix to distinguish benchmarks and profdata with identical " 608 "rustc versions", 609 ) 610 611 parser_benchmark_pgo = subparsers.add_parser( 612 "benchmark-pgo", 613 help="Build the Rust compiler using PGO with the indicated " 614 "profdata files, benchmark the build of the indicated crate, " 615 "and upload the benchmark data.", 616 ) 617 parser_benchmark_pgo.set_defaults(func=benchmark_pgo) 618 parser_benchmark_pgo.add_argument( 619 "--bench-crate-name", 620 default=CRATE_NAME, 621 help="Name of the crate whose build to benchmark", 622 ) 623 parser_benchmark_pgo.add_argument( 624 "--bench-crate-version", 625 default=CRATE_VERSION, 626 help="Version of the crate whose benchmark to build", 627 ) 628 parser_benchmark_pgo.add_argument( 629 "--crate-name", 630 default=CRATE_NAME, 631 help="Name of the crate whose profile to use", 632 ) 633 parser_benchmark_pgo.add_argument( 634 "--crate-version", 635 default=CRATE_VERSION, 636 help="Version of the crate whose profile to use", 637 ) 638 parser_benchmark_pgo.add_argument( 639 "--suffix", 640 default="", 641 help="Suffix to distinguish benchmarks and profdata with identical " 642 "rustc versions", 643 ) 644 645 parser_upload_profdata = subparsers.add_parser( 646 "upload-profdata", help="Upload the profdata files" 647 ) 648 parser_upload_profdata.set_defaults(func=upload_profdata) 649 parser_upload_profdata.add_argument( 650 "--crate-name", 651 default=CRATE_NAME, 652 help="Name of the crate whose profile to use", 653 ) 654 parser_upload_profdata.add_argument( 655 "--crate-version", 656 default=CRATE_VERSION, 657 help="Version of the crate whose profile to use", 658 ) 659 parser_upload_profdata.add_argument( 660 "--suffix", 661 default="", 662 help="Suffix to distinguish benchmarks and profdata with identical " 663 "rustc versions", 664 ) 665 666 args = parser.parse_args(argv[1:]) 667 668 (LOCAL_BASE / "crates").mkdir(parents=True, exist_ok=True) 669 (LOCAL_BASE / "llvm-profraw").mkdir(parents=True, exist_ok=True) 670 (LOCAL_BASE / "frontend-profraw").mkdir(parents=True, exist_ok=True) 671 (LOCAL_BASE / "benchmarks").mkdir(parents=True, exist_ok=True) 672 673 args.func(args) 674 675 return 0 676 677 678if __name__ == "__main__": 679 sys.exit(main(sys.argv)) 680