xref: /aosp_15_r20/external/toolchain-utils/pgo_tools_rust/pgo_rust.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# Copyright 2022 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# pylint: disable=line-too-long
7
8"""Handle most aspects of creating and benchmarking PGO profiles for Rust.
9
10This is meant to be done at Rust uprev time. Ultimately profdata files need
11to be placed at
12
13gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-frontend.profdata{s}.tz
14and
15gs://chromeos-localmirror/distfiles/rust-pgo-{rust_version}-llvm.profdata{s}.tz
16
17Here {s} is an optional suffix to distinguish between profdata files on the same
18Rust version.
19
20The intended flow is that you first get the new Rust version in a shape so that
21it builds, for instance modifying or adding patches as necessary. Note that if
22you need to generate manifests for dev-lang/rust and dev-lang/rust-host before
23the profdata files are created, which will cause the `ebuild manifest` command
24to fail. One way to handle this is to temporarily delete the lines of the
25variable SRC_URI in cros-rustc.eclass which refer to profdata files.
26
27After you have a new working Rust version, you can run the following.
28
29```
30$ ./pgo_rust.py generate         # generate profdata files
31$ ./pgo_rust.py benchmark-pgo    # benchmark with PGO
32$ ./pgo_rust.py benchmark-nopgo  # benchmark without PGO
33$ ./pgo_rust.py upload-profdata  # upload profdata to localmirror
34```
35
36The benchmark steps aren't strictly necessary, but are recommended and will
37upload benchmark data to
38
39gs://chromeos-toolchain-artifacts/rust-pgo/benchmarks/{rust_version}/
40
41Currently by default ripgrep 13.0.0 is used as both the crate to build using an
42instrumented Rust while generating profdata, and the crate to build to
43benchmark Rust. You may wish to experiment with other crates for either role.
44In that case upload your crate to
45
46gs://chromeos-toolchain-artifacts/rust-pgo/crates/{name}-{version}.tar.xz
47
48and use `--crate-name` and `--crate-version` to indicate which crate to build
49to generate profdata (or which crate's generated profdata to use), and
50`--bench-crate-name` to indicate which crate to build in benchmarks.
51
52Notes on various local and GS locations follow.
53
54Note that currently we need to keep separate profdata files for the LLVM and
55frontend components of Rust. This is because LLVM profdata is instrumented by
56the system LLVM, but Rust's profdata is instrumented by its own LLVM, which
57may have separate profdata.
58
59profdata files accessed by ebuilds must be stored in
60
61gs://chromeos-localmirror/distfiles
62
63Specifically, they go to
64
65gs://chromeos-localmirror/distfiles/rust-pgo-{rust-version}-llvm.profdata.xz
66
67gs://chromeos-localmirror/distfiles/
68  rust-pgo-{rust-version}-frontend.profdata.xz
69
70But we can store other data elsewhere, like gs://chromeos-toolchain-artifacts.
71
72GS locations:
73
74{GS_BASE}/crates/ - store crates we may use for generating profiles or
75benchmarking PGO optimized Rust compilers
76
77{GS_BASE}/benchmarks/{rust_version}/nopgo/
78  {bench_crate_name}-{bench_crate_version}-{triple}
79
80{GS_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
81  {bench_crate_name}-{bench_crate_version}-{triple}
82
83Local locations:
84
85{LOCAL_BASE}/crates/
86
87{LOCAL_BASE}/llvm-profraw/
88
89{LOCAL_BASE}/frontend-profraw/
90
91{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/llvm.profdata
92
93{LOCAL_BASE}/profdata/{crate_name}-{crate_version}/frontend.profdata
94
95{LOCAL_BASE}/benchmarks/{rust_version}/nopgo/
96  {bench_crate_name}-{bench_crate_version}-{triple}
97
98{LOCAL_BASE}/benchmarks/{rust_version}/{crate_name}-{crate_version}/
99  {bench_crate_name}-{bench_crate_version}-{triple}
100
101{LOCAL_BASE}/llvm.profdata     - must go here to be used by Rust ebuild
102{LOCAL_BASE}/frontend.profdata - must go here to be used by Rust ebuild
103"""
104
105import argparse
106import contextlib
107import logging
108import os
109from pathlib import Path
110from pathlib import PurePosixPath
111import re
112import shutil
113import subprocess
114import sys
115from typing import cast, List, Mapping, Optional
116
117
118TARGET_TRIPLES = [
119    "x86_64-cros-linux-gnu",
120    "x86_64-pc-linux-gnu",
121    "armv7a-cros-linux-gnueabihf",
122    "aarch64-cros-linux-gnu",
123]
124
125LOCAL_BASE = Path("/tmp/rust-pgo")
126
127GS_BASE = PurePosixPath("/chromeos-toolchain-artifacts/rust-pgo")
128
129GS_DISTFILES = PurePosixPath("/chromeos-localmirror/distfiles")
130
131CRATE_NAME = "ripgrep"
132
133CRATE_VERSION = "13.0.0"
134
135
136@contextlib.contextmanager
137def chdir(new_directory: Path):
138    initial_directory = Path.cwd()
139    os.chdir(new_directory)
140    try:
141        yield
142    finally:
143        os.chdir(initial_directory)
144
145
146def run(
147    args: List,
148    *,
149    indent: int = 4,
150    env: Optional[Mapping[str, str]] = None,
151    capture_stdout: bool = False,
152    message: bool = True,
153) -> Optional[str]:
154    args = [str(arg) for arg in args]
155
156    if env is None:
157        new_env: Mapping[str, str] = os.environ
158    else:
159        new_env = os.environ.copy()
160        new_env.update(env)
161
162    if message:
163        if env is None:
164            logging.info("Running %s", args)
165        else:
166            logging.info("Running %s in environment %s", args, env)
167
168    result = subprocess.run(
169        args,
170        env=new_env,
171        stdout=subprocess.PIPE,
172        stderr=subprocess.PIPE,
173        encoding="utf-8",
174        check=False,
175    )
176
177    stdout = result.stdout
178    stderr = result.stderr
179    if indent != 0:
180        stdout = re.sub("^", " " * indent, stdout, flags=re.MULTILINE)
181        stderr = re.sub("^", " " * indent, stderr, flags=re.MULTILINE)
182
183    if capture_stdout:
184        ret = result.stdout
185    else:
186        logging.info("STDOUT:")
187        logging.info(stdout)
188        logging.info("STDERR:")
189        logging.info(stderr)
190        ret = None
191
192    result.check_returncode()
193
194    if message:
195        if env is None:
196            logging.info("Ran %s\n", args)
197        else:
198            logging.info("Ran %s in environment %s\n", args, env)
199
200    return ret
201
202
203def get_command_output(args: List, **kwargs) -> str:
204    """Runs a command and returns its stdout and stderr as a string."""
205    return cast(str, run(args, capture_stdout=True, **kwargs))
206
207
208def get_rust_version() -> str:
209    s = get_command_output(["rustc", "--version"])
210    m = re.search(r"\d+\.\d+\.\d+", s)
211    assert m is not None, repr(s)
212    return m.group(0)
213
214
215def download_unpack_crate(*, crate_name: str, crate_version: str):
216    filename_no_extension = f"{crate_name}-{crate_version}"
217    gs_path = GS_BASE / "crates" / f"{filename_no_extension}.tar.xz"
218    local_path = LOCAL_BASE / "crates"
219    shutil.rmtree(
220        local_path / f"{crate_name}-{crate_version}", ignore_errors=True
221    )
222    with chdir(local_path):
223        run(["gsutil", "cp", f"gs:/{gs_path}", "."])
224        run(["tar", "xaf", f"{filename_no_extension}.tar.xz"])
225
226
227def build_crate(
228    *,
229    crate_name: str,
230    crate_version: str,
231    target_triple: str,
232    time_file: Optional[str] = None,
233):
234    local_path = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
235    with chdir(local_path):
236        Path(".cargo").mkdir(exist_ok=True)
237        with open(".cargo/config.toml", "w", encoding="utf-8") as f:
238            f.write(
239                "\n".join(
240                    (
241                        "[source.crates-io]",
242                        'replace-with = "vendored-sources"',
243                        "",
244                        "[source.vendored-sources]",
245                        'directory = "vendor"',
246                        "",
247                        f"[target.{target_triple}]",
248                        f'linker = "{target_triple}-clang"',
249                        "",
250                        "[target.'cfg(all())']",
251                        "rustflags = [",
252                        '    "-Clto=thin",',
253                        '    "-Cembed-bitcode=yes",',
254                        "]",
255                    )
256                )
257            )
258
259        run(["cargo", "clean"])
260
261        cargo_cmd = ["cargo", "build", "--release", "--target", target_triple]
262
263        if time_file is None:
264            run(cargo_cmd)
265        else:
266            time_cmd = [
267                "/usr/bin/time",
268                f"--output={time_file}",
269                "--format=wall time (s) %e\nuser time (s) %U\nmax RSS %M\n",
270            ]
271            run(time_cmd + cargo_cmd)
272
273
274def build_rust(
275    *,
276    generate_frontend_profile: bool = False,
277    generate_llvm_profile: bool = False,
278    use_frontend_profile: bool = False,
279    use_llvm_profile: bool = False,
280):
281    if use_frontend_profile or use_llvm_profile:
282        assert not generate_frontend_profile and not generate_llvm_profile, (
283            "Can't build a compiler to both use profile information "
284            "and generate it"
285        )
286
287    assert (
288        not generate_frontend_profile or not generate_llvm_profile
289    ), "Can't generate both frontend and LLVM profile information"
290
291    use = "-rust_profile_frontend_use -rust_profile_llvm_use "
292    if generate_frontend_profile:
293        use += "rust_profile_frontend_generate "
294    if generate_llvm_profile:
295        use += "rust_profile_llvm_generate "
296    if use_frontend_profile:
297        use += "rust_profile_frontend_use_local "
298    if use_llvm_profile:
299        use += "rust_profile_llvm_use_local "
300
301    env_use = os.getenv("USE", "").rstrip()
302    use = (env_use + " " + use).strip()
303    rust_cross_packages = [
304        f"cross-{x}/rust" for x in TARGET_TRIPLES if "-pc-linux-" not in x
305    ]
306
307    # -E to preserve environment variables like USE, FEATURES, etc.
308    run(
309        [
310            "sudo",
311            "-E",
312            "emerge",
313            "-j",
314            "dev-lang/rust-host",
315        ]
316        + rust_cross_packages,
317        env={"USE": use},
318    )
319
320
321def merge_profdata(llvm_or_frontend, *, source_directory: Path, dest: Path):
322    assert llvm_or_frontend in ("llvm", "frontend")
323
324    # The two `llvm-profdata` programs come from different LLVM versions, and
325    # may support different versions of the profdata format, so make sure to
326    # use the right one.
327    llvm_profdata = (
328        "/usr/bin/llvm-profdata"
329        if llvm_or_frontend == "llvm"
330        else "/usr/libexec/rust/llvm-profdata"
331    )
332
333    dest.parent.mkdir(parents=True, exist_ok=True)
334
335    files = list(source_directory.glob("*.profraw"))
336    assert files, f"No profraw files found in {source_directory}"
337    run([llvm_profdata, "merge", f"--output={dest}"] + files)
338
339
340def do_upload_profdata(*, source: Path, dest: PurePosixPath):
341    new_path = source.parent / (source.name + ".xz")
342    run(["xz", "--keep", "--compress", "--force", source])
343    upload_file(source=new_path, dest=dest, public_read=True)
344
345
346def upload_file(
347    *, source: Path, dest: PurePosixPath, public_read: bool = False
348):
349    if public_read:
350        run(["gsutil", "cp", "-a", "public-read", source, f"gs:/{dest}"])
351    else:
352        run(["gsutil", "cp", source, f"gs:/{dest}"])
353
354
355def maybe_download_crate(*, crate_name: str, crate_version: str):
356    """Downloads a crate if its download directory does not already exist."""
357    directory = LOCAL_BASE / "crates" / f"{crate_name}-{crate_version}"
358    if directory.is_dir():
359        logging.info("Crate already downloaded")
360    else:
361        logging.info("Downloading crate")
362        download_unpack_crate(
363            crate_name=crate_name, crate_version=crate_version
364        )
365
366
367def generate(args):
368    maybe_download_crate(
369        crate_name=args.crate_name, crate_version=args.crate_version
370    )
371
372    llvm_dir = LOCAL_BASE / "llvm-profraw"
373    shutil.rmtree(llvm_dir, ignore_errors=True)
374    frontend_dir = LOCAL_BASE / "frontend-profraw"
375    shutil.rmtree(frontend_dir, ignore_errors=True)
376
377    logging.info("Building Rust instrumented for llvm")
378    build_rust(generate_llvm_profile=True)
379
380    llvm_dir.mkdir(parents=True, exist_ok=True)
381    for triple in TARGET_TRIPLES:
382        logging.info(
383            "Building crate with LLVM instrumentation, for triple %s", triple
384        )
385        build_crate(
386            crate_name=args.crate_name,
387            crate_version=args.crate_version,
388            target_triple=triple,
389        )
390
391    logging.info("Merging LLVM profile data")
392    merge_profdata(
393        "llvm",
394        source_directory=LOCAL_BASE / "llvm-profraw",
395        dest=(
396            LOCAL_BASE
397            / "profdata"
398            / f"{args.crate_name}-{args.crate_version}"
399            / "llvm.profdata"
400        ),
401    )
402
403    logging.info("Building Rust instrumented for frontend")
404    build_rust(generate_frontend_profile=True)
405
406    frontend_dir.mkdir(parents=True, exist_ok=True)
407    for triple in TARGET_TRIPLES:
408        logging.info(
409            "Building crate with frontend instrumentation, for triple %s",
410            triple,
411        )
412        build_crate(
413            crate_name=args.crate_name,
414            crate_version=args.crate_version,
415            target_triple=triple,
416        )
417
418    logging.info("Merging frontend profile data")
419    merge_profdata(
420        "frontend",
421        source_directory=LOCAL_BASE / "frontend-profraw",
422        dest=(
423            LOCAL_BASE
424            / "profdata"
425            / f"{args.crate_name}-{args.crate_version}"
426            / "frontend.profdata"
427        ),
428    )
429
430
431def benchmark_nopgo(args):
432    maybe_download_crate(
433        crate_name=args.bench_crate_name, crate_version=args.bench_crate_version
434    )
435
436    logging.info("Building Rust, no PGO")
437    build_rust()
438
439    time_directory = LOCAL_BASE / "benchmarks" / "nopgo"
440    logging.info("Benchmarking crate build with no PGO")
441    time_directory.mkdir(parents=True, exist_ok=True)
442    for triple in TARGET_TRIPLES:
443        build_crate(
444            crate_name=args.bench_crate_name,
445            crate_version=args.bench_crate_version,
446            target_triple=triple,
447            time_file=(
448                time_directory
449                / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
450            ),
451        )
452
453    rust_version = get_rust_version()
454    dest_directory = (
455        GS_BASE / "benchmarks" / rust_version / f"nopgo{args.suffix}"
456    )
457    logging.info("Uploading benchmark data")
458    for file in time_directory.iterdir():
459        upload_file(
460            source=time_directory / file.name, dest=dest_directory / file.name
461        )
462
463
464def benchmark_pgo(args):
465    maybe_download_crate(
466        crate_name=args.bench_crate_name, crate_version=args.bench_crate_version
467    )
468
469    files_dir = Path(
470        "/mnt/host/source/src/third_party/chromiumos-overlay",
471        "dev-lang/rust/files",
472    )
473
474    logging.info("Copying profile data to be used in building Rust")
475    run(
476        [
477            "cp",
478            (
479                LOCAL_BASE
480                / "profdata"
481                / f"{args.crate_name}-{args.crate_version}"
482                / "llvm.profdata"
483            ),
484            files_dir,
485        ]
486    )
487    run(
488        [
489            "cp",
490            (
491                LOCAL_BASE
492                / "profdata"
493                / f"{args.crate_name}-{args.crate_version}"
494                / "frontend.profdata"
495            ),
496            files_dir,
497        ]
498    )
499
500    logging.info("Building Rust with PGO")
501    build_rust(use_llvm_profile=True, use_frontend_profile=True)
502
503    time_directory = (
504        LOCAL_BASE / "benchmarks" / f"{args.crate_name}-{args.crate_version}"
505    )
506    time_directory.mkdir(parents=True, exist_ok=True)
507    logging.info("Benchmarking crate built with PGO")
508    for triple in TARGET_TRIPLES:
509        build_crate(
510            crate_name=args.bench_crate_name,
511            crate_version=args.bench_crate_version,
512            target_triple=triple,
513            time_file=(
514                time_directory
515                / f"{args.bench_crate_name}-{args.bench_crate_version}-{triple}"
516            ),
517        )
518
519    rust_version = get_rust_version()
520    dest_directory = (
521        GS_BASE
522        / "benchmarks"
523        / rust_version
524        / f"{args.crate_name}-{args.crate_version}{args.suffix}"
525    )
526    logging.info("Uploading benchmark data")
527    for file in time_directory.iterdir():
528        upload_file(
529            source=time_directory / file.name, dest=dest_directory / file.name
530        )
531
532
533def upload_profdata(args):
534    directory = (
535        LOCAL_BASE / "profdata" / f"{args.crate_name}-{args.crate_version}"
536    )
537    rust_version = get_rust_version()
538
539    logging.info("Uploading LLVM profdata")
540    do_upload_profdata(
541        source=directory / "llvm.profdata",
542        dest=(
543            GS_DISTFILES
544            / f"rust-pgo-{rust_version}-llvm{args.suffix}.profdata.xz"
545        ),
546    )
547
548    logging.info("Uploading frontend profdata")
549    do_upload_profdata(
550        source=directory / "frontend.profdata",
551        dest=(
552            GS_DISTFILES
553            / f"rust-pgo-{rust_version}-frontend{args.suffix}.profdata.xz"
554        ),
555    )
556
557
558def main(argv: List[str]) -> int:
559    logging.basicConfig(
560        stream=sys.stdout, level=logging.NOTSET, format="%(message)s"
561    )
562
563    parser = argparse.ArgumentParser(
564        prog=argv[0],
565        description=__doc__,
566        formatter_class=argparse.RawDescriptionHelpFormatter,
567    )
568    subparsers = parser.add_subparsers(dest="command", help="")
569    subparsers.required = True
570
571    parser_generate = subparsers.add_parser(
572        "generate",
573        help="Generate LLVM and frontend profdata files by building "
574        "instrumented Rust compilers, and using them to build the "
575        "indicated crate (downloading the crate if necessary).",
576    )
577    parser_generate.set_defaults(func=generate)
578    parser_generate.add_argument(
579        "--crate-name", default=CRATE_NAME, help="Name of the crate to build"
580    )
581    parser_generate.add_argument(
582        "--crate-version",
583        default=CRATE_VERSION,
584        help="Version of the crate to build",
585    )
586
587    parser_benchmark_nopgo = subparsers.add_parser(
588        "benchmark-nopgo",
589        help="Build the Rust compiler without PGO, benchmark "
590        "the build of the indicated crate, and upload "
591        "the benchmark data.",
592    )
593    parser_benchmark_nopgo.set_defaults(func=benchmark_nopgo)
594    parser_benchmark_nopgo.add_argument(
595        "--bench-crate-name",
596        default=CRATE_NAME,
597        help="Name of the crate whose build to benchmark",
598    )
599    parser_benchmark_nopgo.add_argument(
600        "--bench-crate-version",
601        default=CRATE_VERSION,
602        help="Version of the crate whose benchmark to build",
603    )
604    parser_benchmark_nopgo.add_argument(
605        "--suffix",
606        default="",
607        help="Suffix to distinguish benchmarks and profdata with identical "
608        "rustc versions",
609    )
610
611    parser_benchmark_pgo = subparsers.add_parser(
612        "benchmark-pgo",
613        help="Build the Rust compiler using PGO with the indicated "
614        "profdata files, benchmark the build of the indicated crate, "
615        "and upload the benchmark data.",
616    )
617    parser_benchmark_pgo.set_defaults(func=benchmark_pgo)
618    parser_benchmark_pgo.add_argument(
619        "--bench-crate-name",
620        default=CRATE_NAME,
621        help="Name of the crate whose build to benchmark",
622    )
623    parser_benchmark_pgo.add_argument(
624        "--bench-crate-version",
625        default=CRATE_VERSION,
626        help="Version of the crate whose benchmark to build",
627    )
628    parser_benchmark_pgo.add_argument(
629        "--crate-name",
630        default=CRATE_NAME,
631        help="Name of the crate whose profile to use",
632    )
633    parser_benchmark_pgo.add_argument(
634        "--crate-version",
635        default=CRATE_VERSION,
636        help="Version of the crate whose profile to use",
637    )
638    parser_benchmark_pgo.add_argument(
639        "--suffix",
640        default="",
641        help="Suffix to distinguish benchmarks and profdata with identical "
642        "rustc versions",
643    )
644
645    parser_upload_profdata = subparsers.add_parser(
646        "upload-profdata", help="Upload the profdata files"
647    )
648    parser_upload_profdata.set_defaults(func=upload_profdata)
649    parser_upload_profdata.add_argument(
650        "--crate-name",
651        default=CRATE_NAME,
652        help="Name of the crate whose profile to use",
653    )
654    parser_upload_profdata.add_argument(
655        "--crate-version",
656        default=CRATE_VERSION,
657        help="Version of the crate whose profile to use",
658    )
659    parser_upload_profdata.add_argument(
660        "--suffix",
661        default="",
662        help="Suffix to distinguish benchmarks and profdata with identical "
663        "rustc versions",
664    )
665
666    args = parser.parse_args(argv[1:])
667
668    (LOCAL_BASE / "crates").mkdir(parents=True, exist_ok=True)
669    (LOCAL_BASE / "llvm-profraw").mkdir(parents=True, exist_ok=True)
670    (LOCAL_BASE / "frontend-profraw").mkdir(parents=True, exist_ok=True)
671    (LOCAL_BASE / "benchmarks").mkdir(parents=True, exist_ok=True)
672
673    args.func(args)
674
675    return 0
676
677
678if __name__ == "__main__":
679    sys.exit(main(sys.argv))
680