xref: /aosp_15_r20/external/toolchain-utils/pgo_tools/generate_pgo_profile.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li#!/usr/bin/env python3
2*760c253cSXin Li# Copyright 2023 The ChromiumOS Authors
3*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
4*760c253cSXin Li# found in the LICENSE file.
5*760c253cSXin Li
6*760c253cSXin Li"""Generates a PGO profile for LLVM.
7*760c253cSXin Li
8*760c253cSXin Li**This script is meant to be run from inside of the chroot.**
9*760c253cSXin Li
10*760c253cSXin LiNote that this script has a few (perhaps surprising) side-effects:
11*760c253cSXin Li1. The first time this is run in a chroot, it will pack up your existing llvm
12*760c253cSXin Li   and save it as a binpkg.
13*760c253cSXin Li2. This script clobbers your llvm installation. If the script is run to
14*760c253cSXin Li   completion, your old installation will be restored. If it does not, it may
15*760c253cSXin Li   not be.
16*760c253cSXin Li"""
17*760c253cSXin Li
18*760c253cSXin Liimport argparse
19*760c253cSXin Liimport dataclasses
20*760c253cSXin Liimport logging
21*760c253cSXin Liimport os
22*760c253cSXin Lifrom pathlib import Path
23*760c253cSXin Liimport shlex
24*760c253cSXin Liimport shutil
25*760c253cSXin Liimport subprocess
26*760c253cSXin Liimport sys
27*760c253cSXin Liimport tempfile
28*760c253cSXin Liimport textwrap
29*760c253cSXin Lifrom typing import Dict, FrozenSet, List, Optional
30*760c253cSXin Li
31*760c253cSXin Liimport pgo_tools
32*760c253cSXin Li
33*760c253cSXin Li
34*760c253cSXin Li# This script runs `quickpkg` on LLVM. This file saves the version of LLVM that
35*760c253cSXin Li# was quickpkg'ed.
36*760c253cSXin LiSAVED_LLVM_BINPKG_STAMP = Path("/tmp/generate_pgo_profile_old_llvm.txt")
37*760c253cSXin Li
38*760c253cSXin Li# Triple to build with when not trying to get backend coverage.
39*760c253cSXin LiHOST_TRIPLE = "x86_64-pc-linux-gnu"
40*760c253cSXin Li
41*760c253cSXin Li# List of triples we want coverage for.
42*760c253cSXin LiIMPORTANT_TRIPLES = (
43*760c253cSXin Li    HOST_TRIPLE,
44*760c253cSXin Li    "x86_64-cros-linux-gnu",
45*760c253cSXin Li    "armv7a-cros-linux-gnueabihf",
46*760c253cSXin Li    "aarch64-cros-linux-gnu",
47*760c253cSXin Li)
48*760c253cSXin Li
49*760c253cSXin Li# Set of all of the cross-* libraries we need.
50*760c253cSXin LiALL_NEEDED_CROSS_LIBS = frozenset(
51*760c253cSXin Li    f"cross-{triple}/{package}"
52*760c253cSXin Li    for triple in IMPORTANT_TRIPLES
53*760c253cSXin Li    if triple != HOST_TRIPLE
54*760c253cSXin Li    for package in ("glibc", "libcxx", "llvm-libunwind", "linux-headers")
55*760c253cSXin Li)
56*760c253cSXin Li
57*760c253cSXin Li
58*760c253cSXin Lidef ensure_llvm_binpkg_exists() -> bool:
59*760c253cSXin Li    """Verifies that we have an LLVM binpkg to fall back on.
60*760c253cSXin Li
61*760c253cSXin Li    Returns:
62*760c253cSXin Li        True if this function actually created a binpkg, false if one already
63*760c253cSXin Li        existed.
64*760c253cSXin Li    """
65*760c253cSXin Li    if SAVED_LLVM_BINPKG_STAMP.exists():
66*760c253cSXin Li        pkg = Path(SAVED_LLVM_BINPKG_STAMP.read_text(encoding="utf-8"))
67*760c253cSXin Li        # Double-check this, since this package is considered a cache artifact
68*760c253cSXin Li        # by portage. Ergo, it can _technically_ be GC'ed at any time.
69*760c253cSXin Li        if pkg.exists():
70*760c253cSXin Li            return False
71*760c253cSXin Li
72*760c253cSXin Li    pkg = pgo_tools.quickpkg_llvm()
73*760c253cSXin Li    SAVED_LLVM_BINPKG_STAMP.write_text(str(pkg), encoding="utf-8")
74*760c253cSXin Li    return True
75*760c253cSXin Li
76*760c253cSXin Li
77*760c253cSXin Lidef restore_llvm_binpkg():
78*760c253cSXin Li    """Installs the binpkg created by ensure_llvm_binpkg_exists."""
79*760c253cSXin Li    logging.info("Restoring non-PGO'ed LLVM installation")
80*760c253cSXin Li    pkg = Path(SAVED_LLVM_BINPKG_STAMP.read_text(encoding="utf-8"))
81*760c253cSXin Li    assert (
82*760c253cSXin Li        pkg.exists()
83*760c253cSXin Li    ), f"Non-PGO'ed binpkg at {pkg} does not exist. Can't restore"
84*760c253cSXin Li    pgo_tools.run(pgo_tools.generate_quickpkg_restoration_command(pkg))
85*760c253cSXin Li
86*760c253cSXin Li
87*760c253cSXin Lidef find_missing_cross_libs() -> FrozenSet[str]:
88*760c253cSXin Li    """Returns cross-* libraries that need to be installed for workloads."""
89*760c253cSXin Li    equery_result = pgo_tools.run(
90*760c253cSXin Li        ["equery", "l", "--format=$cp", "cross-*/*"],
91*760c253cSXin Li        check=False,
92*760c253cSXin Li        stdout=subprocess.PIPE,
93*760c253cSXin Li    )
94*760c253cSXin Li
95*760c253cSXin Li    # If no matching package is found, equery will exit with code 3.
96*760c253cSXin Li    if equery_result.returncode == 3:
97*760c253cSXin Li        return ALL_NEEDED_CROSS_LIBS
98*760c253cSXin Li
99*760c253cSXin Li    equery_result.check_returncode()
100*760c253cSXin Li    has_packages = {x.strip() for x in equery_result.stdout.splitlines()}
101*760c253cSXin Li    return ALL_NEEDED_CROSS_LIBS - has_packages
102*760c253cSXin Li
103*760c253cSXin Li
104*760c253cSXin Lidef ensure_cross_libs_are_installed():
105*760c253cSXin Li    """Ensures that we have cross-* libs for all `IMPORTANT_TRIPLES`."""
106*760c253cSXin Li    missing_packages = find_missing_cross_libs()
107*760c253cSXin Li    if not missing_packages:
108*760c253cSXin Li        logging.info("All cross-compiler libraries are already installed")
109*760c253cSXin Li        return
110*760c253cSXin Li
111*760c253cSXin Li    missing_packages = sorted(missing_packages)
112*760c253cSXin Li    logging.info("Installing cross-compiler libs: %s", missing_packages)
113*760c253cSXin Li    pgo_tools.run(
114*760c253cSXin Li        ["sudo", "emerge", "-j", "-G"] + missing_packages,
115*760c253cSXin Li    )
116*760c253cSXin Li
117*760c253cSXin Li
118*760c253cSXin Lidef emerge_pgo_generate_llvm():
119*760c253cSXin Li    """Emerges a sys-devel/llvm with PGO instrumentation enabled."""
120*760c253cSXin Li    force_use = (
121*760c253cSXin Li        "llvm_pgo_generate -llvm_pgo_use"
122*760c253cSXin Li        # Turn ThinLTO off, since doing so results in way faster builds.
123*760c253cSXin Li        # This is assumed to be OK, since:
124*760c253cSXin Li        #   - ThinLTO should have no significant impact on where Clang puts
125*760c253cSXin Li        #     instrprof counters.
126*760c253cSXin Li        #   - In practice, both "PGO generated with ThinLTO enabled," and "PGO
127*760c253cSXin Li        #     generated without ThinLTO enabled," were benchmarked, and the
128*760c253cSXin Li        #     performance difference between the two was in the noise.
129*760c253cSXin Li        " -thinlto"
130*760c253cSXin Li        # Turn ccache off, since if there are valid ccache artifacts from prior
131*760c253cSXin Li        # runs of this script, ccache will lead to us not getting profdata from
132*760c253cSXin Li        # those.
133*760c253cSXin Li        " -wrapper_ccache"
134*760c253cSXin Li    )
135*760c253cSXin Li    use = (os.environ.get("USE", "") + " " + force_use).strip()
136*760c253cSXin Li
137*760c253cSXin Li    # Use FEATURES=ccache since it's not much of a CPU time penalty, and if a
138*760c253cSXin Li    # user runs this script repeatedly, they'll appreciate it. :)
139*760c253cSXin Li    force_features = "ccache"
140*760c253cSXin Li    features = (os.environ.get("FEATURES", "") + " " + force_features).strip()
141*760c253cSXin Li    logging.info("Building LLVM with USE=%s", shlex.quote(use))
142*760c253cSXin Li    pgo_tools.run(
143*760c253cSXin Li        [
144*760c253cSXin Li            "sudo",
145*760c253cSXin Li            f"FEATURES={features}",
146*760c253cSXin Li            f"USE={use}",
147*760c253cSXin Li            "emerge",
148*760c253cSXin Li            "sys-devel/llvm",
149*760c253cSXin Li        ]
150*760c253cSXin Li    )
151*760c253cSXin Li
152*760c253cSXin Li
153*760c253cSXin Lidef build_profiling_env(profile_dir: Path) -> Dict[str, str]:
154*760c253cSXin Li    profile_pattern = str(profile_dir / "profile-%m.profraw")
155*760c253cSXin Li    return {
156*760c253cSXin Li        "LLVM_PROFILE_OUTPUT_FORMAT": "profraw",
157*760c253cSXin Li        "LLVM_PROFILE_FILE": profile_pattern,
158*760c253cSXin Li    }
159*760c253cSXin Li
160*760c253cSXin Li
161*760c253cSXin Lidef ensure_clang_invocations_generate_profiles(clang_bin: str, tmpdir: Path):
162*760c253cSXin Li    """Raises an exception if clang doesn't generate profraw files.
163*760c253cSXin Li
164*760c253cSXin Li    Args:
165*760c253cSXin Li        clang_bin: the path to a clang binary.
166*760c253cSXin Li        tmpdir: a place where this function can put temporary files.
167*760c253cSXin Li    """
168*760c253cSXin Li    tmpdir = tmpdir / "ensure_profiles_generated"
169*760c253cSXin Li    tmpdir.mkdir(parents=True)
170*760c253cSXin Li    pgo_tools.run(
171*760c253cSXin Li        [clang_bin, "--help"],
172*760c253cSXin Li        extra_env=build_profiling_env(tmpdir),
173*760c253cSXin Li        stdout=subprocess.DEVNULL,
174*760c253cSXin Li        stderr=subprocess.DEVNULL,
175*760c253cSXin Li    )
176*760c253cSXin Li    is_empty = next(tmpdir.iterdir(), None) is None
177*760c253cSXin Li    if is_empty:
178*760c253cSXin Li        raise ValueError(
179*760c253cSXin Li            f"The clang binary at {clang_bin} generated no profile"
180*760c253cSXin Li        )
181*760c253cSXin Li    shutil.rmtree(tmpdir)
182*760c253cSXin Li
183*760c253cSXin Li
184*760c253cSXin Lidef write_unified_cmake_file(
185*760c253cSXin Li    into_dir: Path, absl_subdir: Path, gtest_subdir: Path
186*760c253cSXin Li):
187*760c253cSXin Li    (into_dir / "CMakeLists.txt").write_text(
188*760c253cSXin Li        textwrap.dedent(
189*760c253cSXin Li            f"""\
190*760c253cSXin Li            cmake_minimum_required(VERSION 3.10)
191*760c253cSXin Li
192*760c253cSXin Li            project(generate_pgo)
193*760c253cSXin Li
194*760c253cSXin Li            add_subdirectory({gtest_subdir})
195*760c253cSXin Li            add_subdirectory({absl_subdir})"""
196*760c253cSXin Li        ),
197*760c253cSXin Li        encoding="utf-8",
198*760c253cSXin Li    )
199*760c253cSXin Li
200*760c253cSXin Li
201*760c253cSXin Lidef fetch_workloads_into(target_dir: Path):
202*760c253cSXin Li    """Fetches PGO generation workloads into `target_dir`."""
203*760c253cSXin Li    # The workload here is absl and gtest. The reasoning behind that selection
204*760c253cSXin Li    # was essentially a mix of:
205*760c253cSXin Li    # - absl is reasonably-written and self-contained
206*760c253cSXin Li    # - gtest is needed if tests are to be built; in order to have absl do much
207*760c253cSXin Li    #   of any linking, gtest is necessary.
208*760c253cSXin Li    #
209*760c253cSXin Li    # Use the version of absl that's bundled with ChromeOS at the time of
210*760c253cSXin Li    # writing.
211*760c253cSXin Li    target_dir.mkdir(parents=True)
212*760c253cSXin Li
213*760c253cSXin Li    def fetch_and_extract(gs_url: str, into_dir: Path):
214*760c253cSXin Li        tgz_full = target_dir / os.path.basename(gs_url)
215*760c253cSXin Li        pgo_tools.run(
216*760c253cSXin Li            [
217*760c253cSXin Li                "gsutil",
218*760c253cSXin Li                "cp",
219*760c253cSXin Li                gs_url,
220*760c253cSXin Li                tgz_full,
221*760c253cSXin Li            ],
222*760c253cSXin Li        )
223*760c253cSXin Li        into_dir.mkdir()
224*760c253cSXin Li
225*760c253cSXin Li        pgo_tools.run(
226*760c253cSXin Li            ["tar", "xaf", tgz_full],
227*760c253cSXin Li            cwd=into_dir,
228*760c253cSXin Li        )
229*760c253cSXin Li
230*760c253cSXin Li    absl_dir = target_dir / "absl"
231*760c253cSXin Li    fetch_and_extract(
232*760c253cSXin Li        gs_url="gs://chromeos-localmirror/distfiles/"
233*760c253cSXin Li        "abseil-cpp-a86bb8a97e38bc1361289a786410c0eb5824099c.tar.bz2",
234*760c253cSXin Li        into_dir=absl_dir,
235*760c253cSXin Li    )
236*760c253cSXin Li
237*760c253cSXin Li    gtest_dir = target_dir / "gtest"
238*760c253cSXin Li    fetch_and_extract(
239*760c253cSXin Li        gs_url="gs://chromeos-mirror/gentoo/distfiles/"
240*760c253cSXin Li        "gtest-1b18723e874b256c1e39378c6774a90701d70f7a.tar.gz",
241*760c253cSXin Li        into_dir=gtest_dir,
242*760c253cSXin Li    )
243*760c253cSXin Li
244*760c253cSXin Li    unpacked_absl_dir = read_exactly_one_dirent(absl_dir)
245*760c253cSXin Li    unpacked_gtest_dir = read_exactly_one_dirent(gtest_dir)
246*760c253cSXin Li    write_unified_cmake_file(
247*760c253cSXin Li        into_dir=target_dir,
248*760c253cSXin Li        absl_subdir=unpacked_absl_dir.relative_to(target_dir),
249*760c253cSXin Li        gtest_subdir=unpacked_gtest_dir.relative_to(target_dir),
250*760c253cSXin Li    )
251*760c253cSXin Li
252*760c253cSXin Li
253*760c253cSXin Li@dataclasses.dataclass(frozen=True)
254*760c253cSXin Liclass WorkloadRunner:
255*760c253cSXin Li    """Runs benchmark workloads."""
256*760c253cSXin Li
257*760c253cSXin Li    profraw_dir: Path
258*760c253cSXin Li    target_dir: Path
259*760c253cSXin Li    out_dir: Path
260*760c253cSXin Li
261*760c253cSXin Li    def run(
262*760c253cSXin Li        self,
263*760c253cSXin Li        triple: str,
264*760c253cSXin Li        extra_cflags: Optional[str] = None,
265*760c253cSXin Li        sysroot: Optional[str] = None,
266*760c253cSXin Li    ):
267*760c253cSXin Li        logging.info(
268*760c253cSXin Li            "Running workload for triple %s, extra cflags %r",
269*760c253cSXin Li            triple,
270*760c253cSXin Li            extra_cflags,
271*760c253cSXin Li        )
272*760c253cSXin Li        if self.out_dir.exists():
273*760c253cSXin Li            shutil.rmtree(self.out_dir)
274*760c253cSXin Li        self.out_dir.mkdir(parents=True)
275*760c253cSXin Li
276*760c253cSXin Li        clang = triple + "-clang"
277*760c253cSXin Li        profiling_env = build_profiling_env(self.profraw_dir)
278*760c253cSXin Li        if sysroot:
279*760c253cSXin Li            profiling_env["SYSROOT"] = sysroot
280*760c253cSXin Li
281*760c253cSXin Li        cmake_command: pgo_tools.Command = [
282*760c253cSXin Li            "cmake",
283*760c253cSXin Li            "-G",
284*760c253cSXin Li            "Ninja",
285*760c253cSXin Li            "-DCMAKE_BUILD_TYPE=RelWithDebInfo",
286*760c253cSXin Li            f"-DCMAKE_C_COMPILER={clang}",
287*760c253cSXin Li            f"-DCMAKE_CXX_COMPILER={clang}++",
288*760c253cSXin Li            "-DABSL_BUILD_TESTING=ON",
289*760c253cSXin Li            "-DABSL_USE_EXTERNAL_GOOGLETEST=ON",
290*760c253cSXin Li            "-DABSL_USE_GOOGLETEST_HEAD=OFF",
291*760c253cSXin Li            "-DABSL_FIND_GOOGLETEST=OFF",
292*760c253cSXin Li        ]
293*760c253cSXin Li
294*760c253cSXin Li        if extra_cflags:
295*760c253cSXin Li            cmake_command += (
296*760c253cSXin Li                f"-DCMAKE_C_FLAGS={extra_cflags}",
297*760c253cSXin Li                f"-DCMAKE_CXX_FLAGS={extra_cflags}",
298*760c253cSXin Li            )
299*760c253cSXin Li
300*760c253cSXin Li        cmake_command.append(self.target_dir)
301*760c253cSXin Li        pgo_tools.run(
302*760c253cSXin Li            cmake_command,
303*760c253cSXin Li            extra_env=profiling_env,
304*760c253cSXin Li            cwd=self.out_dir,
305*760c253cSXin Li        )
306*760c253cSXin Li
307*760c253cSXin Li        pgo_tools.run(
308*760c253cSXin Li            ["ninja", "-v", "all"],
309*760c253cSXin Li            extra_env=profiling_env,
310*760c253cSXin Li            cwd=self.out_dir,
311*760c253cSXin Li        )
312*760c253cSXin Li
313*760c253cSXin Li
314*760c253cSXin Lidef read_exactly_one_dirent(directory: Path) -> Path:
315*760c253cSXin Li    """Returns the single Path under the given directory. Raises otherwise."""
316*760c253cSXin Li    ents = directory.iterdir()
317*760c253cSXin Li    ent = next(ents, None)
318*760c253cSXin Li    if ent is not None:
319*760c253cSXin Li        if next(ents, None) is None:
320*760c253cSXin Li            return ent
321*760c253cSXin Li    raise ValueError(f"Expected exactly one entry under {directory}")
322*760c253cSXin Li
323*760c253cSXin Li
324*760c253cSXin Lidef run_workloads(target_dir: Path) -> Path:
325*760c253cSXin Li    """Runs all of our workloads in target_dir.
326*760c253cSXin Li
327*760c253cSXin Li    Args:
328*760c253cSXin Li        target_dir: a directory that already had `fetch_workloads_into` called
329*760c253cSXin Li            on it.
330*760c253cSXin Li
331*760c253cSXin Li    Returns:
332*760c253cSXin Li        A directory in which profraw files from running the workloads are
333*760c253cSXin Li        saved.
334*760c253cSXin Li    """
335*760c253cSXin Li    profraw_dir = target_dir / "profiles"
336*760c253cSXin Li    profraw_dir.mkdir()
337*760c253cSXin Li
338*760c253cSXin Li    out_dir = target_dir / "out"
339*760c253cSXin Li    runner = WorkloadRunner(
340*760c253cSXin Li        profraw_dir=profraw_dir,
341*760c253cSXin Li        target_dir=target_dir,
342*760c253cSXin Li        out_dir=out_dir,
343*760c253cSXin Li    )
344*760c253cSXin Li
345*760c253cSXin Li    # Run the workload once per triple.
346*760c253cSXin Li    for triple in IMPORTANT_TRIPLES:
347*760c253cSXin Li        runner.run(
348*760c253cSXin Li            triple, sysroot=None if triple == HOST_TRIPLE else f"/usr/{triple}"
349*760c253cSXin Li        )
350*760c253cSXin Li
351*760c253cSXin Li    # Add a run of ThinLTO, so any ThinLTO-specific lld bits get exercised.
352*760c253cSXin Li    # Also, since CrOS uses -Os often, exercise that.
353*760c253cSXin Li    runner.run(HOST_TRIPLE, extra_cflags="-flto=thin -Os")
354*760c253cSXin Li    return profraw_dir
355*760c253cSXin Li
356*760c253cSXin Li
357*760c253cSXin Lidef convert_profraw_to_pgo_profile(profraw_dir: Path) -> Path:
358*760c253cSXin Li    """Creates a PGO profile from the profraw profiles in profraw_dir."""
359*760c253cSXin Li    output = profraw_dir / "merged.prof"
360*760c253cSXin Li    profile_files = list(profraw_dir.glob("profile-*profraw"))
361*760c253cSXin Li    if not profile_files:
362*760c253cSXin Li        raise ValueError("No profraw files generated?")
363*760c253cSXin Li
364*760c253cSXin Li    logging.info(
365*760c253cSXin Li        "Creating a PGO profile from %d profraw files", len(profile_files)
366*760c253cSXin Li    )
367*760c253cSXin Li    generate_command = [
368*760c253cSXin Li        "llvm-profdata",
369*760c253cSXin Li        "merge",
370*760c253cSXin Li        "--instr",
371*760c253cSXin Li        f"--output={output}",
372*760c253cSXin Li    ]
373*760c253cSXin Li    pgo_tools.run(generate_command + profile_files)
374*760c253cSXin Li    return output
375*760c253cSXin Li
376*760c253cSXin Li
377*760c253cSXin Lidef main(argv: List[str]):
378*760c253cSXin Li    logging.basicConfig(
379*760c253cSXin Li        format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
380*760c253cSXin Li        "%(message)s",
381*760c253cSXin Li        level=logging.DEBUG,
382*760c253cSXin Li    )
383*760c253cSXin Li
384*760c253cSXin Li    parser = argparse.ArgumentParser(
385*760c253cSXin Li        description=__doc__,
386*760c253cSXin Li        formatter_class=argparse.RawDescriptionHelpFormatter,
387*760c253cSXin Li    )
388*760c253cSXin Li    parser.add_argument(
389*760c253cSXin Li        "--output",
390*760c253cSXin Li        required=True,
391*760c253cSXin Li        type=Path,
392*760c253cSXin Li        help="Where to put the PGO profile",
393*760c253cSXin Li    )
394*760c253cSXin Li    parser.add_argument(
395*760c253cSXin Li        "--use-old-binpkg",
396*760c253cSXin Li        action="store_true",
397*760c253cSXin Li        help="""
398*760c253cSXin Li        This script saves your initial LLVM installation as a binpkg, so it may
399*760c253cSXin Li        restore that installation later in the build. Passing --use-old-binpkg
400*760c253cSXin Li        allows this script to use a binpkg from a prior invocation of this
401*760c253cSXin Li        script.
402*760c253cSXin Li        """,
403*760c253cSXin Li    )
404*760c253cSXin Li    opts = parser.parse_args(argv)
405*760c253cSXin Li
406*760c253cSXin Li    pgo_tools.exit_if_not_in_chroot()
407*760c253cSXin Li
408*760c253cSXin Li    output = opts.output
409*760c253cSXin Li
410*760c253cSXin Li    llvm_binpkg_is_fresh = ensure_llvm_binpkg_exists()
411*760c253cSXin Li    if not llvm_binpkg_is_fresh and not opts.use_old_binpkg:
412*760c253cSXin Li        sys.exit(
413*760c253cSXin Li            textwrap.dedent(
414*760c253cSXin Li                f"""\
415*760c253cSXin Li                A LLVM binpkg packed by a previous run of this script is
416*760c253cSXin Li                available. If you intend this run to be another attempt at the
417*760c253cSXin Li                previous run, please pass --use-old-binpkg (so the old LLVM
418*760c253cSXin Li                binpkg is used as our 'baseline'). If you don't, please remove
419*760c253cSXin Li                the file referring to it at {SAVED_LLVM_BINPKG_STAMP}.
420*760c253cSXin Li                """
421*760c253cSXin Li            )
422*760c253cSXin Li        )
423*760c253cSXin Li
424*760c253cSXin Li    logging.info("Ensuring `cross-` libraries are installed")
425*760c253cSXin Li    ensure_cross_libs_are_installed()
426*760c253cSXin Li    tempdir = Path(tempfile.mkdtemp(prefix="generate_llvm_pgo_profile_"))
427*760c253cSXin Li    try:
428*760c253cSXin Li        workloads_path = tempdir / "workloads"
429*760c253cSXin Li        logging.info("Fetching workloads")
430*760c253cSXin Li        fetch_workloads_into(workloads_path)
431*760c253cSXin Li
432*760c253cSXin Li        # If our binpkg is not fresh, we may be operating with a weird LLVM
433*760c253cSXin Li        # (e.g., a PGO'ed one ;) ). Ensure we always start with that binpkg as
434*760c253cSXin Li        # our baseline.
435*760c253cSXin Li        if not llvm_binpkg_is_fresh:
436*760c253cSXin Li            restore_llvm_binpkg()
437*760c253cSXin Li
438*760c253cSXin Li        logging.info("Building PGO instrumented LLVM")
439*760c253cSXin Li        emerge_pgo_generate_llvm()
440*760c253cSXin Li
441*760c253cSXin Li        logging.info("Ensuring instrumented compilers generate profiles")
442*760c253cSXin Li        for triple in IMPORTANT_TRIPLES:
443*760c253cSXin Li            ensure_clang_invocations_generate_profiles(
444*760c253cSXin Li                triple + "-clang", tempdir
445*760c253cSXin Li            )
446*760c253cSXin Li
447*760c253cSXin Li        logging.info("Running workloads")
448*760c253cSXin Li        profraw_dir = run_workloads(workloads_path)
449*760c253cSXin Li
450*760c253cSXin Li        # This is a subtle but critical step. The LLVM we're currently working
451*760c253cSXin Li        # with was built by the LLVM represented _by our binpkg_, which may be
452*760c253cSXin Li        # a radically different version of LLVM than what was installed (e.g.,
453*760c253cSXin Li        # it could be from our bootstrap SDK, which could be many months old).
454*760c253cSXin Li        #
455*760c253cSXin Li        # If our current LLVM's llvm-profdata is used to interpret the profraw
456*760c253cSXin Li        # files:
457*760c253cSXin Li        # 1. The profile generated will be for our new version of clang, and
458*760c253cSXin Li        #    may therefore be too new for the older version that we still have
459*760c253cSXin Li        #    to support.
460*760c253cSXin Li        # 2. There may be silent incompatibilities, as the stability guarantees
461*760c253cSXin Li        #    of profraw files are not immediately apparent.
462*760c253cSXin Li        logging.info("Restoring LLVM's binpkg")
463*760c253cSXin Li        restore_llvm_binpkg()
464*760c253cSXin Li        pgo_profile = convert_profraw_to_pgo_profile(profraw_dir)
465*760c253cSXin Li        shutil.copyfile(pgo_profile, output)
466*760c253cSXin Li    except:
467*760c253cSXin Li        # Leave the tempdir, as it might help people debug.
468*760c253cSXin Li        logging.info("NOTE: Tempdir will remain at %s", tempdir)
469*760c253cSXin Li        raise
470*760c253cSXin Li
471*760c253cSXin Li    logging.info("Removing now-obsolete tempdir")
472*760c253cSXin Li    shutil.rmtree(tempdir)
473*760c253cSXin Li    logging.info("PGO profile is available at %s.", output)
474*760c253cSXin Li
475*760c253cSXin Li
476*760c253cSXin Liif __name__ == "__main__":
477*760c253cSXin Li    main(sys.argv[1:])
478