xref: /aosp_15_r20/external/toolchain-utils/afdo_redaction/remove_indirect_calls.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li#!/usr/bin/env python3
2*760c253cSXin Li# -*- coding: utf-8 -*-
3*760c253cSXin Li# Copyright 2019 The ChromiumOS Authors
4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
5*760c253cSXin Li# found in the LICENSE file.
6*760c253cSXin Li
7*760c253cSXin Li"""Script to remove all indirect call targets from textual AFDO profiles.
8*760c253cSXin Li
9*760c253cSXin LiIndirect call samples can cause code to appear 'live' when it otherwise
10*760c253cSXin Liwouldn't be. This resurrection can happen either by the way of profile-based
11*760c253cSXin Lispeculative devirtualization, or because of imprecision in LLVM's liveness
12*760c253cSXin Licalculations when performing LTO.
13*760c253cSXin Li
14*760c253cSXin LiThis generally isn't a problem when an AFDO profile is applied to the binary it
15*760c253cSXin Liwas collected on. However, because we e.g., build NaCl from the same set of
16*760c253cSXin Liobjects as Chrome, this can become problematic, and lead to NaCl doubling in
17*760c253cSXin Lisize (or worse). See crbug.com/1005023 and crbug.com/916130.
18*760c253cSXin Li"""
19*760c253cSXin Li
20*760c253cSXin Li
21*760c253cSXin Liimport argparse
22*760c253cSXin Liimport re
23*760c253cSXin Li
24*760c253cSXin Li
25*760c253cSXin Lidef _remove_indirect_call_targets(lines):
26*760c253cSXin Li    # Lines with indirect call targets look like:
27*760c253cSXin Li    #   1.1: 1234 foo:111 bar:122
28*760c253cSXin Li    #
29*760c253cSXin Li    # Where 1.1 is the line info/discriminator, 1234 is the total number of
30*760c253cSXin Li    # samples seen for that line/discriminator, foo:111 is "111 of the calls here
31*760c253cSXin Li    # went to foo," and bar:122 is "122 of the calls here went to bar."
32*760c253cSXin Li    call_target_re = re.compile(
33*760c253cSXin Li        r"""
34*760c253cSXin Li      ^\s+                    # Top-level lines are function records.
35*760c253cSXin Li      \d+(?:\.\d+)?:          # Line info/discriminator
36*760c253cSXin Li      \s+
37*760c253cSXin Li      \d+                     # Total sample count
38*760c253cSXin Li      \s+
39*760c253cSXin Li      ((?:[^\s:]+:\d+\s*)+)   # Indirect call target(s)
40*760c253cSXin Li      $
41*760c253cSXin Li  """,
42*760c253cSXin Li        re.VERBOSE,
43*760c253cSXin Li    )
44*760c253cSXin Li    for line in lines:
45*760c253cSXin Li        line = line.rstrip()
46*760c253cSXin Li
47*760c253cSXin Li        match = call_target_re.match(line)
48*760c253cSXin Li        if not match:
49*760c253cSXin Li            yield line + "\n"
50*760c253cSXin Li            continue
51*760c253cSXin Li
52*760c253cSXin Li        group_start, group_end = match.span(1)
53*760c253cSXin Li        assert group_end == len(line)
54*760c253cSXin Li        yield line[:group_start].rstrip() + "\n"
55*760c253cSXin Li
56*760c253cSXin Li
57*760c253cSXin Lidef run(input_stream, output_stream):
58*760c253cSXin Li    for line in _remove_indirect_call_targets(input_stream):
59*760c253cSXin Li        output_stream.write(line)
60*760c253cSXin Li
61*760c253cSXin Li
62*760c253cSXin Lidef main():
63*760c253cSXin Li    parser = argparse.ArgumentParser(
64*760c253cSXin Li        description=__doc__,
65*760c253cSXin Li        formatter_class=argparse.RawDescriptionHelpFormatter,
66*760c253cSXin Li    )
67*760c253cSXin Li    parser.add_argument(
68*760c253cSXin Li        "--input",
69*760c253cSXin Li        default="/dev/stdin",
70*760c253cSXin Li        help="File to read from. Defaults to stdin.",
71*760c253cSXin Li    )
72*760c253cSXin Li    parser.add_argument(
73*760c253cSXin Li        "--output",
74*760c253cSXin Li        default="/dev/stdout",
75*760c253cSXin Li        help="File to write to. Defaults to stdout.",
76*760c253cSXin Li    )
77*760c253cSXin Li    args = parser.parse_args()
78*760c253cSXin Li
79*760c253cSXin Li    with open(args.input) as stdin:
80*760c253cSXin Li        with open(args.output, "w") as stdout:
81*760c253cSXin Li            run(stdin, stdout)
82*760c253cSXin Li
83*760c253cSXin Li
84*760c253cSXin Liif __name__ == "__main__":
85*760c253cSXin Li    main()
86