xref: /aosp_15_r20/external/toolchain-utils/afdo_redaction/redact_profile.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1*760c253cSXin Li#!/usr/bin/env python3
2*760c253cSXin Li# -*- coding: utf-8 -*-
3*760c253cSXin Li# Copyright 2018 The ChromiumOS Authors
4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
5*760c253cSXin Li# found in the LICENSE file.
6*760c253cSXin Li
7*760c253cSXin Li"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles.
8*760c253cSXin Li
9*760c253cSXin LiAFDO sampling and ICF have an unfortunate interaction that causes a huge
10*760c253cSXin Liinflation in sample counts. Essentially, if you have N functions ICF'ed to the
11*760c253cSXin Lisame location, one AFDO sample in any of those N functions will count as one
12*760c253cSXin Lisample in *each* of those N functions.
13*760c253cSXin Li
14*760c253cSXin LiIn practice, there are a few forms of function bodies that are very heavily
15*760c253cSXin LiICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types
16*760c253cSXin Lilike std::map...). Recording 28,000 samples across all N thousand logical
17*760c253cSXin Lifunctions that point to the same body really hurts our AFDO numbers, given that
18*760c253cSXin Liour actual sample count across all of Chrome is something around 10,000,000.
19*760c253cSXin Li(No, really, these are actual numbers. In practice, at the time of writing,
20*760c253cSXin Lithis script eliminates >90% of our AFDO samples by count. Sometimes as high as
21*760c253cSXin Li98%.)
22*760c253cSXin Li
23*760c253cSXin LiIt reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
24*760c253cSXin Lito stdout. A summary of what the script actually did is printed to stderr.
25*760c253cSXin Li"""
26*760c253cSXin Li
27*760c253cSXin Li
28*760c253cSXin Liimport collections
29*760c253cSXin Liimport re
30*760c253cSXin Liimport sys
31*760c253cSXin Li
32*760c253cSXin Li
33*760c253cSXin Lidef _count_samples(samples):
34*760c253cSXin Li    """Count the total number of samples in a function."""
35*760c253cSXin Li    line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$")
36*760c253cSXin Li
37*760c253cSXin Li    top_level_samples = 0
38*760c253cSXin Li    all_samples = 0
39*760c253cSXin Li    for line in samples:
40*760c253cSXin Li        m = line_re.match(line)
41*760c253cSXin Li        if not m:
42*760c253cSXin Li            continue
43*760c253cSXin Li
44*760c253cSXin Li        spaces, n = m.groups()
45*760c253cSXin Li        n = int(n)
46*760c253cSXin Li        all_samples += n
47*760c253cSXin Li        if len(spaces) == 1:
48*760c253cSXin Li            top_level_samples += n
49*760c253cSXin Li
50*760c253cSXin Li    return top_level_samples, all_samples
51*760c253cSXin Li
52*760c253cSXin Li
53*760c253cSXin Li# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
54*760c253cSXin Li# profile. function_line is the top line of said function, and `samples` is
55*760c253cSXin Li# a list of all of the sample lines below that.
56*760c253cSXin Li#
57*760c253cSXin Li# We rely on the format of these strings in some places in this script. For
58*760c253cSXin Li# reference, a full function sample will look something like:
59*760c253cSXin Li#
60*760c253cSXin Li# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185
61*760c253cSXin Li#  6: 83
62*760c253cSXin Li#  15: 126
63*760c253cSXin Li#  62832: 126
64*760c253cSXin Li#  6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349
65*760c253cSXin Li#   1: 206
66*760c253cSXin Li#   1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060
67*760c253cSXin Li#    1: 206
68*760c253cSXin Li#  11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800
69*760c253cSXin Li#   2.1: 80
70*760c253cSXin Li#
71*760c253cSXin Li#
72*760c253cSXin Li# In that case, function_line is
73*760c253cSXin Li# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be
74*760c253cSXin Li# every line below that.
75*760c253cSXin Li#
76*760c253cSXin Li# Function lines look like;
77*760c253cSXin Li# function_symbol:entry_count:dont_care
78*760c253cSXin Li#
79*760c253cSXin Li# And samples look like one of:
80*760c253cSXin Li#  arbitrary_number: sample_count
81*760c253cSXin Li#  arbitrary_number: inlined_function_symbol:inlined_entry_count
82*760c253cSXin LiProfileRecord = collections.namedtuple(
83*760c253cSXin Li    "ProfileRecord", ["function_line", "samples"]
84*760c253cSXin Li)
85*760c253cSXin Li
86*760c253cSXin Li
87*760c253cSXin Lidef _normalize_samples(samples):
88*760c253cSXin Li    """Normalizes the samples in the given function body.
89*760c253cSXin Li
90*760c253cSXin Li    Normalization just means that we redact inlined function names. This is
91*760c253cSXin Li    done so that a bit of templating doesn't make two function bodies look
92*760c253cSXin Li    distinct. Namely:
93*760c253cSXin Li
94*760c253cSXin Li    template <typename T>
95*760c253cSXin Li    __attribute__((noinline))
96*760c253cSXin Li    int getNumber() { return 1; }
97*760c253cSXin Li
98*760c253cSXin Li    template <typename T>
99*760c253cSXin Li    __attribute__((noinline))
100*760c253cSXin Li    int getNumberIndirectly() { return getNumber<T>(); }
101*760c253cSXin Li
102*760c253cSXin Li    int main() {
103*760c253cSXin Li      return getNumber<int>() + getNumber<float>();
104*760c253cSXin Li    }
105*760c253cSXin Li
106*760c253cSXin Li    If the profile has the mangled name for getNumber<float> in
107*760c253cSXin Li    getNumberIndirectly<float> (and similar for <int>), we'll consider them to
108*760c253cSXin Li    be distinct when they're not.
109*760c253cSXin Li    """
110*760c253cSXin Li
111*760c253cSXin Li    # I'm not actually sure if this ends up being an issue in practice, but it's
112*760c253cSXin Li    # simple enough to guard against.
113*760c253cSXin Li    inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$")
114*760c253cSXin Li    result = []
115*760c253cSXin Li    for s in samples:
116*760c253cSXin Li        m = inlined_re.match(s)
117*760c253cSXin Li        if m:
118*760c253cSXin Li            result.append("%s: __REDACTED__:%s" % m.groups())
119*760c253cSXin Li        else:
120*760c253cSXin Li            result.append(s)
121*760c253cSXin Li    return tuple(result)
122*760c253cSXin Li
123*760c253cSXin Li
124*760c253cSXin Lidef _read_textual_afdo_profile(stream):
125*760c253cSXin Li    """Parses an AFDO profile from a line stream into ProfileRecords."""
126*760c253cSXin Li    # ProfileRecords are actually nested, due to inlining. For the purpose of
127*760c253cSXin Li    # this script, that doesn't matter.
128*760c253cSXin Li    lines = (line.rstrip() for line in stream)
129*760c253cSXin Li    function_line = None
130*760c253cSXin Li    samples = []
131*760c253cSXin Li    for line in lines:
132*760c253cSXin Li        if not line:
133*760c253cSXin Li            continue
134*760c253cSXin Li
135*760c253cSXin Li        if line[0].isspace():
136*760c253cSXin Li            assert (
137*760c253cSXin Li                function_line is not None
138*760c253cSXin Li            ), "sample exists outside of a function?"
139*760c253cSXin Li            samples.append(line)
140*760c253cSXin Li            continue
141*760c253cSXin Li
142*760c253cSXin Li        if function_line is not None:
143*760c253cSXin Li            yield ProfileRecord(
144*760c253cSXin Li                function_line=function_line, samples=tuple(samples)
145*760c253cSXin Li            )
146*760c253cSXin Li        function_line = line
147*760c253cSXin Li        samples = []
148*760c253cSXin Li
149*760c253cSXin Li    if function_line is not None:
150*760c253cSXin Li        yield ProfileRecord(function_line=function_line, samples=tuple(samples))
151*760c253cSXin Li
152*760c253cSXin Li
153*760c253cSXin Li# The default of 100 is arbitrarily selected, but it does make the overwhelming
154*760c253cSXin Li# majority of obvious sample duplication disappear.
155*760c253cSXin Li#
156*760c253cSXin Li# We experimented shortly with an nm-powered version of this script (rather
157*760c253cSXin Li# than structural matching, we'd see which functions mapped to the same literal
158*760c253cSXin Li# address). Running this with a high value (100) for max_repeats produced
159*760c253cSXin Li# results basically indistinguishable from nm, so ...
160*760c253cSXin Li#
161*760c253cSXin Li# Non-nm based approaches are superior because they don't require any prior
162*760c253cSXin Li# build artifacts; just an AFDO profile.
163*760c253cSXin Lidef dedup_records(profile_records, summary_file, max_repeats=100):
164*760c253cSXin Li    """Removes heavily duplicated records from profile_records.
165*760c253cSXin Li
166*760c253cSXin Li    profile_records is expected to be an iterable of ProfileRecord.
167*760c253cSXin Li    max_repeats ia how many functions must share identical bodies for us to
168*760c253cSXin Li      consider it 'heavily duplicated' and remove the results.
169*760c253cSXin Li    """
170*760c253cSXin Li
171*760c253cSXin Li    # Build a mapping of function structure -> list of functions with identical
172*760c253cSXin Li    # structure and sample counts
173*760c253cSXin Li    counts = collections.defaultdict(list)
174*760c253cSXin Li    for record in profile_records:
175*760c253cSXin Li        counts[_normalize_samples(record.samples)].append(record)
176*760c253cSXin Li
177*760c253cSXin Li    # Be sure that we didn't see any duplicate functions, since that's bad...
178*760c253cSXin Li    total_functions_recorded = sum(len(records) for records in counts.values())
179*760c253cSXin Li
180*760c253cSXin Li    unique_function_names = {
181*760c253cSXin Li        record.function_line.split(":")[0]
182*760c253cSXin Li        for records in counts.values()
183*760c253cSXin Li        for record in records
184*760c253cSXin Li    }
185*760c253cSXin Li
186*760c253cSXin Li    assert (
187*760c253cSXin Li        len(unique_function_names) == total_functions_recorded
188*760c253cSXin Li    ), "duplicate function names?"
189*760c253cSXin Li
190*760c253cSXin Li    num_kept = 0
191*760c253cSXin Li    num_samples_kept = 0
192*760c253cSXin Li    num_top_samples_kept = 0
193*760c253cSXin Li    num_total = 0
194*760c253cSXin Li    num_samples_total = 0
195*760c253cSXin Li    num_top_samples_total = 0
196*760c253cSXin Li
197*760c253cSXin Li    for normalized_samples, records in counts.items():
198*760c253cSXin Li        top_sample_count, all_sample_count = _count_samples(normalized_samples)
199*760c253cSXin Li        top_sample_count *= len(records)
200*760c253cSXin Li        all_sample_count *= len(records)
201*760c253cSXin Li
202*760c253cSXin Li        num_total += len(records)
203*760c253cSXin Li        num_samples_total += all_sample_count
204*760c253cSXin Li        num_top_samples_total += top_sample_count
205*760c253cSXin Li
206*760c253cSXin Li        if len(records) >= max_repeats:
207*760c253cSXin Li            continue
208*760c253cSXin Li
209*760c253cSXin Li        num_kept += len(records)
210*760c253cSXin Li        num_samples_kept += all_sample_count
211*760c253cSXin Li        num_top_samples_kept += top_sample_count
212*760c253cSXin Li        for record in records:
213*760c253cSXin Li            yield record
214*760c253cSXin Li
215*760c253cSXin Li    print(
216*760c253cSXin Li        "Retained {:,}/{:,} functions".format(num_kept, num_total),
217*760c253cSXin Li        file=summary_file,
218*760c253cSXin Li    )
219*760c253cSXin Li    print(
220*760c253cSXin Li        "Retained {:,}/{:,} samples, total".format(
221*760c253cSXin Li            num_samples_kept, num_samples_total
222*760c253cSXin Li        ),
223*760c253cSXin Li        file=summary_file,
224*760c253cSXin Li    )
225*760c253cSXin Li    print(
226*760c253cSXin Li        "Retained {:,}/{:,} top-level samples".format(
227*760c253cSXin Li            num_top_samples_kept, num_top_samples_total
228*760c253cSXin Li        ),
229*760c253cSXin Li        file=summary_file,
230*760c253cSXin Li    )
231*760c253cSXin Li
232*760c253cSXin Li
233*760c253cSXin Lidef run(profile_input_file, summary_output_file, profile_output_file):
234*760c253cSXin Li    profile_records = _read_textual_afdo_profile(profile_input_file)
235*760c253cSXin Li
236*760c253cSXin Li    # Sort this so we get deterministic output. AFDO doesn't care what order it's
237*760c253cSXin Li    # in.
238*760c253cSXin Li    deduped = sorted(
239*760c253cSXin Li        dedup_records(profile_records, summary_output_file),
240*760c253cSXin Li        key=lambda r: r.function_line,
241*760c253cSXin Li    )
242*760c253cSXin Li    for function_line, samples in deduped:
243*760c253cSXin Li        print(function_line, file=profile_output_file)
244*760c253cSXin Li        print("\n".join(samples), file=profile_output_file)
245*760c253cSXin Li
246*760c253cSXin Li
247*760c253cSXin Lidef _main():
248*760c253cSXin Li    run(
249*760c253cSXin Li        profile_input_file=sys.stdin,
250*760c253cSXin Li        summary_output_file=sys.stderr,
251*760c253cSXin Li        profile_output_file=sys.stdout,
252*760c253cSXin Li    )
253*760c253cSXin Li
254*760c253cSXin Li
255*760c253cSXin Liif __name__ == "__main__":
256*760c253cSXin Li    _main()
257