toolchain-utils/afdo_redaction/redact_profile.py

*760c253cSXin Li#!/usr/bin/env python3
*760c253cSXin Li# -*- coding: utf-8 -*-
*760c253cSXin Li# Copyright 2018 The ChromiumOS Authors
*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be
*760c253cSXin Li# found in the LICENSE file.
*760c253cSXin Li
*760c253cSXin Li"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles.
*760c253cSXin Li
*760c253cSXin LiAFDO sampling and ICF have an unfortunate interaction that causes a huge
*760c253cSXin Liinflation in sample counts. Essentially, if you have N functions ICF'ed to the
*760c253cSXin Lisame location, one AFDO sample in any of those N functions will count as one
*760c253cSXin Lisample in *each* of those N functions.
*760c253cSXin Li
*760c253cSXin LiIn practice, there are a few forms of function bodies that are very heavily
*760c253cSXin LiICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types
*760c253cSXin Lilike std::map...). Recording 28,000 samples across all N thousand logical
*760c253cSXin Lifunctions that point to the same body really hurts our AFDO numbers, given that
*760c253cSXin Liour actual sample count across all of Chrome is something around 10,000,000.
*760c253cSXin Li(No, really, these are actual numbers. In practice, at the time of writing,
*760c253cSXin Lithis script eliminates >90% of our AFDO samples by count. Sometimes as high as
*760c253cSXin Li98%.)
*760c253cSXin Li
*760c253cSXin LiIt reads a textual AFDO profile from stdin, and prints a 'fixed' version of it
*760c253cSXin Lito stdout. A summary of what the script actually did is printed to stderr.
*760c253cSXin Li"""
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Liimport collections
*760c253cSXin Liimport re
*760c253cSXin Liimport sys
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _count_samples(samples):
*760c253cSXin Li    """Count the total number of samples in a function."""
*760c253cSXin Li    line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$")
*760c253cSXin Li
*760c253cSXin Li    top_level_samples = 0
*760c253cSXin Li    all_samples = 0
*760c253cSXin Li    for line in samples:
*760c253cSXin Li        m = line_re.match(line)
*760c253cSXin Li        if not m:
*760c253cSXin Li            continue
*760c253cSXin Li
*760c253cSXin Li        spaces, n = m.groups()
*760c253cSXin Li        n = int(n)
*760c253cSXin Li        all_samples += n
*760c253cSXin Li        if len(spaces) == 1:
*760c253cSXin Li            top_level_samples += n
*760c253cSXin Li
*760c253cSXin Li    return top_level_samples, all_samples
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Li# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO
*760c253cSXin Li# profile. function_line is the top line of said function, and `samples` is
*760c253cSXin Li# a list of all of the sample lines below that.
*760c253cSXin Li#
*760c253cSXin Li# We rely on the format of these strings in some places in this script. For
*760c253cSXin Li# reference, a full function sample will look something like:
*760c253cSXin Li#
*760c253cSXin Li# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185
*760c253cSXin Li#  6: 83
*760c253cSXin Li#  15: 126
*760c253cSXin Li#  62832: 126
*760c253cSXin Li#  6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349
*760c253cSXin Li#   1: 206
*760c253cSXin Li#   1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060
*760c253cSXin Li#    1: 206
*760c253cSXin Li#  11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800
*760c253cSXin Li#   2.1: 80
*760c253cSXin Li#
*760c253cSXin Li#
*760c253cSXin Li# In that case, function_line is
*760c253cSXin Li# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be
*760c253cSXin Li# every line below that.
*760c253cSXin Li#
*760c253cSXin Li# Function lines look like;
*760c253cSXin Li# function_symbol:entry_count:dont_care
*760c253cSXin Li#
*760c253cSXin Li# And samples look like one of:
*760c253cSXin Li#  arbitrary_number: sample_count
*760c253cSXin Li#  arbitrary_number: inlined_function_symbol:inlined_entry_count
*760c253cSXin LiProfileRecord = collections.namedtuple(
*760c253cSXin Li    "ProfileRecord", ["function_line", "samples"]
*760c253cSXin Li)
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _normalize_samples(samples):
*760c253cSXin Li    """Normalizes the samples in the given function body.
*760c253cSXin Li
*760c253cSXin Li    Normalization just means that we redact inlined function names. This is
*760c253cSXin Li    done so that a bit of templating doesn't make two function bodies look
*760c253cSXin Li    distinct. Namely:
*760c253cSXin Li
*760c253cSXin Li    template <typename T>
*760c253cSXin Li    __attribute__((noinline))
*760c253cSXin Li    int getNumber() { return 1; }
*760c253cSXin Li
*760c253cSXin Li    template <typename T>
*760c253cSXin Li    __attribute__((noinline))
*760c253cSXin Li    int getNumberIndirectly() { return getNumber<T>(); }
*760c253cSXin Li
*760c253cSXin Li    int main() {
*760c253cSXin Li      return getNumber<int>() + getNumber<float>();
*760c253cSXin Li    }
*760c253cSXin Li
*760c253cSXin Li    If the profile has the mangled name for getNumber<float> in
*760c253cSXin Li    getNumberIndirectly<float> (and similar for <int>), we'll consider them to
*760c253cSXin Li    be distinct when they're not.
*760c253cSXin Li    """
*760c253cSXin Li
*760c253cSXin Li    # I'm not actually sure if this ends up being an issue in practice, but it's
*760c253cSXin Li    # simple enough to guard against.
*760c253cSXin Li    inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$")
*760c253cSXin Li    result = []
*760c253cSXin Li    for s in samples:
*760c253cSXin Li        m = inlined_re.match(s)
*760c253cSXin Li        if m:
*760c253cSXin Li            result.append("%s: __REDACTED__:%s" % m.groups())
*760c253cSXin Li        else:
*760c253cSXin Li            result.append(s)
*760c253cSXin Li    return tuple(result)
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _read_textual_afdo_profile(stream):
*760c253cSXin Li    """Parses an AFDO profile from a line stream into ProfileRecords."""
*760c253cSXin Li    # ProfileRecords are actually nested, due to inlining. For the purpose of
*760c253cSXin Li    # this script, that doesn't matter.
*760c253cSXin Li    lines = (line.rstrip() for line in stream)
*760c253cSXin Li    function_line = None
*760c253cSXin Li    samples = []
*760c253cSXin Li    for line in lines:
*760c253cSXin Li        if not line:
*760c253cSXin Li            continue
*760c253cSXin Li
*760c253cSXin Li        if line[0].isspace():
*760c253cSXin Li            assert (
*760c253cSXin Li                function_line is not None
*760c253cSXin Li            ), "sample exists outside of a function?"
*760c253cSXin Li            samples.append(line)
*760c253cSXin Li            continue
*760c253cSXin Li
*760c253cSXin Li        if function_line is not None:
*760c253cSXin Li            yield ProfileRecord(
*760c253cSXin Li                function_line=function_line, samples=tuple(samples)
*760c253cSXin Li            )
*760c253cSXin Li        function_line = line
*760c253cSXin Li        samples = []
*760c253cSXin Li
*760c253cSXin Li    if function_line is not None:
*760c253cSXin Li        yield ProfileRecord(function_line=function_line, samples=tuple(samples))
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Li# The default of 100 is arbitrarily selected, but it does make the overwhelming
*760c253cSXin Li# majority of obvious sample duplication disappear.
*760c253cSXin Li#
*760c253cSXin Li# We experimented shortly with an nm-powered version of this script (rather
*760c253cSXin Li# than structural matching, we'd see which functions mapped to the same literal
*760c253cSXin Li# address). Running this with a high value (100) for max_repeats produced
*760c253cSXin Li# results basically indistinguishable from nm, so ...
*760c253cSXin Li#
*760c253cSXin Li# Non-nm based approaches are superior because they don't require any prior
*760c253cSXin Li# build artifacts; just an AFDO profile.
*760c253cSXin Lidef dedup_records(profile_records, summary_file, max_repeats=100):
*760c253cSXin Li    """Removes heavily duplicated records from profile_records.
*760c253cSXin Li
*760c253cSXin Li    profile_records is expected to be an iterable of ProfileRecord.
*760c253cSXin Li    max_repeats ia how many functions must share identical bodies for us to
*760c253cSXin Li      consider it 'heavily duplicated' and remove the results.
*760c253cSXin Li    """
*760c253cSXin Li
*760c253cSXin Li    # Build a mapping of function structure -> list of functions with identical
*760c253cSXin Li    # structure and sample counts
*760c253cSXin Li    counts = collections.defaultdict(list)
*760c253cSXin Li    for record in profile_records:
*760c253cSXin Li        counts[_normalize_samples(record.samples)].append(record)
*760c253cSXin Li
*760c253cSXin Li    # Be sure that we didn't see any duplicate functions, since that's bad...
*760c253cSXin Li    total_functions_recorded = sum(len(records) for records in counts.values())
*760c253cSXin Li
*760c253cSXin Li    unique_function_names = {
*760c253cSXin Li        record.function_line.split(":")[0]
*760c253cSXin Li        for records in counts.values()
*760c253cSXin Li        for record in records
*760c253cSXin Li    }
*760c253cSXin Li
*760c253cSXin Li    assert (
*760c253cSXin Li        len(unique_function_names) == total_functions_recorded
*760c253cSXin Li    ), "duplicate function names?"
*760c253cSXin Li
*760c253cSXin Li    num_kept = 0
*760c253cSXin Li    num_samples_kept = 0
*760c253cSXin Li    num_top_samples_kept = 0
*760c253cSXin Li    num_total = 0
*760c253cSXin Li    num_samples_total = 0
*760c253cSXin Li    num_top_samples_total = 0
*760c253cSXin Li
*760c253cSXin Li    for normalized_samples, records in counts.items():
*760c253cSXin Li        top_sample_count, all_sample_count = _count_samples(normalized_samples)
*760c253cSXin Li        top_sample_count *= len(records)
*760c253cSXin Li        all_sample_count *= len(records)
*760c253cSXin Li
*760c253cSXin Li        num_total += len(records)
*760c253cSXin Li        num_samples_total += all_sample_count
*760c253cSXin Li        num_top_samples_total += top_sample_count
*760c253cSXin Li
*760c253cSXin Li        if len(records) >= max_repeats:
*760c253cSXin Li            continue
*760c253cSXin Li
*760c253cSXin Li        num_kept += len(records)
*760c253cSXin Li        num_samples_kept += all_sample_count
*760c253cSXin Li        num_top_samples_kept += top_sample_count
*760c253cSXin Li        for record in records:
*760c253cSXin Li            yield record
*760c253cSXin Li
*760c253cSXin Li    print(
*760c253cSXin Li        "Retained {:,}/{:,} functions".format(num_kept, num_total),
*760c253cSXin Li        file=summary_file,
*760c253cSXin Li    )
*760c253cSXin Li    print(
*760c253cSXin Li        "Retained {:,}/{:,} samples, total".format(
*760c253cSXin Li            num_samples_kept, num_samples_total
*760c253cSXin Li        ),
*760c253cSXin Li        file=summary_file,
*760c253cSXin Li    )
*760c253cSXin Li    print(
*760c253cSXin Li        "Retained {:,}/{:,} top-level samples".format(
*760c253cSXin Li            num_top_samples_kept, num_top_samples_total
*760c253cSXin Li        ),
*760c253cSXin Li        file=summary_file,
*760c253cSXin Li    )
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef run(profile_input_file, summary_output_file, profile_output_file):
*760c253cSXin Li    profile_records = _read_textual_afdo_profile(profile_input_file)
*760c253cSXin Li
*760c253cSXin Li    # Sort this so we get deterministic output. AFDO doesn't care what order it's
*760c253cSXin Li    # in.
*760c253cSXin Li    deduped = sorted(
*760c253cSXin Li        dedup_records(profile_records, summary_output_file),
*760c253cSXin Li        key=lambda r: r.function_line,
*760c253cSXin Li    )
*760c253cSXin Li    for function_line, samples in deduped:
*760c253cSXin Li        print(function_line, file=profile_output_file)
*760c253cSXin Li        print("\n".join(samples), file=profile_output_file)
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Lidef _main():
*760c253cSXin Li    run(
*760c253cSXin Li        profile_input_file=sys.stdin,
*760c253cSXin Li        summary_output_file=sys.stderr,
*760c253cSXin Li        profile_output_file=sys.stdout,
*760c253cSXin Li    )
*760c253cSXin Li
*760c253cSXin Li
*760c253cSXin Liif __name__ == "__main__":
*760c253cSXin Li    _main()