1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2018 The ChromiumOS Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles. 8 9AFDO sampling and ICF have an unfortunate interaction that causes a huge 10inflation in sample counts. Essentially, if you have N functions ICF'ed to the 11same location, one AFDO sample in any of those N functions will count as one 12sample in *each* of those N functions. 13 14In practice, there are a few forms of function bodies that are very heavily 15ICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types 16like std::map...). Recording 28,000 samples across all N thousand logical 17functions that point to the same body really hurts our AFDO numbers, given that 18our actual sample count across all of Chrome is something around 10,000,000. 19(No, really, these are actual numbers. In practice, at the time of writing, 20this script eliminates >90% of our AFDO samples by count. Sometimes as high as 2198%.) 22 23It reads a textual AFDO profile from stdin, and prints a 'fixed' version of it 24to stdout. A summary of what the script actually did is printed to stderr. 25""" 26 27 28import collections 29import re 30import sys 31 32 33def _count_samples(samples): 34 """Count the total number of samples in a function.""" 35 line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$") 36 37 top_level_samples = 0 38 all_samples = 0 39 for line in samples: 40 m = line_re.match(line) 41 if not m: 42 continue 43 44 spaces, n = m.groups() 45 n = int(n) 46 all_samples += n 47 if len(spaces) == 1: 48 top_level_samples += n 49 50 return top_level_samples, all_samples 51 52 53# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO 54# profile. function_line is the top line of said function, and `samples` is 55# a list of all of the sample lines below that. 56# 57# We rely on the format of these strings in some places in this script. For 58# reference, a full function sample will look something like: 59# 60# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185 61# 6: 83 62# 15: 126 63# 62832: 126 64# 6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349 65# 1: 206 66# 1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060 67# 1: 206 68# 11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800 69# 2.1: 80 70# 71# 72# In that case, function_line is 73# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be 74# every line below that. 75# 76# Function lines look like; 77# function_symbol:entry_count:dont_care 78# 79# And samples look like one of: 80# arbitrary_number: sample_count 81# arbitrary_number: inlined_function_symbol:inlined_entry_count 82ProfileRecord = collections.namedtuple( 83 "ProfileRecord", ["function_line", "samples"] 84) 85 86 87def _normalize_samples(samples): 88 """Normalizes the samples in the given function body. 89 90 Normalization just means that we redact inlined function names. This is 91 done so that a bit of templating doesn't make two function bodies look 92 distinct. Namely: 93 94 template <typename T> 95 __attribute__((noinline)) 96 int getNumber() { return 1; } 97 98 template <typename T> 99 __attribute__((noinline)) 100 int getNumberIndirectly() { return getNumber<T>(); } 101 102 int main() { 103 return getNumber<int>() + getNumber<float>(); 104 } 105 106 If the profile has the mangled name for getNumber<float> in 107 getNumberIndirectly<float> (and similar for <int>), we'll consider them to 108 be distinct when they're not. 109 """ 110 111 # I'm not actually sure if this ends up being an issue in practice, but it's 112 # simple enough to guard against. 113 inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$") 114 result = [] 115 for s in samples: 116 m = inlined_re.match(s) 117 if m: 118 result.append("%s: __REDACTED__:%s" % m.groups()) 119 else: 120 result.append(s) 121 return tuple(result) 122 123 124def _read_textual_afdo_profile(stream): 125 """Parses an AFDO profile from a line stream into ProfileRecords.""" 126 # ProfileRecords are actually nested, due to inlining. For the purpose of 127 # this script, that doesn't matter. 128 lines = (line.rstrip() for line in stream) 129 function_line = None 130 samples = [] 131 for line in lines: 132 if not line: 133 continue 134 135 if line[0].isspace(): 136 assert ( 137 function_line is not None 138 ), "sample exists outside of a function?" 139 samples.append(line) 140 continue 141 142 if function_line is not None: 143 yield ProfileRecord( 144 function_line=function_line, samples=tuple(samples) 145 ) 146 function_line = line 147 samples = [] 148 149 if function_line is not None: 150 yield ProfileRecord(function_line=function_line, samples=tuple(samples)) 151 152 153# The default of 100 is arbitrarily selected, but it does make the overwhelming 154# majority of obvious sample duplication disappear. 155# 156# We experimented shortly with an nm-powered version of this script (rather 157# than structural matching, we'd see which functions mapped to the same literal 158# address). Running this with a high value (100) for max_repeats produced 159# results basically indistinguishable from nm, so ... 160# 161# Non-nm based approaches are superior because they don't require any prior 162# build artifacts; just an AFDO profile. 163def dedup_records(profile_records, summary_file, max_repeats=100): 164 """Removes heavily duplicated records from profile_records. 165 166 profile_records is expected to be an iterable of ProfileRecord. 167 max_repeats ia how many functions must share identical bodies for us to 168 consider it 'heavily duplicated' and remove the results. 169 """ 170 171 # Build a mapping of function structure -> list of functions with identical 172 # structure and sample counts 173 counts = collections.defaultdict(list) 174 for record in profile_records: 175 counts[_normalize_samples(record.samples)].append(record) 176 177 # Be sure that we didn't see any duplicate functions, since that's bad... 178 total_functions_recorded = sum(len(records) for records in counts.values()) 179 180 unique_function_names = { 181 record.function_line.split(":")[0] 182 for records in counts.values() 183 for record in records 184 } 185 186 assert ( 187 len(unique_function_names) == total_functions_recorded 188 ), "duplicate function names?" 189 190 num_kept = 0 191 num_samples_kept = 0 192 num_top_samples_kept = 0 193 num_total = 0 194 num_samples_total = 0 195 num_top_samples_total = 0 196 197 for normalized_samples, records in counts.items(): 198 top_sample_count, all_sample_count = _count_samples(normalized_samples) 199 top_sample_count *= len(records) 200 all_sample_count *= len(records) 201 202 num_total += len(records) 203 num_samples_total += all_sample_count 204 num_top_samples_total += top_sample_count 205 206 if len(records) >= max_repeats: 207 continue 208 209 num_kept += len(records) 210 num_samples_kept += all_sample_count 211 num_top_samples_kept += top_sample_count 212 for record in records: 213 yield record 214 215 print( 216 "Retained {:,}/{:,} functions".format(num_kept, num_total), 217 file=summary_file, 218 ) 219 print( 220 "Retained {:,}/{:,} samples, total".format( 221 num_samples_kept, num_samples_total 222 ), 223 file=summary_file, 224 ) 225 print( 226 "Retained {:,}/{:,} top-level samples".format( 227 num_top_samples_kept, num_top_samples_total 228 ), 229 file=summary_file, 230 ) 231 232 233def run(profile_input_file, summary_output_file, profile_output_file): 234 profile_records = _read_textual_afdo_profile(profile_input_file) 235 236 # Sort this so we get deterministic output. AFDO doesn't care what order it's 237 # in. 238 deduped = sorted( 239 dedup_records(profile_records, summary_output_file), 240 key=lambda r: r.function_line, 241 ) 242 for function_line, samples in deduped: 243 print(function_line, file=profile_output_file) 244 print("\n".join(samples), file=profile_output_file) 245 246 247def _main(): 248 run( 249 profile_input_file=sys.stdin, 250 summary_output_file=sys.stderr, 251 profile_output_file=sys.stdout, 252 ) 253 254 255if __name__ == "__main__": 256 _main() 257