1*760c253cSXin Li#!/usr/bin/env python3 2*760c253cSXin Li# -*- coding: utf-8 -*- 3*760c253cSXin Li# Copyright 2018 The ChromiumOS Authors 4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 5*760c253cSXin Li# found in the LICENSE file. 6*760c253cSXin Li 7*760c253cSXin Li"""Script to redact apparent ICF'ed symbolsfrom textual AFDO profiles. 8*760c253cSXin Li 9*760c253cSXin LiAFDO sampling and ICF have an unfortunate interaction that causes a huge 10*760c253cSXin Liinflation in sample counts. Essentially, if you have N functions ICF'ed to the 11*760c253cSXin Lisame location, one AFDO sample in any of those N functions will count as one 12*760c253cSXin Lisample in *each* of those N functions. 13*760c253cSXin Li 14*760c253cSXin LiIn practice, there are a few forms of function bodies that are very heavily 15*760c253cSXin LiICF'ed (e.g. `ret`, `xor %eax, %eax; ret`, destructors for widely-used types 16*760c253cSXin Lilike std::map...). Recording 28,000 samples across all N thousand logical 17*760c253cSXin Lifunctions that point to the same body really hurts our AFDO numbers, given that 18*760c253cSXin Liour actual sample count across all of Chrome is something around 10,000,000. 19*760c253cSXin Li(No, really, these are actual numbers. In practice, at the time of writing, 20*760c253cSXin Lithis script eliminates >90% of our AFDO samples by count. Sometimes as high as 21*760c253cSXin Li98%.) 22*760c253cSXin Li 23*760c253cSXin LiIt reads a textual AFDO profile from stdin, and prints a 'fixed' version of it 24*760c253cSXin Lito stdout. A summary of what the script actually did is printed to stderr. 25*760c253cSXin Li""" 26*760c253cSXin Li 27*760c253cSXin Li 28*760c253cSXin Liimport collections 29*760c253cSXin Liimport re 30*760c253cSXin Liimport sys 31*760c253cSXin Li 32*760c253cSXin Li 33*760c253cSXin Lidef _count_samples(samples): 34*760c253cSXin Li """Count the total number of samples in a function.""" 35*760c253cSXin Li line_re = re.compile(r"^(\s*)\d+(?:\.\d+)?: (\d+)\s*$") 36*760c253cSXin Li 37*760c253cSXin Li top_level_samples = 0 38*760c253cSXin Li all_samples = 0 39*760c253cSXin Li for line in samples: 40*760c253cSXin Li m = line_re.match(line) 41*760c253cSXin Li if not m: 42*760c253cSXin Li continue 43*760c253cSXin Li 44*760c253cSXin Li spaces, n = m.groups() 45*760c253cSXin Li n = int(n) 46*760c253cSXin Li all_samples += n 47*760c253cSXin Li if len(spaces) == 1: 48*760c253cSXin Li top_level_samples += n 49*760c253cSXin Li 50*760c253cSXin Li return top_level_samples, all_samples 51*760c253cSXin Li 52*760c253cSXin Li 53*760c253cSXin Li# A ProfileRecord is a set of samples for a top-level symbol in a textual AFDO 54*760c253cSXin Li# profile. function_line is the top line of said function, and `samples` is 55*760c253cSXin Li# a list of all of the sample lines below that. 56*760c253cSXin Li# 57*760c253cSXin Li# We rely on the format of these strings in some places in this script. For 58*760c253cSXin Li# reference, a full function sample will look something like: 59*760c253cSXin Li# 60*760c253cSXin Li# _ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185 61*760c253cSXin Li# 6: 83 62*760c253cSXin Li# 15: 126 63*760c253cSXin Li# 62832: 126 64*760c253cSXin Li# 6: _ZNK5blink10PaintLayer14GroupedMappingEv:2349 65*760c253cSXin Li# 1: 206 66*760c253cSXin Li# 1: _ZNK5blink10PaintLayer14GroupedMappersEv:2060 67*760c253cSXin Li# 1: 206 68*760c253cSXin Li# 11: _ZNK5blink10PaintLayer25GetCompositedLayerMappingEv:800 69*760c253cSXin Li# 2.1: 80 70*760c253cSXin Li# 71*760c253cSXin Li# 72*760c253cSXin Li# In that case, function_line is 73*760c253cSXin Li# '_ZNK5blink10PaintLayer19GetCompositingStateEv:4530:185', and samples will be 74*760c253cSXin Li# every line below that. 75*760c253cSXin Li# 76*760c253cSXin Li# Function lines look like; 77*760c253cSXin Li# function_symbol:entry_count:dont_care 78*760c253cSXin Li# 79*760c253cSXin Li# And samples look like one of: 80*760c253cSXin Li# arbitrary_number: sample_count 81*760c253cSXin Li# arbitrary_number: inlined_function_symbol:inlined_entry_count 82*760c253cSXin LiProfileRecord = collections.namedtuple( 83*760c253cSXin Li "ProfileRecord", ["function_line", "samples"] 84*760c253cSXin Li) 85*760c253cSXin Li 86*760c253cSXin Li 87*760c253cSXin Lidef _normalize_samples(samples): 88*760c253cSXin Li """Normalizes the samples in the given function body. 89*760c253cSXin Li 90*760c253cSXin Li Normalization just means that we redact inlined function names. This is 91*760c253cSXin Li done so that a bit of templating doesn't make two function bodies look 92*760c253cSXin Li distinct. Namely: 93*760c253cSXin Li 94*760c253cSXin Li template <typename T> 95*760c253cSXin Li __attribute__((noinline)) 96*760c253cSXin Li int getNumber() { return 1; } 97*760c253cSXin Li 98*760c253cSXin Li template <typename T> 99*760c253cSXin Li __attribute__((noinline)) 100*760c253cSXin Li int getNumberIndirectly() { return getNumber<T>(); } 101*760c253cSXin Li 102*760c253cSXin Li int main() { 103*760c253cSXin Li return getNumber<int>() + getNumber<float>(); 104*760c253cSXin Li } 105*760c253cSXin Li 106*760c253cSXin Li If the profile has the mangled name for getNumber<float> in 107*760c253cSXin Li getNumberIndirectly<float> (and similar for <int>), we'll consider them to 108*760c253cSXin Li be distinct when they're not. 109*760c253cSXin Li """ 110*760c253cSXin Li 111*760c253cSXin Li # I'm not actually sure if this ends up being an issue in practice, but it's 112*760c253cSXin Li # simple enough to guard against. 113*760c253cSXin Li inlined_re = re.compile(r"(^\s*\d+): [^:]+:(\s*\d+)\s*$") 114*760c253cSXin Li result = [] 115*760c253cSXin Li for s in samples: 116*760c253cSXin Li m = inlined_re.match(s) 117*760c253cSXin Li if m: 118*760c253cSXin Li result.append("%s: __REDACTED__:%s" % m.groups()) 119*760c253cSXin Li else: 120*760c253cSXin Li result.append(s) 121*760c253cSXin Li return tuple(result) 122*760c253cSXin Li 123*760c253cSXin Li 124*760c253cSXin Lidef _read_textual_afdo_profile(stream): 125*760c253cSXin Li """Parses an AFDO profile from a line stream into ProfileRecords.""" 126*760c253cSXin Li # ProfileRecords are actually nested, due to inlining. For the purpose of 127*760c253cSXin Li # this script, that doesn't matter. 128*760c253cSXin Li lines = (line.rstrip() for line in stream) 129*760c253cSXin Li function_line = None 130*760c253cSXin Li samples = [] 131*760c253cSXin Li for line in lines: 132*760c253cSXin Li if not line: 133*760c253cSXin Li continue 134*760c253cSXin Li 135*760c253cSXin Li if line[0].isspace(): 136*760c253cSXin Li assert ( 137*760c253cSXin Li function_line is not None 138*760c253cSXin Li ), "sample exists outside of a function?" 139*760c253cSXin Li samples.append(line) 140*760c253cSXin Li continue 141*760c253cSXin Li 142*760c253cSXin Li if function_line is not None: 143*760c253cSXin Li yield ProfileRecord( 144*760c253cSXin Li function_line=function_line, samples=tuple(samples) 145*760c253cSXin Li ) 146*760c253cSXin Li function_line = line 147*760c253cSXin Li samples = [] 148*760c253cSXin Li 149*760c253cSXin Li if function_line is not None: 150*760c253cSXin Li yield ProfileRecord(function_line=function_line, samples=tuple(samples)) 151*760c253cSXin Li 152*760c253cSXin Li 153*760c253cSXin Li# The default of 100 is arbitrarily selected, but it does make the overwhelming 154*760c253cSXin Li# majority of obvious sample duplication disappear. 155*760c253cSXin Li# 156*760c253cSXin Li# We experimented shortly with an nm-powered version of this script (rather 157*760c253cSXin Li# than structural matching, we'd see which functions mapped to the same literal 158*760c253cSXin Li# address). Running this with a high value (100) for max_repeats produced 159*760c253cSXin Li# results basically indistinguishable from nm, so ... 160*760c253cSXin Li# 161*760c253cSXin Li# Non-nm based approaches are superior because they don't require any prior 162*760c253cSXin Li# build artifacts; just an AFDO profile. 163*760c253cSXin Lidef dedup_records(profile_records, summary_file, max_repeats=100): 164*760c253cSXin Li """Removes heavily duplicated records from profile_records. 165*760c253cSXin Li 166*760c253cSXin Li profile_records is expected to be an iterable of ProfileRecord. 167*760c253cSXin Li max_repeats ia how many functions must share identical bodies for us to 168*760c253cSXin Li consider it 'heavily duplicated' and remove the results. 169*760c253cSXin Li """ 170*760c253cSXin Li 171*760c253cSXin Li # Build a mapping of function structure -> list of functions with identical 172*760c253cSXin Li # structure and sample counts 173*760c253cSXin Li counts = collections.defaultdict(list) 174*760c253cSXin Li for record in profile_records: 175*760c253cSXin Li counts[_normalize_samples(record.samples)].append(record) 176*760c253cSXin Li 177*760c253cSXin Li # Be sure that we didn't see any duplicate functions, since that's bad... 178*760c253cSXin Li total_functions_recorded = sum(len(records) for records in counts.values()) 179*760c253cSXin Li 180*760c253cSXin Li unique_function_names = { 181*760c253cSXin Li record.function_line.split(":")[0] 182*760c253cSXin Li for records in counts.values() 183*760c253cSXin Li for record in records 184*760c253cSXin Li } 185*760c253cSXin Li 186*760c253cSXin Li assert ( 187*760c253cSXin Li len(unique_function_names) == total_functions_recorded 188*760c253cSXin Li ), "duplicate function names?" 189*760c253cSXin Li 190*760c253cSXin Li num_kept = 0 191*760c253cSXin Li num_samples_kept = 0 192*760c253cSXin Li num_top_samples_kept = 0 193*760c253cSXin Li num_total = 0 194*760c253cSXin Li num_samples_total = 0 195*760c253cSXin Li num_top_samples_total = 0 196*760c253cSXin Li 197*760c253cSXin Li for normalized_samples, records in counts.items(): 198*760c253cSXin Li top_sample_count, all_sample_count = _count_samples(normalized_samples) 199*760c253cSXin Li top_sample_count *= len(records) 200*760c253cSXin Li all_sample_count *= len(records) 201*760c253cSXin Li 202*760c253cSXin Li num_total += len(records) 203*760c253cSXin Li num_samples_total += all_sample_count 204*760c253cSXin Li num_top_samples_total += top_sample_count 205*760c253cSXin Li 206*760c253cSXin Li if len(records) >= max_repeats: 207*760c253cSXin Li continue 208*760c253cSXin Li 209*760c253cSXin Li num_kept += len(records) 210*760c253cSXin Li num_samples_kept += all_sample_count 211*760c253cSXin Li num_top_samples_kept += top_sample_count 212*760c253cSXin Li for record in records: 213*760c253cSXin Li yield record 214*760c253cSXin Li 215*760c253cSXin Li print( 216*760c253cSXin Li "Retained {:,}/{:,} functions".format(num_kept, num_total), 217*760c253cSXin Li file=summary_file, 218*760c253cSXin Li ) 219*760c253cSXin Li print( 220*760c253cSXin Li "Retained {:,}/{:,} samples, total".format( 221*760c253cSXin Li num_samples_kept, num_samples_total 222*760c253cSXin Li ), 223*760c253cSXin Li file=summary_file, 224*760c253cSXin Li ) 225*760c253cSXin Li print( 226*760c253cSXin Li "Retained {:,}/{:,} top-level samples".format( 227*760c253cSXin Li num_top_samples_kept, num_top_samples_total 228*760c253cSXin Li ), 229*760c253cSXin Li file=summary_file, 230*760c253cSXin Li ) 231*760c253cSXin Li 232*760c253cSXin Li 233*760c253cSXin Lidef run(profile_input_file, summary_output_file, profile_output_file): 234*760c253cSXin Li profile_records = _read_textual_afdo_profile(profile_input_file) 235*760c253cSXin Li 236*760c253cSXin Li # Sort this so we get deterministic output. AFDO doesn't care what order it's 237*760c253cSXin Li # in. 238*760c253cSXin Li deduped = sorted( 239*760c253cSXin Li dedup_records(profile_records, summary_output_file), 240*760c253cSXin Li key=lambda r: r.function_line, 241*760c253cSXin Li ) 242*760c253cSXin Li for function_line, samples in deduped: 243*760c253cSXin Li print(function_line, file=profile_output_file) 244*760c253cSXin Li print("\n".join(samples), file=profile_output_file) 245*760c253cSXin Li 246*760c253cSXin Li 247*760c253cSXin Lidef _main(): 248*760c253cSXin Li run( 249*760c253cSXin Li profile_input_file=sys.stdin, 250*760c253cSXin Li summary_output_file=sys.stderr, 251*760c253cSXin Li profile_output_file=sys.stdout, 252*760c253cSXin Li ) 253*760c253cSXin Li 254*760c253cSXin Li 255*760c253cSXin Liif __name__ == "__main__": 256*760c253cSXin Li _main() 257