1*760c253cSXin Li#!/usr/bin/env python3 2*760c253cSXin Li# -*- coding: utf-8 -*- 3*760c253cSXin Li# Copyright 2019 The ChromiumOS Authors 4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 5*760c253cSXin Li# found in the LICENSE file. 6*760c253cSXin Li 7*760c253cSXin Li"""Script to remove all indirect call targets from textual AFDO profiles. 8*760c253cSXin Li 9*760c253cSXin LiIndirect call samples can cause code to appear 'live' when it otherwise 10*760c253cSXin Liwouldn't be. This resurrection can happen either by the way of profile-based 11*760c253cSXin Lispeculative devirtualization, or because of imprecision in LLVM's liveness 12*760c253cSXin Licalculations when performing LTO. 13*760c253cSXin Li 14*760c253cSXin LiThis generally isn't a problem when an AFDO profile is applied to the binary it 15*760c253cSXin Liwas collected on. However, because we e.g., build NaCl from the same set of 16*760c253cSXin Liobjects as Chrome, this can become problematic, and lead to NaCl doubling in 17*760c253cSXin Lisize (or worse). See crbug.com/1005023 and crbug.com/916130. 18*760c253cSXin Li""" 19*760c253cSXin Li 20*760c253cSXin Li 21*760c253cSXin Liimport argparse 22*760c253cSXin Liimport re 23*760c253cSXin Li 24*760c253cSXin Li 25*760c253cSXin Lidef _remove_indirect_call_targets(lines): 26*760c253cSXin Li # Lines with indirect call targets look like: 27*760c253cSXin Li # 1.1: 1234 foo:111 bar:122 28*760c253cSXin Li # 29*760c253cSXin Li # Where 1.1 is the line info/discriminator, 1234 is the total number of 30*760c253cSXin Li # samples seen for that line/discriminator, foo:111 is "111 of the calls here 31*760c253cSXin Li # went to foo," and bar:122 is "122 of the calls here went to bar." 32*760c253cSXin Li call_target_re = re.compile( 33*760c253cSXin Li r""" 34*760c253cSXin Li ^\s+ # Top-level lines are function records. 35*760c253cSXin Li \d+(?:\.\d+)?: # Line info/discriminator 36*760c253cSXin Li \s+ 37*760c253cSXin Li \d+ # Total sample count 38*760c253cSXin Li \s+ 39*760c253cSXin Li ((?:[^\s:]+:\d+\s*)+) # Indirect call target(s) 40*760c253cSXin Li $ 41*760c253cSXin Li """, 42*760c253cSXin Li re.VERBOSE, 43*760c253cSXin Li ) 44*760c253cSXin Li for line in lines: 45*760c253cSXin Li line = line.rstrip() 46*760c253cSXin Li 47*760c253cSXin Li match = call_target_re.match(line) 48*760c253cSXin Li if not match: 49*760c253cSXin Li yield line + "\n" 50*760c253cSXin Li continue 51*760c253cSXin Li 52*760c253cSXin Li group_start, group_end = match.span(1) 53*760c253cSXin Li assert group_end == len(line) 54*760c253cSXin Li yield line[:group_start].rstrip() + "\n" 55*760c253cSXin Li 56*760c253cSXin Li 57*760c253cSXin Lidef run(input_stream, output_stream): 58*760c253cSXin Li for line in _remove_indirect_call_targets(input_stream): 59*760c253cSXin Li output_stream.write(line) 60*760c253cSXin Li 61*760c253cSXin Li 62*760c253cSXin Lidef main(): 63*760c253cSXin Li parser = argparse.ArgumentParser( 64*760c253cSXin Li description=__doc__, 65*760c253cSXin Li formatter_class=argparse.RawDescriptionHelpFormatter, 66*760c253cSXin Li ) 67*760c253cSXin Li parser.add_argument( 68*760c253cSXin Li "--input", 69*760c253cSXin Li default="/dev/stdin", 70*760c253cSXin Li help="File to read from. Defaults to stdin.", 71*760c253cSXin Li ) 72*760c253cSXin Li parser.add_argument( 73*760c253cSXin Li "--output", 74*760c253cSXin Li default="/dev/stdout", 75*760c253cSXin Li help="File to write to. Defaults to stdout.", 76*760c253cSXin Li ) 77*760c253cSXin Li args = parser.parse_args() 78*760c253cSXin Li 79*760c253cSXin Li with open(args.input) as stdin: 80*760c253cSXin Li with open(args.output, "w") as stdout: 81*760c253cSXin Li run(stdin, stdout) 82*760c253cSXin Li 83*760c253cSXin Li 84*760c253cSXin Liif __name__ == "__main__": 85*760c253cSXin Li main() 86