1#!/usr/bin/env python3 2# Copyright 2020 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Reports binary size metrics for LaCrOS build artifacts. 6 7More information at //docs/speed/binary_size/metrics.md. 8""" 9 10import argparse 11import collections 12import contextlib 13import json 14import logging 15import os 16import subprocess 17import sys 18import tempfile 19SRC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 20sys.path.insert(0, os.path.join(SRC_DIR, 'build', 'util')) 21from lib.results import result_sink 22from lib.results import result_types 23 24 25@contextlib.contextmanager 26def _SysPath(path): 27 """Library import context that temporarily appends |path| to |sys.path|.""" 28 if path and path not in sys.path: 29 sys.path.insert(0, path) 30 else: 31 path = None # Indicates that |sys.path| is not modified. 32 try: 33 yield 34 finally: 35 if path: 36 sys.path.pop(0) 37 38 39DIR_SOURCE_ROOT = os.environ.get( 40 'CHECKOUT_SOURCE_ROOT', 41 os.path.abspath( 42 os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))) 43 44BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib', 45 'common') 46 47TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult', 48 'tracing') 49 50EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party', 51 'eu-strip', 'bin', 'eu-strip') 52 53with _SysPath(BUILD_COMMON_PATH): 54 import perf_tests_results_helper # pylint: disable=import-error 55 56with _SysPath(TRACING_PATH): 57 from tracing.value import convert_chart_json # pylint: disable=import-error 58 59_BASE_CHART = { 60 'format_version': '0.1', 61 'benchmark_name': 'resource_sizes', 62 'trace_rerun_options': [], 63 'charts': {} 64} 65 66_KEY_RAW = 'raw' 67_KEY_GZIPPED = 'gzipped' 68_KEY_STRIPPED = 'stripped' 69_KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped' 70 71 72class _Group: 73 """A group of build artifacts whose file sizes are summed and tracked. 74 75 Build artifacts for size tracking fall under these categories: 76 * File: A single file. 77 * Group: A collection of files. 78 * Dir: All files under a directory. 79 80 Attributes: 81 paths: A list of files or directories to be tracked together. 82 title: The display name of the group. 83 track_stripped: Whether to also track summed stripped ELF sizes. 84 track_compressed: Whether to also track summed compressed sizes. 85 """ 86 87 def __init__(self, paths, title, track_stripped=False, 88 track_compressed=False): 89 self.paths = paths 90 self.title = title 91 self.track_stripped = track_stripped 92 self.track_compressed = track_compressed 93 94 def __eq__(self, other): 95 """Overrides the default implementation""" 96 if isinstance(other, _Group): 97 return (self.paths == other.paths) & (self.title == other.title) & ( 98 self.track_stripped == other.track_stripped) & ( 99 self.track_compressed == other.track_compressed) 100 return False 101 102# Common artifacts in official builder lacros-arm32 and lacros64 in 103# src-internal. The artifcts can be found in 104# chromium/src-internal/testing/buildbot/archive/lacros64.json and 105# chromium/src-internal/testing/buildbot/archive/lacros-arm32.json 106# chromium/src-internal/testing/buildbot/archive/lacros-arm64.json 107_TRACKED_GROUPS = [ 108 _Group(paths=['chrome'], 109 title='File: chrome', 110 track_stripped=True, 111 track_compressed=True), 112 _Group(paths=['chrome_crashpad_handler'], 113 title='File: chrome_crashpad_handler'), 114 _Group(paths=['icudtl.dat'], title='File: icudtl.dat'), 115 _Group(paths=['icudtl.dat.hash'], title='File: icudtl.dat.hash'), 116 _Group(paths=['libEGL.so'], title='File: libEGL.so'), 117 _Group(paths=['libGLESv2.so'], title='File: libGLESv2.so'), 118 _Group(paths=['nacl_helper'], title='File: nacl_helper'), 119 _Group(paths=['resources.pak'], title='File: resources.pak'), 120 _Group(paths=[ 121 'chrome_100_percent.pak', 'chrome_200_percent.pak', 122 'headless_lib_data.pak', 'headless_lib_strings.pak' 123 ], 124 title='Group: Other PAKs'), 125 _Group(paths=['snapshot_blob.bin'], title='Group: Misc'), 126 _Group(paths=['locales/'], title='Dir: locales'), 127 _Group(paths=['resources/accessibility/'], 128 title='Dir: resources/accessibility'), 129 _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'), 130] 131 132 133def _visit_paths(base_dir, paths): 134 """Itemizes files specified by a list of paths. 135 136 Args: 137 base_dir: Base directory for all elements in |paths|. 138 paths: A list of filenames or directory names to specify files whose sizes 139 to be counted. Directories are recursed. There's no de-duping effort. 140 Non-existing files or directories are ignored (with warning message). 141 """ 142 for path in paths: 143 full_path = os.path.join(base_dir, path) 144 if os.path.exists(full_path): 145 if os.path.isdir(full_path): 146 for dirpath, _, filenames in os.walk(full_path): 147 for filename in filenames: 148 yield os.path.join(dirpath, filename) 149 else: # Assume is file. 150 yield full_path 151 else: 152 logging.critical('Not found: %s', path) 153 154 155def _is_probably_elf(filename): 156 """Heuristically decides whether |filename| is ELF via magic signature.""" 157 with open(filename, 'rb') as fh: 158 return fh.read(4) == '\x7FELF' 159 160 161def _is_unstrippable_elf(filename): 162 """Identifies known-unstrippable ELF files to denoise the system.""" 163 return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so') 164 165 166def _get_filesize(filename): 167 """Returns the size of a file, or 0 if file is not found.""" 168 try: 169 return os.path.getsize(filename) 170 except OSError: 171 logging.critical('Failed to get size: %s', filename) 172 return 0 173 174 175def _get_gzipped_filesize(filename): 176 """Returns the gzipped size of a file, or 0 if file is not found.""" 177 BUFFER_SIZE = 65536 178 if not os.path.isfile(filename): 179 return 0 180 try: 181 # Call gzip externally instead of using gzip package since it's > 2x faster. 182 cmd = ['gzip', '-c', filename] 183 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 184 # Manually counting bytes instead of using len(p.communicate()[0]) to avoid 185 # buffering the entire compressed data (can be ~100 MB). 186 ret = 0 187 while True: 188 chunk = len(p.stdout.read(BUFFER_SIZE)) 189 if chunk == 0: 190 break 191 ret += chunk 192 return ret 193 except OSError: 194 logging.critical('Failed to get gzipped size: %s', filename) 195 return 0 196 197 198def _get_catagorized_filesizes(filename): 199 """Measures |filename| sizes under various transforms. 200 201 Returns: A Counter (keyed by _Key_* constants) that stores measured sizes. 202 """ 203 sizes = collections.Counter() 204 sizes[_KEY_RAW] = _get_filesize(filename) 205 sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename) 206 207 # Pre-assign values for non-ELF, or in case of failure for ELF. 208 sizes[_KEY_STRIPPED] = sizes[_KEY_RAW] 209 sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED] 210 211 if _is_probably_elf(filename) and not _is_unstrippable_elf(filename): 212 try: 213 fd, temp_file = tempfile.mkstemp() 214 os.close(fd) 215 cmd = [EU_STRIP_PATH, filename, '-o', temp_file] 216 subprocess.check_output(cmd) 217 sizes[_KEY_STRIPPED] = _get_filesize(temp_file) 218 sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file) 219 if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]: 220 # This weird case has been observed for libwidevinecdm.so. 221 logging.critical('Stripping made things worse for %s' % filename) 222 except subprocess.CalledProcessError: 223 logging.critical('Failed to strip file: %s' % filename) 224 finally: 225 os.unlink(temp_file) 226 return sizes 227 228 229def _dump_chart_json(output_dir, chartjson): 230 """Writes chart histogram to JSON files. 231 232 Output files: 233 results-chart.json contains the chart JSON. 234 perf_results.json contains histogram JSON for Catapult. 235 236 Args: 237 output_dir: Directory to place the JSON files. 238 chartjson: Source JSON data for output files. 239 """ 240 results_path = os.path.join(output_dir, 'results-chart.json') 241 logging.critical('Dumping chartjson to %s', results_path) 242 with open(results_path, 'w') as json_file: 243 json.dump(chartjson, json_file, indent=2) 244 245 # We would ideally generate a histogram set directly instead of generating 246 # chartjson then converting. However, perf_tests_results_helper is in 247 # //build, which doesn't seem to have any precedent for depending on 248 # anything in Catapult. This can probably be fixed, but since this doesn't 249 # need to be super fast or anything, converting is a good enough solution 250 # for the time being. 251 histogram_result = convert_chart_json.ConvertChartJson(results_path) 252 if histogram_result.returncode != 0: 253 raise Exception('chartjson conversion failed with error: ' + 254 histogram_result.stdout) 255 256 histogram_path = os.path.join(output_dir, 'perf_results.json') 257 logging.critical('Dumping histograms to %s', histogram_path) 258 with open(histogram_path, 'wb') as json_file: 259 json_file.write(histogram_result.stdout) 260 261 262def _run_resource_sizes(args): 263 """Main flow to extract and output size data.""" 264 chartjson = _BASE_CHART.copy() 265 chartjson.update({ 266 'benchmark_description': 267 ('LaCrOS %s resource size information.' % args.arch) 268 }) 269 report_func = perf_tests_results_helper.ReportPerfResult 270 total_sizes = collections.Counter() 271 272 def report_sizes(sizes, title, track_stripped, track_compressed): 273 report_func(chart_data=chartjson, 274 graph_title=title, 275 trace_title='size', 276 value=sizes[_KEY_RAW], 277 units='bytes') 278 279 if track_stripped: 280 report_func(chart_data=chartjson, 281 graph_title=title + ' (Stripped)', 282 trace_title='size', 283 value=sizes[_KEY_STRIPPED], 284 units='bytes') 285 286 if track_compressed: 287 report_func(chart_data=chartjson, 288 graph_title=title + ' (Gzipped)', 289 trace_title='size', 290 value=sizes[_KEY_GZIPPED], 291 units='bytes') 292 293 if track_stripped and track_compressed: 294 report_func(chart_data=chartjson, 295 graph_title=title + ' (Stripped, Gzipped)', 296 trace_title='size', 297 value=sizes[_KEY_STRIPPED_GZIPPED], 298 units='bytes') 299 300 tracked_groups = _TRACKED_GROUPS.copy() 301 # Architecture amd64 requires artifact nacl_irt_x86_64.nexe. 302 if args.arch == 'amd64': 303 tracked_groups.append( 304 _Group(paths=['nacl_irt_x86_64.nexe'], 305 title='File: nacl_irt_x86_64.nexe')) 306 # Architecture arm32 requires artifact nacl_irt_arm.nexe. 307 elif args.arch == 'arm32': 308 tracked_groups.append( 309 _Group(paths=['nacl_irt_arm.nexe'], title='File: nacl_irt_arm.nexe')) 310 tracked_groups.append( 311 _Group(paths=['nacl_helper_bootstrap'], 312 title='File: nacl_helper_bootstrap')) 313 # TODO(https://crbug.com/1356761): remove the following part once nacl files 314 # are available. 315 elif args.arch == 'arm64': 316 tracked_groups.remove( 317 _Group(paths=['nacl_helper'], title='File: nacl_helper')) 318 for g in tracked_groups: 319 sizes = sum( 320 map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)), 321 collections.Counter()) 322 report_sizes(sizes, g.title, g.track_stripped, g.track_compressed) 323 324 # Total compressed size is summed over individual compressed sizes, instead 325 # of concatanating first, then compress everything. This is done for 326 # simplicity. It also gives a conservative size estimate (assuming file 327 # metadata and overheads are negligible). 328 total_sizes += sizes 329 330 report_sizes(total_sizes, 'Total', True, True) 331 332 _dump_chart_json(args.output_dir, chartjson) 333 334 335def main(): 336 """Parses arguments and runs high level flows.""" 337 argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.') 338 339 argparser.add_argument('--chromium-output-directory', 340 dest='out_dir', 341 required=True, 342 type=os.path.realpath, 343 help='Location of the build artifacts.') 344 argparser.add_argument('--arch', 345 required=True, 346 type=str, 347 help='The architecture of lacros, valid values: amd64,' 348 ' arm32, arm64') 349 350 output_group = argparser.add_mutually_exclusive_group() 351 352 output_group.add_argument('--output-dir', 353 default='.', 354 help='Directory to save chartjson to.') 355 356 # Accepted to conform to the isolated script interface, but ignored. 357 argparser.add_argument('--isolated-script-test-filter', 358 help=argparse.SUPPRESS) 359 argparser.add_argument('--isolated-script-test-perf-output', 360 type=os.path.realpath, 361 help=argparse.SUPPRESS) 362 363 output_group.add_argument( 364 '--isolated-script-test-output', 365 type=os.path.realpath, 366 help='File to which results will be written in the simplified JSON ' 367 'output format.') 368 369 args = argparser.parse_args() 370 371 isolated_script_output = {'valid': False, 'failures': []} 372 if args.isolated_script_test_output: 373 test_name = 'lacros_resource_sizes' 374 args.output_dir = os.path.join( 375 os.path.dirname(args.isolated_script_test_output), test_name) 376 if not os.path.exists(args.output_dir): 377 os.makedirs(args.output_dir) 378 379 try: 380 _run_resource_sizes(args) 381 isolated_script_output = {'valid': True, 'failures': []} 382 finally: 383 if args.isolated_script_test_output: 384 results_path = os.path.join(args.output_dir, 'test_results.json') 385 with open(results_path, 'w') as output_file: 386 json.dump(isolated_script_output, output_file) 387 with open(args.isolated_script_test_output, 'w') as output_file: 388 json.dump(isolated_script_output, output_file) 389 result_sink_client = result_sink.TryInitClient() 390 if result_sink_client: 391 status = result_types.PASS 392 if not isolated_script_output['valid']: 393 status = result_types.UNKNOWN 394 elif isolated_script_output['failures']: 395 status = result_types.FAIL 396 result_sink_client.Post(test_name, status, None, None, None) 397 398 399if __name__ == '__main__': 400 main() 401