xref: /aosp_15_r20/external/cronet/build/lacros/lacros_resource_sizes.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2# Copyright 2020 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Reports binary size metrics for LaCrOS build artifacts.
6
7More information at //docs/speed/binary_size/metrics.md.
8"""
9
10import argparse
11import collections
12import contextlib
13import json
14import logging
15import os
16import subprocess
17import sys
18import tempfile
19SRC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
20sys.path.insert(0, os.path.join(SRC_DIR, 'build', 'util'))
21from lib.results import result_sink
22from lib.results import result_types
23
24
25@contextlib.contextmanager
26def _SysPath(path):
27  """Library import context that temporarily appends |path| to |sys.path|."""
28  if path and path not in sys.path:
29    sys.path.insert(0, path)
30  else:
31    path = None  # Indicates that |sys.path| is not modified.
32  try:
33    yield
34  finally:
35    if path:
36      sys.path.pop(0)
37
38
39DIR_SOURCE_ROOT = os.environ.get(
40    'CHECKOUT_SOURCE_ROOT',
41    os.path.abspath(
42        os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))
43
44BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib',
45                                 'common')
46
47TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult',
48                            'tracing')
49
50EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party',
51                             'eu-strip', 'bin', 'eu-strip')
52
53with _SysPath(BUILD_COMMON_PATH):
54  import perf_tests_results_helper  # pylint: disable=import-error
55
56with _SysPath(TRACING_PATH):
57  from tracing.value import convert_chart_json  # pylint: disable=import-error
58
59_BASE_CHART = {
60    'format_version': '0.1',
61    'benchmark_name': 'resource_sizes',
62    'trace_rerun_options': [],
63    'charts': {}
64}
65
66_KEY_RAW = 'raw'
67_KEY_GZIPPED = 'gzipped'
68_KEY_STRIPPED = 'stripped'
69_KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped'
70
71
72class _Group:
73  """A group of build artifacts whose file sizes are summed and tracked.
74
75  Build artifacts for size tracking fall under these categories:
76  * File: A single file.
77  * Group: A collection of files.
78  * Dir: All files under a directory.
79
80  Attributes:
81    paths: A list of files or directories to be tracked together.
82    title: The display name of the group.
83    track_stripped: Whether to also track summed stripped ELF sizes.
84    track_compressed: Whether to also track summed compressed sizes.
85  """
86
87  def __init__(self, paths, title, track_stripped=False,
88               track_compressed=False):
89    self.paths = paths
90    self.title = title
91    self.track_stripped = track_stripped
92    self.track_compressed = track_compressed
93
94  def __eq__(self, other):
95    """Overrides the default implementation"""
96    if isinstance(other, _Group):
97      return (self.paths == other.paths) & (self.title == other.title) & (
98          self.track_stripped == other.track_stripped) & (
99              self.track_compressed == other.track_compressed)
100    return False
101
102# Common artifacts in official builder lacros-arm32 and lacros64 in
103# src-internal. The artifcts can be found in
104# chromium/src-internal/testing/buildbot/archive/lacros64.json and
105# chromium/src-internal/testing/buildbot/archive/lacros-arm32.json
106# chromium/src-internal/testing/buildbot/archive/lacros-arm64.json
107_TRACKED_GROUPS = [
108    _Group(paths=['chrome'],
109           title='File: chrome',
110           track_stripped=True,
111           track_compressed=True),
112    _Group(paths=['chrome_crashpad_handler'],
113           title='File: chrome_crashpad_handler'),
114    _Group(paths=['icudtl.dat'], title='File: icudtl.dat'),
115    _Group(paths=['icudtl.dat.hash'], title='File: icudtl.dat.hash'),
116    _Group(paths=['libEGL.so'], title='File: libEGL.so'),
117    _Group(paths=['libGLESv2.so'], title='File: libGLESv2.so'),
118    _Group(paths=['nacl_helper'], title='File: nacl_helper'),
119    _Group(paths=['resources.pak'], title='File: resources.pak'),
120    _Group(paths=[
121        'chrome_100_percent.pak', 'chrome_200_percent.pak',
122        'headless_lib_data.pak', 'headless_lib_strings.pak'
123    ],
124           title='Group: Other PAKs'),
125    _Group(paths=['snapshot_blob.bin'], title='Group: Misc'),
126    _Group(paths=['locales/'], title='Dir: locales'),
127    _Group(paths=['resources/accessibility/'],
128           title='Dir: resources/accessibility'),
129    _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'),
130]
131
132
133def _visit_paths(base_dir, paths):
134  """Itemizes files specified by a list of paths.
135
136  Args:
137    base_dir: Base directory for all elements in |paths|.
138    paths: A list of filenames or directory names to specify files whose sizes
139      to be counted. Directories are recursed. There's no de-duping effort.
140      Non-existing files or directories are ignored (with warning message).
141  """
142  for path in paths:
143    full_path = os.path.join(base_dir, path)
144    if os.path.exists(full_path):
145      if os.path.isdir(full_path):
146        for dirpath, _, filenames in os.walk(full_path):
147          for filename in filenames:
148            yield os.path.join(dirpath, filename)
149      else:  # Assume is file.
150        yield full_path
151    else:
152      logging.critical('Not found: %s', path)
153
154
155def _is_probably_elf(filename):
156  """Heuristically decides whether |filename| is ELF via magic signature."""
157  with open(filename, 'rb') as fh:
158    return fh.read(4) == '\x7FELF'
159
160
161def _is_unstrippable_elf(filename):
162  """Identifies known-unstrippable ELF files to denoise the system."""
163  return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so')
164
165
166def _get_filesize(filename):
167  """Returns the size of a file, or 0 if file is not found."""
168  try:
169    return os.path.getsize(filename)
170  except OSError:
171    logging.critical('Failed to get size: %s', filename)
172  return 0
173
174
175def _get_gzipped_filesize(filename):
176  """Returns the gzipped size of a file, or 0 if file is not found."""
177  BUFFER_SIZE = 65536
178  if not os.path.isfile(filename):
179    return 0
180  try:
181    # Call gzip externally instead of using gzip package since it's > 2x faster.
182    cmd = ['gzip', '-c', filename]
183    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
184    # Manually counting bytes instead of using len(p.communicate()[0]) to avoid
185    # buffering the entire compressed data (can be ~100 MB).
186    ret = 0
187    while True:
188      chunk = len(p.stdout.read(BUFFER_SIZE))
189      if chunk == 0:
190        break
191      ret += chunk
192    return ret
193  except OSError:
194    logging.critical('Failed to get gzipped size: %s', filename)
195  return 0
196
197
198def _get_catagorized_filesizes(filename):
199  """Measures |filename| sizes under various transforms.
200
201  Returns: A Counter (keyed by _Key_* constants) that stores measured sizes.
202  """
203  sizes = collections.Counter()
204  sizes[_KEY_RAW] = _get_filesize(filename)
205  sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename)
206
207  # Pre-assign values for non-ELF, or in case of failure for ELF.
208  sizes[_KEY_STRIPPED] = sizes[_KEY_RAW]
209  sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED]
210
211  if _is_probably_elf(filename) and not _is_unstrippable_elf(filename):
212    try:
213      fd, temp_file = tempfile.mkstemp()
214      os.close(fd)
215      cmd = [EU_STRIP_PATH, filename, '-o', temp_file]
216      subprocess.check_output(cmd)
217      sizes[_KEY_STRIPPED] = _get_filesize(temp_file)
218      sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file)
219      if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]:
220        # This weird case has been observed for libwidevinecdm.so.
221        logging.critical('Stripping made things worse for %s' % filename)
222    except subprocess.CalledProcessError:
223      logging.critical('Failed to strip file: %s' % filename)
224    finally:
225      os.unlink(temp_file)
226  return sizes
227
228
229def _dump_chart_json(output_dir, chartjson):
230  """Writes chart histogram to JSON files.
231
232  Output files:
233    results-chart.json contains the chart JSON.
234    perf_results.json contains histogram JSON for Catapult.
235
236  Args:
237    output_dir: Directory to place the JSON files.
238    chartjson: Source JSON data for output files.
239  """
240  results_path = os.path.join(output_dir, 'results-chart.json')
241  logging.critical('Dumping chartjson to %s', results_path)
242  with open(results_path, 'w') as json_file:
243    json.dump(chartjson, json_file, indent=2)
244
245  # We would ideally generate a histogram set directly instead of generating
246  # chartjson then converting. However, perf_tests_results_helper is in
247  # //build, which doesn't seem to have any precedent for depending on
248  # anything in Catapult. This can probably be fixed, but since this doesn't
249  # need to be super fast or anything, converting is a good enough solution
250  # for the time being.
251  histogram_result = convert_chart_json.ConvertChartJson(results_path)
252  if histogram_result.returncode != 0:
253    raise Exception('chartjson conversion failed with error: ' +
254                    histogram_result.stdout)
255
256  histogram_path = os.path.join(output_dir, 'perf_results.json')
257  logging.critical('Dumping histograms to %s', histogram_path)
258  with open(histogram_path, 'wb') as json_file:
259    json_file.write(histogram_result.stdout)
260
261
262def _run_resource_sizes(args):
263  """Main flow to extract and output size data."""
264  chartjson = _BASE_CHART.copy()
265  chartjson.update({
266      'benchmark_description':
267      ('LaCrOS %s resource size information.' % args.arch)
268  })
269  report_func = perf_tests_results_helper.ReportPerfResult
270  total_sizes = collections.Counter()
271
272  def report_sizes(sizes, title, track_stripped, track_compressed):
273    report_func(chart_data=chartjson,
274                graph_title=title,
275                trace_title='size',
276                value=sizes[_KEY_RAW],
277                units='bytes')
278
279    if track_stripped:
280      report_func(chart_data=chartjson,
281                  graph_title=title + ' (Stripped)',
282                  trace_title='size',
283                  value=sizes[_KEY_STRIPPED],
284                  units='bytes')
285
286    if track_compressed:
287      report_func(chart_data=chartjson,
288                  graph_title=title + ' (Gzipped)',
289                  trace_title='size',
290                  value=sizes[_KEY_GZIPPED],
291                  units='bytes')
292
293    if track_stripped and track_compressed:
294      report_func(chart_data=chartjson,
295                  graph_title=title + ' (Stripped, Gzipped)',
296                  trace_title='size',
297                  value=sizes[_KEY_STRIPPED_GZIPPED],
298                  units='bytes')
299
300  tracked_groups = _TRACKED_GROUPS.copy()
301  # Architecture amd64 requires artifact nacl_irt_x86_64.nexe.
302  if args.arch == 'amd64':
303    tracked_groups.append(
304        _Group(paths=['nacl_irt_x86_64.nexe'],
305               title='File: nacl_irt_x86_64.nexe'))
306  # Architecture arm32 requires artifact nacl_irt_arm.nexe.
307  elif args.arch == 'arm32':
308    tracked_groups.append(
309        _Group(paths=['nacl_irt_arm.nexe'], title='File: nacl_irt_arm.nexe'))
310    tracked_groups.append(
311        _Group(paths=['nacl_helper_bootstrap'],
312               title='File: nacl_helper_bootstrap'))
313  # TODO(https://crbug.com/1356761): remove the following part once nacl files
314  # are available.
315  elif args.arch == 'arm64':
316    tracked_groups.remove(
317        _Group(paths=['nacl_helper'], title='File: nacl_helper'))
318  for g in tracked_groups:
319    sizes = sum(
320        map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)),
321        collections.Counter())
322    report_sizes(sizes, g.title, g.track_stripped, g.track_compressed)
323
324    # Total compressed size is summed over individual compressed sizes, instead
325    # of concatanating first, then compress everything. This is done for
326    # simplicity. It also gives a conservative size estimate (assuming file
327    # metadata and overheads are negligible).
328    total_sizes += sizes
329
330  report_sizes(total_sizes, 'Total', True, True)
331
332  _dump_chart_json(args.output_dir, chartjson)
333
334
335def main():
336  """Parses arguments and runs high level flows."""
337  argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.')
338
339  argparser.add_argument('--chromium-output-directory',
340                         dest='out_dir',
341                         required=True,
342                         type=os.path.realpath,
343                         help='Location of the build artifacts.')
344  argparser.add_argument('--arch',
345                         required=True,
346                         type=str,
347                         help='The architecture of lacros, valid values: amd64,'
348                         ' arm32, arm64')
349
350  output_group = argparser.add_mutually_exclusive_group()
351
352  output_group.add_argument('--output-dir',
353                            default='.',
354                            help='Directory to save chartjson to.')
355
356  # Accepted to conform to the isolated script interface, but ignored.
357  argparser.add_argument('--isolated-script-test-filter',
358                         help=argparse.SUPPRESS)
359  argparser.add_argument('--isolated-script-test-perf-output',
360                         type=os.path.realpath,
361                         help=argparse.SUPPRESS)
362
363  output_group.add_argument(
364      '--isolated-script-test-output',
365      type=os.path.realpath,
366      help='File to which results will be written in the simplified JSON '
367      'output format.')
368
369  args = argparser.parse_args()
370
371  isolated_script_output = {'valid': False, 'failures': []}
372  if args.isolated_script_test_output:
373    test_name = 'lacros_resource_sizes'
374    args.output_dir = os.path.join(
375        os.path.dirname(args.isolated_script_test_output), test_name)
376    if not os.path.exists(args.output_dir):
377      os.makedirs(args.output_dir)
378
379  try:
380    _run_resource_sizes(args)
381    isolated_script_output = {'valid': True, 'failures': []}
382  finally:
383    if args.isolated_script_test_output:
384      results_path = os.path.join(args.output_dir, 'test_results.json')
385      with open(results_path, 'w') as output_file:
386        json.dump(isolated_script_output, output_file)
387      with open(args.isolated_script_test_output, 'w') as output_file:
388        json.dump(isolated_script_output, output_file)
389  result_sink_client = result_sink.TryInitClient()
390  if result_sink_client:
391    status = result_types.PASS
392    if not isolated_script_output['valid']:
393      status = result_types.UNKNOWN
394    elif isolated_script_output['failures']:
395      status = result_types.FAIL
396    result_sink_client.Post(test_name, status, None, None, None)
397
398
399if __name__ == '__main__':
400  main()
401