xref: /aosp_15_r20/external/cronet/testing/merge_scripts/code_coverage/merge_js_lib.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2020 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Functions to merge multiple JavaScript coverage files into one"""
5
6import base64
7import logging
8import json
9import os
10import sys
11
12_HERE_PATH = os.path.dirname(__file__)
13_THIRD_PARTY_PATH = os.path.normpath(
14    os.path.join(_HERE_PATH, '..', '..', '..', 'third_party'))
15_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..'))
16sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node'))
17sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage'))
18import node
19import coverage_modules
20
21logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s',
22                    level=logging.DEBUG)
23
24_PREFIXES_TO_CHECK = ['//', 'import ', '/*', '*']
25
26
27def _parse_json_file(path):
28    """Opens file and parses data into JSON
29
30  Args:
31    path (str): The path to a JSON file to parse.
32  """
33    with open(path, 'r') as json_file:
34        # Some JSON files erroroneously end with double curly brace, prefer to
35        # strip it out instead of throwing an error message.
36        json_string = json_file.read()
37        if json_string[0] == '{' and json_string[-2:] == '}}':
38            logging.warning(
39                'Found additional trailing curly brace for path: %s', path)
40            return json.loads(json_string[:-1])
41        return json.loads(json_string)
42
43
44def _get_paths_with_suffix(input_dir, suffix):
45    """Gets all JSON files in the input directory.
46
47  Args:
48    input_dir (str): The path to recursively search for
49        JSON files.
50
51  Returns:
52    A list of absolute file paths.
53  """
54    paths = []
55    for dir_path, _sub_dirs, file_names in os.walk(input_dir):
56        paths.extend([
57            os.path.join(dir_path, fn) for fn in file_names
58            if fn.endswith(suffix)
59        ])
60    return paths
61
62
63def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH):
64    """Extract parsed script contents and write back to original folder
65  structure.
66
67  Args:
68    task_output_dir (str): The output directory for the sharded task. This will
69        contain the raw JavaScript v8 parsed files that are identified by
70        their ".js.json" suffix.
71
72  Returns:
73    The absolute file path to the raw parsed scripts or None if no parsed
74    scripts were identified (or any of the raw data contains invalid JSON).
75  """
76    _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,'
77
78    scripts = _get_paths_with_suffix(task_output_dir, '.js.json')
79    output_dir = os.path.join(task_output_dir, 'parsed_scripts')
80
81    # The original file is extracted from the inline sourcemaps, this
82    # information is not available from the coverage data. So we have to
83    # maintain a URL to path map to ensure the coverage data knows the original
84    # source location.
85    url_to_path_map = {}
86
87    if not scripts:
88        return None
89
90    for file_path in scripts:
91        script_data = None
92        try:
93            script_data = _parse_json_file(file_path)
94        except ValueError as e:
95            logging.error('Failed to parse %s: %s', file_path, e)
96            return None
97
98        if any(key not in script_data
99               for key in ('url', 'text', 'sourceMapURL')):
100            logging.info('File %s is missing key url, text or sourceMapURL',
101                         file_path)
102            continue
103
104        # TODO(crbug/1373753): For now we exclude any sourcemaps that are 0
105        # length and also that don't begin with a data URL designation.
106        if len(script_data['sourceMapURL']) == 0 or not script_data[
107                'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX):
108            continue
109
110        decoded_sourcemap = base64.b64decode(
111            script_data['sourceMapURL'].replace(_SOURCEMAPPING_DATA_URL_PREFIX,
112                                                ''))
113        json_sourcemap = json.loads(decoded_sourcemap)
114        if len(json_sourcemap['sources']) == 0:
115            logging.warning('File %s has a valid sourcemap with no sources',
116                            file_path)
117            continue
118
119        for source_idx in range(len(json_sourcemap['sources'])):
120            source_path = os.path.relpath(
121                os.path.normpath(
122                    os.path.join(json_sourcemap['sourceRoot'],
123                                 json_sourcemap['sources'][source_idx])),
124                source_dir)
125            source_directory = os.path.join(output_dir,
126                                            os.path.dirname(source_path))
127            if not os.path.exists(source_directory):
128                os.makedirs(source_directory)
129
130            with open(os.path.join(output_dir, source_path), 'wb') as f:
131                f.write(script_data['text'].encode('utf8'))
132
133            # Only write the first instance of the sources to the map.
134            # Sourcemaps require stability in their indexing as the mapping
135            # derived are based on the index location of the file in the
136            # "sources" and "sourcesContent" fields. Therefore the first index
137            # of the "sources" field will be the first file that was encountered
138            # during source map generation, i.e. this should be the actual
139            # chromium/src original file.
140            if script_data['url'] not in url_to_path_map:
141                url_to_path_map[script_data['url']] = source_path
142
143    if not url_to_path_map:
144        return None
145
146    with open(os.path.join(output_dir, 'parsed_scripts.json'),
147              'w+',
148              encoding='utf-8') as f:
149        json.dump(url_to_path_map, f)
150
151    return output_dir
152
153def should_exclude(line_contents):
154    """Whether we exclude the line from coverage map."""
155    line_contents = line_contents.strip()
156    # Exclude empty lines.
157    if line_contents == '':
158        return True
159
160    # Exclude comments and imports.
161    for prefix in _PREFIXES_TO_CHECK:
162        if line_contents.startswith(prefix):
163            return True
164
165    return False
166
167def exclude_uninteresting_lines(coverage_file_path):
168    """Removes lines from Istanbul coverage reports that correspond to lines in
169  the source file that are empty. These lines provide no additional coverage
170  information and in fact inflate the coverage metrics.
171
172  Args:
173    coverage_file_path (str): The path to the merged coverage.json file.
174  """
175    with open(coverage_file_path, 'r+') as f:
176        coverage = json.load(f)
177
178        def exclude_line(coverage_map, key):
179            """Exclude an individual line from the coverage map. This relies on
180            the key 'statementMap' which maintains a map of statements to lines
181            as well as the key 's' which contains the invocation counts of each
182            line.
183            """
184            del coverage_map['statementMap'][key]
185            del coverage_map['s'][key]
186
187        for file_path in coverage:
188            istanbul_coverage = coverage[file_path]
189            lines = []
190            with open(file_path) as fd:
191                lines = fd.readlines()
192
193            # Force list of the keys to allow removal of items whilst iterating.
194            for key in list(istanbul_coverage['statementMap']):
195                statement_map = istanbul_coverage['statementMap'][key]
196                line_num = statement_map['start']['line']
197
198                assert statement_map['start']['line'] == statement_map['end'][
199                    'line']
200
201                if should_exclude(lines[line_num - 1]):
202                    exclude_line(istanbul_coverage, key)
203                    continue
204
205        # Overwrite the current coverage file with new contents.
206        f.seek(0)
207        f.truncate()
208        json.dump(coverage, f)
209
210
211def remap_paths_to_relative(coverage_file_path, chromium_src_dir, build_dir):
212    """Remap paths to be relative to the chromium_src_dir.
213
214  Args:
215    coverage_file_path (str): The path to the merged coverage.json file.
216    chromium_src_dir (str): The absolute location to chromium/src.
217    build_dir (str): The absolute path to the output dir in chromium/src.
218  """
219    with open(coverage_file_path, 'r+') as f:
220        coverage_json = json.load(f)
221        excluded_paths = 0
222        remapped_paths = 0
223
224        for key in list(coverage_json.keys()):
225
226            if key.startswith(build_dir):
227                del coverage_json[key]
228                excluded_paths += 1
229                continue
230
231            if not key.startswith(chromium_src_dir):
232                del coverage_json[key]
233                excluded_paths += 1
234                continue
235
236            relative_src_path = os.path.relpath(key, chromium_src_dir).replace(
237                '\\', '/')
238            value = coverage_json[key]
239            value['path'] = relative_src_path
240            coverage_json[relative_src_path] = value
241            del coverage_json[key]
242            remapped_paths += 1
243
244        logging.info('Remapped %s paths' % (remapped_paths))
245        logging.info('Excluded %s paths' % (excluded_paths))
246
247        # Overwrite the current coverage file with new contents.
248        f.seek(0)
249        f.truncate()
250        json.dump(coverage_json, f)
251
252
253def get_raw_coverage_dirs(task_output_dir):
254    """Returns a list of directories containing raw v8 coverage.
255
256  Args:
257    task_output_dir (str): The output directory for the sharded task. This will
258        contain the raw JavaScript v8 coverage files that are identified by
259        their ".cov.json" suffix.
260  """
261    coverage_directories = set()
262    for dir_path, _sub_dirs, file_names in os.walk(task_output_dir):
263        for name in file_names:
264            if name.endswith('.cov.json'):
265                coverage_directories.add(dir_path)
266                continue
267
268    return coverage_directories
269
270
271def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir,
272                                     task_output_dir):
273    """Calls the node helper script convert_to_istanbul.js
274
275  Args:
276    raw_coverage_dirs (list): Directory that contains raw v8 code coverage.
277    source_dir (str): Root directory containing the instrumented source.
278
279  Raises:
280    RuntimeError: If the underlying node command fails.
281  """
282    stdout = node.RunNode([
283        os.path.join(_HERE_PATH, 'convert_to_istanbul.js'),
284        '--source-dir',
285        source_dir,
286        '--output-dir',
287        task_output_dir,
288        '--raw-coverage-dirs',
289        *raw_coverage_dirs,
290    ])
291    logging.info(stdout)
292
293
294def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file):
295    """Merges all disparate istanbul reports into a single report.
296
297  Args:
298    istanbul_coverage_dir (str): Directory containing separate coverage files.
299    source_dir (str): Directory containing instrumented source code.
300    output_file (str): File path to output merged coverage.
301
302  Raises:
303    RuntimeError: If the underlying node command fails.
304  """
305    return node.RunNode([
306        coverage_modules.PathToNyc(),
307        'merge',
308        istanbul_coverage_dir,
309        output_file,
310        '--cwd',
311        source_dir,
312    ])
313
314
315def generate_coverage_reports(coverage_file_dir, output_dir):
316    """Generate a LCOV report.
317
318  Args:
319    coverage_file_dir (str): Directory containing the coverage.json file.
320    output_dir (str): Directory to output the reports.
321  """
322    return node.RunNode([
323        coverage_modules.PathToNyc(),
324        'report',
325        '--temp-dir',
326        coverage_file_dir,
327        '--reporter',
328        'lcov',
329        '--report-dir',
330        output_dir,
331        '--exclude-after-remap',
332        'false',
333    ])
334