1# Copyright 2020 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Functions to merge multiple JavaScript coverage files into one""" 5 6import base64 7import logging 8import json 9import os 10import sys 11 12_HERE_PATH = os.path.dirname(__file__) 13_THIRD_PARTY_PATH = os.path.normpath( 14 os.path.join(_HERE_PATH, '..', '..', '..', 'third_party')) 15_SRC_PATH = os.path.normpath(os.path.join(_HERE_PATH, '..', '..', '..')) 16sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'node')) 17sys.path.append(os.path.join(_THIRD_PARTY_PATH, 'js_code_coverage')) 18import node 19import coverage_modules 20 21logging.basicConfig(format='[%(asctime)s %(levelname)s] %(message)s', 22 level=logging.DEBUG) 23 24_PREFIXES_TO_CHECK = ['//', 'import ', '/*', '*'] 25 26 27def _parse_json_file(path): 28 """Opens file and parses data into JSON 29 30 Args: 31 path (str): The path to a JSON file to parse. 32 """ 33 with open(path, 'r') as json_file: 34 # Some JSON files erroroneously end with double curly brace, prefer to 35 # strip it out instead of throwing an error message. 36 json_string = json_file.read() 37 if json_string[0] == '{' and json_string[-2:] == '}}': 38 logging.warning( 39 'Found additional trailing curly brace for path: %s', path) 40 return json.loads(json_string[:-1]) 41 return json.loads(json_string) 42 43 44def _get_paths_with_suffix(input_dir, suffix): 45 """Gets all JSON files in the input directory. 46 47 Args: 48 input_dir (str): The path to recursively search for 49 JSON files. 50 51 Returns: 52 A list of absolute file paths. 53 """ 54 paths = [] 55 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 56 paths.extend([ 57 os.path.join(dir_path, fn) for fn in file_names 58 if fn.endswith(suffix) 59 ]) 60 return paths 61 62 63def write_parsed_scripts(task_output_dir, source_dir=_SRC_PATH): 64 """Extract parsed script contents and write back to original folder 65 structure. 66 67 Args: 68 task_output_dir (str): The output directory for the sharded task. This will 69 contain the raw JavaScript v8 parsed files that are identified by 70 their ".js.json" suffix. 71 72 Returns: 73 The absolute file path to the raw parsed scripts or None if no parsed 74 scripts were identified (or any of the raw data contains invalid JSON). 75 """ 76 _SOURCEMAPPING_DATA_URL_PREFIX = 'data:application/json;base64,' 77 78 scripts = _get_paths_with_suffix(task_output_dir, '.js.json') 79 output_dir = os.path.join(task_output_dir, 'parsed_scripts') 80 81 # The original file is extracted from the inline sourcemaps, this 82 # information is not available from the coverage data. So we have to 83 # maintain a URL to path map to ensure the coverage data knows the original 84 # source location. 85 url_to_path_map = {} 86 87 if not scripts: 88 return None 89 90 for file_path in scripts: 91 script_data = None 92 try: 93 script_data = _parse_json_file(file_path) 94 except ValueError as e: 95 logging.error('Failed to parse %s: %s', file_path, e) 96 return None 97 98 if any(key not in script_data 99 for key in ('url', 'text', 'sourceMapURL')): 100 logging.info('File %s is missing key url, text or sourceMapURL', 101 file_path) 102 continue 103 104 # TODO(crbug/1373753): For now we exclude any sourcemaps that are 0 105 # length and also that don't begin with a data URL designation. 106 if len(script_data['sourceMapURL']) == 0 or not script_data[ 107 'sourceMapURL'].startswith(_SOURCEMAPPING_DATA_URL_PREFIX): 108 continue 109 110 decoded_sourcemap = base64.b64decode( 111 script_data['sourceMapURL'].replace(_SOURCEMAPPING_DATA_URL_PREFIX, 112 '')) 113 json_sourcemap = json.loads(decoded_sourcemap) 114 if len(json_sourcemap['sources']) == 0: 115 logging.warning('File %s has a valid sourcemap with no sources', 116 file_path) 117 continue 118 119 for source_idx in range(len(json_sourcemap['sources'])): 120 source_path = os.path.relpath( 121 os.path.normpath( 122 os.path.join(json_sourcemap['sourceRoot'], 123 json_sourcemap['sources'][source_idx])), 124 source_dir) 125 source_directory = os.path.join(output_dir, 126 os.path.dirname(source_path)) 127 if not os.path.exists(source_directory): 128 os.makedirs(source_directory) 129 130 with open(os.path.join(output_dir, source_path), 'wb') as f: 131 f.write(script_data['text'].encode('utf8')) 132 133 # Only write the first instance of the sources to the map. 134 # Sourcemaps require stability in their indexing as the mapping 135 # derived are based on the index location of the file in the 136 # "sources" and "sourcesContent" fields. Therefore the first index 137 # of the "sources" field will be the first file that was encountered 138 # during source map generation, i.e. this should be the actual 139 # chromium/src original file. 140 if script_data['url'] not in url_to_path_map: 141 url_to_path_map[script_data['url']] = source_path 142 143 if not url_to_path_map: 144 return None 145 146 with open(os.path.join(output_dir, 'parsed_scripts.json'), 147 'w+', 148 encoding='utf-8') as f: 149 json.dump(url_to_path_map, f) 150 151 return output_dir 152 153def should_exclude(line_contents): 154 """Whether we exclude the line from coverage map.""" 155 line_contents = line_contents.strip() 156 # Exclude empty lines. 157 if line_contents == '': 158 return True 159 160 # Exclude comments and imports. 161 for prefix in _PREFIXES_TO_CHECK: 162 if line_contents.startswith(prefix): 163 return True 164 165 return False 166 167def exclude_uninteresting_lines(coverage_file_path): 168 """Removes lines from Istanbul coverage reports that correspond to lines in 169 the source file that are empty. These lines provide no additional coverage 170 information and in fact inflate the coverage metrics. 171 172 Args: 173 coverage_file_path (str): The path to the merged coverage.json file. 174 """ 175 with open(coverage_file_path, 'r+') as f: 176 coverage = json.load(f) 177 178 def exclude_line(coverage_map, key): 179 """Exclude an individual line from the coverage map. This relies on 180 the key 'statementMap' which maintains a map of statements to lines 181 as well as the key 's' which contains the invocation counts of each 182 line. 183 """ 184 del coverage_map['statementMap'][key] 185 del coverage_map['s'][key] 186 187 for file_path in coverage: 188 istanbul_coverage = coverage[file_path] 189 lines = [] 190 with open(file_path) as fd: 191 lines = fd.readlines() 192 193 # Force list of the keys to allow removal of items whilst iterating. 194 for key in list(istanbul_coverage['statementMap']): 195 statement_map = istanbul_coverage['statementMap'][key] 196 line_num = statement_map['start']['line'] 197 198 assert statement_map['start']['line'] == statement_map['end'][ 199 'line'] 200 201 if should_exclude(lines[line_num - 1]): 202 exclude_line(istanbul_coverage, key) 203 continue 204 205 # Overwrite the current coverage file with new contents. 206 f.seek(0) 207 f.truncate() 208 json.dump(coverage, f) 209 210 211def remap_paths_to_relative(coverage_file_path, chromium_src_dir, build_dir): 212 """Remap paths to be relative to the chromium_src_dir. 213 214 Args: 215 coverage_file_path (str): The path to the merged coverage.json file. 216 chromium_src_dir (str): The absolute location to chromium/src. 217 build_dir (str): The absolute path to the output dir in chromium/src. 218 """ 219 with open(coverage_file_path, 'r+') as f: 220 coverage_json = json.load(f) 221 excluded_paths = 0 222 remapped_paths = 0 223 224 for key in list(coverage_json.keys()): 225 226 if key.startswith(build_dir): 227 del coverage_json[key] 228 excluded_paths += 1 229 continue 230 231 if not key.startswith(chromium_src_dir): 232 del coverage_json[key] 233 excluded_paths += 1 234 continue 235 236 relative_src_path = os.path.relpath(key, chromium_src_dir).replace( 237 '\\', '/') 238 value = coverage_json[key] 239 value['path'] = relative_src_path 240 coverage_json[relative_src_path] = value 241 del coverage_json[key] 242 remapped_paths += 1 243 244 logging.info('Remapped %s paths' % (remapped_paths)) 245 logging.info('Excluded %s paths' % (excluded_paths)) 246 247 # Overwrite the current coverage file with new contents. 248 f.seek(0) 249 f.truncate() 250 json.dump(coverage_json, f) 251 252 253def get_raw_coverage_dirs(task_output_dir): 254 """Returns a list of directories containing raw v8 coverage. 255 256 Args: 257 task_output_dir (str): The output directory for the sharded task. This will 258 contain the raw JavaScript v8 coverage files that are identified by 259 their ".cov.json" suffix. 260 """ 261 coverage_directories = set() 262 for dir_path, _sub_dirs, file_names in os.walk(task_output_dir): 263 for name in file_names: 264 if name.endswith('.cov.json'): 265 coverage_directories.add(dir_path) 266 continue 267 268 return coverage_directories 269 270 271def convert_raw_coverage_to_istanbul(raw_coverage_dirs, source_dir, 272 task_output_dir): 273 """Calls the node helper script convert_to_istanbul.js 274 275 Args: 276 raw_coverage_dirs (list): Directory that contains raw v8 code coverage. 277 source_dir (str): Root directory containing the instrumented source. 278 279 Raises: 280 RuntimeError: If the underlying node command fails. 281 """ 282 stdout = node.RunNode([ 283 os.path.join(_HERE_PATH, 'convert_to_istanbul.js'), 284 '--source-dir', 285 source_dir, 286 '--output-dir', 287 task_output_dir, 288 '--raw-coverage-dirs', 289 *raw_coverage_dirs, 290 ]) 291 logging.info(stdout) 292 293 294def merge_istanbul_reports(istanbul_coverage_dir, source_dir, output_file): 295 """Merges all disparate istanbul reports into a single report. 296 297 Args: 298 istanbul_coverage_dir (str): Directory containing separate coverage files. 299 source_dir (str): Directory containing instrumented source code. 300 output_file (str): File path to output merged coverage. 301 302 Raises: 303 RuntimeError: If the underlying node command fails. 304 """ 305 return node.RunNode([ 306 coverage_modules.PathToNyc(), 307 'merge', 308 istanbul_coverage_dir, 309 output_file, 310 '--cwd', 311 source_dir, 312 ]) 313 314 315def generate_coverage_reports(coverage_file_dir, output_dir): 316 """Generate a LCOV report. 317 318 Args: 319 coverage_file_dir (str): Directory containing the coverage.json file. 320 output_dir (str): Directory to output the reports. 321 """ 322 return node.RunNode([ 323 coverage_modules.PathToNyc(), 324 'report', 325 '--temp-dir', 326 coverage_file_dir, 327 '--reporter', 328 'lcov', 329 '--report-dir', 330 output_dir, 331 '--exclude-after-remap', 332 'false', 333 ]) 334