1# Copyright 2013 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import difflib 7import hashlib 8import itertools 9import json 10import os 11import sys 12import zipfile 13 14from util import build_utils 15import action_helpers # build_utils adds //build to sys.path. 16import print_python_deps 17 18# When set and a difference is detected, a diff of what changed is printed. 19PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 20 21# An escape hatch that causes all targets to be rebuilt. 22_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 23 24 25def CallAndWriteDepfileIfStale(on_stale_md5, 26 options, 27 record_path=None, 28 input_paths=None, 29 input_strings=None, 30 output_paths=None, 31 force=False, 32 pass_changes=False, 33 track_subpaths_allowlist=None, 34 depfile_deps=None): 35 """Wraps CallAndRecordIfStale() and writes a depfile if applicable. 36 37 Depfiles are automatically added to output_paths when present in the |options| 38 argument. They are then created after |on_stale_md5| is called. 39 40 By default, only python dependencies are added to the depfile. If there are 41 other input paths that are not captured by GN deps, then they should be listed 42 in depfile_deps. It's important to write paths to the depfile that are already 43 captured by GN deps since GN args can cause GN deps to change, and such 44 changes are not immediately reflected in depfiles (http://crbug.com/589311). 45 """ 46 if not output_paths: 47 raise Exception('At least one output_path must be specified.') 48 input_paths = list(input_paths or []) 49 input_strings = list(input_strings or []) 50 output_paths = list(output_paths or []) 51 52 input_paths += print_python_deps.ComputePythonDependencies() 53 54 CallAndRecordIfStale( 55 on_stale_md5, 56 record_path=record_path, 57 input_paths=input_paths, 58 input_strings=input_strings, 59 output_paths=output_paths, 60 force=force, 61 pass_changes=pass_changes, 62 track_subpaths_allowlist=track_subpaths_allowlist) 63 64 # Write depfile even when inputs have not changed to ensure build correctness 65 # on bots that build with & without patch, and the patch changes the depfile 66 # location. 67 if hasattr(options, 'depfile') and options.depfile: 68 action_helpers.write_depfile(options.depfile, output_paths[0], depfile_deps) 69 70 71def CallAndRecordIfStale(function, 72 record_path=None, 73 input_paths=None, 74 input_strings=None, 75 output_paths=None, 76 force=False, 77 pass_changes=False, 78 track_subpaths_allowlist=None): 79 """Calls function if outputs are stale. 80 81 Outputs are considered stale if: 82 - any output_paths are missing, or 83 - the contents of any file within input_paths has changed, or 84 - the contents of input_strings has changed. 85 86 To debug which files are out-of-date, set the environment variable: 87 PRINT_MD5_DIFFS=1 88 89 Args: 90 function: The function to call. 91 record_path: Path to record metadata. 92 Defaults to output_paths[0] + '.md5.stamp' 93 input_paths: List of paths to calcualte an md5 sum on. 94 input_strings: List of strings to record verbatim. 95 output_paths: List of output paths. 96 force: Whether to treat outputs as missing regardless of whether they 97 actually are. 98 pass_changes: Whether to pass a Changes instance to |function|. 99 track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip 100 files from |input_paths| to make subpath information available for. 101 """ 102 assert record_path or output_paths 103 input_paths = input_paths or [] 104 input_strings = input_strings or [] 105 output_paths = output_paths or [] 106 record_path = record_path or output_paths[0] + '.md5.stamp' 107 108 assert record_path.endswith('.stamp'), ( 109 'record paths must end in \'.stamp\' so that they are easy to find ' 110 'and delete') 111 112 new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS) 113 new_metadata.AddStrings(input_strings) 114 115 zip_allowlist = set(track_subpaths_allowlist or []) 116 for path in input_paths: 117 # It's faster to md5 an entire zip file than it is to just locate & hash 118 # its central directory (which is what this used to do). 119 if path in zip_allowlist: 120 entries = _ExtractZipEntries(path) 121 new_metadata.AddZipFile(path, entries) 122 else: 123 new_metadata.AddFile(path, _ComputeTagForPath(path)) 124 125 old_metadata = None 126 force = force or _FORCE_REBUILD 127 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 128 too_new = [] 129 # When outputs are missing, don't bother gathering change information. 130 if not missing_outputs and os.path.exists(record_path): 131 record_mtime = os.path.getmtime(record_path) 132 # Outputs newer than the change information must have been modified outside 133 # of the build, and should be considered stale. 134 too_new = [x for x in output_paths if os.path.getmtime(x) > record_mtime] 135 if not too_new: 136 with open(record_path, 'r') as jsonfile: 137 try: 138 old_metadata = _Metadata.FromFile(jsonfile) 139 except: # pylint: disable=bare-except 140 pass # Not yet using new file format. 141 142 changes = Changes(old_metadata, new_metadata, force, missing_outputs, too_new) 143 if not changes.HasChanges(): 144 return 145 146 if PRINT_EXPLANATIONS: 147 print('=' * 80) 148 print('Target is stale: %s' % record_path) 149 print(changes.DescribeDifference()) 150 print('=' * 80) 151 152 args = (changes,) if pass_changes else () 153 function(*args) 154 155 with open(record_path, 'w') as f: 156 new_metadata.ToFile(f) 157 158 159class Changes: 160 """Provides and API for querying what changed between runs.""" 161 162 def __init__(self, old_metadata, new_metadata, force, missing_outputs, 163 too_new): 164 self.old_metadata = old_metadata 165 self.new_metadata = new_metadata 166 self.force = force 167 self.missing_outputs = missing_outputs 168 self.too_new = too_new 169 170 def _GetOldTag(self, path, subpath=None): 171 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 172 173 def HasChanges(self): 174 """Returns whether any changes exist.""" 175 return (self.HasStringChanges() 176 or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 177 178 def HasStringChanges(self): 179 """Returns whether string metadata changed.""" 180 return (self.force or not self.old_metadata 181 or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()) 182 183 def AddedOrModifiedOnly(self): 184 """Returns whether the only changes were from added or modified (sub)files. 185 186 No missing outputs, no removed paths/subpaths. 187 """ 188 if self.HasStringChanges(): 189 return False 190 if any(self.IterRemovedPaths()): 191 return False 192 for path in self.IterModifiedPaths(): 193 if any(self.IterRemovedSubpaths(path)): 194 return False 195 return True 196 197 def IterAllPaths(self): 198 """Generator for paths.""" 199 return self.new_metadata.IterPaths(); 200 201 def IterAllSubpaths(self, path): 202 """Generator for subpaths.""" 203 return self.new_metadata.IterSubpaths(path); 204 205 def IterAddedPaths(self): 206 """Generator for paths that were added.""" 207 for path in self.new_metadata.IterPaths(): 208 if self._GetOldTag(path) is None: 209 yield path 210 211 def IterAddedSubpaths(self, path): 212 """Generator for paths that were added within the given zip file.""" 213 for subpath in self.new_metadata.IterSubpaths(path): 214 if self._GetOldTag(path, subpath) is None: 215 yield subpath 216 217 def IterRemovedPaths(self): 218 """Generator for paths that were removed.""" 219 if self.old_metadata: 220 for path in self.old_metadata.IterPaths(): 221 if self.new_metadata.GetTag(path) is None: 222 yield path 223 224 def IterRemovedSubpaths(self, path): 225 """Generator for paths that were removed within the given zip file.""" 226 if self.old_metadata: 227 for subpath in self.old_metadata.IterSubpaths(path): 228 if self.new_metadata.GetTag(path, subpath) is None: 229 yield subpath 230 231 def IterModifiedPaths(self): 232 """Generator for paths whose contents have changed.""" 233 for path in self.new_metadata.IterPaths(): 234 old_tag = self._GetOldTag(path) 235 new_tag = self.new_metadata.GetTag(path) 236 if old_tag is not None and old_tag != new_tag: 237 yield path 238 239 def IterModifiedSubpaths(self, path): 240 """Generator for paths within a zip file whose contents have changed.""" 241 for subpath in self.new_metadata.IterSubpaths(path): 242 old_tag = self._GetOldTag(path, subpath) 243 new_tag = self.new_metadata.GetTag(path, subpath) 244 if old_tag is not None and old_tag != new_tag: 245 yield subpath 246 247 def IterChangedPaths(self): 248 """Generator for all changed paths (added/removed/modified).""" 249 return itertools.chain(self.IterRemovedPaths(), 250 self.IterModifiedPaths(), 251 self.IterAddedPaths()) 252 253 def IterChangedSubpaths(self, path): 254 """Generator for paths within a zip that were added/removed/modified.""" 255 return itertools.chain(self.IterRemovedSubpaths(path), 256 self.IterModifiedSubpaths(path), 257 self.IterAddedSubpaths(path)) 258 259 def DescribeDifference(self): 260 """Returns a human-readable description of what changed.""" 261 if self.force: 262 return 'force=True' 263 if self.missing_outputs: 264 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 265 if self.too_new: 266 return 'Outputs newer than stamp file:\n ' + '\n '.join(self.too_new) 267 if self.old_metadata is None: 268 return 'Previous stamp file not found.' 269 270 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 271 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 272 self.new_metadata.GetStrings()) 273 changed = [s for s in ndiff if not s.startswith(' ')] 274 return 'Input strings changed:\n ' + '\n '.join(changed) 275 276 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 277 return "There's no difference." 278 279 lines = [] 280 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 281 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 282 for path in self.IterModifiedPaths(): 283 lines.append('Modified: ' + path) 284 lines.extend(' -> Subpath added: ' + p 285 for p in self.IterAddedSubpaths(path)) 286 lines.extend(' -> Subpath removed: ' + p 287 for p in self.IterRemovedSubpaths(path)) 288 lines.extend(' -> Subpath modified: ' + p 289 for p in self.IterModifiedSubpaths(path)) 290 if lines: 291 return 'Input files changed:\n ' + '\n '.join(lines) 292 return 'I have no idea what changed (there is a bug).' 293 294 295class _Metadata: 296 """Data model for tracking change metadata. 297 298 Args: 299 track_entries: Enables per-file change tracking. Slower, but required for 300 Changes functionality. 301 """ 302 # Schema: 303 # { 304 # "files-md5": "VALUE", 305 # "strings-md5": "VALUE", 306 # "input-files": [ 307 # { 308 # "path": "path.jar", 309 # "tag": "{MD5 of entries}", 310 # "entries": [ 311 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 312 # ] 313 # }, { 314 # "path": "path.txt", 315 # "tag": "{MD5}", 316 # } 317 # ], 318 # "input-strings": ["a", "b", ...], 319 # } 320 def __init__(self, track_entries=False): 321 self._track_entries = track_entries 322 self._files_md5 = None 323 self._strings_md5 = None 324 self._files = [] 325 self._strings = [] 326 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 327 self._file_map = None 328 329 @classmethod 330 def FromFile(cls, fileobj): 331 """Returns a _Metadata initialized from a file object.""" 332 ret = cls() 333 obj = json.load(fileobj) 334 ret._files_md5 = obj['files-md5'] 335 ret._strings_md5 = obj['strings-md5'] 336 ret._files = obj.get('input-files', []) 337 ret._strings = obj.get('input-strings', []) 338 return ret 339 340 def ToFile(self, fileobj): 341 """Serializes metadata to the given file object.""" 342 obj = { 343 'files-md5': self.FilesMd5(), 344 'strings-md5': self.StringsMd5(), 345 } 346 if self._track_entries: 347 obj['input-files'] = sorted(self._files, key=lambda e: e['path']) 348 obj['input-strings'] = self._strings 349 350 json.dump(obj, fileobj, indent=2) 351 352 def _AssertNotQueried(self): 353 assert self._files_md5 is None 354 assert self._strings_md5 is None 355 assert self._file_map is None 356 357 def AddStrings(self, values): 358 self._AssertNotQueried() 359 self._strings.extend(str(v) for v in values) 360 361 def AddFile(self, path, tag): 362 """Adds metadata for a non-zip file. 363 364 Args: 365 path: Path to the file. 366 tag: A short string representative of the file contents. 367 """ 368 self._AssertNotQueried() 369 self._files.append({ 370 'path': path, 371 'tag': tag, 372 }) 373 374 def AddZipFile(self, path, entries): 375 """Adds metadata for a zip file. 376 377 Args: 378 path: Path to the file. 379 entries: List of (subpath, tag) tuples for entries within the zip. 380 """ 381 self._AssertNotQueried() 382 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 383 (e[1] for e in entries))) 384 self._files.append({ 385 'path': path, 386 'tag': tag, 387 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 388 }) 389 390 def GetStrings(self): 391 """Returns the list of input strings.""" 392 return self._strings 393 394 def FilesMd5(self): 395 """Lazily computes and returns the aggregate md5 of input files.""" 396 if self._files_md5 is None: 397 # Omit paths from md5 since temporary files have random names. 398 self._files_md5 = _ComputeInlineMd5( 399 self.GetTag(p) for p in sorted(self.IterPaths())) 400 return self._files_md5 401 402 def StringsMd5(self): 403 """Lazily computes and returns the aggregate md5 of input strings.""" 404 if self._strings_md5 is None: 405 self._strings_md5 = _ComputeInlineMd5(self._strings) 406 return self._strings_md5 407 408 def _GetEntry(self, path, subpath=None): 409 """Returns the JSON entry for the given path / subpath.""" 410 if self._file_map is None: 411 self._file_map = {} 412 for entry in self._files: 413 self._file_map[(entry['path'], None)] = entry 414 for subentry in entry.get('entries', ()): 415 self._file_map[(entry['path'], subentry['path'])] = subentry 416 return self._file_map.get((path, subpath)) 417 418 def GetTag(self, path, subpath=None): 419 """Returns the tag for the given path / subpath.""" 420 ret = self._GetEntry(path, subpath) 421 return ret and ret['tag'] 422 423 def IterPaths(self): 424 """Returns a generator for all top-level paths.""" 425 return (e['path'] for e in self._files) 426 427 def IterSubpaths(self, path): 428 """Returns a generator for all subpaths in the given zip. 429 430 If the given path is not a zip file or doesn't exist, returns an empty 431 iterable. 432 """ 433 outer_entry = self._GetEntry(path) 434 if not outer_entry: 435 return () 436 subentries = outer_entry.get('entries', []) 437 return (entry['path'] for entry in subentries) 438 439 440def _ComputeTagForPath(path): 441 stat = os.stat(path) 442 if stat.st_size > 1 * 1024 * 1024: 443 # Fallback to mtime for large files so that md5_check does not take too long 444 # to run. 445 return stat.st_mtime 446 md5 = hashlib.md5() 447 with open(path, 'rb') as f: 448 md5.update(f.read()) 449 return md5.hexdigest() 450 451 452def _ComputeInlineMd5(iterable): 453 """Computes the md5 of the concatenated parameters.""" 454 md5 = hashlib.md5() 455 for item in iterable: 456 md5.update(str(item).encode('ascii')) 457 return md5.hexdigest() 458 459 460def _ExtractZipEntries(path): 461 """Returns a list of (path, CRC32) of all files within |path|.""" 462 entries = [] 463 with zipfile.ZipFile(path) as zip_file: 464 for zip_info in zip_file.infolist(): 465 # Skip directories and empty files. 466 if zip_info.CRC: 467 entries.append( 468 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 469 return entries 470