1#!/usr/bin/env python3 2# 3# Copyright 2018 The Chromium Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Create an Android application bundle from one or more bundle modules.""" 8 9import argparse 10import concurrent.futures 11import json 12import logging 13import os 14import posixpath 15import shutil 16import sys 17from xml.etree import ElementTree 18import zipfile 19 20sys.path.append( 21 os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))) 22from pylib.utils import dexdump 23 24import bundletool 25from util import build_utils 26from util import manifest_utils 27from util import resource_utils 28import action_helpers # build_utils adds //build to sys.path. 29import zip_helpers 30 31 32# Location of language-based assets in bundle modules. 33_LOCALES_SUBDIR = 'assets/locales/' 34 35# The fallback locale should always have its .pak file included in 36# the base apk, i.e. not use language-based asset targetting. This ensures 37# that Chrome won't crash on startup if its bundle is installed on a device 38# with an unsupported system locale (e.g. fur-rIT). 39_FALLBACK_LOCALE = 'en-US' 40 41# List of split dimensions recognized by this tool. 42_ALL_SPLIT_DIMENSIONS = [ 'ABI', 'SCREEN_DENSITY', 'LANGUAGE' ] 43 44# Due to historical reasons, certain languages identified by Chromium with a 45# 3-letters ISO 639-2 code, are mapped to a nearly equivalent 2-letters 46# ISO 639-1 code instead (due to the fact that older Android releases only 47# supported the latter when matching resources). 48# 49# the same conversion as for Java resources. 50_SHORTEN_LANGUAGE_CODE_MAP = { 51 'fil': 'tl', # Filipino to Tagalog. 52} 53 54# A list of extensions corresponding to files that should never be compressed 55# in the bundle. This used to be handled by bundletool automatically until 56# release 0.8.0, which required that this be passed to the BundleConfig 57# file instead. 58# 59# This is the original list, which was taken from aapt2, with 'webp' added to 60# it (which curiously was missing from the list). 61_UNCOMPRESSED_FILE_EXTS = [ 62 '3g2', '3gp', '3gpp', '3gpp2', 'aac', 'amr', 'awb', 'git', 'imy', 'jet', 63 'jpeg', 'jpg', 'm4a', 'm4v', 'mid', 'midi', 'mkv', 'mp2', 'mp3', 'mp4', 64 'mpeg', 'mpg', 'ogg', 'png', 'rtttl', 'smf', 'wav', 'webm', 'webp', 'wmv', 65 'xmf' 66] 67 68_COMPONENT_TYPES = ('activity', 'provider', 'receiver', 'service') 69_DEDUPE_ENTRY_TYPES = _COMPONENT_TYPES + ('activity-alias', 'meta-data') 70 71_ROTATION_METADATA_KEY = 'com.google.play.apps.signing/RotationConfig.textproto' 72 73_ALLOWLISTED_NON_BASE_SERVICES = { 74 # Only on API level 33+ which is past the fix for b/169196314. 75 'androidx.pdf.service.PdfDocumentServiceImpl', 76 'androidx.pdf.service.PdfDocumentService', 77 # These need to be burned down. 78 'androidx.room.MultiInstanceInvalidationService', 79 'com.google.apps.tiktok.concurrent.AndroidFuturesService', 80 'com.google.apps.tiktok.concurrent.InternalForegroundService', 81} 82_ALLOWLISTED_NON_BASE_PROVIDERS = { 83 # These need to be burned down. 84 ('com.google.android.libraries.sharing.sharekit.provider.' 85 'ShareKitContentProvider') 86} 87 88 89def _ParseArgs(args): 90 parser = argparse.ArgumentParser() 91 parser.add_argument('--out-bundle', required=True, 92 help='Output bundle zip archive.') 93 parser.add_argument('--module-zips', required=True, 94 help='GN-list of module zip archives.') 95 parser.add_argument( 96 '--pathmap-in-paths', 97 action='append', 98 help='List of module pathmap files.') 99 parser.add_argument( 100 '--module-name', 101 action='append', 102 dest='module_names', 103 help='List of module names.') 104 parser.add_argument( 105 '--pathmap-out-path', help='Path to combined pathmap file for bundle.') 106 parser.add_argument( 107 '--rtxt-in-paths', action='append', help='GN-list of module R.txt files.') 108 parser.add_argument( 109 '--rtxt-out-path', help='Path to combined R.txt file for bundle.') 110 parser.add_argument('--uncompressed-assets', action='append', 111 help='GN-list of uncompressed assets.') 112 parser.add_argument('--compress-dex', 113 action='store_true', 114 help='Compress .dex files') 115 parser.add_argument('--split-dimensions', 116 help="GN-list of split dimensions to support.") 117 parser.add_argument( 118 '--base-module-rtxt-path', 119 help='Optional path to the base module\'s R.txt file, only used with ' 120 'language split dimension.') 121 parser.add_argument( 122 '--base-allowlist-rtxt-path', 123 help='Optional path to an R.txt file, string resources ' 124 'listed there _and_ in --base-module-rtxt-path will ' 125 'be kept in the base bundle module, even if language' 126 ' splitting is enabled.') 127 parser.add_argument('--rotation-config', 128 help='Path to a RotationConfig.textproto') 129 parser.add_argument('--warnings-as-errors', 130 action='store_true', 131 help='Treat all warnings as errors.') 132 133 parser.add_argument( 134 '--validate-services', 135 action='store_true', 136 help='Check if services are in base module if isolatedSplits is enabled.') 137 138 options = parser.parse_args(args) 139 options.module_zips = action_helpers.parse_gn_list(options.module_zips) 140 141 if len(options.module_zips) == 0: 142 parser.error('The module zip list cannot be empty.') 143 if len(options.module_zips) != len(options.module_names): 144 parser.error('# module zips != # names.') 145 if 'base' not in options.module_names: 146 parser.error('Missing base module.') 147 148 # Sort modules for more stable outputs. 149 per_module_values = list( 150 zip(options.module_names, options.module_zips, 151 options.uncompressed_assets, options.rtxt_in_paths, 152 options.pathmap_in_paths)) 153 per_module_values.sort(key=lambda x: (x[0] != 'base', x[0])) 154 options.module_names = [x[0] for x in per_module_values] 155 options.module_zips = [x[1] for x in per_module_values] 156 options.uncompressed_assets = [x[2] for x in per_module_values] 157 options.rtxt_in_paths = [x[3] for x in per_module_values] 158 options.pathmap_in_paths = [x[4] for x in per_module_values] 159 160 options.rtxt_in_paths = action_helpers.parse_gn_list(options.rtxt_in_paths) 161 options.pathmap_in_paths = action_helpers.parse_gn_list( 162 options.pathmap_in_paths) 163 164 # Merge all uncompressed assets into a set. 165 uncompressed_list = [] 166 for entry in action_helpers.parse_gn_list(options.uncompressed_assets): 167 # Each entry has the following format: 'zipPath' or 'srcPath:zipPath' 168 pos = entry.find(':') 169 if pos >= 0: 170 uncompressed_list.append(entry[pos + 1:]) 171 else: 172 uncompressed_list.append(entry) 173 174 options.uncompressed_assets = set(uncompressed_list) 175 176 # Check that all split dimensions are valid 177 if options.split_dimensions: 178 options.split_dimensions = action_helpers.parse_gn_list( 179 options.split_dimensions) 180 for dim in options.split_dimensions: 181 if dim.upper() not in _ALL_SPLIT_DIMENSIONS: 182 parser.error('Invalid split dimension "%s" (expected one of: %s)' % ( 183 dim, ', '.join(x.lower() for x in _ALL_SPLIT_DIMENSIONS))) 184 185 # As a special case, --base-allowlist-rtxt-path can be empty to indicate 186 # that the module doesn't need such a allowlist. That's because it is easier 187 # to check this condition here than through GN rules :-( 188 if options.base_allowlist_rtxt_path == '': 189 options.base_module_rtxt_path = None 190 191 # Check --base-module-rtxt-path and --base-allowlist-rtxt-path usage. 192 if options.base_module_rtxt_path: 193 if not options.base_allowlist_rtxt_path: 194 parser.error( 195 '--base-module-rtxt-path requires --base-allowlist-rtxt-path') 196 if 'language' not in options.split_dimensions: 197 parser.error('--base-module-rtxt-path is only valid with ' 198 'language-based splits.') 199 200 return options 201 202 203def _MakeSplitDimension(value, enabled): 204 """Return dict modelling a BundleConfig splitDimension entry.""" 205 return {'value': value, 'negate': not enabled} 206 207 208def _GenerateBundleConfigJson(uncompressed_assets, compress_dex, 209 split_dimensions, base_master_resource_ids): 210 """Generate a dictionary that can be written to a JSON BuildConfig. 211 212 Args: 213 uncompressed_assets: A list or set of file paths under assets/ that always 214 be stored uncompressed. 215 compressed_dex: Boolean, whether to compress .dex. 216 split_dimensions: list of split dimensions. 217 base_master_resource_ids: Optional list of 32-bit resource IDs to keep 218 inside the base module, even when split dimensions are enabled. 219 Returns: 220 A dictionary that can be written as a json file. 221 """ 222 # Compute splitsConfig list. Each item is a dictionary that can have 223 # the following keys: 224 # 'value': One of ['LANGUAGE', 'DENSITY', 'ABI'] 225 # 'negate': Boolean, True to indicate that the bundle should *not* be 226 # split (unused at the moment by this script). 227 228 split_dimensions = [ _MakeSplitDimension(dim, dim in split_dimensions) 229 for dim in _ALL_SPLIT_DIMENSIONS ] 230 231 # Locale-specific pak files stored in bundle splits need not be compressed. 232 uncompressed_globs = [ 233 'assets/locales#lang_*/*.pak', 'assets/fallback-locales/*.pak' 234 ] 235 # normpath to allow for ../ prefix. 236 uncompressed_globs.extend( 237 posixpath.normpath('assets/' + x) for x in uncompressed_assets) 238 # NOTE: Use '**' instead of '*' to work through directories! 239 uncompressed_globs.extend('**.' + ext for ext in _UNCOMPRESSED_FILE_EXTS) 240 if not compress_dex: 241 # Explicit glob required only when using bundletool to create .apks files. 242 # Play Store looks for and respects "uncompressDexFiles" set below. 243 # b/176198991 244 # This is added as a placeholder entry in order to have no effect unless 245 # processed with app_bundle_utils.GenerateBundleApks(). 246 uncompressed_globs.append('classesX.dex') 247 248 data = { 249 'optimizations': { 250 'splitsConfig': { 251 'splitDimension': split_dimensions, 252 }, 253 'uncompressNativeLibraries': { 254 'enabled': True, 255 'alignment': 'PAGE_ALIGNMENT_16K' 256 }, 257 'uncompressDexFiles': { 258 'enabled': True, # Applies only for P+. 259 } 260 }, 261 'compression': { 262 'uncompressedGlob': sorted(uncompressed_globs), 263 }, 264 } 265 266 if base_master_resource_ids: 267 data['master_resources'] = { 268 'resource_ids': list(base_master_resource_ids), 269 } 270 271 return json.dumps(data, indent=2) 272 273 274def _RewriteLanguageAssetPath(src_path): 275 """Rewrite the destination path of a locale asset for language-based splits. 276 277 Should only be used when generating bundles with language-based splits. 278 This will rewrite paths that look like locales/<locale>.pak into 279 locales#<language>/<locale>.pak, where <language> is the language code 280 from the locale. 281 282 Returns new path. 283 """ 284 if not src_path.startswith(_LOCALES_SUBDIR) or not src_path.endswith('.pak'): 285 return [src_path] 286 287 locale = src_path[len(_LOCALES_SUBDIR):-4] 288 android_locale = resource_utils.ToAndroidLocaleName(locale) 289 290 # The locale format is <lang>-<region> or <lang> or BCP-47 (e.g b+sr+Latn). 291 # Extract the language. 292 pos = android_locale.find('-') 293 if android_locale.startswith('b+'): 294 # If locale is in BCP-47 the language is the second tag (e.g. b+sr+Latn) 295 android_language = android_locale.split('+')[1] 296 elif pos >= 0: 297 android_language = android_locale[:pos] 298 else: 299 android_language = android_locale 300 301 if locale == _FALLBACK_LOCALE: 302 # Fallback locale .pak files must be placed in a different directory 303 # to ensure they are always stored in the base module. 304 result_path = 'assets/fallback-locales/%s.pak' % locale 305 else: 306 # Other language .pak files go into a language-specific asset directory 307 # that bundletool will store in separate split APKs. 308 result_path = 'assets/locales#lang_%s/%s.pak' % (android_language, locale) 309 310 return result_path 311 312 313def _SplitModuleForAssetTargeting(src_module_zip, tmp_dir, split_dimensions): 314 """Splits assets in a module if needed. 315 316 Args: 317 src_module_zip: input zip module path. 318 tmp_dir: Path to temporary directory, where the new output module might 319 be written to. 320 split_dimensions: list of split dimensions. 321 322 Returns: 323 If the module doesn't need asset targeting, doesn't do anything and 324 returns src_module_zip. Otherwise, create a new module zip archive under 325 tmp_dir with the same file name, but which contains assets paths targeting 326 the proper dimensions. 327 """ 328 split_language = 'LANGUAGE' in split_dimensions 329 if not split_language: 330 # Nothing to target, so return original module path. 331 return src_module_zip 332 333 with zipfile.ZipFile(src_module_zip, 'r') as src_zip: 334 language_files = [ 335 f for f in src_zip.namelist() if f.startswith(_LOCALES_SUBDIR)] 336 337 if not language_files: 338 # Not language-based assets to split in this module. 339 return src_module_zip 340 341 tmp_zip = os.path.join(tmp_dir, os.path.basename(src_module_zip)) 342 with zipfile.ZipFile(tmp_zip, 'w') as dst_zip: 343 for info in src_zip.infolist(): 344 src_path = info.filename 345 is_compressed = info.compress_type != zipfile.ZIP_STORED 346 347 dst_path = src_path 348 if src_path in language_files: 349 dst_path = _RewriteLanguageAssetPath(src_path) 350 351 zip_helpers.add_to_zip_hermetic(dst_zip, 352 dst_path, 353 data=src_zip.read(src_path), 354 compress=is_compressed) 355 356 return tmp_zip 357 358 359def _GenerateBaseResourcesAllowList(base_module_rtxt_path, 360 base_allowlist_rtxt_path): 361 """Generate a allowlist of base master resource ids. 362 363 Args: 364 base_module_rtxt_path: Path to base module R.txt file. 365 base_allowlist_rtxt_path: Path to base allowlist R.txt file. 366 Returns: 367 list of resource ids. 368 """ 369 ids_map = resource_utils.GenerateStringResourcesAllowList( 370 base_module_rtxt_path, base_allowlist_rtxt_path) 371 return ids_map.keys() 372 373 374def _ConcatTextFiles(in_paths, out_path): 375 """Concatenate the contents of multiple text files into one. 376 377 The each file contents is preceded by a line containing the original filename. 378 379 Args: 380 in_paths: List of input file paths. 381 out_path: Path to output file. 382 """ 383 with open(out_path, 'w') as out_file: 384 for in_path in in_paths: 385 if not os.path.exists(in_path): 386 continue 387 with open(in_path, 'r') as in_file: 388 out_file.write('-- Contents of {}\n'.format(os.path.basename(in_path))) 389 out_file.write(in_file.read()) 390 391 392def _LoadPathmap(pathmap_path): 393 """Load the pathmap of obfuscated resource paths. 394 395 Returns: A dict mapping from obfuscated paths to original paths or an 396 empty dict if passed a None |pathmap_path|. 397 """ 398 if pathmap_path is None: 399 return {} 400 401 pathmap = {} 402 with open(pathmap_path, 'r') as f: 403 for line in f: 404 line = line.strip() 405 if line.startswith('--') or line == '': 406 continue 407 original, renamed = line.split(' -> ') 408 pathmap[renamed] = original 409 return pathmap 410 411 412def _WriteBundlePathmap(module_pathmap_paths, module_names, 413 bundle_pathmap_path): 414 """Combine the contents of module pathmaps into a bundle pathmap. 415 416 This rebases the resource paths inside the module pathmap before adding them 417 to the bundle pathmap. So res/a.xml inside the base module pathmap would be 418 base/res/a.xml in the bundle pathmap. 419 """ 420 with open(bundle_pathmap_path, 'w') as bundle_pathmap_file: 421 for module_pathmap_path, module_name in zip(module_pathmap_paths, 422 module_names): 423 if not os.path.exists(module_pathmap_path): 424 continue 425 module_pathmap = _LoadPathmap(module_pathmap_path) 426 for short_path, long_path in module_pathmap.items(): 427 rebased_long_path = '{}/{}'.format(module_name, long_path) 428 rebased_short_path = '{}/{}'.format(module_name, short_path) 429 line = '{} -> {}\n'.format(rebased_long_path, rebased_short_path) 430 bundle_pathmap_file.write(line) 431 432 433def _GetManifestForModule(bundle_path, module_name): 434 data = bundletool.RunBundleTool( 435 ['dump', 'manifest', '--bundle', bundle_path, '--module', module_name]) 436 try: 437 return ElementTree.fromstring(data) 438 except ElementTree.ParseError: 439 sys.stderr.write('Failed to parse:\n') 440 sys.stderr.write(data) 441 raise 442 443 444def _GetComponentNames(manifest, tag_name): 445 android_name = '{%s}name' % manifest_utils.ANDROID_NAMESPACE 446 return [ 447 s.attrib.get(android_name) 448 for s in manifest.iterfind(f'application/{tag_name}') 449 ] 450 451 452def _ClassesFromZip(module_zip): 453 classes = set() 454 for package in dexdump.Dump(module_zip): 455 for java_package, package_dict in package.items(): 456 java_package += '.' if java_package else '' 457 classes.update(java_package + c for c in package_dict['classes']) 458 return classes 459 460 461def _ValidateSplits(bundle_path, module_zips): 462 logging.info('Reading manifests and running dexdump') 463 base_zip = next(p for p in module_zips if os.path.basename(p) == 'base.zip') 464 module_names = sorted(os.path.basename(p)[:-len('.zip')] for p in module_zips) 465 # Using threads makes these step go from 7s -> 1s on my machine. 466 with concurrent.futures.ThreadPoolExecutor() as executor: 467 # Create list of classes from the base module's dex. 468 classes_future = executor.submit(_ClassesFromZip, base_zip) 469 470 # Create xmltrees of all module manifests. 471 manifest_futures = [ 472 executor.submit(_GetManifestForModule, bundle_path, n) 473 for n in module_names 474 ] 475 manifests_by_name = dict( 476 zip(module_names, (f.result() for f in manifest_futures))) 477 base_classes = classes_future.result() 478 479 # Collect service names from all split manifests. 480 logging.info('Performing checks') 481 errors = [] 482 483 # Ensure there are no components defined in multiple splits. 484 splits_by_component = {} 485 for module_name, cur_manifest in manifests_by_name.items(): 486 for kind in _DEDUPE_ENTRY_TYPES: 487 for component in _GetComponentNames(cur_manifest, kind): 488 owner_module_name = splits_by_component.setdefault((kind, component), 489 module_name) 490 # Allow services that exist only to keep <meta-data> out of 491 # ApplicationInfo. 492 if (owner_module_name != module_name 493 and not component.endswith('HolderService')): 494 errors.append(f'The {kind} "{component}" appeared in both ' 495 f'{owner_module_name} and {module_name}.') 496 497 # Ensure components defined in base manifest exist in base dex. 498 for (kind, component), module_name in splits_by_component.items(): 499 if module_name == 'base' and kind in _COMPONENT_TYPES: 500 if component not in base_classes: 501 errors.append(f"{component} is defined in the base manfiest, " 502 f"but the class does not exist in the base splits' dex") 503 504 # Remaining checks apply only when isolatedSplits="true". 505 isolated_splits = manifests_by_name['base'].get( 506 f'{{{manifest_utils.ANDROID_NAMESPACE}}}isolatedSplits') 507 if isolated_splits != 'true': 508 return errors 509 510 # Ensure all providers are present in base module. We enforce this because 511 # providers are loaded early in startup, and keeping them in the base module 512 # gives more time for the chrome split to load. 513 for module_name, cur_manifest in manifests_by_name.items(): 514 if module_name == 'base': 515 continue 516 provider_names = _GetComponentNames(cur_manifest, 'provider') 517 for p in provider_names: 518 if p not in _ALLOWLISTED_NON_BASE_PROVIDERS: 519 errors.append(f'Provider {p} should be declared in the base manifest,' 520 f' but is in "{module_name}" module. For details, see ' 521 'https://chromium.googlesource.com/chromium/src/+/main/' 522 'docs/android_isolated_splits.md#contentproviders') 523 524 # Ensure all services are present in base module because service classes are 525 # not found if they are not present in the base module. b/169196314 526 # It is fine if they are defined in split manifests though. 527 for module_name, cur_manifest in manifests_by_name.items(): 528 for service_name in _GetComponentNames(cur_manifest, 'service'): 529 if (service_name not in base_classes 530 and service_name not in _ALLOWLISTED_NON_BASE_SERVICES): 531 errors.append(f'Service {service_name} should be declared in the base' 532 f' manifest, but is in "{module_name}" module. For' 533 ' details, see b/169196314.') 534 535 return errors 536 537 538def main(args): 539 build_utils.InitLogging('AAB_DEBUG') 540 args = build_utils.ExpandFileArgs(args) 541 options = _ParseArgs(args) 542 543 split_dimensions = [] 544 if options.split_dimensions: 545 split_dimensions = [x.upper() for x in options.split_dimensions] 546 547 548 with build_utils.TempDir() as tmp_dir: 549 logging.info('Splitting locale assets') 550 module_zips = [ 551 _SplitModuleForAssetTargeting(module, tmp_dir, split_dimensions) \ 552 for module in options.module_zips] 553 554 base_master_resource_ids = None 555 if options.base_module_rtxt_path: 556 logging.info('Creating R.txt allowlist') 557 base_master_resource_ids = _GenerateBaseResourcesAllowList( 558 options.base_module_rtxt_path, options.base_allowlist_rtxt_path) 559 560 logging.info('Creating BundleConfig.pb.json') 561 bundle_config = _GenerateBundleConfigJson(options.uncompressed_assets, 562 options.compress_dex, 563 split_dimensions, 564 base_master_resource_ids) 565 566 tmp_bundle = os.path.join(tmp_dir, 'tmp_bundle') 567 568 # Important: bundletool requires that the bundle config file is 569 # named with a .pb.json extension. 570 tmp_bundle_config = tmp_bundle + '.BundleConfig.pb.json' 571 572 with open(tmp_bundle_config, 'w') as f: 573 f.write(bundle_config) 574 575 logging.info('Running bundletool') 576 cmd_args = build_utils.JavaCmd() + [ 577 '-jar', 578 bundletool.BUNDLETOOL_JAR_PATH, 579 'build-bundle', 580 '--modules=' + ','.join(module_zips), 581 '--output=' + tmp_bundle, 582 '--config=' + tmp_bundle_config, 583 ] 584 585 if options.rotation_config: 586 cmd_args += [ 587 f'--metadata-file={_ROTATION_METADATA_KEY}:{options.rotation_config}' 588 ] 589 590 build_utils.CheckOutput( 591 cmd_args, 592 print_stdout=True, 593 print_stderr=True, 594 stderr_filter=build_utils.FilterReflectiveAccessJavaWarnings, 595 fail_on_output=options.warnings_as_errors) 596 597 if options.validate_services: 598 # TODO(crbug.com/40148088): This step takes 0.4s locally for bundles with 599 # isolated splits disabled and 2s for bundles with isolated splits 600 # enabled. Consider making this run in parallel or move into a separate 601 # step before enabling isolated splits by default. 602 logging.info('Validating isolated split manifests') 603 errors = _ValidateSplits(tmp_bundle, module_zips) 604 if errors: 605 sys.stderr.write('Bundle failed sanity checks:\n ') 606 sys.stderr.write('\n '.join(errors)) 607 sys.stderr.write('\n') 608 sys.exit(1) 609 610 logging.info('Writing final output artifacts') 611 shutil.move(tmp_bundle, options.out_bundle) 612 613 if options.rtxt_out_path: 614 _ConcatTextFiles(options.rtxt_in_paths, options.rtxt_out_path) 615 616 if options.pathmap_out_path: 617 _WriteBundlePathmap(options.pathmap_in_paths, options.module_names, 618 options.pathmap_out_path) 619 620 621if __name__ == '__main__': 622 main(sys.argv[1:]) 623