1# Copyright 2019 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Contains common helpers for working with Android manifests.""" 6 7import hashlib 8import os 9import re 10import shlex 11import sys 12import xml.dom.minidom as minidom 13from xml.etree import ElementTree 14 15from util import build_utils 16import action_helpers # build_utils adds //build to sys.path. 17 18ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android' 19TOOLS_NAMESPACE = 'http://schemas.android.com/tools' 20DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution' 21EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath( 22 os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml')) 23# When normalizing for expectation matching, wrap these tags when they are long 24# or else they become very hard to read. 25_WRAP_CANDIDATES = ( 26 '<manifest', 27 '<application', 28 '<activity', 29 '<provider', 30 '<receiver', 31 '<service', 32) 33# Don't wrap lines shorter than this. 34_WRAP_LINE_LENGTH = 100 35 36_xml_namespace_initialized = False 37 38 39def _RegisterElementTreeNamespaces(): 40 global _xml_namespace_initialized 41 if _xml_namespace_initialized: 42 return 43 _xml_namespace_initialized = True 44 ElementTree.register_namespace('android', ANDROID_NAMESPACE) 45 ElementTree.register_namespace('tools', TOOLS_NAMESPACE) 46 ElementTree.register_namespace('dist', DIST_NAMESPACE) 47 48 49def NamespacedGet(node, key): 50 return node.get('{%s}%s' % (ANDROID_NAMESPACE, key)) 51 52 53def NamespacedSet(node, key, value): 54 node.set('{%s}%s' % (ANDROID_NAMESPACE, key), value) 55 56 57def ParseManifest(path): 58 """Parses an AndroidManifest.xml using ElementTree. 59 60 Registers required namespaces, creates application node if missing, adds any 61 missing namespaces for 'android', 'tools' and 'dist'. 62 63 Returns tuple of: 64 doc: Root xml document. 65 manifest_node: the <manifest> node. 66 app_node: the <application> node. 67 """ 68 _RegisterElementTreeNamespaces() 69 doc = ElementTree.parse(path) 70 # ElementTree.find does not work if the required tag is the root. 71 if doc.getroot().tag == 'manifest': 72 manifest_node = doc.getroot() 73 else: 74 manifest_node = doc.find('manifest') 75 assert manifest_node is not None, 'Manifest is none for path ' + path 76 77 app_node = doc.find('application') 78 if app_node is None: 79 app_node = ElementTree.SubElement(manifest_node, 'application') 80 81 return doc, manifest_node, app_node 82 83 84def SaveManifest(doc, path): 85 with action_helpers.atomic_output(path) as f: 86 f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8')) 87 88 89def GetPackage(manifest_node): 90 return manifest_node.get('package') 91 92 93def SetUsesSdk(manifest_node, 94 target_sdk_version, 95 min_sdk_version, 96 max_sdk_version=None): 97 uses_sdk_node = manifest_node.find('./uses-sdk') 98 if uses_sdk_node is None: 99 uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk') 100 NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version) 101 NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version) 102 if max_sdk_version: 103 NamespacedSet(uses_sdk_node, 'maxSdkVersion', max_sdk_version) 104 105 106def SetTargetApiIfUnset(manifest_node, target_sdk_version): 107 uses_sdk_node = manifest_node.find('./uses-sdk') 108 if uses_sdk_node is None: 109 uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk') 110 curr_target_sdk_version = NamespacedGet(uses_sdk_node, 'targetSdkVersion') 111 if curr_target_sdk_version is None: 112 NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version) 113 return curr_target_sdk_version is None 114 115 116def _SortAndStripElementTree(root): 117 # Sort alphabetically with two exceptions: 118 # 1) Put <application> node last (since it's giant). 119 # 2) Put android:name before other attributes. 120 def element_sort_key(node): 121 if node.tag == 'application': 122 return 'z' 123 ret = ElementTree.tostring(node) 124 # ElementTree.tostring inserts namespace attributes for any that are needed 125 # for the node or any of its descendants. Remove them so as to prevent a 126 # change to a child that adds/removes a namespace usage from changing sort 127 # order. 128 return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8')) 129 130 name_attr = '{%s}name' % ANDROID_NAMESPACE 131 132 def attribute_sort_key(tup): 133 return ('', '') if tup[0] == name_attr else tup 134 135 def helper(node): 136 for child in node: 137 if child.text and child.text.isspace(): 138 child.text = None 139 helper(child) 140 141 # Sort attributes (requires Python 3.8+). 142 node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key)) 143 144 # Sort nodes 145 node[:] = sorted(node, key=element_sort_key) 146 147 helper(root) 148 149 150def _SplitElement(line): 151 """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>').""" 152 153 # Shlex splits nicely, but removes quotes. Need to put them back. 154 def restore_quotes(value): 155 return value.replace('=', '="', 1) + '"' 156 157 # Simplify restore_quotes by separating />. 158 assert line.endswith('>'), line 159 end_tag = '>' 160 if line.endswith('/>'): 161 end_tag = '/>' 162 line = line[:-len(end_tag)] 163 164 # Use shlex to avoid having to re-encode ", etc. 165 parts = shlex.split(line) 166 start_tag = parts[0] 167 attrs = parts[1:] 168 169 return start_tag, [restore_quotes(x) for x in attrs], end_tag 170 171 172def _CreateNodeHash(lines): 173 """Computes a hash (md5) for the first XML node found in |lines|. 174 175 Args: 176 lines: List of strings containing pretty-printed XML. 177 178 Returns: 179 Positive 32-bit integer hash of the node (including children). 180 """ 181 target_indent = lines[0].find('<') 182 tag_closed = False 183 for i, l in enumerate(lines[1:]): 184 cur_indent = l.find('<') 185 if cur_indent != -1 and cur_indent <= target_indent: 186 tag_lines = lines[:i + 1] 187 break 188 if not tag_closed and 'android:name="' in l: 189 # To reduce noise of node tags changing, use android:name as the 190 # basis the hash since they usually unique. 191 tag_lines = [l] 192 break 193 tag_closed = tag_closed or '>' in l 194 else: 195 assert False, 'Did not find end of node:\n' + '\n'.join(lines) 196 197 # Insecure and truncated hash as it only needs to be unique vs. its neighbors. 198 return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8] 199 200 201def _IsSelfClosing(lines): 202 """Given pretty-printed xml, returns whether first node is self-closing.""" 203 for l in lines: 204 idx = l.find('>') 205 if idx != -1: 206 return l[idx - 1] == '/' 207 raise RuntimeError('Did not find end of tag:\n%s' % '\n'.join(lines)) 208 209 210def _AddDiffTags(lines): 211 # When multiple identical tags appear sequentially, XML diffs can look like: 212 # + </tag> 213 # + <tag> 214 # rather than: 215 # + <tag> 216 # + </tag> 217 # To reduce confusion, add hashes to tags. 218 # This also ensures changed tags show up with outer <tag> elements rather than 219 # showing only changed attributes. 220 hash_stack = [] 221 for i, l in enumerate(lines): 222 stripped = l.lstrip() 223 # Ignore non-indented tags and lines that are not the start/end of a node. 224 if l[0] != ' ' or stripped[0] != '<': 225 continue 226 # Ignore self-closing nodes that fit on one line. 227 if l[-2:] == '/>': 228 continue 229 # Ignore <application> since diff tag changes with basically any change. 230 if stripped.lstrip('</').startswith('application'): 231 continue 232 233 # Check for the closing tag (</foo>). 234 if stripped[1] != '/': 235 cur_hash = _CreateNodeHash(lines[i:]) 236 if not _IsSelfClosing(lines[i:]): 237 hash_stack.append(cur_hash) 238 else: 239 cur_hash = hash_stack.pop() 240 lines[i] += ' # DIFF-ANCHOR: {}'.format(cur_hash) 241 assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack) 242 243 244def NormalizeManifest(manifest_contents, version_code_offset, 245 library_version_offset): 246 _RegisterElementTreeNamespaces() 247 # This also strips comments and sorts node attributes alphabetically. 248 root = ElementTree.fromstring(manifest_contents) 249 package = GetPackage(root) 250 251 app_node = root.find('application') 252 if app_node is not None: 253 # android:debuggable is added when !is_official_build. Strip it out to avoid 254 # expectation diffs caused by not adding is_official_build. Play store 255 # blocks uploading apps with it set, so there's no risk of it slipping in. 256 debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE 257 if debuggable_name in app_node.attrib: 258 del app_node.attrib[debuggable_name] 259 260 version_code = NamespacedGet(root, 'versionCode') 261 if version_code and version_code_offset: 262 version_code = int(version_code) - int(version_code_offset) 263 NamespacedSet(root, 'versionCode', f'OFFSET={version_code}') 264 version_name = NamespacedGet(root, 'versionName') 265 if version_name: 266 version_name = re.sub(r'\d+', '#', version_name) 267 NamespacedSet(root, 'versionName', version_name) 268 269 # Trichrome's static library version number is updated daily. To avoid 270 # frequent manifest check failures, we remove the exact version number 271 # during normalization. 272 for node in app_node: 273 if node.tag in ['uses-static-library', 'static-library']: 274 version = NamespacedGet(node, 'version') 275 if version and library_version_offset: 276 version = int(version) - int(library_version_offset) 277 NamespacedSet(node, 'version', f'OFFSET={version}') 278 279 # We also remove the exact package name (except the one at the root level) 280 # to avoid noise during manifest comparison. 281 def blur_package_name(node): 282 for key in node.keys(): 283 node.set(key, node.get(key).replace(package, '$PACKAGE')) 284 285 for child in node: 286 blur_package_name(child) 287 288 # We only blur the package names of non-root nodes because they generate a lot 289 # of diffs when doing manifest checks for upstream targets. We still want to 290 # have 1 piece of package name not blurred just in case the package name is 291 # mistakenly changed. 292 for child in root: 293 blur_package_name(child) 294 295 _SortAndStripElementTree(root) 296 297 # Fix up whitespace/indentation. 298 dom = minidom.parseString(ElementTree.tostring(root)) 299 out_lines = [] 300 for l in dom.toprettyxml(indent=' ').splitlines(): 301 if not l or l.isspace(): 302 continue 303 if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES): 304 indent = ' ' * l.find('<') 305 start_tag, attrs, end_tag = _SplitElement(l) 306 out_lines.append('{}{}'.format(indent, start_tag)) 307 for attribute in attrs: 308 out_lines.append('{} {}'.format(indent, attribute)) 309 out_lines[-1] += '>' 310 # Heuristic: Do not allow multi-line tags to be self-closing since these 311 # can generally be allowed to have nested elements. When diffing, it adds 312 # noise if the base file is self-closing and the non-base file is not 313 # self-closing. 314 if end_tag == '/>': 315 out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</'))) 316 else: 317 out_lines.append(l) 318 319 # Make output more diff-friendly. 320 _AddDiffTags(out_lines) 321 322 return '\n'.join(out_lines) + '\n' 323