xref: /aosp_15_r20/external/cronet/build/android/gyp/util/manifest_utils.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2019 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Contains common helpers for working with Android manifests."""
6
7import hashlib
8import os
9import re
10import shlex
11import sys
12import xml.dom.minidom as minidom
13from xml.etree import ElementTree
14
15from util import build_utils
16import action_helpers  # build_utils adds //build to sys.path.
17
18ANDROID_NAMESPACE = 'http://schemas.android.com/apk/res/android'
19TOOLS_NAMESPACE = 'http://schemas.android.com/tools'
20DIST_NAMESPACE = 'http://schemas.android.com/apk/distribution'
21EMPTY_ANDROID_MANIFEST_PATH = os.path.abspath(
22    os.path.join(os.path.dirname(__file__), '..', '..', 'AndroidManifest.xml'))
23# When normalizing for expectation matching, wrap these tags when they are long
24# or else they become very hard to read.
25_WRAP_CANDIDATES = (
26    '<manifest',
27    '<application',
28    '<activity',
29    '<provider',
30    '<receiver',
31    '<service',
32)
33# Don't wrap lines shorter than this.
34_WRAP_LINE_LENGTH = 100
35
36_xml_namespace_initialized = False
37
38
39def _RegisterElementTreeNamespaces():
40  global _xml_namespace_initialized
41  if _xml_namespace_initialized:
42    return
43  _xml_namespace_initialized = True
44  ElementTree.register_namespace('android', ANDROID_NAMESPACE)
45  ElementTree.register_namespace('tools', TOOLS_NAMESPACE)
46  ElementTree.register_namespace('dist', DIST_NAMESPACE)
47
48
49def NamespacedGet(node, key):
50  return node.get('{%s}%s' % (ANDROID_NAMESPACE, key))
51
52
53def NamespacedSet(node, key, value):
54  node.set('{%s}%s' % (ANDROID_NAMESPACE, key), value)
55
56
57def ParseManifest(path):
58  """Parses an AndroidManifest.xml using ElementTree.
59
60  Registers required namespaces, creates application node if missing, adds any
61  missing namespaces for 'android', 'tools' and 'dist'.
62
63  Returns tuple of:
64    doc: Root xml document.
65    manifest_node: the <manifest> node.
66    app_node: the <application> node.
67  """
68  _RegisterElementTreeNamespaces()
69  doc = ElementTree.parse(path)
70  # ElementTree.find does not work if the required tag is the root.
71  if doc.getroot().tag == 'manifest':
72    manifest_node = doc.getroot()
73  else:
74    manifest_node = doc.find('manifest')
75  assert manifest_node is not None, 'Manifest is none for path ' + path
76
77  app_node = doc.find('application')
78  if app_node is None:
79    app_node = ElementTree.SubElement(manifest_node, 'application')
80
81  return doc, manifest_node, app_node
82
83
84def SaveManifest(doc, path):
85  with action_helpers.atomic_output(path) as f:
86    f.write(ElementTree.tostring(doc.getroot(), encoding='UTF-8'))
87
88
89def GetPackage(manifest_node):
90  return manifest_node.get('package')
91
92
93def SetUsesSdk(manifest_node,
94               target_sdk_version,
95               min_sdk_version,
96               max_sdk_version=None):
97  uses_sdk_node = manifest_node.find('./uses-sdk')
98  if uses_sdk_node is None:
99    uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk')
100  NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version)
101  NamespacedSet(uses_sdk_node, 'minSdkVersion', min_sdk_version)
102  if max_sdk_version:
103    NamespacedSet(uses_sdk_node, 'maxSdkVersion', max_sdk_version)
104
105
106def SetTargetApiIfUnset(manifest_node, target_sdk_version):
107  uses_sdk_node = manifest_node.find('./uses-sdk')
108  if uses_sdk_node is None:
109    uses_sdk_node = ElementTree.SubElement(manifest_node, 'uses-sdk')
110  curr_target_sdk_version = NamespacedGet(uses_sdk_node, 'targetSdkVersion')
111  if curr_target_sdk_version is None:
112    NamespacedSet(uses_sdk_node, 'targetSdkVersion', target_sdk_version)
113  return curr_target_sdk_version is None
114
115
116def _SortAndStripElementTree(root):
117  # Sort alphabetically with two exceptions:
118  # 1) Put <application> node last (since it's giant).
119  # 2) Put android:name before other attributes.
120  def element_sort_key(node):
121    if node.tag == 'application':
122      return 'z'
123    ret = ElementTree.tostring(node)
124    # ElementTree.tostring inserts namespace attributes for any that are needed
125    # for the node or any of its descendants. Remove them so as to prevent a
126    # change to a child that adds/removes a namespace usage from changing sort
127    # order.
128    return re.sub(r' xmlns:.*?".*?"', '', ret.decode('utf8'))
129
130  name_attr = '{%s}name' % ANDROID_NAMESPACE
131
132  def attribute_sort_key(tup):
133    return ('', '') if tup[0] == name_attr else tup
134
135  def helper(node):
136    for child in node:
137      if child.text and child.text.isspace():
138        child.text = None
139      helper(child)
140
141    # Sort attributes (requires Python 3.8+).
142    node.attrib = dict(sorted(node.attrib.items(), key=attribute_sort_key))
143
144    # Sort nodes
145    node[:] = sorted(node, key=element_sort_key)
146
147  helper(root)
148
149
150def _SplitElement(line):
151  """Parses a one-line xml node into ('<tag', ['a="b"', ...]], '/>')."""
152
153  # Shlex splits nicely, but removes quotes. Need to put them back.
154  def restore_quotes(value):
155    return value.replace('=', '="', 1) + '"'
156
157  # Simplify restore_quotes by separating />.
158  assert line.endswith('>'), line
159  end_tag = '>'
160  if line.endswith('/>'):
161    end_tag = '/>'
162  line = line[:-len(end_tag)]
163
164  # Use shlex to avoid having to re-encode &quot;, etc.
165  parts = shlex.split(line)
166  start_tag = parts[0]
167  attrs = parts[1:]
168
169  return start_tag, [restore_quotes(x) for x in attrs], end_tag
170
171
172def _CreateNodeHash(lines):
173  """Computes a hash (md5) for the first XML node found in |lines|.
174
175  Args:
176    lines: List of strings containing pretty-printed XML.
177
178  Returns:
179    Positive 32-bit integer hash of the node (including children).
180  """
181  target_indent = lines[0].find('<')
182  tag_closed = False
183  for i, l in enumerate(lines[1:]):
184    cur_indent = l.find('<')
185    if cur_indent != -1 and cur_indent <= target_indent:
186      tag_lines = lines[:i + 1]
187      break
188    if not tag_closed and 'android:name="' in l:
189      # To reduce noise of node tags changing, use android:name as the
190      # basis the hash since they usually unique.
191      tag_lines = [l]
192      break
193    tag_closed = tag_closed or '>' in l
194  else:
195    assert False, 'Did not find end of node:\n' + '\n'.join(lines)
196
197  # Insecure and truncated hash as it only needs to be unique vs. its neighbors.
198  return hashlib.md5(('\n'.join(tag_lines)).encode('utf8')).hexdigest()[:8]
199
200
201def _IsSelfClosing(lines):
202  """Given pretty-printed xml, returns whether first node is self-closing."""
203  for l in lines:
204    idx = l.find('>')
205    if idx != -1:
206      return l[idx - 1] == '/'
207  raise RuntimeError('Did not find end of tag:\n%s' % '\n'.join(lines))
208
209
210def _AddDiffTags(lines):
211  # When multiple identical tags appear sequentially, XML diffs can look like:
212  # +  </tag>
213  # +  <tag>
214  # rather than:
215  # +  <tag>
216  # +  </tag>
217  # To reduce confusion, add hashes to tags.
218  # This also ensures changed tags show up with outer <tag> elements rather than
219  # showing only changed attributes.
220  hash_stack = []
221  for i, l in enumerate(lines):
222    stripped = l.lstrip()
223    # Ignore non-indented tags and lines that are not the start/end of a node.
224    if l[0] != ' ' or stripped[0] != '<':
225      continue
226    # Ignore self-closing nodes that fit on one line.
227    if l[-2:] == '/>':
228      continue
229    # Ignore <application> since diff tag changes with basically any change.
230    if stripped.lstrip('</').startswith('application'):
231      continue
232
233    # Check for the closing tag (</foo>).
234    if stripped[1] != '/':
235      cur_hash = _CreateNodeHash(lines[i:])
236      if not _IsSelfClosing(lines[i:]):
237        hash_stack.append(cur_hash)
238    else:
239      cur_hash = hash_stack.pop()
240    lines[i] += '  # DIFF-ANCHOR: {}'.format(cur_hash)
241  assert not hash_stack, 'hash_stack was not empty:\n' + '\n'.join(hash_stack)
242
243
244def NormalizeManifest(manifest_contents, version_code_offset,
245                      library_version_offset):
246  _RegisterElementTreeNamespaces()
247  # This also strips comments and sorts node attributes alphabetically.
248  root = ElementTree.fromstring(manifest_contents)
249  package = GetPackage(root)
250
251  app_node = root.find('application')
252  if app_node is not None:
253    # android:debuggable is added when !is_official_build. Strip it out to avoid
254    # expectation diffs caused by not adding is_official_build. Play store
255    # blocks uploading apps with it set, so there's no risk of it slipping in.
256    debuggable_name = '{%s}debuggable' % ANDROID_NAMESPACE
257    if debuggable_name in app_node.attrib:
258      del app_node.attrib[debuggable_name]
259
260    version_code = NamespacedGet(root, 'versionCode')
261    if version_code and version_code_offset:
262      version_code = int(version_code) - int(version_code_offset)
263      NamespacedSet(root, 'versionCode', f'OFFSET={version_code}')
264    version_name = NamespacedGet(root, 'versionName')
265    if version_name:
266      version_name = re.sub(r'\d+', '#', version_name)
267      NamespacedSet(root, 'versionName', version_name)
268
269    # Trichrome's static library version number is updated daily. To avoid
270    # frequent manifest check failures, we remove the exact version number
271    # during normalization.
272    for node in app_node:
273      if node.tag in ['uses-static-library', 'static-library']:
274        version = NamespacedGet(node, 'version')
275        if version and library_version_offset:
276          version = int(version) - int(library_version_offset)
277          NamespacedSet(node, 'version', f'OFFSET={version}')
278
279  # We also remove the exact package name (except the one at the root level)
280  # to avoid noise during manifest comparison.
281  def blur_package_name(node):
282    for key in node.keys():
283      node.set(key, node.get(key).replace(package, '$PACKAGE'))
284
285    for child in node:
286      blur_package_name(child)
287
288  # We only blur the package names of non-root nodes because they generate a lot
289  # of diffs when doing manifest checks for upstream targets. We still want to
290  # have 1 piece of package name not blurred just in case the package name is
291  # mistakenly changed.
292  for child in root:
293    blur_package_name(child)
294
295  _SortAndStripElementTree(root)
296
297  # Fix up whitespace/indentation.
298  dom = minidom.parseString(ElementTree.tostring(root))
299  out_lines = []
300  for l in dom.toprettyxml(indent='  ').splitlines():
301    if not l or l.isspace():
302      continue
303    if len(l) > _WRAP_LINE_LENGTH and any(x in l for x in _WRAP_CANDIDATES):
304      indent = ' ' * l.find('<')
305      start_tag, attrs, end_tag = _SplitElement(l)
306      out_lines.append('{}{}'.format(indent, start_tag))
307      for attribute in attrs:
308        out_lines.append('{}    {}'.format(indent, attribute))
309      out_lines[-1] += '>'
310      # Heuristic: Do not allow multi-line tags to be self-closing since these
311      # can generally be allowed to have nested elements. When diffing, it adds
312      # noise if the base file is self-closing and the non-base file is not
313      # self-closing.
314      if end_tag == '/>':
315        out_lines.append('{}{}>'.format(indent, start_tag.replace('<', '</')))
316    else:
317      out_lines.append(l)
318
319  # Make output more diff-friendly.
320  _AddDiffTags(out_lines)
321
322  return '\n'.join(out_lines) + '\n'
323