xref: /aosp_15_r20/external/cronet/build/android/pylib/utils/dexdump.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2016 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import os
6import re
7import shutil
8import sys
9import tempfile
10from xml.etree import ElementTree
11from collections import namedtuple
12from typing import Dict
13
14from devil.utils import cmd_helper
15from pylib import constants
16
17sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'gyp'))
18from util import build_utils
19
20DEXDUMP_PATH = os.path.join(constants.ANDROID_SDK_TOOLS, 'dexdump')
21
22
23# Annotations dict format:
24#   {
25#     'empty-annotation-class-name': None,
26#     'annotation-class-name': {
27#       'fieldA': 'primitive-value',
28#       'fieldB': [ 'array-item-1', 'array-item-2', ... ],
29#       'fieldC': {  # CURRENTLY UNSUPPORTED.
30#         /* Object value */
31#         'field': 'primitive-value',
32#         'field': [ 'array-item-1', 'array-item-2', ... ],
33#         'field': { /* Object value */ }
34#       }
35#     }
36#   }
37Annotations = namedtuple('Annotations',
38                         ['classAnnotations', 'methodsAnnotations'])
39
40# Finds each space-separated "foo=..." (where ... can contain spaces).
41_ANNOTATION_VALUE_MATCHER = re.compile(r'\w+=.*?(?:$|(?= \w+=))')
42
43
44def Dump(apk_path):
45  """Dumps class and method information from a APK into a dict via dexdump.
46
47  Args:
48    apk_path: An absolute path to an APK file to dump.
49  Returns:
50    A dict in the following format:
51      {
52        <package_name>: {
53          'classes': {
54            <class_name>: {
55              'methods': [<method_1>, <method_2>],
56              'superclass': <string>,
57              'is_abstract': <boolean>,
58              'annotations': <Annotations>
59            }
60          }
61        }
62      }
63  """
64  try:
65    dexfile_dir = tempfile.mkdtemp()
66    parsed_dex_files = []
67    for dex_file in build_utils.ExtractAll(apk_path,
68                                           dexfile_dir,
69                                           pattern='*classes*.dex'):
70      output_xml = cmd_helper.GetCmdOutput(
71          [DEXDUMP_PATH, '-a', '-j', '-l', 'xml', dex_file])
72      # Dexdump doesn't escape its XML output very well; decode it as utf-8 with
73      # invalid sequences replaced, then remove forbidden characters and
74      # re-encode it (as etree expects a byte string as input so it can figure
75      # out the encoding itself from the XML declaration)
76      BAD_XML_CHARS = re.compile(
77          u'[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f' +
78          u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]')
79
80      # Line duplicated to avoid pylint redefined-variable-type error.
81      clean_xml = BAD_XML_CHARS.sub(u'\ufffd', output_xml)
82
83      # Constructors are referenced as "<init>" in our annotations
84      # which will result in in the ElementTree failing to parse
85      # our xml as it won't find a closing tag for this
86      clean_xml = clean_xml.replace('<init>', 'constructor')
87
88      annotations = _ParseAnnotations(clean_xml)
89
90      parsed_dex_files.append(
91          _ParseRootNode(ElementTree.fromstring(clean_xml.encode('utf-8')),
92                         annotations))
93    return parsed_dex_files
94  finally:
95    shutil.rmtree(dexfile_dir)
96
97
98def _ParseAnnotationValues(values_str):
99  if not values_str:
100    return None
101  ret = {}
102  for key_value in _ANNOTATION_VALUE_MATCHER.findall(values_str):
103    key, value_str = key_value.split('=', 1)
104    # TODO: support for dicts if ever needed.
105    if value_str.startswith('{ ') and value_str.endswith(' }'):
106      value = value_str[2:-2].split()
107    else:
108      value = value_str
109    ret[key] = value
110  return ret
111
112
113def _ParseAnnotations(dexRaw: str) -> Dict[int, Annotations]:
114  """ Parse XML strings and return a list of Annotations mapped to
115  classes by index.
116
117  Annotations are written to the dex dump as human readable blocks of text
118  The only prescription is that they appear before the class in our xml file
119  They are not required to be nested within the package as our classes
120  It is simpler to parse for all the annotations and then associate them
121  back to the
122  classes
123
124  Example:
125  Class #12 annotations:
126  Annotations on class
127    VISIBILITY_RUNTIME Ldalvik/annotation/EnclosingClass; value=...
128  Annotations on method #512 'example'
129    VISIBILITY_SYSTEM Ldalvik/annotation/Signature; value=...
130    VISIBILITY_RUNTIME Landroidx/test/filters/SmallTest;
131    VISIBILITY_RUNTIME Lorg/chromium/base/test/util/Feature; value={ Cronet }
132    VISIBILITY_RUNTIME LFoo; key1={ A B } key2=4104 key3=null
133  """
134
135  # We want to find the lines matching the annotations header pattern
136  # Eg: Class #12 annotations -> true
137  annotationsBlockMatcher = re.compile(u'^Class #.*annotations:$')
138  # We want to retrieve the index of the class
139  # Eg: Class #12 annotations -> 12
140  classIndexMatcher = re.compile(u'(?<=#)[0-9]*')
141  # We want to retrieve the method name from between the quotes
142  # of the annotations line
143  # Eg: Annotations on method #512 'example'  -> example
144  methodMatcher = re.compile(u"(?<=')[^']*")
145  # We want to match everything after the last slash until before the semi colon
146  # Eg: Ldalvik/annotation/Signature; -> Signature
147  annotationMatcher = re.compile(u'([^/]+); ?(.*)?')
148
149  annotations = {}
150  currentAnnotationsForClass = None
151  currentAnnotationsBlock: Dict[str, None] = None
152
153  # This loop does four things
154  # 1. It looks for a line telling us we are describing annotations for
155  #  a new class
156  # 2. It looks for a line telling us if the annotations we find will be
157  #  for the class or for any of it's methods; we will keep reference to
158  #  this
159  # 3. It adds the annotations to whatever we are holding reference to
160  # 4. It looks for a line to see if we should start looking for a
161  #  new class again
162  for line in dexRaw.splitlines():
163    if currentAnnotationsForClass is None:
164      # Step 1
165      # We keep searching until we find an annotation descriptor
166      # This lets us know that we are storing annotations for a new class
167      if annotationsBlockMatcher.match(line):
168        currentClassIndex = int(classIndexMatcher.findall(line)[0])
169        currentAnnotationsForClass = Annotations(classAnnotations={},
170                                                 methodsAnnotations={})
171        annotations[currentClassIndex] = currentAnnotationsForClass
172    else:
173      # Step 2
174      # If we find a descriptor indicating we are tracking annotations
175      # for the class or it's methods, we'll keep a reference of this
176      # block for when we start finding annotation references
177      if line.startswith(u'Annotations on class'):
178        currentAnnotationsBlock = currentAnnotationsForClass.classAnnotations
179      elif line.startswith(u'Annotations on method'):
180        method = methodMatcher.findall(line)[0]
181        currentAnnotationsBlock = {}
182        currentAnnotationsForClass.methodsAnnotations[
183            method] = currentAnnotationsBlock
184
185      # If we match against any other type of annotations
186      # we will ignore them
187      elif line.startswith(u'Annotations on'):
188        currentAnnotationsBlock = None
189
190      # Step 3
191      # We are only adding runtime annotations as those are the types
192      # that will affect if we should run tests or not (where this is
193      # being used)
194      elif currentAnnotationsBlock is not None and line.strip().startswith(
195          'VISIBILITY_RUNTIME'):
196        annotationName, annotationValuesStr = annotationMatcher.findall(line)[0]
197        annotationValues = _ParseAnnotationValues(annotationValuesStr)
198
199        # Our instrumentation tests expect a mapping of "Annotation: Value"
200        # We aren't using the value for anything and this would increase
201        # the complexity of this parser so just mapping these to None
202        currentAnnotationsBlock.update({annotationName: annotationValues})
203
204      # Step 4
205      # Empty lines indicate that the annotation descriptions are complete
206      # and we should look for new classes
207      elif not line.strip():
208        currentAnnotationsForClass = None
209        currentAnnotationsBlock = None
210
211  return annotations
212
213
214def _ParseRootNode(root, annotations: Dict[int, Annotations]):
215  """Parses the XML output of dexdump. This output is in the following format.
216
217  This is a subset of the information contained within dexdump output.
218
219  <api>
220    <package name="foo.bar">
221      <class name="Class" extends="foo.bar.SuperClass">
222        <field name="Field">
223        </field>
224        <constructor name="Method">
225          <parameter name="Param" type="int">
226          </parameter>
227        </constructor>
228        <method name="Method">
229          <parameter name="Param" type="int">
230          </parameter>
231        </method>
232      </class>
233    </package>
234  </api>
235  """
236  results = {}
237
238  # Annotations are referenced by the class order
239  # To match them, we need to keep track of the class number and
240  # match it to the appropriate annotation at that stage
241  classCount = 0
242
243  for child in root:
244    if child.tag == 'package':
245      package_name = child.attrib['name']
246      parsed_node, classCount = _ParsePackageNode(child, classCount,
247                                                  annotations)
248      if package_name in results:
249        results[package_name]['classes'].update(parsed_node['classes'])
250      else:
251        results[package_name] = parsed_node
252  return results
253
254
255def _ParsePackageNode(package_node, classCount: int,
256                      annotations: Dict[int, Annotations]):
257  """Parses a <package> node from the dexdump xml output.
258
259  Returns:
260    A tuple in the format:
261      (classes: {
262        'classes': {
263          <class_1>: {
264            'methods': [<method_1>, <method_2>],
265            'superclass': <string>,
266            'is_abstract': <boolean>,
267            'annotations': <Annotations or None>
268          },
269          <class_2>: {
270            'methods': [<method_1>, <method_2>],
271            'superclass': <string>,
272            'is_abstract': <boolean>,
273            'annotations': <Annotations or None>
274          },
275        }
276      }, classCount: number)
277  """
278  classes = {}
279  for child in package_node:
280    if child.tag == 'class':
281      classes[child.attrib['name']] = _ParseClassNode(child, classCount,
282                                                      annotations)
283      classCount += 1
284  return ({'classes': classes}, classCount)
285
286
287def _ParseClassNode(class_node, classIndex: int,
288                    annotations: Dict[int, Annotations]):
289  """Parses a <class> node from the dexdump xml output.
290
291  Returns:
292    A dict in the format:
293      {
294        'methods': [<method_1>, <method_2>],
295        'superclass': <string>,
296        'is_abstract': <boolean>
297      }
298  """
299  methods = []
300  for child in class_node:
301    if child.tag == 'method' and child.attrib['visibility'] == 'public':
302      methods.append(child.attrib['name'])
303  return {
304      'methods':
305      methods,
306      'superclass':
307      class_node.attrib['extends'],
308      'is_abstract':
309      class_node.attrib.get('abstract') == 'true',
310      'annotations':
311      annotations.get(classIndex,
312                      Annotations(classAnnotations={}, methodsAnnotations={}))
313  }
314