1# Copyright 2016 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import os 6import re 7import shutil 8import sys 9import tempfile 10from xml.etree import ElementTree 11from collections import namedtuple 12from typing import Dict 13 14from devil.utils import cmd_helper 15from pylib import constants 16 17sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'gyp')) 18from util import build_utils 19 20DEXDUMP_PATH = os.path.join(constants.ANDROID_SDK_TOOLS, 'dexdump') 21 22 23# Annotations dict format: 24# { 25# 'empty-annotation-class-name': None, 26# 'annotation-class-name': { 27# 'fieldA': 'primitive-value', 28# 'fieldB': [ 'array-item-1', 'array-item-2', ... ], 29# 'fieldC': { # CURRENTLY UNSUPPORTED. 30# /* Object value */ 31# 'field': 'primitive-value', 32# 'field': [ 'array-item-1', 'array-item-2', ... ], 33# 'field': { /* Object value */ } 34# } 35# } 36# } 37Annotations = namedtuple('Annotations', 38 ['classAnnotations', 'methodsAnnotations']) 39 40# Finds each space-separated "foo=..." (where ... can contain spaces). 41_ANNOTATION_VALUE_MATCHER = re.compile(r'\w+=.*?(?:$|(?= \w+=))') 42 43 44def Dump(apk_path): 45 """Dumps class and method information from a APK into a dict via dexdump. 46 47 Args: 48 apk_path: An absolute path to an APK file to dump. 49 Returns: 50 A dict in the following format: 51 { 52 <package_name>: { 53 'classes': { 54 <class_name>: { 55 'methods': [<method_1>, <method_2>], 56 'superclass': <string>, 57 'is_abstract': <boolean>, 58 'annotations': <Annotations> 59 } 60 } 61 } 62 } 63 """ 64 try: 65 dexfile_dir = tempfile.mkdtemp() 66 parsed_dex_files = [] 67 for dex_file in build_utils.ExtractAll(apk_path, 68 dexfile_dir, 69 pattern='*classes*.dex'): 70 output_xml = cmd_helper.GetCmdOutput( 71 [DEXDUMP_PATH, '-a', '-j', '-l', 'xml', dex_file]) 72 # Dexdump doesn't escape its XML output very well; decode it as utf-8 with 73 # invalid sequences replaced, then remove forbidden characters and 74 # re-encode it (as etree expects a byte string as input so it can figure 75 # out the encoding itself from the XML declaration) 76 BAD_XML_CHARS = re.compile( 77 u'[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f' + 78 u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]') 79 80 # Line duplicated to avoid pylint redefined-variable-type error. 81 clean_xml = BAD_XML_CHARS.sub(u'\ufffd', output_xml) 82 83 # Constructors are referenced as "<init>" in our annotations 84 # which will result in in the ElementTree failing to parse 85 # our xml as it won't find a closing tag for this 86 clean_xml = clean_xml.replace('<init>', 'constructor') 87 88 annotations = _ParseAnnotations(clean_xml) 89 90 parsed_dex_files.append( 91 _ParseRootNode(ElementTree.fromstring(clean_xml.encode('utf-8')), 92 annotations)) 93 return parsed_dex_files 94 finally: 95 shutil.rmtree(dexfile_dir) 96 97 98def _ParseAnnotationValues(values_str): 99 if not values_str: 100 return None 101 ret = {} 102 for key_value in _ANNOTATION_VALUE_MATCHER.findall(values_str): 103 key, value_str = key_value.split('=', 1) 104 # TODO: support for dicts if ever needed. 105 if value_str.startswith('{ ') and value_str.endswith(' }'): 106 value = value_str[2:-2].split() 107 else: 108 value = value_str 109 ret[key] = value 110 return ret 111 112 113def _ParseAnnotations(dexRaw: str) -> Dict[int, Annotations]: 114 """ Parse XML strings and return a list of Annotations mapped to 115 classes by index. 116 117 Annotations are written to the dex dump as human readable blocks of text 118 The only prescription is that they appear before the class in our xml file 119 They are not required to be nested within the package as our classes 120 It is simpler to parse for all the annotations and then associate them 121 back to the 122 classes 123 124 Example: 125 Class #12 annotations: 126 Annotations on class 127 VISIBILITY_RUNTIME Ldalvik/annotation/EnclosingClass; value=... 128 Annotations on method #512 'example' 129 VISIBILITY_SYSTEM Ldalvik/annotation/Signature; value=... 130 VISIBILITY_RUNTIME Landroidx/test/filters/SmallTest; 131 VISIBILITY_RUNTIME Lorg/chromium/base/test/util/Feature; value={ Cronet } 132 VISIBILITY_RUNTIME LFoo; key1={ A B } key2=4104 key3=null 133 """ 134 135 # We want to find the lines matching the annotations header pattern 136 # Eg: Class #12 annotations -> true 137 annotationsBlockMatcher = re.compile(u'^Class #.*annotations:$') 138 # We want to retrieve the index of the class 139 # Eg: Class #12 annotations -> 12 140 classIndexMatcher = re.compile(u'(?<=#)[0-9]*') 141 # We want to retrieve the method name from between the quotes 142 # of the annotations line 143 # Eg: Annotations on method #512 'example' -> example 144 methodMatcher = re.compile(u"(?<=')[^']*") 145 # We want to match everything after the last slash until before the semi colon 146 # Eg: Ldalvik/annotation/Signature; -> Signature 147 annotationMatcher = re.compile(u'([^/]+); ?(.*)?') 148 149 annotations = {} 150 currentAnnotationsForClass = None 151 currentAnnotationsBlock: Dict[str, None] = None 152 153 # This loop does four things 154 # 1. It looks for a line telling us we are describing annotations for 155 # a new class 156 # 2. It looks for a line telling us if the annotations we find will be 157 # for the class or for any of it's methods; we will keep reference to 158 # this 159 # 3. It adds the annotations to whatever we are holding reference to 160 # 4. It looks for a line to see if we should start looking for a 161 # new class again 162 for line in dexRaw.splitlines(): 163 if currentAnnotationsForClass is None: 164 # Step 1 165 # We keep searching until we find an annotation descriptor 166 # This lets us know that we are storing annotations for a new class 167 if annotationsBlockMatcher.match(line): 168 currentClassIndex = int(classIndexMatcher.findall(line)[0]) 169 currentAnnotationsForClass = Annotations(classAnnotations={}, 170 methodsAnnotations={}) 171 annotations[currentClassIndex] = currentAnnotationsForClass 172 else: 173 # Step 2 174 # If we find a descriptor indicating we are tracking annotations 175 # for the class or it's methods, we'll keep a reference of this 176 # block for when we start finding annotation references 177 if line.startswith(u'Annotations on class'): 178 currentAnnotationsBlock = currentAnnotationsForClass.classAnnotations 179 elif line.startswith(u'Annotations on method'): 180 method = methodMatcher.findall(line)[0] 181 currentAnnotationsBlock = {} 182 currentAnnotationsForClass.methodsAnnotations[ 183 method] = currentAnnotationsBlock 184 185 # If we match against any other type of annotations 186 # we will ignore them 187 elif line.startswith(u'Annotations on'): 188 currentAnnotationsBlock = None 189 190 # Step 3 191 # We are only adding runtime annotations as those are the types 192 # that will affect if we should run tests or not (where this is 193 # being used) 194 elif currentAnnotationsBlock is not None and line.strip().startswith( 195 'VISIBILITY_RUNTIME'): 196 annotationName, annotationValuesStr = annotationMatcher.findall(line)[0] 197 annotationValues = _ParseAnnotationValues(annotationValuesStr) 198 199 # Our instrumentation tests expect a mapping of "Annotation: Value" 200 # We aren't using the value for anything and this would increase 201 # the complexity of this parser so just mapping these to None 202 currentAnnotationsBlock.update({annotationName: annotationValues}) 203 204 # Step 4 205 # Empty lines indicate that the annotation descriptions are complete 206 # and we should look for new classes 207 elif not line.strip(): 208 currentAnnotationsForClass = None 209 currentAnnotationsBlock = None 210 211 return annotations 212 213 214def _ParseRootNode(root, annotations: Dict[int, Annotations]): 215 """Parses the XML output of dexdump. This output is in the following format. 216 217 This is a subset of the information contained within dexdump output. 218 219 <api> 220 <package name="foo.bar"> 221 <class name="Class" extends="foo.bar.SuperClass"> 222 <field name="Field"> 223 </field> 224 <constructor name="Method"> 225 <parameter name="Param" type="int"> 226 </parameter> 227 </constructor> 228 <method name="Method"> 229 <parameter name="Param" type="int"> 230 </parameter> 231 </method> 232 </class> 233 </package> 234 </api> 235 """ 236 results = {} 237 238 # Annotations are referenced by the class order 239 # To match them, we need to keep track of the class number and 240 # match it to the appropriate annotation at that stage 241 classCount = 0 242 243 for child in root: 244 if child.tag == 'package': 245 package_name = child.attrib['name'] 246 parsed_node, classCount = _ParsePackageNode(child, classCount, 247 annotations) 248 if package_name in results: 249 results[package_name]['classes'].update(parsed_node['classes']) 250 else: 251 results[package_name] = parsed_node 252 return results 253 254 255def _ParsePackageNode(package_node, classCount: int, 256 annotations: Dict[int, Annotations]): 257 """Parses a <package> node from the dexdump xml output. 258 259 Returns: 260 A tuple in the format: 261 (classes: { 262 'classes': { 263 <class_1>: { 264 'methods': [<method_1>, <method_2>], 265 'superclass': <string>, 266 'is_abstract': <boolean>, 267 'annotations': <Annotations or None> 268 }, 269 <class_2>: { 270 'methods': [<method_1>, <method_2>], 271 'superclass': <string>, 272 'is_abstract': <boolean>, 273 'annotations': <Annotations or None> 274 }, 275 } 276 }, classCount: number) 277 """ 278 classes = {} 279 for child in package_node: 280 if child.tag == 'class': 281 classes[child.attrib['name']] = _ParseClassNode(child, classCount, 282 annotations) 283 classCount += 1 284 return ({'classes': classes}, classCount) 285 286 287def _ParseClassNode(class_node, classIndex: int, 288 annotations: Dict[int, Annotations]): 289 """Parses a <class> node from the dexdump xml output. 290 291 Returns: 292 A dict in the format: 293 { 294 'methods': [<method_1>, <method_2>], 295 'superclass': <string>, 296 'is_abstract': <boolean> 297 } 298 """ 299 methods = [] 300 for child in class_node: 301 if child.tag == 'method' and child.attrib['visibility'] == 'public': 302 methods.append(child.attrib['name']) 303 return { 304 'methods': 305 methods, 306 'superclass': 307 class_node.attrib['extends'], 308 'is_abstract': 309 class_node.attrib.get('abstract') == 'true', 310 'annotations': 311 annotations.get(classIndex, 312 Annotations(classAnnotations={}, methodsAnnotations={})) 313 } 314