1# Copyright 2023 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4import dataclasses 5import os 6import re 7from typing import List 8from typing import Optional 9 10import java_types 11 12_MODIFIER_KEYWORDS = (r'(?:(?:' + '|'.join([ 13 'abstract', 14 'default', 15 'final', 16 'native', 17 'private', 18 'protected', 19 'public', 20 'static', 21 'synchronized', 22]) + r')\s+)*') 23 24 25class ParseError(Exception): 26 suffix = '' 27 28 def __str__(self): 29 return super().__str__() + self.suffix 30 31 32@dataclasses.dataclass(order=True) 33class ParsedNative: 34 name: str 35 signature: java_types.JavaSignature 36 native_class_name: str 37 static: bool = False 38 39 40@dataclasses.dataclass(order=True) 41class ParsedCalledByNative: 42 java_class: java_types.JavaClass 43 name: str 44 signature: java_types.JavaSignature 45 static: bool 46 unchecked: bool = False 47 48 49@dataclasses.dataclass(order=True) 50class ParsedConstantField(object): 51 name: str 52 value: str 53 54 55@dataclasses.dataclass 56class ParsedFile: 57 filename: str 58 type_resolver: java_types.TypeResolver 59 proxy_methods: List[ParsedNative] 60 non_proxy_methods: List[ParsedNative] 61 called_by_natives: List[ParsedCalledByNative] 62 constant_fields: List[ParsedConstantField] 63 proxy_interface: Optional[java_types.JavaClass] = None 64 proxy_visibility: Optional[str] = None 65 module_name: Optional[str] = None # E.g. @NativeMethods("module_name") 66 jni_namespace: Optional[str] = None # E.g. @JNINamespace("content") 67 68 69@dataclasses.dataclass 70class _ParsedProxyNatives: 71 interface_name: str 72 visibility: str 73 module_name: str 74 methods: List[ParsedNative] 75 76 77# Match single line comments, multiline comments, character literals, and 78# double-quoted strings. 79_COMMENT_REMOVER_REGEX = re.compile( 80 r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', 81 re.DOTALL | re.MULTILINE) 82 83 84def _remove_comments(contents): 85 # We need to support both inline and block comments, and we need to handle 86 # strings that contain '//' or '/*'. 87 def replacer(match): 88 # Replace matches that are comments with nothing; return literals/strings 89 # unchanged. 90 s = match.group(0) 91 if s.startswith('/'): 92 return '' 93 else: 94 return s 95 96 return _COMMENT_REMOVER_REGEX.sub(replacer, contents) 97 98 99# Remove everything between and including <> except at the end of a string, e.g. 100# @JniType("std::vector<int>") 101# This will also break lines with comparison operators, but we don't care. 102_GENERICS_REGEX = re.compile(r'<[^<>\n]*>(?!>*")') 103 104 105def _remove_generics(value): 106 """Strips Java generics from a string.""" 107 while True: 108 ret = _GENERICS_REGEX.sub('', value) 109 if len(ret) == len(value): 110 return ret 111 value = ret 112 113 114_PACKAGE_REGEX = re.compile('^package\s+(\S+?);', flags=re.MULTILINE) 115 116 117def _parse_package(contents): 118 match = _PACKAGE_REGEX.search(contents) 119 if not match: 120 raise ParseError('Unable to find "package" line') 121 return match.group(1) 122 123 124_CLASSES_REGEX = re.compile( 125 r'^(.*?)(?:\b(?:public|protected|private)?\b)\s*' 126 r'(?:\b(?:static|abstract|final|sealed)\s+)*' 127 r'\b(?:class|interface|enum)\s+(\w+?)\b[^"]*?$', 128 flags=re.MULTILINE) 129 130 131# Does not handle doubly-nested classes. 132def _parse_java_classes(contents): 133 package = _parse_package(contents).replace('.', '/') 134 outer_class = None 135 nested_classes = [] 136 for m in _CLASSES_REGEX.finditer(contents): 137 preamble, class_name = m.groups() 138 # Ignore annotations like @Foo("contains the words class Bar") 139 if preamble.count('"') % 2 != 0: 140 continue 141 if outer_class is None: 142 outer_class = java_types.JavaClass(f'{package}/{class_name}') 143 else: 144 nested_classes.append(outer_class.make_nested(class_name)) 145 146 if outer_class is None: 147 raise ParseError('No classes found.') 148 149 return outer_class, nested_classes 150 151 152_ANNOTATION_REGEX = re.compile( 153 r'@(?P<annotation_name>[\w.]+)(?P<annotation_args>\(\s*(?:[^)]+)\s*\))?\s*') 154# Only supports ("foo") 155_ANNOTATION_ARGS_REGEX = re.compile( 156 r'\(\s*"(?P<annotation_value>[^"]*?)"\s*\)\s*') 157 158def _parse_annotations(value): 159 annotations = {} 160 last_idx = 0 161 for m in _ANNOTATION_REGEX.finditer(value): 162 string_value = '' 163 if match_args := m.group('annotation_args'): 164 if match_arg_value := _ANNOTATION_ARGS_REGEX.match(match_args): 165 string_value = match_arg_value.group('annotation_value') 166 annotations[m.group('annotation_name')] = string_value 167 last_idx = m.end() 168 169 return annotations, value[last_idx:] 170 171 172def _parse_type(type_resolver, value): 173 """Parses a string into a JavaType.""" 174 annotations, value = _parse_annotations(value) 175 array_dimensions = 0 176 while value[-2:] == '[]': 177 array_dimensions += 1 178 value = value[:-2] 179 180 if value in java_types.PRIMITIVES: 181 primitive_name = value 182 java_class = None 183 else: 184 primitive_name = None 185 java_class = type_resolver.resolve(value) 186 187 return java_types.JavaType(array_dimensions=array_dimensions, 188 primitive_name=primitive_name, 189 java_class=java_class, 190 annotations=annotations) 191 192 193_FINAL_REGEX = re.compile(r'\bfinal\s') 194 195 196def _parse_param_list(type_resolver, value) -> java_types.JavaParamList: 197 if not value or value.isspace(): 198 return java_types.EMPTY_PARAM_LIST 199 params = [] 200 value = _FINAL_REGEX.sub('', value) 201 for param_str in value.split(','): 202 param_str = param_str.strip() 203 param_str, _, param_name = param_str.rpartition(' ') 204 param_str = param_str.rstrip() 205 206 # Handle varargs. 207 if param_str.endswith('...'): 208 param_str = param_str[:-3] + '[]' 209 210 param_type = _parse_type(type_resolver, param_str) 211 params.append(java_types.JavaParam(param_type, param_name)) 212 213 return java_types.JavaParamList(params) 214 215 216_NATIVE_METHODS_INTERFACE_REGEX = re.compile( 217 r'@NativeMethods(?:\(\s*"(?P<module_name>\w+)"\s*\))?[\S\s]+?' 218 r'(?P<visibility>public)?\s*\binterface\s*' 219 r'(?P<interface_name>\w*)\s*{(?P<interface_body>(\s*.*)+?\s*)}') 220 221_PROXY_NATIVE_REGEX = re.compile(r'\s*(.*?)\s+(\w+)\((.*?)\);', flags=re.DOTALL) 222 223_PUBLIC_REGEX = re.compile(r'\bpublic\s') 224 225 226def _parse_proxy_natives(type_resolver, contents): 227 matches = list(_NATIVE_METHODS_INTERFACE_REGEX.finditer(contents)) 228 if not matches: 229 return None 230 if len(matches) > 1: 231 raise ParseError( 232 'Multiple @NativeMethod interfaces in one class is not supported.') 233 234 match = matches[0] 235 ret = _ParsedProxyNatives(interface_name=match.group('interface_name'), 236 visibility=match.group('visibility'), 237 module_name=match.group('module_name'), 238 methods=[]) 239 interface_body = match.group('interface_body') 240 241 for m in _PROXY_NATIVE_REGEX.finditer(interface_body): 242 preamble, name, params_part = m.groups() 243 preamble = _PUBLIC_REGEX.sub('', preamble) 244 annotations, _ = _parse_annotations(preamble) 245 params = _parse_param_list(type_resolver, params_part) 246 return_type = _parse_type(type_resolver, preamble) 247 signature = java_types.JavaSignature.from_params(return_type, params) 248 ret.methods.append( 249 ParsedNative( 250 name=name, 251 signature=signature, 252 native_class_name=annotations.get('NativeClassQualifiedName'))) 253 if not ret.methods: 254 raise ParseError('Found no methods within @NativeMethod interface.') 255 ret.methods.sort() 256 return ret 257 258 259_NON_PROXY_NATIVES_REGEX = re.compile( 260 r'(@NativeClassQualifiedName' 261 r'\(\"(?P<native_class_name>\S*?)\"\)\s+)?' 262 r'(?P<qualifiers>\w+\s\w+|\w+|\s+)\s*native\s+' 263 r'(?P<return_type>\S*)\s+' 264 r'(?P<name>native\w+)\((?P<params>.*?)\);', re.DOTALL) 265 266 267def _parse_non_proxy_natives(type_resolver, contents): 268 ret = [] 269 for match in _NON_PROXY_NATIVES_REGEX.finditer(contents): 270 name = match.group('name').replace('native', '') 271 return_type = _parse_type(type_resolver, match.group('return_type')) 272 params = _parse_param_list(type_resolver, match.group('params')) 273 signature = java_types.JavaSignature.from_params(return_type, params) 274 native_class_name = match.group('native_class_name') 275 static = 'static' in match.group('qualifiers') 276 ret.append( 277 ParsedNative(name=name, 278 signature=signature, 279 native_class_name=native_class_name, 280 static=static)) 281 ret.sort() 282 return ret 283 284 285# Regex to match a string like "@CalledByNative public void foo(int bar)". 286_CALLED_BY_NATIVE_REGEX = re.compile( 287 r'@CalledByNative((?P<Unchecked>(?:Unchecked)?|ForTesting))' 288 r'(?:\("(?P<annotation_value>.*)"\))?' 289 r'(?P<method_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?' 290 r'\s+(?P<modifiers>' + _MODIFIER_KEYWORDS + r')' + 291 r'(?P<return_type_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?' 292 r'\s*(?P<return_type>\S*?)' 293 r'\s*(?P<name>\w+)' 294 r'\s*\(\s*(?P<params>[^{;]*)\)' 295 r'\s*(?:throws\s+[^{;]+)?' 296 r'[{;]') 297 298 299def _parse_called_by_natives(type_resolver, contents): 300 ret = [] 301 for match in _CALLED_BY_NATIVE_REGEX.finditer(contents): 302 return_type_grp = match.group('return_type') 303 name = match.group('name') 304 if return_type_grp: 305 pre_annotations = match.group('method_annotations') or '' 306 post_annotations = match.group('return_type_annotations') or '' 307 # Combine all the annotations before parsing the return type. 308 return_type_str = str.strip(f'{pre_annotations} {post_annotations}' 309 f' {return_type_grp}') 310 return_type = _parse_type(type_resolver, return_type_str) 311 else: 312 return_type = java_types.VOID 313 name = '<init>' 314 315 params = _parse_param_list(type_resolver, match.group('params')) 316 signature = java_types.JavaSignature.from_params(return_type, params) 317 inner_class_name = match.group('annotation_value') 318 java_class = type_resolver.java_class 319 if inner_class_name: 320 java_class = java_class.make_nested(inner_class_name) 321 322 ret.append( 323 ParsedCalledByNative(java_class=java_class, 324 name=name, 325 signature=signature, 326 static='static' in match.group('modifiers'), 327 unchecked='Unchecked' in match.group('Unchecked'))) 328 329 # Check for any @CalledByNative occurrences that were not matched. 330 unmatched_lines = _CALLED_BY_NATIVE_REGEX.sub('', contents).splitlines() 331 for i, line in enumerate(unmatched_lines): 332 if '@CalledByNative' in line: 333 context = '\n'.join(unmatched_lines[i:i + 5]) 334 raise ParseError('Could not parse @CalledByNative method signature:\n' + 335 context) 336 337 ret.sort() 338 return ret 339 340 341_IMPORT_REGEX = re.compile(r'^import\s+([^\s*]+);', flags=re.MULTILINE) 342_IMPORT_CLASS_NAME_REGEX = re.compile(r'^(.*?)\.([A-Z].*)') 343 344 345def _parse_imports(contents): 346 # Regex skips static imports as well as wildcard imports. 347 names = _IMPORT_REGEX.findall(contents) 348 for name in names: 349 m = _IMPORT_CLASS_NAME_REGEX.match(name) 350 if m: 351 package, class_name = m.groups() 352 yield java_types.JavaClass( 353 package.replace('.', '/') + '/' + class_name.replace('.', '$')) 354 355 356_JNI_NAMESPACE_REGEX = re.compile('@JNINamespace\("(.*?)"\)') 357 358 359def _parse_jni_namespace(contents): 360 m = _JNI_NAMESPACE_REGEX.findall(contents) 361 if not m: 362 return '' 363 if len(m) > 1: 364 raise ParseError('Found multiple @JNINamespace annotations.') 365 return m[0] 366 367 368def _do_parse(filename, *, package_prefix): 369 assert not filename.endswith('.kt'), ( 370 f'Found {filename}, but Kotlin is not supported by JNI generator.') 371 with open(filename) as f: 372 contents = f.read() 373 contents = _remove_comments(contents) 374 contents = _remove_generics(contents) 375 376 outer_class, nested_classes = _parse_java_classes(contents) 377 378 expected_name = os.path.splitext(os.path.basename(filename))[0] 379 if outer_class.name != expected_name: 380 raise ParseError( 381 f'Found class "{outer_class.name}" but expected "{expected_name}".') 382 383 if package_prefix: 384 outer_class = outer_class.make_prefixed(package_prefix) 385 nested_classes = [c.make_prefixed(package_prefix) for c in nested_classes] 386 387 type_resolver = java_types.TypeResolver(outer_class) 388 for java_class in _parse_imports(contents): 389 type_resolver.add_import(java_class) 390 for java_class in nested_classes: 391 type_resolver.add_nested_class(java_class) 392 393 parsed_proxy_natives = _parse_proxy_natives(type_resolver, contents) 394 jni_namespace = _parse_jni_namespace(contents) 395 396 non_proxy_methods = _parse_non_proxy_natives(type_resolver, contents) 397 called_by_natives = _parse_called_by_natives(type_resolver, contents) 398 399 ret = ParsedFile(filename=filename, 400 jni_namespace=jni_namespace, 401 type_resolver=type_resolver, 402 proxy_methods=[], 403 non_proxy_methods=non_proxy_methods, 404 called_by_natives=called_by_natives, 405 constant_fields=[]) 406 407 if parsed_proxy_natives: 408 ret.module_name = parsed_proxy_natives.module_name 409 ret.proxy_interface = outer_class.make_nested( 410 parsed_proxy_natives.interface_name) 411 ret.proxy_visibility = parsed_proxy_natives.visibility 412 ret.proxy_methods = parsed_proxy_natives.methods 413 414 return ret 415 416 417def parse_java_file(filename, *, package_prefix=None): 418 try: 419 return _do_parse(filename, package_prefix=package_prefix) 420 except ParseError as e: 421 e.suffix = f' (when parsing {filename})' 422 raise 423 424 425_JAVAP_CLASS_REGEX = re.compile(r'\b(?:class|interface) (\S+)') 426_JAVAP_FINAL_FIELD_REGEX = re.compile( 427 r'^\s+public static final \S+ (.*?) = (\d+);', flags=re.MULTILINE) 428_JAVAP_METHOD_REGEX = re.compile( 429 rf'^\s*({_MODIFIER_KEYWORDS}).*?(\S+?)\(.*\n\s+descriptor: (.*)', 430 flags=re.MULTILINE) 431 432 433def parse_javap(filename, contents): 434 contents = _remove_generics(contents) 435 match = _JAVAP_CLASS_REGEX.search(contents) 436 if not match: 437 raise ParseError('Could not find java class in javap output') 438 java_class = java_types.JavaClass(match.group(1).replace('.', '/')) 439 type_resolver = java_types.TypeResolver(java_class) 440 441 constant_fields = [] 442 for match in _JAVAP_FINAL_FIELD_REGEX.finditer(contents): 443 name, value = match.groups() 444 constant_fields.append(ParsedConstantField(name=name, value=value)) 445 constant_fields.sort() 446 447 called_by_natives = [] 448 for match in _JAVAP_METHOD_REGEX.finditer(contents): 449 modifiers, name, descriptor = match.groups() 450 if name == java_class.full_name_with_dots: 451 name = '<init>' 452 signature = java_types.JavaSignature.from_descriptor(descriptor) 453 454 called_by_natives.append( 455 ParsedCalledByNative(java_class=java_class, 456 name=name, 457 signature=signature, 458 static='static' in modifiers)) 459 called_by_natives.sort() 460 461 # Although javac will not allow multiple methods with no args and different 462 # return types, Class.class has just that, and it breaks with our 463 # name-mangling logic which assumes this cannot happen. 464 if java_class.full_name_with_slashes == 'java/lang/Class': 465 called_by_natives = [ 466 x for x in called_by_natives if 'TypeDescriptor' not in ( 467 x.signature.return_type.non_array_full_name_with_slashes) 468 ] 469 470 return ParsedFile(filename=filename, 471 type_resolver=type_resolver, 472 proxy_methods=[], 473 non_proxy_methods=[], 474 called_by_natives=called_by_natives, 475 constant_fields=constant_fields) 476