xref: /aosp_15_r20/external/cronet/third_party/jni_zero/parse.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2023 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4import dataclasses
5import os
6import re
7from typing import List
8from typing import Optional
9
10import java_types
11
12_MODIFIER_KEYWORDS = (r'(?:(?:' + '|'.join([
13    'abstract',
14    'default',
15    'final',
16    'native',
17    'private',
18    'protected',
19    'public',
20    'static',
21    'synchronized',
22]) + r')\s+)*')
23
24
25class ParseError(Exception):
26  suffix = ''
27
28  def __str__(self):
29    return super().__str__() + self.suffix
30
31
32@dataclasses.dataclass(order=True)
33class ParsedNative:
34  name: str
35  signature: java_types.JavaSignature
36  native_class_name: str
37  static: bool = False
38
39
40@dataclasses.dataclass(order=True)
41class ParsedCalledByNative:
42  java_class: java_types.JavaClass
43  name: str
44  signature: java_types.JavaSignature
45  static: bool
46  unchecked: bool = False
47
48
49@dataclasses.dataclass(order=True)
50class ParsedConstantField(object):
51  name: str
52  value: str
53
54
55@dataclasses.dataclass
56class ParsedFile:
57  filename: str
58  type_resolver: java_types.TypeResolver
59  proxy_methods: List[ParsedNative]
60  non_proxy_methods: List[ParsedNative]
61  called_by_natives: List[ParsedCalledByNative]
62  constant_fields: List[ParsedConstantField]
63  proxy_interface: Optional[java_types.JavaClass] = None
64  proxy_visibility: Optional[str] = None
65  module_name: Optional[str] = None  # E.g. @NativeMethods("module_name")
66  jni_namespace: Optional[str] = None  # E.g. @JNINamespace("content")
67
68
69@dataclasses.dataclass
70class _ParsedProxyNatives:
71  interface_name: str
72  visibility: str
73  module_name: str
74  methods: List[ParsedNative]
75
76
77# Match single line comments, multiline comments, character literals, and
78# double-quoted strings.
79_COMMENT_REMOVER_REGEX = re.compile(
80    r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
81    re.DOTALL | re.MULTILINE)
82
83
84def _remove_comments(contents):
85  # We need to support both inline and block comments, and we need to handle
86  # strings that contain '//' or '/*'.
87  def replacer(match):
88    # Replace matches that are comments with nothing; return literals/strings
89    # unchanged.
90    s = match.group(0)
91    if s.startswith('/'):
92      return ''
93    else:
94      return s
95
96  return _COMMENT_REMOVER_REGEX.sub(replacer, contents)
97
98
99# Remove everything between and including <> except at the end of a string, e.g.
100# @JniType("std::vector<int>")
101# This will also break lines with comparison operators, but we don't care.
102_GENERICS_REGEX = re.compile(r'<[^<>\n]*>(?!>*")')
103
104
105def _remove_generics(value):
106  """Strips Java generics from a string."""
107  while True:
108    ret = _GENERICS_REGEX.sub('', value)
109    if len(ret) == len(value):
110      return ret
111    value = ret
112
113
114_PACKAGE_REGEX = re.compile('^package\s+(\S+?);', flags=re.MULTILINE)
115
116
117def _parse_package(contents):
118  match = _PACKAGE_REGEX.search(contents)
119  if not match:
120    raise ParseError('Unable to find "package" line')
121  return match.group(1)
122
123
124_CLASSES_REGEX = re.compile(
125    r'^(.*?)(?:\b(?:public|protected|private)?\b)\s*'
126    r'(?:\b(?:static|abstract|final|sealed)\s+)*'
127    r'\b(?:class|interface|enum)\s+(\w+?)\b[^"]*?$',
128    flags=re.MULTILINE)
129
130
131# Does not handle doubly-nested classes.
132def _parse_java_classes(contents):
133  package = _parse_package(contents).replace('.', '/')
134  outer_class = None
135  nested_classes = []
136  for m in _CLASSES_REGEX.finditer(contents):
137    preamble, class_name = m.groups()
138    # Ignore annotations like @Foo("contains the words class Bar")
139    if preamble.count('"') % 2 != 0:
140      continue
141    if outer_class is None:
142      outer_class = java_types.JavaClass(f'{package}/{class_name}')
143    else:
144      nested_classes.append(outer_class.make_nested(class_name))
145
146  if outer_class is None:
147    raise ParseError('No classes found.')
148
149  return outer_class, nested_classes
150
151
152_ANNOTATION_REGEX = re.compile(
153    r'@(?P<annotation_name>[\w.]+)(?P<annotation_args>\(\s*(?:[^)]+)\s*\))?\s*')
154# Only supports ("foo")
155_ANNOTATION_ARGS_REGEX = re.compile(
156    r'\(\s*"(?P<annotation_value>[^"]*?)"\s*\)\s*')
157
158def _parse_annotations(value):
159  annotations = {}
160  last_idx = 0
161  for m in _ANNOTATION_REGEX.finditer(value):
162    string_value = ''
163    if match_args := m.group('annotation_args'):
164      if match_arg_value := _ANNOTATION_ARGS_REGEX.match(match_args):
165        string_value = match_arg_value.group('annotation_value')
166    annotations[m.group('annotation_name')] = string_value
167    last_idx = m.end()
168
169  return annotations, value[last_idx:]
170
171
172def _parse_type(type_resolver, value):
173  """Parses a string into a JavaType."""
174  annotations, value = _parse_annotations(value)
175  array_dimensions = 0
176  while value[-2:] == '[]':
177    array_dimensions += 1
178    value = value[:-2]
179
180  if value in java_types.PRIMITIVES:
181    primitive_name = value
182    java_class = None
183  else:
184    primitive_name = None
185    java_class = type_resolver.resolve(value)
186
187  return java_types.JavaType(array_dimensions=array_dimensions,
188                             primitive_name=primitive_name,
189                             java_class=java_class,
190                             annotations=annotations)
191
192
193_FINAL_REGEX = re.compile(r'\bfinal\s')
194
195
196def _parse_param_list(type_resolver, value) -> java_types.JavaParamList:
197  if not value or value.isspace():
198    return java_types.EMPTY_PARAM_LIST
199  params = []
200  value = _FINAL_REGEX.sub('', value)
201  for param_str in value.split(','):
202    param_str = param_str.strip()
203    param_str, _, param_name = param_str.rpartition(' ')
204    param_str = param_str.rstrip()
205
206    # Handle varargs.
207    if param_str.endswith('...'):
208      param_str = param_str[:-3] + '[]'
209
210    param_type = _parse_type(type_resolver, param_str)
211    params.append(java_types.JavaParam(param_type, param_name))
212
213  return java_types.JavaParamList(params)
214
215
216_NATIVE_METHODS_INTERFACE_REGEX = re.compile(
217    r'@NativeMethods(?:\(\s*"(?P<module_name>\w+)"\s*\))?[\S\s]+?'
218    r'(?P<visibility>public)?\s*\binterface\s*'
219    r'(?P<interface_name>\w*)\s*{(?P<interface_body>(\s*.*)+?\s*)}')
220
221_PROXY_NATIVE_REGEX = re.compile(r'\s*(.*?)\s+(\w+)\((.*?)\);', flags=re.DOTALL)
222
223_PUBLIC_REGEX = re.compile(r'\bpublic\s')
224
225
226def _parse_proxy_natives(type_resolver, contents):
227  matches = list(_NATIVE_METHODS_INTERFACE_REGEX.finditer(contents))
228  if not matches:
229    return None
230  if len(matches) > 1:
231    raise ParseError(
232        'Multiple @NativeMethod interfaces in one class is not supported.')
233
234  match = matches[0]
235  ret = _ParsedProxyNatives(interface_name=match.group('interface_name'),
236                            visibility=match.group('visibility'),
237                            module_name=match.group('module_name'),
238                            methods=[])
239  interface_body = match.group('interface_body')
240
241  for m in _PROXY_NATIVE_REGEX.finditer(interface_body):
242    preamble, name, params_part = m.groups()
243    preamble = _PUBLIC_REGEX.sub('', preamble)
244    annotations, _ = _parse_annotations(preamble)
245    params = _parse_param_list(type_resolver, params_part)
246    return_type = _parse_type(type_resolver, preamble)
247    signature = java_types.JavaSignature.from_params(return_type, params)
248    ret.methods.append(
249        ParsedNative(
250            name=name,
251            signature=signature,
252            native_class_name=annotations.get('NativeClassQualifiedName')))
253  if not ret.methods:
254    raise ParseError('Found no methods within @NativeMethod interface.')
255  ret.methods.sort()
256  return ret
257
258
259_NON_PROXY_NATIVES_REGEX = re.compile(
260    r'(@NativeClassQualifiedName'
261    r'\(\"(?P<native_class_name>\S*?)\"\)\s+)?'
262    r'(?P<qualifiers>\w+\s\w+|\w+|\s+)\s*native\s+'
263    r'(?P<return_type>\S*)\s+'
264    r'(?P<name>native\w+)\((?P<params>.*?)\);', re.DOTALL)
265
266
267def _parse_non_proxy_natives(type_resolver, contents):
268  ret = []
269  for match in _NON_PROXY_NATIVES_REGEX.finditer(contents):
270    name = match.group('name').replace('native', '')
271    return_type = _parse_type(type_resolver, match.group('return_type'))
272    params = _parse_param_list(type_resolver, match.group('params'))
273    signature = java_types.JavaSignature.from_params(return_type, params)
274    native_class_name = match.group('native_class_name')
275    static = 'static' in match.group('qualifiers')
276    ret.append(
277        ParsedNative(name=name,
278                     signature=signature,
279                     native_class_name=native_class_name,
280                     static=static))
281  ret.sort()
282  return ret
283
284
285# Regex to match a string like "@CalledByNative public void foo(int bar)".
286_CALLED_BY_NATIVE_REGEX = re.compile(
287    r'@CalledByNative((?P<Unchecked>(?:Unchecked)?|ForTesting))'
288    r'(?:\("(?P<annotation_value>.*)"\))?'
289    r'(?P<method_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?'
290    r'\s+(?P<modifiers>' + _MODIFIER_KEYWORDS + r')' +
291    r'(?P<return_type_annotations>(?:\s*@\w+(?:\(.*?\))?)+)?'
292    r'\s*(?P<return_type>\S*?)'
293    r'\s*(?P<name>\w+)'
294    r'\s*\(\s*(?P<params>[^{;]*)\)'
295    r'\s*(?:throws\s+[^{;]+)?'
296    r'[{;]')
297
298
299def _parse_called_by_natives(type_resolver, contents):
300  ret = []
301  for match in _CALLED_BY_NATIVE_REGEX.finditer(contents):
302    return_type_grp = match.group('return_type')
303    name = match.group('name')
304    if return_type_grp:
305      pre_annotations = match.group('method_annotations') or ''
306      post_annotations = match.group('return_type_annotations') or ''
307      # Combine all the annotations before parsing the return type.
308      return_type_str = str.strip(f'{pre_annotations} {post_annotations}'
309                                  f' {return_type_grp}')
310      return_type = _parse_type(type_resolver, return_type_str)
311    else:
312      return_type = java_types.VOID
313      name = '<init>'
314
315    params = _parse_param_list(type_resolver, match.group('params'))
316    signature = java_types.JavaSignature.from_params(return_type, params)
317    inner_class_name = match.group('annotation_value')
318    java_class = type_resolver.java_class
319    if inner_class_name:
320      java_class = java_class.make_nested(inner_class_name)
321
322    ret.append(
323        ParsedCalledByNative(java_class=java_class,
324                             name=name,
325                             signature=signature,
326                             static='static' in match.group('modifiers'),
327                             unchecked='Unchecked' in match.group('Unchecked')))
328
329  # Check for any @CalledByNative occurrences that were not matched.
330  unmatched_lines = _CALLED_BY_NATIVE_REGEX.sub('', contents).splitlines()
331  for i, line in enumerate(unmatched_lines):
332    if '@CalledByNative' in line:
333      context = '\n'.join(unmatched_lines[i:i + 5])
334      raise ParseError('Could not parse @CalledByNative method signature:\n' +
335                       context)
336
337  ret.sort()
338  return ret
339
340
341_IMPORT_REGEX = re.compile(r'^import\s+([^\s*]+);', flags=re.MULTILINE)
342_IMPORT_CLASS_NAME_REGEX = re.compile(r'^(.*?)\.([A-Z].*)')
343
344
345def _parse_imports(contents):
346  # Regex skips static imports as well as wildcard imports.
347  names = _IMPORT_REGEX.findall(contents)
348  for name in names:
349    m = _IMPORT_CLASS_NAME_REGEX.match(name)
350    if m:
351      package, class_name = m.groups()
352      yield java_types.JavaClass(
353          package.replace('.', '/') + '/' + class_name.replace('.', '$'))
354
355
356_JNI_NAMESPACE_REGEX = re.compile('@JNINamespace\("(.*?)"\)')
357
358
359def _parse_jni_namespace(contents):
360  m = _JNI_NAMESPACE_REGEX.findall(contents)
361  if not m:
362    return ''
363  if len(m) > 1:
364    raise ParseError('Found multiple @JNINamespace annotations.')
365  return m[0]
366
367
368def _do_parse(filename, *, package_prefix):
369  assert not filename.endswith('.kt'), (
370      f'Found {filename}, but Kotlin is not supported by JNI generator.')
371  with open(filename) as f:
372    contents = f.read()
373  contents = _remove_comments(contents)
374  contents = _remove_generics(contents)
375
376  outer_class, nested_classes = _parse_java_classes(contents)
377
378  expected_name = os.path.splitext(os.path.basename(filename))[0]
379  if outer_class.name != expected_name:
380    raise ParseError(
381        f'Found class "{outer_class.name}" but expected "{expected_name}".')
382
383  if package_prefix:
384    outer_class = outer_class.make_prefixed(package_prefix)
385    nested_classes = [c.make_prefixed(package_prefix) for c in nested_classes]
386
387  type_resolver = java_types.TypeResolver(outer_class)
388  for java_class in _parse_imports(contents):
389    type_resolver.add_import(java_class)
390  for java_class in nested_classes:
391    type_resolver.add_nested_class(java_class)
392
393  parsed_proxy_natives = _parse_proxy_natives(type_resolver, contents)
394  jni_namespace = _parse_jni_namespace(contents)
395
396  non_proxy_methods = _parse_non_proxy_natives(type_resolver, contents)
397  called_by_natives = _parse_called_by_natives(type_resolver, contents)
398
399  ret = ParsedFile(filename=filename,
400                   jni_namespace=jni_namespace,
401                   type_resolver=type_resolver,
402                   proxy_methods=[],
403                   non_proxy_methods=non_proxy_methods,
404                   called_by_natives=called_by_natives,
405                   constant_fields=[])
406
407  if parsed_proxy_natives:
408    ret.module_name = parsed_proxy_natives.module_name
409    ret.proxy_interface = outer_class.make_nested(
410        parsed_proxy_natives.interface_name)
411    ret.proxy_visibility = parsed_proxy_natives.visibility
412    ret.proxy_methods = parsed_proxy_natives.methods
413
414  return ret
415
416
417def parse_java_file(filename, *, package_prefix=None):
418  try:
419    return _do_parse(filename, package_prefix=package_prefix)
420  except ParseError as e:
421    e.suffix = f' (when parsing {filename})'
422    raise
423
424
425_JAVAP_CLASS_REGEX = re.compile(r'\b(?:class|interface) (\S+)')
426_JAVAP_FINAL_FIELD_REGEX = re.compile(
427    r'^\s+public static final \S+ (.*?) = (\d+);', flags=re.MULTILINE)
428_JAVAP_METHOD_REGEX = re.compile(
429    rf'^\s*({_MODIFIER_KEYWORDS}).*?(\S+?)\(.*\n\s+descriptor: (.*)',
430    flags=re.MULTILINE)
431
432
433def parse_javap(filename, contents):
434  contents = _remove_generics(contents)
435  match = _JAVAP_CLASS_REGEX.search(contents)
436  if not match:
437    raise ParseError('Could not find java class in javap output')
438  java_class = java_types.JavaClass(match.group(1).replace('.', '/'))
439  type_resolver = java_types.TypeResolver(java_class)
440
441  constant_fields = []
442  for match in _JAVAP_FINAL_FIELD_REGEX.finditer(contents):
443    name, value = match.groups()
444    constant_fields.append(ParsedConstantField(name=name, value=value))
445  constant_fields.sort()
446
447  called_by_natives = []
448  for match in _JAVAP_METHOD_REGEX.finditer(contents):
449    modifiers, name, descriptor = match.groups()
450    if name == java_class.full_name_with_dots:
451      name = '<init>'
452    signature = java_types.JavaSignature.from_descriptor(descriptor)
453
454    called_by_natives.append(
455        ParsedCalledByNative(java_class=java_class,
456                             name=name,
457                             signature=signature,
458                             static='static' in modifiers))
459  called_by_natives.sort()
460
461  # Although javac will not allow multiple methods with no args and different
462  # return types, Class.class has just that, and it breaks with our
463  # name-mangling logic which assumes this cannot happen.
464  if java_class.full_name_with_slashes == 'java/lang/Class':
465    called_by_natives = [
466        x for x in called_by_natives if 'TypeDescriptor' not in (
467            x.signature.return_type.non_array_full_name_with_slashes)
468    ]
469
470  return ParsedFile(filename=filename,
471                    type_resolver=type_resolver,
472                    proxy_methods=[],
473                    non_proxy_methods=[],
474                    called_by_natives=called_by_natives,
475                    constant_fields=constant_fields)
476