xref: /aosp_15_r20/build/soong/cc/symbolfile/__init__.py (revision 333d2b3687b3a337dbcca9d65000bca186795e39)
1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parser for Android's version script information."""
17from __future__ import annotations
18
19from dataclasses import dataclass, field
20import logging
21import re
22from typing import (
23    Dict,
24    Iterable,
25    Iterator,
26    List,
27    Mapping,
28    NewType,
29    Optional,
30    TextIO,
31    Tuple,
32    Union,
33)
34
35
36ApiMap = Mapping[str, int]
37Arch = NewType('Arch', str)
38Tag = NewType('Tag', str)
39
40
41ALL_ARCHITECTURES = (
42    Arch('arm'),
43    Arch('arm64'),
44    Arch('riscv64'),
45    Arch('x86'),
46    Arch('x86_64'),
47)
48
49# TODO: it would be nice to dedupe with 'has_*_tag' property methods
50SUPPORTED_TAGS = ALL_ARCHITECTURES + (
51    Tag('apex'),
52    Tag('llndk'),
53    Tag('platform-only'),
54    Tag('systemapi'),
55    Tag('var'),
56    Tag('weak'),
57)
58
59# Arbitrary magic number. We use the same one in api-level.h for this purpose.
60FUTURE_API_LEVEL = 10000
61
62
63def logger() -> logging.Logger:
64    """Return the main logger for this module."""
65    return logging.getLogger(__name__)
66
67
68@dataclass
69class Tags:
70    """Container class for the tags attached to a symbol or version."""
71
72    tags: tuple[Tag, ...] = field(default_factory=tuple)
73
74    @classmethod
75    def from_strs(cls, strs: Iterable[str]) -> Tags:
76        """Constructs tags from a collection of strings.
77
78        Does not decode API levels.
79        """
80        return Tags(tuple(Tag(s) for s in strs))
81
82    def __contains__(self, tag: Union[Tag, str]) -> bool:
83        return tag in self.tags
84
85    def __iter__(self) -> Iterator[Tag]:
86        yield from self.tags
87
88    @property
89    def has_mode_tags(self) -> bool:
90        """Returns True if any mode tags (apex, llndk, etc) are set."""
91        return self.has_apex_tags or self.has_llndk_tags or self.has_systemapi_tags
92
93    @property
94    def has_apex_tags(self) -> bool:
95        """Returns True if any APEX tags are set."""
96        return 'apex' in self.tags
97
98    @property
99    def has_systemapi_tags(self) -> bool:
100        """Returns True if any APEX tags are set."""
101        return 'systemapi' in self.tags
102
103    @property
104    def has_llndk_tags(self) -> bool:
105        """Returns True if any LL-NDK tags are set."""
106        return 'llndk' in self.tags
107
108    @property
109    def has_platform_only_tags(self) -> bool:
110        """Returns True if any platform-only tags are set."""
111        return 'platform-only' in self.tags
112
113
114@dataclass
115class Symbol:
116    """A symbol definition from a symbol file."""
117
118    name: str
119    tags: Tags
120
121
122@dataclass
123class Version:
124    """A version block of a symbol file."""
125
126    name: str
127    base: Optional[str]
128    tags: Tags
129    symbols: List[Symbol]
130
131    @property
132    def is_private(self) -> bool:
133        """Returns True if this version block is private (platform only)."""
134        return self.name.endswith('_PRIVATE') or self.name.endswith('_PLATFORM')
135
136
137def get_tags(line: str, api_map: ApiMap) -> Tags:
138    """Returns a list of all tags on this line."""
139    _, _, all_tags = line.strip().partition('#')
140    return Tags(tuple(
141        decode_api_level_tag(Tag(e), api_map)
142        for e in re.split(r'\s+', all_tags) if e.strip()
143    ))
144
145
146def is_api_level_tag(tag: Tag) -> bool:
147    """Returns true if this tag has an API level that may need decoding."""
148    if tag.startswith('llndk-deprecated='):
149        return True
150    if tag.startswith('introduced='):
151        return True
152    if tag.startswith('introduced-'):
153        return True
154    if tag.startswith('versioned='):
155        return True
156    return False
157
158
159def decode_api_level(api: str, api_map: ApiMap) -> int:
160    """Decodes the API level argument into the API level number.
161
162    For the average case, this just decodes the integer value from the string,
163    but for unreleased APIs we need to translate from the API codename (like
164    "O") to the future API level for that codename.
165    """
166    try:
167        return int(api)
168    except ValueError:
169        pass
170
171    if api == "current":
172        return FUTURE_API_LEVEL
173
174    return api_map[api]
175
176
177def decode_api_level_tag(tag: Tag, api_map: ApiMap) -> Tag:
178    """Decodes API level code name in a tag.
179
180    Raises:
181        ParseError: An unknown version name was found in a tag.
182    """
183    if not is_api_level_tag(tag):
184        if tag not in SUPPORTED_TAGS:
185            raise ParseError(f'Unsupported tag: {tag}')
186
187        return tag
188
189    name, value = split_tag(tag)
190    try:
191        decoded = str(decode_api_level(value, api_map))
192        return Tag(f'{name}={decoded}')
193    except KeyError as ex:
194        raise ParseError(f'Unknown version name in tag: {tag}') from ex
195
196
197def split_tag(tag: Tag) -> Tuple[str, str]:
198    """Returns a key/value tuple of the tag.
199
200    Raises:
201        ValueError: Tag is not a key/value type tag.
202
203    Returns: Tuple of (key, value) of the tag. Both components are strings.
204    """
205    if '=' not in tag:
206        raise ValueError('Not a key/value tag: ' + tag)
207    key, _, value = tag.partition('=')
208    return key, value
209
210
211def get_tag_value(tag: Tag) -> str:
212    """Returns the value of a key/value tag.
213
214    Raises:
215        ValueError: Tag is not a key/value type tag.
216
217    Returns: Value part of tag as a string.
218    """
219    return split_tag(tag)[1]
220
221class Filter:
222    """A filter encapsulates a condition that tells whether a version or a
223    symbol should be omitted or not
224    """
225
226    def __init__(self, arch: Arch, api: int, llndk: bool = False, apex: bool = False, systemapi:
227                 bool = False, ndk: bool = True):
228        self.arch = arch
229        self.api = api
230        self.llndk = llndk
231        self.apex = apex
232        self.systemapi = systemapi
233        self.ndk = ndk
234
235    def _symbol_in_arch_api(self, tags: Tags) -> bool:
236        if not symbol_in_arch(tags, self.arch):
237            return True
238        if not symbol_in_api(tags, self.arch, self.api):
239            return True
240        return False
241
242    def _should_omit_tags(self, tags: Tags) -> bool:
243        """Returns True if the tagged object should be omitted.
244
245        This defines the rules shared between version tagging and symbol tagging.
246        """
247        # The apex and llndk tags will only exclude APIs from other modes. If in
248        # APEX or LLNDK mode and neither tag is provided, we fall back to the
249        # default behavior because all NDK symbols are implicitly available to
250        # APEX and LLNDK.
251        if tags.has_mode_tags:
252            if self.apex and tags.has_apex_tags:
253                return False
254            if self.systemapi and tags.has_systemapi_tags:
255                return False
256            if self.llndk and tags.has_llndk_tags:
257                return self._symbol_in_arch_api(tags)
258            return True
259        return self._symbol_in_arch_api(tags)
260
261    def should_omit_version(self, version: Version) -> bool:
262        """Returns True if the version section should be omitted.
263
264        We want to omit any sections that do not have any symbols we'll have in
265        the stub library. Sections that contain entirely future symbols or only
266        symbols for certain architectures.
267        """
268        if version.is_private:
269            return True
270        if version.tags.has_platform_only_tags:
271            return True
272        return self._should_omit_tags(version.tags)
273
274    def should_omit_symbol(self, symbol: Symbol) -> bool:
275        """Returns True if the symbol should be omitted."""
276        if not symbol.tags.has_mode_tags and not self.ndk:
277            # Symbols that don't have mode tags are NDK. They are usually
278            # included, but have to be omitted if NDK symbols are explicitly
279            # filtered-out
280            return True
281
282        return self._should_omit_tags(symbol.tags)
283
284
285def symbol_in_arch(tags: Tags, arch: Arch) -> bool:
286    """Returns true if the symbol is present for the given architecture."""
287    has_arch_tags = False
288    for tag in tags:
289        if tag == arch:
290            return True
291        if tag in ALL_ARCHITECTURES:
292            has_arch_tags = True
293
294    # If there were no arch tags, the symbol is available for all
295    # architectures. If there were any arch tags, the symbol is only available
296    # for the tagged architectures.
297    return not has_arch_tags
298
299
300def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
301    """Returns true if the symbol is present for the given API level."""
302    introduced_tag = None
303    arch_specific = False
304    for tag in tags:
305        # If there is an arch-specific tag, it should override the common one.
306        if tag.startswith('introduced=') and not arch_specific:
307            introduced_tag = tag
308        elif tag.startswith('introduced-' + arch + '='):
309            introduced_tag = tag
310            arch_specific = True
311        elif tag == 'future':
312            return api == FUTURE_API_LEVEL
313
314    if introduced_tag is None:
315        # We found no "introduced" tags, so the symbol has always been
316        # available.
317        return True
318
319    return api >= int(get_tag_value(introduced_tag))
320
321
322def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
323    """Returns true if the symbol should be versioned for the given API.
324
325    This models the `versioned=API` tag. This should be a very uncommonly
326    needed tag, and is really only needed to fix versioning mistakes that are
327    already out in the wild.
328
329    For example, some of libc's __aeabi_* functions were originally placed in
330    the private version, but that was incorrect. They are now in LIBC_N, but
331    when building against any version prior to N we need the symbol to be
332    unversioned (otherwise it won't resolve on M where it is private).
333    """
334    for tag in tags:
335        if tag.startswith('versioned='):
336            return api >= int(get_tag_value(tag))
337    # If there is no "versioned" tag, the tag has been versioned for as long as
338    # it was introduced.
339    return True
340
341
342class ParseError(RuntimeError):
343    """An error that occurred while parsing a symbol file."""
344
345
346class MultiplyDefinedSymbolError(RuntimeError):
347    """A symbol name was multiply defined."""
348    def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
349        super().__init__(
350            'Version script contains multiple definitions for: {}'.format(
351                ', '.join(multiply_defined_symbols)))
352        self.multiply_defined_symbols = multiply_defined_symbols
353
354
355class SymbolFileParser:
356    """Parses NDK symbol files."""
357    def __init__(self, input_file: TextIO, api_map: ApiMap, filt: Filter) -> None:
358        self.input_file = input_file
359        self.api_map = api_map
360        self.filter = filt
361        self.current_line: Optional[str] = None
362
363    def parse(self) -> List[Version]:
364        """Parses the symbol file and returns a list of Version objects."""
365        versions = []
366        while self.next_line():
367            assert self.current_line is not None
368            if '{' in self.current_line:
369                versions.append(self.parse_version())
370            else:
371                raise ParseError(
372                    f'Unexpected contents at top level: {self.current_line}')
373
374        self.check_no_duplicate_symbols(versions)
375        return versions
376
377    def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
378        """Raises errors for multiply defined symbols.
379
380        This situation is the normal case when symbol versioning is actually
381        used, but this script doesn't currently handle that. The error message
382        will be a not necessarily obvious "error: redefition of 'foo'" from
383        stub.c, so it's better for us to catch this situation and raise a
384        better error.
385        """
386        symbol_names = set()
387        multiply_defined_symbols = set()
388        for version in versions:
389            if self.filter.should_omit_version(version):
390                continue
391
392            for symbol in version.symbols:
393                if self.filter.should_omit_symbol(symbol):
394                    continue
395
396                if symbol.name in symbol_names:
397                    multiply_defined_symbols.add(symbol.name)
398                symbol_names.add(symbol.name)
399        if multiply_defined_symbols:
400            raise MultiplyDefinedSymbolError(
401                sorted(list(multiply_defined_symbols)))
402
403    def parse_version(self) -> Version:
404        """Parses a single version section and returns a Version object."""
405        assert self.current_line is not None
406        name = self.current_line.split('{')[0].strip()
407        tags = get_tags(self.current_line, self.api_map)
408        symbols: List[Symbol] = []
409        global_scope = True
410        cpp_symbols = False
411        while self.next_line():
412            if '}' in self.current_line:
413                # Line is something like '} BASE; # tags'. Both base and tags
414                # are optional here.
415                base = self.current_line.partition('}')[2]
416                base = base.partition('#')[0].strip()
417                if not base.endswith(';'):
418                    raise ParseError(
419                        'Unterminated version/export "C++" block (expected ;).')
420                if cpp_symbols:
421                    cpp_symbols = False
422                else:
423                    base = base.rstrip(';').rstrip()
424                    return Version(name, base or None, tags, symbols)
425            elif 'extern "C++" {' in self.current_line:
426                cpp_symbols = True
427            elif not cpp_symbols and ':' in self.current_line:
428                visibility = self.current_line.split(':')[0].strip()
429                if visibility == 'local':
430                    global_scope = False
431                elif visibility == 'global':
432                    global_scope = True
433                else:
434                    raise ParseError('Unknown visiblity label: ' + visibility)
435            elif global_scope and not cpp_symbols:
436                symbols.append(self.parse_symbol())
437            else:
438                # We're in a hidden scope or in 'extern "C++"' block. Ignore
439                # everything.
440                pass
441        raise ParseError('Unexpected EOF in version block.')
442
443    def parse_symbol(self) -> Symbol:
444        """Parses a single symbol line and returns a Symbol object."""
445        assert self.current_line is not None
446        if ';' not in self.current_line:
447            raise ParseError(
448                'Expected ; to terminate symbol: ' + self.current_line)
449        if '*' in self.current_line:
450            raise ParseError(
451                'Wildcard global symbols are not permitted.')
452        # Line is now in the format "<symbol-name>; # tags"
453        name, _, _ = self.current_line.strip().partition(';')
454        tags = get_tags(self.current_line, self.api_map)
455        return Symbol(name, tags)
456
457    def next_line(self) -> str:
458        """Returns the next non-empty non-comment line.
459
460        A return value of '' indicates EOF.
461        """
462        line = self.input_file.readline()
463        while not line.strip() or line.strip().startswith('#'):
464            line = self.input_file.readline()
465
466            # We want to skip empty lines, but '' indicates EOF.
467            if not line:
468                break
469        self.current_line = line
470        return self.current_line
471