1"""Check the stable ABI manifest or generate files from it
2
3By default, the tool only checks existing files/libraries.
4Pass --generate to recreate auto-generated files instead.
5
6For actions that take a FILENAME, the filename can be left out to use a default
7(relative to the manifest file, as they appear in the CPython codebase).
8"""
9
10from functools import partial
11from pathlib import Path
12import dataclasses
13import subprocess
14import sysconfig
15import argparse
16import textwrap
17import tomllib
18import difflib
19import shutil
20import pprint
21import sys
22import os
23import os.path
24import io
25import re
26import csv
27
28MISSING = object()
29
30EXCLUDED_HEADERS = {
31    "bytes_methods.h",
32    "cellobject.h",
33    "classobject.h",
34    "code.h",
35    "compile.h",
36    "datetime.h",
37    "dtoa.h",
38    "frameobject.h",
39    "genobject.h",
40    "longintrepr.h",
41    "parsetok.h",
42    "pyatomic.h",
43    "pytime.h",
44    "token.h",
45    "ucnhash.h",
46}
47MACOS = (sys.platform == "darwin")
48UNIXY = MACOS or (sys.platform == "linux")  # XXX should this be "not Windows"?
49
50
51# The stable ABI manifest (Misc/stable_abi.toml) exists only to fill the
52# following dataclasses.
53# Feel free to change its syntax (and the `parse_manifest` function)
54# to better serve that purpose (while keeping it human-readable).
55
56class Manifest:
57    """Collection of `ABIItem`s forming the stable ABI/limited API."""
58    def __init__(self):
59        self.contents = dict()
60
61    def add(self, item):
62        if item.name in self.contents:
63            # We assume that stable ABI items do not share names,
64            # even if they're different kinds (e.g. function vs. macro).
65            raise ValueError(f'duplicate ABI item {item.name}')
66        self.contents[item.name] = item
67
68    def select(self, kinds, *, include_abi_only=True, ifdef=None):
69        """Yield selected items of the manifest
70
71        kinds: set of requested kinds, e.g. {'function', 'macro'}
72        include_abi_only: if True (default), include all items of the
73            stable ABI.
74            If False, include only items from the limited API
75            (i.e. items people should use today)
76        ifdef: set of feature macros (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
77            If None (default), items are not filtered by this. (This is
78            different from the empty set, which filters out all such
79            conditional items.)
80        """
81        for name, item in sorted(self.contents.items()):
82            if item.kind not in kinds:
83                continue
84            if item.abi_only and not include_abi_only:
85                continue
86            if (ifdef is not None
87                    and item.ifdef is not None
88                    and item.ifdef not in ifdef):
89                continue
90            yield item
91
92    def dump(self):
93        """Yield lines to recreate the manifest file (sans comments/newlines)"""
94        for item in self.contents.values():
95            fields = dataclasses.fields(item)
96            yield f"[{item.kind}.{item.name}]"
97            for field in fields:
98                if field.name in {'name', 'value', 'kind'}:
99                    continue
100                value = getattr(item, field.name)
101                if value == field.default:
102                    pass
103                elif value is True:
104                    yield f"    {field.name} = true"
105                elif value:
106                    yield f"    {field.name} = {value!r}"
107
108
109itemclasses = {}
110def itemclass(kind):
111    """Register the decorated class in `itemclasses`"""
112    def decorator(cls):
113        itemclasses[kind] = cls
114        return cls
115    return decorator
116
117@itemclass('function')
118@itemclass('macro')
119@itemclass('data')
120@itemclass('const')
121@itemclass('typedef')
122@dataclasses.dataclass
123class ABIItem:
124    """Information on one item (function, macro, struct, etc.)"""
125
126    name: str
127    kind: str
128    added: str = None
129    abi_only: bool = False
130    ifdef: str = None
131
132@itemclass('feature_macro')
133@dataclasses.dataclass(kw_only=True)
134class FeatureMacro(ABIItem):
135    name: str
136    doc: str
137    windows: bool = False
138    abi_only: bool = True
139
140@itemclass('struct')
141@dataclasses.dataclass(kw_only=True)
142class Struct(ABIItem):
143    struct_abi_kind: str
144    members: list = None
145
146
147def parse_manifest(file):
148    """Parse the given file (iterable of lines) to a Manifest"""
149
150    manifest = Manifest()
151
152    data = tomllib.load(file)
153
154    for kind, itemclass in itemclasses.items():
155        for name, item_data in data[kind].items():
156            try:
157                item = itemclass(name=name, kind=kind, **item_data)
158                manifest.add(item)
159            except BaseException as exc:
160                exc.add_note(f'in {kind} {name}')
161                raise
162
163    return manifest
164
165# The tool can run individual "actions".
166# Most actions are "generators", which generate a single file from the
167# manifest. (Checking works by generating a temp file & comparing.)
168# Other actions, like "--unixy-check", don't work on a single file.
169
170generators = []
171def generator(var_name, default_path):
172    """Decorates a file generator: function that writes to a file"""
173    def _decorator(func):
174        func.var_name = var_name
175        func.arg_name = '--' + var_name.replace('_', '-')
176        func.default_path = default_path
177        generators.append(func)
178        return func
179    return _decorator
180
181
182@generator("python3dll", 'PC/python3dll.c')
183def gen_python3dll(manifest, args, outfile):
184    """Generate/check the source for the Windows stable ABI library"""
185    write = partial(print, file=outfile)
186    write(textwrap.dedent(r"""
187        /* Re-export stable Python ABI */
188
189        /* Generated by Tools/scripts/stable_abi.py */
190
191        #ifdef _M_IX86
192        #define DECORATE "_"
193        #else
194        #define DECORATE
195        #endif
196
197        #define EXPORT_FUNC(name) \
198            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
199        #define EXPORT_DATA(name) \
200            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
201    """))
202
203    def sort_key(item):
204        return item.name.lower()
205
206    windows_feature_macros = {
207        item.name for item in manifest.select({'feature_macro'}) if item.windows
208    }
209    for item in sorted(
210            manifest.select(
211                {'function'},
212                include_abi_only=True,
213                ifdef=windows_feature_macros),
214            key=sort_key):
215        write(f'EXPORT_FUNC({item.name})')
216
217    write()
218
219    for item in sorted(
220            manifest.select(
221                {'data'},
222                include_abi_only=True,
223                ifdef=windows_feature_macros),
224            key=sort_key):
225        write(f'EXPORT_DATA({item.name})')
226
227REST_ROLES = {
228    'function': 'function',
229    'data': 'var',
230    'struct': 'type',
231    'macro': 'macro',
232    # 'const': 'const',  # all undocumented
233    'typedef': 'type',
234}
235
236@generator("doc_list", 'Doc/data/stable_abi.dat')
237def gen_doc_annotations(manifest, args, outfile):
238    """Generate/check the stable ABI list for documentation annotations"""
239    writer = csv.DictWriter(
240        outfile,
241        ['role', 'name', 'added', 'ifdef_note', 'struct_abi_kind'],
242        lineterminator='\n')
243    writer.writeheader()
244    for item in manifest.select(REST_ROLES.keys(), include_abi_only=False):
245        if item.ifdef:
246            ifdef_note = manifest.contents[item.ifdef].doc
247        else:
248            ifdef_note = None
249        row = {
250            'role': REST_ROLES[item.kind],
251            'name': item.name,
252            'added': item.added,
253            'ifdef_note': ifdef_note}
254        rows = [row]
255        if item.kind == 'struct':
256            row['struct_abi_kind'] = item.struct_abi_kind
257            for member_name in item.members or ():
258                rows.append({
259                    'role': 'member',
260                    'name': f'{item.name}.{member_name}',
261                    'added': item.added})
262        writer.writerows(rows)
263
264@generator("ctypes_test", 'Lib/test/test_stable_abi_ctypes.py')
265def gen_ctypes_test(manifest, args, outfile):
266    """Generate/check the ctypes-based test for exported symbols"""
267    write = partial(print, file=outfile)
268    write(textwrap.dedent('''
269        # Generated by Tools/scripts/stable_abi.py
270
271        """Test that all symbols of the Stable ABI are accessible using ctypes
272        """
273
274        import sys
275        import unittest
276        from test.support.import_helper import import_module
277        from _testcapi import get_feature_macros
278
279        feature_macros = get_feature_macros()
280        ctypes_test = import_module('ctypes')
281
282        class TestStableABIAvailability(unittest.TestCase):
283            def test_available_symbols(self):
284
285                for symbol_name in SYMBOL_NAMES:
286                    with self.subTest(symbol_name):
287                        ctypes_test.pythonapi[symbol_name]
288
289            def test_feature_macros(self):
290                self.assertEqual(
291                    set(get_feature_macros()), EXPECTED_FEATURE_MACROS)
292
293            # The feature macros for Windows are used in creating the DLL
294            # definition, so they must be known on all platforms.
295            # If we are on Windows, we check that the hardcoded data matches
296            # the reality.
297            @unittest.skipIf(sys.platform != "win32", "Windows specific test")
298            def test_windows_feature_macros(self):
299                for name, value in WINDOWS_FEATURE_MACROS.items():
300                    if value != 'maybe':
301                        with self.subTest(name):
302                            self.assertEqual(feature_macros[name], value)
303
304        SYMBOL_NAMES = (
305    '''))
306    items = manifest.select(
307        {'function', 'data'},
308        include_abi_only=True,
309    )
310    optional_items = {}
311    for item in items:
312        if item.name in (
313                # Some symbols aren't exported on all platforms.
314                # This is a bug: https://bugs.python.org/issue44133
315                'PyModule_Create2', 'PyModule_FromDefAndSpec2',
316            ):
317            continue
318        if item.ifdef:
319            optional_items.setdefault(item.ifdef, []).append(item.name)
320        else:
321            write(f'    "{item.name}",')
322    write(")")
323    for ifdef, names in optional_items.items():
324        write(f"if feature_macros[{ifdef!r}]:")
325        write(f"    SYMBOL_NAMES += (")
326        for name in names:
327            write(f"        {name!r},")
328        write("    )")
329    write("")
330    feature_macros = list(manifest.select({'feature_macro'}))
331    feature_names = sorted(m.name for m in feature_macros)
332    write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})")
333
334    windows_feature_macros = {m.name: m.windows for m in feature_macros}
335    write(f"WINDOWS_FEATURE_MACROS = {pprint.pformat(windows_feature_macros)}")
336
337
338@generator("testcapi_feature_macros", 'Modules/_testcapi_feature_macros.inc')
339def gen_testcapi_feature_macros(manifest, args, outfile):
340    """Generate/check the stable ABI list for documentation annotations"""
341    write = partial(print, file=outfile)
342    write('// Generated by Tools/scripts/stable_abi.py')
343    write()
344    write('// Add an entry in dict `result` for each Stable ABI feature macro.')
345    write()
346    for macro in manifest.select({'feature_macro'}):
347        name = macro.name
348        write(f'#ifdef {name}')
349        write(f'    res = PyDict_SetItemString(result, "{name}", Py_True);')
350        write('#else')
351        write(f'    res = PyDict_SetItemString(result, "{name}", Py_False);')
352        write('#endif')
353        write('if (res) {')
354        write('    Py_DECREF(result); return NULL;')
355        write('}')
356        write()
357
358
359def generate_or_check(manifest, args, path, func):
360    """Generate/check a file with a single generator
361
362    Return True if successful; False if a comparison failed.
363    """
364
365    outfile = io.StringIO()
366    func(manifest, args, outfile)
367    generated = outfile.getvalue()
368    existing = path.read_text()
369
370    if generated != existing:
371        if args.generate:
372            path.write_text(generated)
373        else:
374            print(f'File {path} differs from expected!')
375            diff = difflib.unified_diff(
376                generated.splitlines(), existing.splitlines(),
377                str(path), '<expected>',
378                lineterm='',
379            )
380            for line in diff:
381                print(line)
382            return False
383    return True
384
385
386def do_unixy_check(manifest, args):
387    """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
388    okay = True
389
390    # Get all macros first: we'll need feature macros like HAVE_FORK and
391    # MS_WINDOWS for everything else
392    present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
393    feature_macros = set(m.name for m in manifest.select({'feature_macro'}))
394    feature_macros &= present_macros
395
396    # Check that we have all needed macros
397    expected_macros = set(
398        item.name for item in manifest.select({'macro'})
399    )
400    missing_macros = expected_macros - present_macros
401    okay &= _report_unexpected_items(
402        missing_macros,
403        'Some macros from are not defined from "Include/Python.h"'
404        + 'with Py_LIMITED_API:')
405
406    expected_symbols = set(item.name for item in manifest.select(
407        {'function', 'data'}, include_abi_only=True, ifdef=feature_macros,
408    ))
409
410    # Check the static library (*.a)
411    LIBRARY = sysconfig.get_config_var("LIBRARY")
412    if not LIBRARY:
413        raise Exception("failed to get LIBRARY variable from sysconfig")
414    if os.path.exists(LIBRARY):
415        okay &= binutils_check_library(
416            manifest, LIBRARY, expected_symbols, dynamic=False)
417
418    # Check the dynamic library (*.so)
419    LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
420    if not LDLIBRARY:
421        raise Exception("failed to get LDLIBRARY variable from sysconfig")
422    okay &= binutils_check_library(
423            manifest, LDLIBRARY, expected_symbols, dynamic=False)
424
425    # Check definitions in the header files
426    expected_defs = set(item.name for item in manifest.select(
427        {'function', 'data'}, include_abi_only=False, ifdef=feature_macros,
428    ))
429    found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
430    missing_defs = expected_defs - found_defs
431    okay &= _report_unexpected_items(
432        missing_defs,
433        'Some expected declarations were not declared in '
434        + '"Include/Python.h" with Py_LIMITED_API:')
435
436    # Some Limited API macros are defined in terms of private symbols.
437    # These are not part of Limited API (even though they're defined with
438    # Py_LIMITED_API). They must be part of the Stable ABI, though.
439    private_symbols = {n for n in expected_symbols if n.startswith('_')}
440    extra_defs = found_defs - expected_defs - private_symbols
441    okay &= _report_unexpected_items(
442        extra_defs,
443        'Some extra declarations were found in "Include/Python.h" '
444        + 'with Py_LIMITED_API:')
445
446    return okay
447
448
449def _report_unexpected_items(items, msg):
450    """If there are any `items`, report them using "msg" and return false"""
451    if items:
452        print(msg, file=sys.stderr)
453        for item in sorted(items):
454            print(' -', item, file=sys.stderr)
455        return False
456    return True
457
458
459def binutils_get_exported_symbols(library, dynamic=False):
460    """Retrieve exported symbols using the nm(1) tool from binutils"""
461    # Only look at dynamic symbols
462    args = ["nm", "--no-sort"]
463    if dynamic:
464        args.append("--dynamic")
465    args.append(library)
466    proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
467    if proc.returncode:
468        sys.stdout.write(proc.stdout)
469        sys.exit(proc.returncode)
470
471    stdout = proc.stdout.rstrip()
472    if not stdout:
473        raise Exception("command output is empty")
474
475    for line in stdout.splitlines():
476        # Split line '0000000000001b80 D PyTextIOWrapper_Type'
477        if not line:
478            continue
479
480        parts = line.split(maxsplit=2)
481        if len(parts) < 3:
482            continue
483
484        symbol = parts[-1]
485        if MACOS and symbol.startswith("_"):
486            yield symbol[1:]
487        else:
488            yield symbol
489
490
491def binutils_check_library(manifest, library, expected_symbols, dynamic):
492    """Check that library exports all expected_symbols"""
493    available_symbols = set(binutils_get_exported_symbols(library, dynamic))
494    missing_symbols = expected_symbols - available_symbols
495    if missing_symbols:
496        print(textwrap.dedent(f"""\
497            Some symbols from the limited API are missing from {library}:
498                {', '.join(missing_symbols)}
499
500            This error means that there are some missing symbols among the
501            ones exported in the library.
502            This normally means that some symbol, function implementation or
503            a prototype belonging to a symbol in the limited API has been
504            deleted or is missing.
505        """), file=sys.stderr)
506        return False
507    return True
508
509
510def gcc_get_limited_api_macros(headers):
511    """Get all limited API macros from headers.
512
513    Runs the preprocessor over all the header files in "Include" setting
514    "-DPy_LIMITED_API" to the correct value for the running version of the
515    interpreter and extracting all macro definitions (via adding -dM to the
516    compiler arguments).
517
518    Requires Python built with a GCC-compatible compiler. (clang might work)
519    """
520
521    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
522
523    preprocesor_output_with_macros = subprocess.check_output(
524        sysconfig.get_config_var("CC").split()
525        + [
526            # Prevent the expansion of the exported macros so we can
527            # capture them later
528            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
529            f"-DPy_LIMITED_API={api_hexversion}",
530            "-I.",
531            "-I./Include",
532            "-dM",
533            "-E",
534        ]
535        + [str(file) for file in headers],
536        text=True,
537    )
538
539    return {
540        target
541        for target in re.findall(
542            r"#define (\w+)", preprocesor_output_with_macros
543        )
544    }
545
546
547def gcc_get_limited_api_definitions(headers):
548    """Get all limited API definitions from headers.
549
550    Run the preprocessor over all the header files in "Include" setting
551    "-DPy_LIMITED_API" to the correct value for the running version of the
552    interpreter.
553
554    The limited API symbols will be extracted from the output of this command
555    as it includes the prototypes and definitions of all the exported symbols
556    that are in the limited api.
557
558    This function does *NOT* extract the macros defined on the limited API
559
560    Requires Python built with a GCC-compatible compiler. (clang might work)
561    """
562    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
563    preprocesor_output = subprocess.check_output(
564        sysconfig.get_config_var("CC").split()
565        + [
566            # Prevent the expansion of the exported macros so we can capture
567            # them later
568            "-DPyAPI_FUNC=__PyAPI_FUNC",
569            "-DPyAPI_DATA=__PyAPI_DATA",
570            "-DEXPORT_DATA=__EXPORT_DATA",
571            "-D_Py_NO_RETURN=",
572            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
573            f"-DPy_LIMITED_API={api_hexversion}",
574            "-I.",
575            "-I./Include",
576            "-E",
577        ]
578        + [str(file) for file in headers],
579        text=True,
580        stderr=subprocess.DEVNULL,
581    )
582    stable_functions = set(
583        re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
584    )
585    stable_exported_data = set(
586        re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
587    )
588    stable_data = set(
589        re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
590    )
591    return stable_data | stable_exported_data | stable_functions
592
593def check_private_names(manifest):
594    """Ensure limited API doesn't contain private names
595
596    Names prefixed by an underscore are private by definition.
597    """
598    for name, item in manifest.contents.items():
599        if name.startswith('_') and not item.abi_only:
600            raise ValueError(
601                f'`{name}` is private (underscore-prefixed) and should be '
602                + 'removed from the stable ABI list or or marked `abi_only`')
603
604def check_dump(manifest, filename):
605    """Check that manifest.dump() corresponds to the data.
606
607    Mainly useful when debugging this script.
608    """
609    dumped = tomllib.loads('\n'.join(manifest.dump()))
610    with filename.open('rb') as file:
611        from_file = tomllib.load(file)
612    if dumped != from_file:
613        print(f'Dump differs from loaded data!', file=sys.stderr)
614        diff = difflib.unified_diff(
615            pprint.pformat(dumped).splitlines(),
616            pprint.pformat(from_file).splitlines(),
617            '<dumped>', str(filename),
618            lineterm='',
619        )
620        for line in diff:
621            print(line, file=sys.stderr)
622        return False
623    else:
624        return True
625
626def main():
627    parser = argparse.ArgumentParser(
628        description=__doc__,
629        formatter_class=argparse.RawDescriptionHelpFormatter,
630    )
631    parser.add_argument(
632        "file", type=Path, metavar='FILE',
633        help="file with the stable abi manifest",
634    )
635    parser.add_argument(
636        "--generate", action='store_true',
637        help="generate file(s), rather than just checking them",
638    )
639    parser.add_argument(
640        "--generate-all", action='store_true',
641        help="as --generate, but generate all file(s) using default filenames."
642            + " (unlike --all, does not run any extra checks)",
643    )
644    parser.add_argument(
645        "-a", "--all", action='store_true',
646        help="run all available checks using default filenames",
647    )
648    parser.add_argument(
649        "-l", "--list", action='store_true',
650        help="list available generators and their default filenames; then exit",
651    )
652    parser.add_argument(
653        "--dump", action='store_true',
654        help="dump the manifest contents (used for debugging the parser)",
655    )
656
657    actions_group = parser.add_argument_group('actions')
658    for gen in generators:
659        actions_group.add_argument(
660            gen.arg_name, dest=gen.var_name,
661            type=str, nargs="?", default=MISSING,
662            metavar='FILENAME',
663            help=gen.__doc__,
664        )
665    actions_group.add_argument(
666        '--unixy-check', action='store_true',
667        help=do_unixy_check.__doc__,
668    )
669    args = parser.parse_args()
670
671    base_path = args.file.parent.parent
672
673    if args.list:
674        for gen in generators:
675            print(f'{gen.arg_name}: {base_path / gen.default_path}')
676        sys.exit(0)
677
678    run_all_generators = args.generate_all
679
680    if args.generate_all:
681        args.generate = True
682
683    if args.all:
684        run_all_generators = True
685        args.unixy_check = True
686
687    try:
688        file = args.file.open('rb')
689    except FileNotFoundError as err:
690        if args.file.suffix == '.txt':
691            # Provide a better error message
692            suggestion = args.file.with_suffix('.toml')
693            raise FileNotFoundError(
694                f'{args.file} not found. Did you mean {suggestion} ?') from err
695        raise
696    with file:
697        manifest = parse_manifest(file)
698
699    check_private_names(manifest)
700
701    # Remember results of all actions (as booleans).
702    # At the end we'll check that at least one action was run,
703    # and also fail if any are false.
704    results = {}
705
706    if args.dump:
707        for line in manifest.dump():
708            print(line)
709        results['dump'] = check_dump(manifest, args.file)
710
711    for gen in generators:
712        filename = getattr(args, gen.var_name)
713        if filename is None or (run_all_generators and filename is MISSING):
714            filename = base_path / gen.default_path
715        elif filename is MISSING:
716            continue
717
718        results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
719
720    if args.unixy_check:
721        results['unixy_check'] = do_unixy_check(manifest, args)
722
723    if not results:
724        if args.generate:
725            parser.error('No file specified. Use --help for usage.')
726        parser.error('No check specified. Use --help for usage.')
727
728    failed_results = [name for name, result in results.items() if not result]
729
730    if failed_results:
731        raise Exception(f"""
732        These checks related to the stable ABI did not succeed:
733            {', '.join(failed_results)}
734
735        If you see diffs in the output, files derived from the stable
736        ABI manifest the were not regenerated.
737        Run `make regen-limited-abi` to fix this.
738
739        Otherwise, see the error(s) above.
740
741        The stable ABI manifest is at: {args.file}
742        Note that there is a process to follow when modifying it.
743
744        You can read more about the limited API and its contracts at:
745
746        https://docs.python.org/3/c-api/stable.html
747
748        And in PEP 384:
749
750        https://peps.python.org/pep-0384/
751        """)
752
753
754if __name__ == "__main__":
755    main()
756