1import contextlib
2import io
3import os.path
4import re
5
6__file__ = os.path.abspath(__file__)
7ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
8INTERNAL = os.path.join(ROOT, 'Include', 'internal')
9
10
11IGNORED = {
12    'ACTION',  # Python/_warnings.c
13    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
14    'DUNDER',  # Objects/typeobject.c
15    'RDUNDER',  # Objects/typeobject.c
16    'SPECIAL',  # Objects/weakrefobject.c
17}
18IDENTIFIERS = [
19    # from ADD() Python/_warnings.c
20    'default',
21    'ignore',
22
23    # from GET_WARNINGS_ATTR() in Python/_warnings.c
24    'WarningMessage',
25    '_showwarnmsg',
26    '_warn_unawaited_coroutine',
27    'defaultaction',
28    'filters',
29    'onceregistry',
30
31    # from WRAP_METHOD() in Objects/weakrefobject.c
32    '__bytes__',
33    '__reversed__',
34
35    # from COPY_ATTR() in Objects/funcobject.c
36    '__module__',
37    '__name__',
38    '__qualname__',
39    '__doc__',
40    '__annotations__',
41
42    # from SLOT* in Objects/typeobject.c
43    '__abs__',
44    '__add__',
45    '__and__',
46    '__divmod__',
47    '__float__',
48    '__floordiv__',
49    '__getitem__',
50    '__iadd__',
51    '__iand__',
52    '__ifloordiv__',
53    '__ilshift__',
54    '__imatmul__',
55    '__imod__',
56    '__imul__',
57    '__int__',
58    '__invert__',
59    '__ior__',
60    '__irshift__',
61    '__isub__',
62    '__itruediv__',
63    '__ixor__',
64    '__lshift__',
65    '__matmul__',
66    '__mod__',
67    '__mul__',
68    '__neg__',
69    '__or__',
70    '__pos__',
71    '__pow__',
72    '__radd__',
73    '__rand__',
74    '__rdivmod__',
75    '__rfloordiv__',
76    '__rlshift__',
77    '__rmatmul__',
78    '__rmod__',
79    '__rmul__',
80    '__ror__',
81    '__rpow__',
82    '__rrshift__',
83    '__rshift__',
84    '__rsub__',
85    '__rtruediv__',
86    '__rxor__',
87    '__str__',
88    '__sub__',
89    '__truediv__',
90    '__xor__',
91]
92
93
94#######################################
95# helpers
96
97def iter_files():
98    for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
99        root = os.path.join(ROOT, name)
100        for dirname, _, files in os.walk(root):
101            for name in files:
102                if not name.endswith(('.c', '.h')):
103                    continue
104                yield os.path.join(dirname, name)
105
106
107def iter_global_strings():
108    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
109    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
110    for filename in iter_files():
111        try:
112            infile = open(filename, encoding='utf-8')
113        except FileNotFoundError:
114            # The file must have been a temporary file.
115            continue
116        with infile:
117            for lno, line in enumerate(infile, 1):
118                for m in id_regex.finditer(line):
119                    identifier, = m.groups()
120                    yield identifier, None, filename, lno, line
121                for m in str_regex.finditer(line):
122                    varname, string = m.groups()
123                    yield varname, string, filename, lno, line
124
125
126def iter_to_marker(lines, marker):
127    for line in lines:
128        if line.rstrip() == marker:
129            break
130        yield line
131
132
133class Printer:
134
135    def __init__(self, file):
136        self.level = 0
137        self.file = file
138        self.continuation = [False]
139
140    @contextlib.contextmanager
141    def indent(self):
142        save_level = self.level
143        try:
144            self.level += 1
145            yield
146        finally:
147            self.level = save_level
148
149    def write(self, arg):
150        eol = '\n'
151        if self.continuation[-1]:
152            eol = f' \\{eol}' if arg else f'\\{eol}'
153        self.file.writelines(("    "*self.level, arg, eol))
154
155    @contextlib.contextmanager
156    def block(self, prefix, suffix="", *, continuation=None):
157        if continuation is None:
158            continuation = self.continuation[-1]
159        self.continuation.append(continuation)
160
161        self.write(prefix + " {")
162        with self.indent():
163            yield
164        self.continuation.pop()
165        self.write("}" + suffix)
166
167
168@contextlib.contextmanager
169def open_for_changes(filename, orig):
170    """Like open() but only write to the file if it changed."""
171    outfile = io.StringIO()
172    yield outfile
173    text = outfile.getvalue()
174    if text != orig:
175        with open(filename, 'w', encoding='utf-8') as outfile:
176            outfile.write(text)
177    else:
178        print(f'# not changed: {filename}')
179
180
181#######################################
182# the global objects
183
184START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */'
185END = '/* End auto-generated code */'
186
187
188def generate_global_strings(identifiers, strings):
189    filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
190
191    # Read the non-generated part of the file.
192    with open(filename) as infile:
193        orig = infile.read()
194    lines = iter(orig.rstrip().splitlines())
195    before = '\n'.join(iter_to_marker(lines, START))
196    for _ in iter_to_marker(lines, END):
197        pass
198    after = '\n'.join(lines)
199
200    # Generate the file.
201    with open_for_changes(filename, orig) as outfile:
202        printer = Printer(outfile)
203        printer.write(before)
204        printer.write(START)
205        with printer.block('struct _Py_global_strings', ';'):
206            with printer.block('struct', ' literals;'):
207                for literal, name in sorted(strings.items(), key=lambda x: x[1]):
208                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
209            outfile.write('\n')
210            with printer.block('struct', ' identifiers;'):
211                for name in sorted(identifiers):
212                    assert name.isidentifier(), name
213                    printer.write(f'STRUCT_FOR_ID({name})')
214            with printer.block('struct', ' ascii[128];'):
215                printer.write("PyASCIIObject _ascii;")
216                printer.write("uint8_t _data[2];")
217            with printer.block('struct', ' latin1[128];'):
218                printer.write("PyCompactUnicodeObject _latin1;")
219                printer.write("uint8_t _data[2];")
220        printer.write(END)
221        printer.write(after)
222
223
224def generate_runtime_init(identifiers, strings):
225    # First get some info from the declarations.
226    nsmallposints = None
227    nsmallnegints = None
228    with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
229        for line in infile:
230            if line.startswith('#define _PY_NSMALLPOSINTS'):
231                nsmallposints = int(line.split()[-1])
232            elif line.startswith('#define _PY_NSMALLNEGINTS'):
233                nsmallnegints = int(line.split()[-1])
234                break
235        else:
236            raise NotImplementedError
237    assert nsmallposints and nsmallnegints
238
239    # Then target the runtime initializer.
240    filename = os.path.join(INTERNAL, 'pycore_runtime_init.h')
241
242    # Read the non-generated part of the file.
243    with open(filename) as infile:
244        orig = infile.read()
245    lines = iter(orig.rstrip().splitlines())
246    before = '\n'.join(iter_to_marker(lines, START))
247    for _ in iter_to_marker(lines, END):
248        pass
249    after = '\n'.join(lines)
250
251    # Generate the file.
252    with open_for_changes(filename, orig) as outfile:
253        printer = Printer(outfile)
254        printer.write(before)
255        printer.write(START)
256        with printer.block('#define _Py_global_objects_INIT', continuation=True):
257            with printer.block('.singletons =', ','):
258                # Global int objects.
259                with printer.block('.small_ints =', ','):
260                    for i in range(-nsmallnegints, nsmallposints):
261                        printer.write(f'_PyLong_DIGIT_INIT({i}),')
262                printer.write('')
263                # Global bytes objects.
264                printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),')
265                with printer.block('.bytes_characters =', ','):
266                    for i in range(256):
267                        printer.write(f'_PyBytes_CHAR_INIT({i}),')
268                printer.write('')
269                # Global strings.
270                with printer.block('.strings =', ','):
271                    with printer.block('.literals =', ','):
272                        for literal, name in sorted(strings.items(), key=lambda x: x[1]):
273                            printer.write(f'INIT_STR({name}, "{literal}"),')
274                    with printer.block('.identifiers =', ','):
275                        for name in sorted(identifiers):
276                            assert name.isidentifier(), name
277                            printer.write(f'INIT_ID({name}),')
278                    with printer.block('.ascii =', ','):
279                        for i in range(128):
280                            printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
281                    with printer.block('.latin1 =', ','):
282                        for i in range(128, 256):
283                            printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),')
284                printer.write('')
285                with printer.block('.tuple_empty =', ','):
286                    printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)')
287        printer.write(END)
288        printer.write(after)
289
290
291def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
292    identifiers = set(IDENTIFIERS)
293    strings = {}
294    for name, string, *_ in iter_global_strings():
295        if string is None:
296            if name not in IGNORED:
297                identifiers.add(name)
298        else:
299            if string not in strings:
300                strings[string] = name
301            elif name != strings[string]:
302                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
303    return identifiers, strings
304
305
306#######################################
307# the script
308
309def main() -> None:
310    identifiers, strings = get_identifiers_and_strings()
311
312    generate_global_strings(identifiers, strings)
313    generate_runtime_init(identifiers, strings)
314
315
316if __name__ == '__main__':
317    import argparse
318    parser = argparse.ArgumentParser()
319    args = parser.parse_args()
320    main(**vars(args))
321