1import contextlib 2import io 3import os.path 4import re 5 6__file__ = os.path.abspath(__file__) 7ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) 8INTERNAL = os.path.join(ROOT, 'Include', 'internal') 9 10 11IGNORED = { 12 'ACTION', # Python/_warnings.c 13 'ATTR', # Python/_warnings.c and Objects/funcobject.c 14 'DUNDER', # Objects/typeobject.c 15 'RDUNDER', # Objects/typeobject.c 16 'SPECIAL', # Objects/weakrefobject.c 17} 18IDENTIFIERS = [ 19 # from ADD() Python/_warnings.c 20 'default', 21 'ignore', 22 23 # from GET_WARNINGS_ATTR() in Python/_warnings.c 24 'WarningMessage', 25 '_showwarnmsg', 26 '_warn_unawaited_coroutine', 27 'defaultaction', 28 'filters', 29 'onceregistry', 30 31 # from WRAP_METHOD() in Objects/weakrefobject.c 32 '__bytes__', 33 '__reversed__', 34 35 # from COPY_ATTR() in Objects/funcobject.c 36 '__module__', 37 '__name__', 38 '__qualname__', 39 '__doc__', 40 '__annotations__', 41 42 # from SLOT* in Objects/typeobject.c 43 '__abs__', 44 '__add__', 45 '__and__', 46 '__divmod__', 47 '__float__', 48 '__floordiv__', 49 '__getitem__', 50 '__iadd__', 51 '__iand__', 52 '__ifloordiv__', 53 '__ilshift__', 54 '__imatmul__', 55 '__imod__', 56 '__imul__', 57 '__int__', 58 '__invert__', 59 '__ior__', 60 '__irshift__', 61 '__isub__', 62 '__itruediv__', 63 '__ixor__', 64 '__lshift__', 65 '__matmul__', 66 '__mod__', 67 '__mul__', 68 '__neg__', 69 '__or__', 70 '__pos__', 71 '__pow__', 72 '__radd__', 73 '__rand__', 74 '__rdivmod__', 75 '__rfloordiv__', 76 '__rlshift__', 77 '__rmatmul__', 78 '__rmod__', 79 '__rmul__', 80 '__ror__', 81 '__rpow__', 82 '__rrshift__', 83 '__rshift__', 84 '__rsub__', 85 '__rtruediv__', 86 '__rxor__', 87 '__str__', 88 '__sub__', 89 '__truediv__', 90 '__xor__', 91] 92 93 94####################################### 95# helpers 96 97def iter_files(): 98 for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): 99 root = os.path.join(ROOT, name) 100 for dirname, _, files in os.walk(root): 101 for name in files: 102 if not name.endswith(('.c', '.h')): 103 continue 104 yield os.path.join(dirname, name) 105 106 107def iter_global_strings(): 108 id_regex = re.compile(r'\b_Py_ID\((\w+)\)') 109 str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') 110 for filename in iter_files(): 111 try: 112 infile = open(filename, encoding='utf-8') 113 except FileNotFoundError: 114 # The file must have been a temporary file. 115 continue 116 with infile: 117 for lno, line in enumerate(infile, 1): 118 for m in id_regex.finditer(line): 119 identifier, = m.groups() 120 yield identifier, None, filename, lno, line 121 for m in str_regex.finditer(line): 122 varname, string = m.groups() 123 yield varname, string, filename, lno, line 124 125 126def iter_to_marker(lines, marker): 127 for line in lines: 128 if line.rstrip() == marker: 129 break 130 yield line 131 132 133class Printer: 134 135 def __init__(self, file): 136 self.level = 0 137 self.file = file 138 self.continuation = [False] 139 140 @contextlib.contextmanager 141 def indent(self): 142 save_level = self.level 143 try: 144 self.level += 1 145 yield 146 finally: 147 self.level = save_level 148 149 def write(self, arg): 150 eol = '\n' 151 if self.continuation[-1]: 152 eol = f' \\{eol}' if arg else f'\\{eol}' 153 self.file.writelines((" "*self.level, arg, eol)) 154 155 @contextlib.contextmanager 156 def block(self, prefix, suffix="", *, continuation=None): 157 if continuation is None: 158 continuation = self.continuation[-1] 159 self.continuation.append(continuation) 160 161 self.write(prefix + " {") 162 with self.indent(): 163 yield 164 self.continuation.pop() 165 self.write("}" + suffix) 166 167 168@contextlib.contextmanager 169def open_for_changes(filename, orig): 170 """Like open() but only write to the file if it changed.""" 171 outfile = io.StringIO() 172 yield outfile 173 text = outfile.getvalue() 174 if text != orig: 175 with open(filename, 'w', encoding='utf-8') as outfile: 176 outfile.write(text) 177 else: 178 print(f'# not changed: {filename}') 179 180 181####################################### 182# the global objects 183 184START = '/* The following is auto-generated by Tools/scripts/generate_global_objects.py. */' 185END = '/* End auto-generated code */' 186 187 188def generate_global_strings(identifiers, strings): 189 filename = os.path.join(INTERNAL, 'pycore_global_strings.h') 190 191 # Read the non-generated part of the file. 192 with open(filename) as infile: 193 orig = infile.read() 194 lines = iter(orig.rstrip().splitlines()) 195 before = '\n'.join(iter_to_marker(lines, START)) 196 for _ in iter_to_marker(lines, END): 197 pass 198 after = '\n'.join(lines) 199 200 # Generate the file. 201 with open_for_changes(filename, orig) as outfile: 202 printer = Printer(outfile) 203 printer.write(before) 204 printer.write(START) 205 with printer.block('struct _Py_global_strings', ';'): 206 with printer.block('struct', ' literals;'): 207 for literal, name in sorted(strings.items(), key=lambda x: x[1]): 208 printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') 209 outfile.write('\n') 210 with printer.block('struct', ' identifiers;'): 211 for name in sorted(identifiers): 212 assert name.isidentifier(), name 213 printer.write(f'STRUCT_FOR_ID({name})') 214 with printer.block('struct', ' ascii[128];'): 215 printer.write("PyASCIIObject _ascii;") 216 printer.write("uint8_t _data[2];") 217 with printer.block('struct', ' latin1[128];'): 218 printer.write("PyCompactUnicodeObject _latin1;") 219 printer.write("uint8_t _data[2];") 220 printer.write(END) 221 printer.write(after) 222 223 224def generate_runtime_init(identifiers, strings): 225 # First get some info from the declarations. 226 nsmallposints = None 227 nsmallnegints = None 228 with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: 229 for line in infile: 230 if line.startswith('#define _PY_NSMALLPOSINTS'): 231 nsmallposints = int(line.split()[-1]) 232 elif line.startswith('#define _PY_NSMALLNEGINTS'): 233 nsmallnegints = int(line.split()[-1]) 234 break 235 else: 236 raise NotImplementedError 237 assert nsmallposints and nsmallnegints 238 239 # Then target the runtime initializer. 240 filename = os.path.join(INTERNAL, 'pycore_runtime_init.h') 241 242 # Read the non-generated part of the file. 243 with open(filename) as infile: 244 orig = infile.read() 245 lines = iter(orig.rstrip().splitlines()) 246 before = '\n'.join(iter_to_marker(lines, START)) 247 for _ in iter_to_marker(lines, END): 248 pass 249 after = '\n'.join(lines) 250 251 # Generate the file. 252 with open_for_changes(filename, orig) as outfile: 253 printer = Printer(outfile) 254 printer.write(before) 255 printer.write(START) 256 with printer.block('#define _Py_global_objects_INIT', continuation=True): 257 with printer.block('.singletons =', ','): 258 # Global int objects. 259 with printer.block('.small_ints =', ','): 260 for i in range(-nsmallnegints, nsmallposints): 261 printer.write(f'_PyLong_DIGIT_INIT({i}),') 262 printer.write('') 263 # Global bytes objects. 264 printer.write('.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0),') 265 with printer.block('.bytes_characters =', ','): 266 for i in range(256): 267 printer.write(f'_PyBytes_CHAR_INIT({i}),') 268 printer.write('') 269 # Global strings. 270 with printer.block('.strings =', ','): 271 with printer.block('.literals =', ','): 272 for literal, name in sorted(strings.items(), key=lambda x: x[1]): 273 printer.write(f'INIT_STR({name}, "{literal}"),') 274 with printer.block('.identifiers =', ','): 275 for name in sorted(identifiers): 276 assert name.isidentifier(), name 277 printer.write(f'INIT_ID({name}),') 278 with printer.block('.ascii =', ','): 279 for i in range(128): 280 printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') 281 with printer.block('.latin1 =', ','): 282 for i in range(128, 256): 283 printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),') 284 printer.write('') 285 with printer.block('.tuple_empty =', ','): 286 printer.write('.ob_base = _PyVarObject_IMMORTAL_INIT(&PyTuple_Type, 0)') 287 printer.write(END) 288 printer.write(after) 289 290 291def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': 292 identifiers = set(IDENTIFIERS) 293 strings = {} 294 for name, string, *_ in iter_global_strings(): 295 if string is None: 296 if name not in IGNORED: 297 identifiers.add(name) 298 else: 299 if string not in strings: 300 strings[string] = name 301 elif name != strings[string]: 302 raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') 303 return identifiers, strings 304 305 306####################################### 307# the script 308 309def main() -> None: 310 identifiers, strings = get_identifiers_and_strings() 311 312 generate_global_strings(identifiers, strings) 313 generate_runtime_init(identifiers, strings) 314 315 316if __name__ == '__main__': 317 import argparse 318 parser = argparse.ArgumentParser() 319 args = parser.parse_args() 320 main(**vars(args)) 321