1import os.path
2import re
3
4from c_parser.preprocessor import (
5    get_preprocessor as _get_preprocessor,
6)
7from c_parser import (
8    parse_file as _parse_file,
9    parse_files as _parse_files,
10)
11from . import REPO_ROOT
12
13
14GLOB_ALL = '**/*'
15
16
17def _abs(relfile):
18    return os.path.join(REPO_ROOT, relfile)
19
20
21def clean_lines(text):
22    """Clear out comments, blank lines, and leading/trailing whitespace."""
23    lines = (line.strip() for line in text.splitlines())
24    lines = (line.partition('#')[0].rstrip()
25             for line in lines
26             if line and not line.startswith('#'))
27    glob_all = f'{GLOB_ALL} '
28    lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
29    lines = (_abs(line) for line in lines)
30    return list(lines)
31
32
33'''
34@begin=sh@
35./python ../c-parser/cpython.py
36    --exclude '+../c-parser/EXCLUDED'
37    --macros '+../c-parser/MACROS'
38    --incldirs '+../c-parser/INCL_DIRS'
39    --same './Include/cpython/'
40    Include/*.h
41    Include/internal/*.h
42    Modules/**/*.c
43    Objects/**/*.c
44    Parser/**/*.c
45    Python/**/*.c
46@end=sh@
47'''
48
49# XXX Handle these.
50EXCLUDED = clean_lines('''
51# @begin=conf@
52
53# Rather than fixing for this one, we manually make sure it's okay.
54Modules/_sha3/kcp/KeccakP-1600-opt64.c
55
56# OSX
57#Modules/_ctypes/darwin/*.c
58#Modules/_ctypes/libffi_osx/*.c
59Modules/_scproxy.c                # SystemConfiguration/SystemConfiguration.h
60
61# Windows
62Modules/_winapi.c               # windows.h
63Modules/expat/winconfig.h
64Modules/overlapped.c            # winsock.h
65Python/dynload_win.c            # windows.h
66Python/thread_nt.h
67
68# other OS-dependent
69Python/dynload_aix.c            # sys/ldr.h
70Python/dynload_dl.c             # dl.h
71Python/dynload_hpux.c           # dl.h
72Python/thread_pthread.h
73Python/emscripten_signal.c
74
75# only huge constants (safe but parsing is slow)
76Modules/_blake2/impl/blake2-kat.h
77Modules/_ssl_data.h
78Modules/_ssl_data_31.h
79Modules/_ssl_data_300.h
80Modules/_ssl_data_111.h
81Modules/cjkcodecs/mappings_*.h
82Modules/unicodedata_db.h
83Modules/unicodename_db.h
84Objects/unicodetype_db.h
85
86# generated
87Python/deepfreeze/*.c
88Python/frozen_modules/*.h
89Python/opcode_targets.h
90Python/stdlib_module_names.h
91
92# @end=conf@
93''')
94
95# XXX Fix the parser.
96EXCLUDED += clean_lines('''
97# The tool should be able to parse these...
98
99Modules/hashlib.h
100Objects/stringlib/codecs.h
101Objects/stringlib/count.h
102Objects/stringlib/ctype.h
103Objects/stringlib/fastsearch.h
104Objects/stringlib/find.h
105Objects/stringlib/find_max_char.h
106Objects/stringlib/partition.h
107Objects/stringlib/replace.h
108Objects/stringlib/split.h
109
110Modules/_dbmmodule.c
111Modules/cjkcodecs/_codecs_*.c
112Modules/expat/xmlrole.c
113Modules/expat/xmlparse.c
114Python/initconfig.c
115''')
116
117INCL_DIRS = clean_lines('''
118# @begin=tsv@
119
120glob	dirname
121*	.
122*	./Include
123*	./Include/internal
124
125Modules/_tkinter.c	/usr/include/tcl8.6
126Modules/tkappinit.c	/usr/include/tcl
127Modules/_decimal/**/*.c	Modules/_decimal/libmpdec
128
129# @end=tsv@
130''')[1:]
131
132MACROS = clean_lines('''
133# @begin=tsv@
134
135glob	name	value
136
137Include/internal/*.h	Py_BUILD_CORE	1
138Python/**/*.c	Py_BUILD_CORE	1
139Parser/**/*.c	Py_BUILD_CORE	1
140Objects/**/*.c	Py_BUILD_CORE	1
141
142Modules/_asynciomodule.c	Py_BUILD_CORE	1
143Modules/_collectionsmodule.c	Py_BUILD_CORE	1
144Modules/_ctypes/_ctypes.c	Py_BUILD_CORE	1
145Modules/_ctypes/cfield.c	Py_BUILD_CORE	1
146Modules/_cursesmodule.c	Py_BUILD_CORE	1
147Modules/_datetimemodule.c	Py_BUILD_CORE	1
148Modules/_functoolsmodule.c	Py_BUILD_CORE	1
149Modules/_heapqmodule.c	Py_BUILD_CORE	1
150Modules/_io/*.c	Py_BUILD_CORE	1
151Modules/_localemodule.c	Py_BUILD_CORE	1
152Modules/_operator.c	Py_BUILD_CORE	1
153Modules/_posixsubprocess.c	Py_BUILD_CORE	1
154Modules/_sre/sre.c	Py_BUILD_CORE	1
155Modules/_threadmodule.c	Py_BUILD_CORE	1
156Modules/_tracemalloc.c	Py_BUILD_CORE	1
157Modules/_weakref.c	Py_BUILD_CORE	1
158Modules/_zoneinfo.c	Py_BUILD_CORE	1
159Modules/atexitmodule.c	Py_BUILD_CORE	1
160Modules/cmathmodule.c	Py_BUILD_CORE	1
161Modules/faulthandler.c	Py_BUILD_CORE	1
162Modules/gcmodule.c	Py_BUILD_CORE	1
163Modules/getpath.c	Py_BUILD_CORE	1
164Modules/getpath_noop.c	Py_BUILD_CORE	1
165Modules/itertoolsmodule.c	Py_BUILD_CORE	1
166Modules/main.c	Py_BUILD_CORE	1
167Modules/mathmodule.c	Py_BUILD_CORE	1
168Modules/posixmodule.c	Py_BUILD_CORE	1
169Modules/sha256module.c	Py_BUILD_CORE	1
170Modules/sha512module.c	Py_BUILD_CORE	1
171Modules/signalmodule.c	Py_BUILD_CORE	1
172Modules/symtablemodule.c	Py_BUILD_CORE	1
173Modules/timemodule.c	Py_BUILD_CORE	1
174Modules/unicodedata.c	Py_BUILD_CORE	1
175Objects/stringlib/codecs.h	Py_BUILD_CORE	1
176Objects/stringlib/unicode_format.h	Py_BUILD_CORE	1
177Parser/string_parser.h	Py_BUILD_CORE	1
178Parser/pegen.h	Py_BUILD_CORE	1
179Python/ceval_gil.h	Py_BUILD_CORE	1
180Python/condvar.h	Py_BUILD_CORE	1
181
182Modules/_json.c	Py_BUILD_CORE_BUILTIN	1
183Modules/_pickle.c	Py_BUILD_CORE_BUILTIN	1
184Modules/_testinternalcapi.c	Py_BUILD_CORE_BUILTIN	1
185
186Include/cpython/abstract.h	Py_CPYTHON_ABSTRACTOBJECT_H	1
187Include/cpython/bytearrayobject.h	Py_CPYTHON_BYTEARRAYOBJECT_H	1
188Include/cpython/bytesobject.h	Py_CPYTHON_BYTESOBJECT_H	1
189Include/cpython/ceval.h	Py_CPYTHON_CEVAL_H	1
190Include/cpython/code.h	Py_CPYTHON_CODE_H	1
191Include/cpython/dictobject.h	Py_CPYTHON_DICTOBJECT_H	1
192Include/cpython/fileobject.h	Py_CPYTHON_FILEOBJECT_H	1
193Include/cpython/fileutils.h	Py_CPYTHON_FILEUTILS_H	1
194Include/cpython/frameobject.h	Py_CPYTHON_FRAMEOBJECT_H	1
195Include/cpython/import.h	Py_CPYTHON_IMPORT_H	1
196Include/cpython/listobject.h	Py_CPYTHON_LISTOBJECT_H	1
197Include/cpython/methodobject.h	Py_CPYTHON_METHODOBJECT_H	1
198Include/cpython/object.h	Py_CPYTHON_OBJECT_H	1
199Include/cpython/objimpl.h	Py_CPYTHON_OBJIMPL_H	1
200Include/cpython/pyerrors.h	Py_CPYTHON_ERRORS_H	1
201Include/cpython/pylifecycle.h	Py_CPYTHON_PYLIFECYCLE_H	1
202Include/cpython/pymem.h	Py_CPYTHON_PYMEM_H	1
203Include/cpython/pystate.h	Py_CPYTHON_PYSTATE_H	1
204Include/cpython/sysmodule.h	Py_CPYTHON_SYSMODULE_H	1
205Include/cpython/traceback.h	Py_CPYTHON_TRACEBACK_H	1
206Include/cpython/tupleobject.h	Py_CPYTHON_TUPLEOBJECT_H	1
207Include/cpython/unicodeobject.h	Py_CPYTHON_UNICODEOBJECT_H	1
208Include/internal/pycore_code.h	SIZEOF_VOID_P	8
209
210# implied include of pyport.h
211Include/**/*.h	PyAPI_DATA(RTYPE)	extern RTYPE
212Include/**/*.h	PyAPI_FUNC(RTYPE)	RTYPE
213Include/**/*.h	Py_DEPRECATED(VER)	/* */
214Include/**/*.h	_Py_NO_RETURN	/* */
215Include/**/*.h	PYLONG_BITS_IN_DIGIT	30
216Modules/**/*.c	PyMODINIT_FUNC	PyObject*
217Objects/unicodeobject.c	PyMODINIT_FUNC	PyObject*
218Python/marshal.c	PyMODINIT_FUNC	PyObject*
219Python/_warnings.c	PyMODINIT_FUNC	PyObject*
220Python/Python-ast.c	PyMODINIT_FUNC	PyObject*
221Python/import.c	PyMODINIT_FUNC	PyObject*
222Modules/_testcapimodule.c	PyAPI_FUNC(RTYPE)	RTYPE
223Python/getargs.c	PyAPI_FUNC(RTYPE)	RTYPE
224Objects/stringlib/unicode_format.h	Py_LOCAL_INLINE(type)	static inline type
225Include/pymath.h	_Py__has_builtin(x)	0
226
227# implied include of pymacro.h
228*/clinic/*.c.h	PyDoc_VAR(name)	static const char name[]
229*/clinic/*.c.h	PyDoc_STR(str)	str
230*/clinic/*.c.h	PyDoc_STRVAR(name,str)	PyDoc_VAR(name) = PyDoc_STR(str)
231
232# implied include of exports.h
233#Modules/_io/bytesio.c	Py_EXPORTED_SYMBOL	/* */
234
235# implied include of object.h
236Include/**/*.h	PyObject_HEAD	PyObject ob_base;
237Include/**/*.h	PyObject_VAR_HEAD	PyVarObject ob_base;
238
239# implied include of pyconfig.h
240Include/**/*.h	SIZEOF_WCHAR_T	4
241
242# implied include of <unistd.h>
243Include/**/*.h	_POSIX_THREADS	1
244Include/**/*.h	HAVE_PTHREAD_H	1
245
246# from Makefile
247Modules/getpath.c	PYTHONPATH	1
248Modules/getpath.c	PREFIX	...
249Modules/getpath.c	EXEC_PREFIX	...
250Modules/getpath.c	VERSION	...
251Modules/getpath.c	VPATH	...
252Modules/getpath.c	PLATLIBDIR	...
253
254# from Modules/_sha3/sha3module.c
255Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c	PLATFORM_BYTE_ORDER	4321  # force big-endian
256Modules/_sha3/kcp/*.c	KeccakOpt	64
257Modules/_sha3/kcp/*.c	KeccakP200_excluded	1
258Modules/_sha3/kcp/*.c	KeccakP400_excluded	1
259Modules/_sha3/kcp/*.c	KeccakP800_excluded	1
260
261# See: setup.py
262Modules/_decimal/**/*.c	CONFIG_64	1
263Modules/_decimal/**/*.c	ASM	1
264Modules/expat/xmlparse.c	HAVE_EXPAT_CONFIG_H	1
265Modules/expat/xmlparse.c	XML_POOR_ENTROPY	1
266Modules/_dbmmodule.c	HAVE_GDBM_DASH_NDBM_H	1
267
268# others
269Modules/_sre/sre_lib.h	LOCAL(type)	static inline type
270Modules/_sre/sre_lib.h	SRE(F)	sre_ucs2_##F
271Objects/stringlib/codecs.h	STRINGLIB_IS_UNICODE	1
272Include/internal/pycore_bitutils.h	_Py__has_builtin(B)	0
273
274# @end=tsv@
275''')[1:]
276
277# -pthread
278# -Wno-unused-result
279# -Wsign-compare
280# -g
281# -Og
282# -Wall
283# -std=c99
284# -Wextra
285# -Wno-unused-result -Wno-unused-parameter
286# -Wno-missing-field-initializers
287# -Werror=implicit-function-declaration
288
289SAME = [
290    './Include/cpython/',
291]
292
293MAX_SIZES = {
294    _abs('Include/**/*.h'): (5_000, 500),
295    _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
296    _abs('Modules/_datetimemodule.c'): (20_000, 300),
297    _abs('Modules/posixmodule.c'): (20_000, 500),
298    _abs('Modules/termios.c'): (10_000, 800),
299    _abs('Modules/_testcapimodule.c'): (20_000, 400),
300    _abs('Modules/expat/expat.h'): (10_000, 400),
301    _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
302    _abs('Objects/typeobject.c'): (20_000, 200),
303    _abs('Python/compile.c'): (20_000, 500),
304    _abs('Python/pylifecycle.c'): (500_000, 5000),
305    _abs('Python/pystate.c'): (500_000, 5000),
306}
307
308
309def get_preprocessor(*,
310                     file_macros=None,
311                     file_incldirs=None,
312                     file_same=None,
313                     **kwargs
314                     ):
315    macros = tuple(MACROS)
316    if file_macros:
317        macros += tuple(file_macros)
318    incldirs = tuple(INCL_DIRS)
319    if file_incldirs:
320        incldirs += tuple(file_incldirs)
321    return _get_preprocessor(
322        file_macros=macros,
323        file_incldirs=incldirs,
324        file_same=file_same,
325        **kwargs
326    )
327
328
329def parse_file(filename, *,
330               match_kind=None,
331               ignore_exc=None,
332               log_err=None,
333               ):
334    get_file_preprocessor = get_preprocessor(
335        ignore_exc=ignore_exc,
336        log_err=log_err,
337    )
338    yield from _parse_file(
339        filename,
340        match_kind=match_kind,
341        get_file_preprocessor=get_file_preprocessor,
342        file_maxsizes=MAX_SIZES,
343    )
344
345
346def parse_files(filenames=None, *,
347                match_kind=None,
348                ignore_exc=None,
349                log_err=None,
350                get_file_preprocessor=None,
351                **file_kwargs
352                ):
353    if get_file_preprocessor is None:
354        get_file_preprocessor = get_preprocessor(
355            ignore_exc=ignore_exc,
356            log_err=log_err,
357        )
358    yield from _parse_files(
359        filenames,
360        match_kind=match_kind,
361        get_file_preprocessor=get_file_preprocessor,
362        file_maxsizes=MAX_SIZES,
363        **file_kwargs
364    )
365