1import itertools
2import pathlib
3import sys
4import sysconfig
5import tempfile
6import tokenize
7from typing import IO, Dict, List, Optional, Set, Tuple
8
9from pegen.c_generator import CParserGenerator
10from pegen.grammar import Grammar
11from pegen.grammar_parser import GeneratedParser as GrammarParser
12from pegen.parser import Parser
13from pegen.parser_generator import ParserGenerator
14from pegen.python_generator import PythonParserGenerator
15from pegen.tokenizer import Tokenizer
16
17MOD_DIR = pathlib.Path(__file__).resolve().parent
18
19TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
20
21
22def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
23    flags = sysconfig.get_config_var(compiler_flags)
24    py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
25    if flags is None or py_flags_nodist is None:
26        return []
27    return f"{flags} {py_flags_nodist}".split()
28
29
30def compile_c_extension(
31    generated_source_path: str,
32    build_dir: Optional[str] = None,
33    verbose: bool = False,
34    keep_asserts: bool = True,
35    disable_optimization: bool = False,
36    library_dir: Optional[str] = None,
37) -> str:
38    """Compile the generated source for a parser generator into an extension module.
39
40    The extension module will be generated in the same directory as the provided path
41    for the generated source, with the same basename (in addition to extension module
42    metadata). For example, for the source mydir/parser.c the generated extension
43    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
44
45    If *build_dir* is provided, that path will be used as the temporary build directory
46    of distutils (this is useful in case you want to use a temporary directory).
47
48    If *library_dir* is provided, that path will be used as the directory for a
49    static library of the common parser sources (this is useful in case you are
50    creating multiple extensions).
51    """
52    import distutils.log
53    from distutils.core import Distribution, Extension
54    from distutils.tests.support import fixup_build_ext  # type: ignore
55
56    from distutils.ccompiler import new_compiler
57    from distutils.dep_util import newer_group
58    from distutils.sysconfig import customize_compiler
59
60    if verbose:
61        distutils.log.set_threshold(distutils.log.DEBUG)
62
63    source_file_path = pathlib.Path(generated_source_path)
64    extension_name = source_file_path.stem
65    extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
66    extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
67    # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
68    extra_compile_args.append("-D_Py_TEST_PEGEN")
69    extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
70    if keep_asserts:
71        extra_compile_args.append("-UNDEBUG")
72    if disable_optimization:
73        if sys.platform == 'win32':
74            extra_compile_args.append("/Od")
75            extra_link_args.append("/LTCG:OFF")
76        else:
77            extra_compile_args.append("-O0")
78            if sysconfig.get_config_var("GNULD") == "yes":
79                extra_link_args.append("-fno-lto")
80
81    common_sources = [
82        str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
83        str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
84        str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
85        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
86        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
87        str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
88        str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
89        str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
90    ]
91    include_dirs = [
92        str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
93        str(MOD_DIR.parent.parent.parent / "Parser"),
94    ]
95    extension = Extension(
96        extension_name,
97        sources=[generated_source_path],
98        extra_compile_args=extra_compile_args,
99        extra_link_args=extra_link_args,
100    )
101    dist = Distribution({"name": extension_name, "ext_modules": [extension]})
102    cmd = dist.get_command_obj("build_ext")
103    fixup_build_ext(cmd)
104    cmd.build_lib = str(source_file_path.parent)
105    cmd.include_dirs = include_dirs
106    if build_dir:
107        cmd.build_temp = build_dir
108    cmd.ensure_finalized()
109
110    compiler = new_compiler()
111    customize_compiler(compiler)
112    compiler.set_include_dirs(cmd.include_dirs)
113    compiler.set_library_dirs(cmd.library_dirs)
114    # build static lib
115    if library_dir:
116        library_filename = compiler.library_filename(extension_name,
117                                                     output_dir=library_dir)
118        if newer_group(common_sources, library_filename, 'newer'):
119            if sys.platform == 'win32':
120                pdb = compiler.static_lib_format % (extension_name, '.pdb')
121                compile_opts = [f"/Fd{library_dir}\\{pdb}"]
122                compile_opts.extend(extra_compile_args)
123            else:
124                compile_opts = extra_compile_args
125            objects = compiler.compile(common_sources,
126                                       output_dir=library_dir,
127                                       debug=cmd.debug,
128                                       extra_postargs=compile_opts)
129            compiler.create_static_lib(objects, extension_name,
130                                       output_dir=library_dir,
131                                       debug=cmd.debug)
132        if sys.platform == 'win32':
133            compiler.add_library_dir(library_dir)
134            extension.libraries = [extension_name]
135        elif sys.platform == 'darwin':
136            compiler.set_link_objects([
137                '-Wl,-force_load', library_filename,
138            ])
139        else:
140            compiler.set_link_objects([
141                '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
142            ])
143    else:
144        extension.sources[0:0] = common_sources
145
146    # Compile the source code to object files.
147    ext_path = cmd.get_ext_fullpath(extension_name)
148    if newer_group(extension.sources, ext_path, 'newer'):
149        objects = compiler.compile(extension.sources,
150                                    output_dir=cmd.build_temp,
151                                    debug=cmd.debug,
152                                    extra_postargs=extra_compile_args)
153    else:
154        objects = compiler.object_filenames(extension.sources,
155                                            output_dir=cmd.build_temp)
156    # Now link the object files together into a "shared object"
157    compiler.link_shared_object(
158        objects, ext_path,
159        libraries=cmd.get_libraries(extension),
160        extra_postargs=extra_link_args,
161        export_symbols=cmd.get_export_symbols(extension),
162        debug=cmd.debug,
163        build_temp=cmd.build_temp)
164
165    return pathlib.Path(ext_path)
166
167
168def build_parser(
169    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
170) -> Tuple[Grammar, Parser, Tokenizer]:
171    with open(grammar_file) as file:
172        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
173        parser = GrammarParser(tokenizer, verbose=verbose_parser)
174        grammar = parser.start()
175
176        if not grammar:
177            raise parser.make_syntax_error(grammar_file)
178
179    return grammar, parser, tokenizer
180
181
182def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
183    all_tokens = {}
184    exact_tokens = {}
185    non_exact_tokens = set()
186    numbers = itertools.count(0)
187
188    for line in tokens:
189        line = line.strip()
190
191        if not line or line.startswith("#"):
192            continue
193
194        pieces = line.split()
195        index = next(numbers)
196
197        if len(pieces) == 1:
198            (token,) = pieces
199            non_exact_tokens.add(token)
200            all_tokens[index] = token
201        elif len(pieces) == 2:
202            token, op = pieces
203            exact_tokens[op.strip("'")] = index
204            all_tokens[index] = token
205        else:
206            raise ValueError(f"Unexpected line found in Tokens file: {line}")
207
208    return all_tokens, exact_tokens, non_exact_tokens
209
210
211def build_c_generator(
212    grammar: Grammar,
213    grammar_file: str,
214    tokens_file: str,
215    output_file: str,
216    compile_extension: bool = False,
217    verbose_c_extension: bool = False,
218    keep_asserts_in_extension: bool = True,
219    skip_actions: bool = False,
220) -> ParserGenerator:
221    with open(tokens_file, "r") as tok_file:
222        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
223    with open(output_file, "w") as file:
224        gen: ParserGenerator = CParserGenerator(
225            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
226        )
227        gen.generate(grammar_file)
228
229    if compile_extension:
230        with tempfile.TemporaryDirectory() as build_dir:
231            compile_c_extension(
232                output_file,
233                build_dir=build_dir,
234                verbose=verbose_c_extension,
235                keep_asserts=keep_asserts_in_extension,
236            )
237    return gen
238
239
240def build_python_generator(
241    grammar: Grammar,
242    grammar_file: str,
243    output_file: str,
244    skip_actions: bool = False,
245) -> ParserGenerator:
246    with open(output_file, "w") as file:
247        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
248        gen.generate(grammar_file)
249    return gen
250
251
252def build_c_parser_and_generator(
253    grammar_file: str,
254    tokens_file: str,
255    output_file: str,
256    compile_extension: bool = False,
257    verbose_tokenizer: bool = False,
258    verbose_parser: bool = False,
259    verbose_c_extension: bool = False,
260    keep_asserts_in_extension: bool = True,
261    skip_actions: bool = False,
262) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
263    """Generate rules, C parser, tokenizer, parser generator for a given grammar
264
265    Args:
266        grammar_file (string): Path for the grammar file
267        tokens_file (string): Path for the tokens file
268        output_file (string): Path for the output file
269        compile_extension (bool, optional): Whether to compile the C extension.
270          Defaults to False.
271        verbose_tokenizer (bool, optional): Whether to display additional output
272          when generating the tokenizer. Defaults to False.
273        verbose_parser (bool, optional): Whether to display additional output
274          when generating the parser. Defaults to False.
275        verbose_c_extension (bool, optional): Whether to display additional
276          output when compiling the C extension . Defaults to False.
277        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
278          when compiling the extension module. Defaults to True.
279        skip_actions (bool, optional): Whether to pretend no rule has any actions.
280    """
281    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
282    gen = build_c_generator(
283        grammar,
284        grammar_file,
285        tokens_file,
286        output_file,
287        compile_extension,
288        verbose_c_extension,
289        keep_asserts_in_extension,
290        skip_actions=skip_actions,
291    )
292
293    return grammar, parser, tokenizer, gen
294
295
296def build_python_parser_and_generator(
297    grammar_file: str,
298    output_file: str,
299    verbose_tokenizer: bool = False,
300    verbose_parser: bool = False,
301    skip_actions: bool = False,
302) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
303    """Generate rules, python parser, tokenizer, parser generator for a given grammar
304
305    Args:
306        grammar_file (string): Path for the grammar file
307        output_file (string): Path for the output file
308        verbose_tokenizer (bool, optional): Whether to display additional output
309          when generating the tokenizer. Defaults to False.
310        verbose_parser (bool, optional): Whether to display additional output
311          when generating the parser. Defaults to False.
312        skip_actions (bool, optional): Whether to pretend no rule has any actions.
313    """
314    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
315    gen = build_python_generator(
316        grammar,
317        grammar_file,
318        output_file,
319        skip_actions=skip_actions,
320    )
321    return grammar, parser, tokenizer, gen
322