1import itertools 2import pathlib 3import sys 4import sysconfig 5import tempfile 6import tokenize 7from typing import IO, Dict, List, Optional, Set, Tuple 8 9from pegen.c_generator import CParserGenerator 10from pegen.grammar import Grammar 11from pegen.grammar_parser import GeneratedParser as GrammarParser 12from pegen.parser import Parser 13from pegen.parser_generator import ParserGenerator 14from pegen.python_generator import PythonParserGenerator 15from pegen.tokenizer import Tokenizer 16 17MOD_DIR = pathlib.Path(__file__).resolve().parent 18 19TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] 20 21 22def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: 23 flags = sysconfig.get_config_var(compiler_flags) 24 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) 25 if flags is None or py_flags_nodist is None: 26 return [] 27 return f"{flags} {py_flags_nodist}".split() 28 29 30def compile_c_extension( 31 generated_source_path: str, 32 build_dir: Optional[str] = None, 33 verbose: bool = False, 34 keep_asserts: bool = True, 35 disable_optimization: bool = False, 36 library_dir: Optional[str] = None, 37) -> str: 38 """Compile the generated source for a parser generator into an extension module. 39 40 The extension module will be generated in the same directory as the provided path 41 for the generated source, with the same basename (in addition to extension module 42 metadata). For example, for the source mydir/parser.c the generated extension 43 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so. 44 45 If *build_dir* is provided, that path will be used as the temporary build directory 46 of distutils (this is useful in case you want to use a temporary directory). 47 48 If *library_dir* is provided, that path will be used as the directory for a 49 static library of the common parser sources (this is useful in case you are 50 creating multiple extensions). 51 """ 52 import distutils.log 53 from distutils.core import Distribution, Extension 54 from distutils.tests.support import fixup_build_ext # type: ignore 55 56 from distutils.ccompiler import new_compiler 57 from distutils.dep_util import newer_group 58 from distutils.sysconfig import customize_compiler 59 60 if verbose: 61 distutils.log.set_threshold(distutils.log.DEBUG) 62 63 source_file_path = pathlib.Path(generated_source_path) 64 extension_name = source_file_path.stem 65 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") 66 extra_compile_args.append("-DPy_BUILD_CORE_MODULE") 67 # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c 68 extra_compile_args.append("-D_Py_TEST_PEGEN") 69 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") 70 if keep_asserts: 71 extra_compile_args.append("-UNDEBUG") 72 if disable_optimization: 73 if sys.platform == 'win32': 74 extra_compile_args.append("/Od") 75 extra_link_args.append("/LTCG:OFF") 76 else: 77 extra_compile_args.append("-O0") 78 if sysconfig.get_config_var("GNULD") == "yes": 79 extra_link_args.append("-fno-lto") 80 81 common_sources = [ 82 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"), 83 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"), 84 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"), 85 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"), 86 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"), 87 str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"), 88 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"), 89 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"), 90 ] 91 include_dirs = [ 92 str(MOD_DIR.parent.parent.parent / "Include" / "internal"), 93 str(MOD_DIR.parent.parent.parent / "Parser"), 94 ] 95 extension = Extension( 96 extension_name, 97 sources=[generated_source_path], 98 extra_compile_args=extra_compile_args, 99 extra_link_args=extra_link_args, 100 ) 101 dist = Distribution({"name": extension_name, "ext_modules": [extension]}) 102 cmd = dist.get_command_obj("build_ext") 103 fixup_build_ext(cmd) 104 cmd.build_lib = str(source_file_path.parent) 105 cmd.include_dirs = include_dirs 106 if build_dir: 107 cmd.build_temp = build_dir 108 cmd.ensure_finalized() 109 110 compiler = new_compiler() 111 customize_compiler(compiler) 112 compiler.set_include_dirs(cmd.include_dirs) 113 compiler.set_library_dirs(cmd.library_dirs) 114 # build static lib 115 if library_dir: 116 library_filename = compiler.library_filename(extension_name, 117 output_dir=library_dir) 118 if newer_group(common_sources, library_filename, 'newer'): 119 if sys.platform == 'win32': 120 pdb = compiler.static_lib_format % (extension_name, '.pdb') 121 compile_opts = [f"/Fd{library_dir}\\{pdb}"] 122 compile_opts.extend(extra_compile_args) 123 else: 124 compile_opts = extra_compile_args 125 objects = compiler.compile(common_sources, 126 output_dir=library_dir, 127 debug=cmd.debug, 128 extra_postargs=compile_opts) 129 compiler.create_static_lib(objects, extension_name, 130 output_dir=library_dir, 131 debug=cmd.debug) 132 if sys.platform == 'win32': 133 compiler.add_library_dir(library_dir) 134 extension.libraries = [extension_name] 135 elif sys.platform == 'darwin': 136 compiler.set_link_objects([ 137 '-Wl,-force_load', library_filename, 138 ]) 139 else: 140 compiler.set_link_objects([ 141 '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive', 142 ]) 143 else: 144 extension.sources[0:0] = common_sources 145 146 # Compile the source code to object files. 147 ext_path = cmd.get_ext_fullpath(extension_name) 148 if newer_group(extension.sources, ext_path, 'newer'): 149 objects = compiler.compile(extension.sources, 150 output_dir=cmd.build_temp, 151 debug=cmd.debug, 152 extra_postargs=extra_compile_args) 153 else: 154 objects = compiler.object_filenames(extension.sources, 155 output_dir=cmd.build_temp) 156 # Now link the object files together into a "shared object" 157 compiler.link_shared_object( 158 objects, ext_path, 159 libraries=cmd.get_libraries(extension), 160 extra_postargs=extra_link_args, 161 export_symbols=cmd.get_export_symbols(extension), 162 debug=cmd.debug, 163 build_temp=cmd.build_temp) 164 165 return pathlib.Path(ext_path) 166 167 168def build_parser( 169 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False 170) -> Tuple[Grammar, Parser, Tokenizer]: 171 with open(grammar_file) as file: 172 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) 173 parser = GrammarParser(tokenizer, verbose=verbose_parser) 174 grammar = parser.start() 175 176 if not grammar: 177 raise parser.make_syntax_error(grammar_file) 178 179 return grammar, parser, tokenizer 180 181 182def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions: 183 all_tokens = {} 184 exact_tokens = {} 185 non_exact_tokens = set() 186 numbers = itertools.count(0) 187 188 for line in tokens: 189 line = line.strip() 190 191 if not line or line.startswith("#"): 192 continue 193 194 pieces = line.split() 195 index = next(numbers) 196 197 if len(pieces) == 1: 198 (token,) = pieces 199 non_exact_tokens.add(token) 200 all_tokens[index] = token 201 elif len(pieces) == 2: 202 token, op = pieces 203 exact_tokens[op.strip("'")] = index 204 all_tokens[index] = token 205 else: 206 raise ValueError(f"Unexpected line found in Tokens file: {line}") 207 208 return all_tokens, exact_tokens, non_exact_tokens 209 210 211def build_c_generator( 212 grammar: Grammar, 213 grammar_file: str, 214 tokens_file: str, 215 output_file: str, 216 compile_extension: bool = False, 217 verbose_c_extension: bool = False, 218 keep_asserts_in_extension: bool = True, 219 skip_actions: bool = False, 220) -> ParserGenerator: 221 with open(tokens_file, "r") as tok_file: 222 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) 223 with open(output_file, "w") as file: 224 gen: ParserGenerator = CParserGenerator( 225 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions 226 ) 227 gen.generate(grammar_file) 228 229 if compile_extension: 230 with tempfile.TemporaryDirectory() as build_dir: 231 compile_c_extension( 232 output_file, 233 build_dir=build_dir, 234 verbose=verbose_c_extension, 235 keep_asserts=keep_asserts_in_extension, 236 ) 237 return gen 238 239 240def build_python_generator( 241 grammar: Grammar, 242 grammar_file: str, 243 output_file: str, 244 skip_actions: bool = False, 245) -> ParserGenerator: 246 with open(output_file, "w") as file: 247 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions 248 gen.generate(grammar_file) 249 return gen 250 251 252def build_c_parser_and_generator( 253 grammar_file: str, 254 tokens_file: str, 255 output_file: str, 256 compile_extension: bool = False, 257 verbose_tokenizer: bool = False, 258 verbose_parser: bool = False, 259 verbose_c_extension: bool = False, 260 keep_asserts_in_extension: bool = True, 261 skip_actions: bool = False, 262) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 263 """Generate rules, C parser, tokenizer, parser generator for a given grammar 264 265 Args: 266 grammar_file (string): Path for the grammar file 267 tokens_file (string): Path for the tokens file 268 output_file (string): Path for the output file 269 compile_extension (bool, optional): Whether to compile the C extension. 270 Defaults to False. 271 verbose_tokenizer (bool, optional): Whether to display additional output 272 when generating the tokenizer. Defaults to False. 273 verbose_parser (bool, optional): Whether to display additional output 274 when generating the parser. Defaults to False. 275 verbose_c_extension (bool, optional): Whether to display additional 276 output when compiling the C extension . Defaults to False. 277 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements 278 when compiling the extension module. Defaults to True. 279 skip_actions (bool, optional): Whether to pretend no rule has any actions. 280 """ 281 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 282 gen = build_c_generator( 283 grammar, 284 grammar_file, 285 tokens_file, 286 output_file, 287 compile_extension, 288 verbose_c_extension, 289 keep_asserts_in_extension, 290 skip_actions=skip_actions, 291 ) 292 293 return grammar, parser, tokenizer, gen 294 295 296def build_python_parser_and_generator( 297 grammar_file: str, 298 output_file: str, 299 verbose_tokenizer: bool = False, 300 verbose_parser: bool = False, 301 skip_actions: bool = False, 302) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: 303 """Generate rules, python parser, tokenizer, parser generator for a given grammar 304 305 Args: 306 grammar_file (string): Path for the grammar file 307 output_file (string): Path for the output file 308 verbose_tokenizer (bool, optional): Whether to display additional output 309 when generating the tokenizer. Defaults to False. 310 verbose_parser (bool, optional): Whether to display additional output 311 when generating the parser. Defaults to False. 312 skip_actions (bool, optional): Whether to pretend no rule has any actions. 313 """ 314 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) 315 gen = build_python_generator( 316 grammar, 317 grammar_file, 318 output_file, 319 skip_actions=skip_actions, 320 ) 321 return grammar, parser, tokenizer, gen 322