1import importlib.util 2import io 3import os 4import pathlib 5import sys 6import textwrap 7import token 8import tokenize 9from typing import IO, Any, Dict, Final, Optional, Type, cast 10 11from pegen.build import compile_c_extension 12from pegen.c_generator import CParserGenerator 13from pegen.grammar import Grammar 14from pegen.grammar_parser import GeneratedParser as GrammarParser 15from pegen.parser import Parser 16from pegen.python_generator import PythonParserGenerator 17from pegen.tokenizer import Tokenizer 18 19ALL_TOKENS = token.tok_name 20EXACT_TOKENS = token.EXACT_TOKEN_TYPES 21NON_EXACT_TOKENS = { 22 name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() 23} 24 25 26def generate_parser(grammar: Grammar) -> Type[Parser]: 27 # Generate a parser. 28 out = io.StringIO() 29 genr = PythonParserGenerator(grammar, out) 30 genr.generate("<string>") 31 32 # Load the generated parser class. 33 ns: Dict[str, Any] = {} 34 exec(out.getvalue(), ns) 35 return ns["GeneratedParser"] 36 37 38def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: 39 # Run a parser on a file (stream). 40 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515 41 parser = parser_class(tokenizer, verbose=verbose) 42 result = parser.start() 43 if result is None: 44 raise parser.make_syntax_error("invalid syntax") 45 return result 46 47 48def parse_string( 49 source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False 50) -> Any: 51 # Run the parser on a string. 52 if dedent: 53 source = textwrap.dedent(source) 54 file = io.StringIO(source) 55 return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515 56 57 58def make_parser(source: str) -> Type[Parser]: 59 # Combine parse_string() and generate_parser(). 60 grammar = parse_string(source, GrammarParser) 61 return generate_parser(grammar) 62 63 64def import_file(full_name: str, path: str) -> Any: 65 """Import a python module from a path""" 66 67 spec = importlib.util.spec_from_file_location(full_name, path) 68 assert spec is not None 69 mod = importlib.util.module_from_spec(spec) 70 71 # We assume this is not None and has an exec_module() method. 72 # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading 73 loader = cast(Any, spec.loader) 74 loader.exec_module(mod) 75 return mod 76 77 78def generate_c_parser_source(grammar: Grammar) -> str: 79 out = io.StringIO() 80 genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out) 81 genr.generate("<string>") 82 return out.getvalue() 83 84 85def generate_parser_c_extension( 86 grammar: Grammar, path: pathlib.PurePath, debug: bool = False, 87 library_dir: Optional[str] = None, 88) -> Any: 89 """Generate a parser c extension for the given grammar in the given path 90 91 Returns a module object with a parse_string() method. 92 TODO: express that using a Protocol. 93 """ 94 # Make sure that the working directory is empty: reusing non-empty temporary 95 # directories when generating extensions can lead to segmentation faults. 96 # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more 97 # context. 98 assert not os.listdir(path) 99 source = path / "parse.c" 100 with open(source, "w", encoding="utf-8") as file: 101 genr = CParserGenerator( 102 grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug 103 ) 104 genr.generate("parse.c") 105 compile_c_extension( 106 str(source), 107 build_dir=str(path), 108 # Significant test_peg_generator speedups 109 disable_optimization=True, 110 library_dir=library_dir, 111 ) 112 113 114def print_memstats() -> bool: 115 MiB: Final = 2 ** 20 116 try: 117 import psutil # type: ignore 118 except ImportError: 119 return False 120 print("Memory stats:") 121 process = psutil.Process() 122 meminfo = process.memory_info() 123 res = {} 124 res["rss"] = meminfo.rss / MiB 125 res["vms"] = meminfo.vms / MiB 126 if sys.platform == "win32": 127 res["maxrss"] = meminfo.peak_wset / MiB 128 else: 129 # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process 130 import resource # Since it doesn't exist on Windows. 131 132 rusage = resource.getrusage(resource.RUSAGE_SELF) 133 if sys.platform == "darwin": 134 factor = 1 135 else: 136 factor = 1024 # Linux 137 res["maxrss"] = rusage.ru_maxrss * factor / MiB 138 for key, value in res.items(): 139 print(f" {key:12.12s}: {value:10.0f} MiB") 140 return True 141