1import importlib.util
2import io
3import os
4import pathlib
5import sys
6import textwrap
7import token
8import tokenize
9from typing import IO, Any, Dict, Final, Optional, Type, cast
10
11from pegen.build import compile_c_extension
12from pegen.c_generator import CParserGenerator
13from pegen.grammar import Grammar
14from pegen.grammar_parser import GeneratedParser as GrammarParser
15from pegen.parser import Parser
16from pegen.python_generator import PythonParserGenerator
17from pegen.tokenizer import Tokenizer
18
19ALL_TOKENS = token.tok_name
20EXACT_TOKENS = token.EXACT_TOKEN_TYPES
21NON_EXACT_TOKENS = {
22    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
23}
24
25
26def generate_parser(grammar: Grammar) -> Type[Parser]:
27    # Generate a parser.
28    out = io.StringIO()
29    genr = PythonParserGenerator(grammar, out)
30    genr.generate("<string>")
31
32    # Load the generated parser class.
33    ns: Dict[str, Any] = {}
34    exec(out.getvalue(), ns)
35    return ns["GeneratedParser"]
36
37
38def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
39    # Run a parser on a file (stream).
40    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore # typeshed issue #3515
41    parser = parser_class(tokenizer, verbose=verbose)
42    result = parser.start()
43    if result is None:
44        raise parser.make_syntax_error("invalid syntax")
45    return result
46
47
48def parse_string(
49    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
50) -> Any:
51    # Run the parser on a string.
52    if dedent:
53        source = textwrap.dedent(source)
54    file = io.StringIO(source)
55    return run_parser(file, parser_class, verbose=verbose)  # type: ignore # typeshed issue #3515
56
57
58def make_parser(source: str) -> Type[Parser]:
59    # Combine parse_string() and generate_parser().
60    grammar = parse_string(source, GrammarParser)
61    return generate_parser(grammar)
62
63
64def import_file(full_name: str, path: str) -> Any:
65    """Import a python module from a path"""
66
67    spec = importlib.util.spec_from_file_location(full_name, path)
68    assert spec is not None
69    mod = importlib.util.module_from_spec(spec)
70
71    # We assume this is not None and has an exec_module() method.
72    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
73    loader = cast(Any, spec.loader)
74    loader.exec_module(mod)
75    return mod
76
77
78def generate_c_parser_source(grammar: Grammar) -> str:
79    out = io.StringIO()
80    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
81    genr.generate("<string>")
82    return out.getvalue()
83
84
85def generate_parser_c_extension(
86    grammar: Grammar, path: pathlib.PurePath, debug: bool = False,
87    library_dir: Optional[str] = None,
88) -> Any:
89    """Generate a parser c extension for the given grammar in the given path
90
91    Returns a module object with a parse_string() method.
92    TODO: express that using a Protocol.
93    """
94    # Make sure that the working directory is empty: reusing non-empty temporary
95    # directories when generating extensions can lead to segmentation faults.
96    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
97    # context.
98    assert not os.listdir(path)
99    source = path / "parse.c"
100    with open(source, "w", encoding="utf-8") as file:
101        genr = CParserGenerator(
102            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
103        )
104        genr.generate("parse.c")
105    compile_c_extension(
106        str(source),
107        build_dir=str(path),
108        # Significant test_peg_generator speedups
109        disable_optimization=True,
110        library_dir=library_dir,
111    )
112
113
114def print_memstats() -> bool:
115    MiB: Final = 2 ** 20
116    try:
117        import psutil  # type: ignore
118    except ImportError:
119        return False
120    print("Memory stats:")
121    process = psutil.Process()
122    meminfo = process.memory_info()
123    res = {}
124    res["rss"] = meminfo.rss / MiB
125    res["vms"] = meminfo.vms / MiB
126    if sys.platform == "win32":
127        res["maxrss"] = meminfo.peak_wset / MiB
128    else:
129        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
130        import resource  # Since it doesn't exist on Windows.
131
132        rusage = resource.getrusage(resource.RUSAGE_SELF)
133        if sys.platform == "darwin":
134            factor = 1
135        else:
136            factor = 1024  # Linux
137        res["maxrss"] = rusage.ru_maxrss * factor / MiB
138    for key, value in res.items():
139        print(f"  {key:12.12s}: {value:10.0f} MiB")
140    return True
141