1import ast 2import os.path 3import re 4from dataclasses import dataclass, field 5from enum import Enum 6from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple 7 8from pegen import grammar 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Forced, 13 Gather, 14 GrammarVisitor, 15 Group, 16 Leaf, 17 Lookahead, 18 NamedItem, 19 NameLeaf, 20 NegativeLookahead, 21 Opt, 22 PositiveLookahead, 23 Repeat0, 24 Repeat1, 25 Rhs, 26 Rule, 27 StringLeaf, 28) 29from pegen.parser_generator import ParserGenerator 30 31EXTENSION_PREFIX = """\ 32#include "pegen.h" 33 34#if defined(Py_DEBUG) && defined(Py_BUILD_CORE) 35# define D(x) if (Py_DebugFlag) x; 36#else 37# define D(x) 38#endif 39 40#ifdef __wasi__ 41# define MAXSTACK 4000 42#else 43# define MAXSTACK 6000 44#endif 45 46""" 47 48 49EXTENSION_SUFFIX = """ 50void * 51_PyPegen_parse(Parser *p) 52{ 53 // Initialize keywords 54 p->keywords = reserved_keywords; 55 p->n_keyword_lists = n_keyword_lists; 56 p->soft_keywords = soft_keywords; 57 58 return start_rule(p); 59} 60""" 61 62 63class NodeTypes(Enum): 64 NAME_TOKEN = 0 65 NUMBER_TOKEN = 1 66 STRING_TOKEN = 2 67 GENERIC_TOKEN = 3 68 KEYWORD = 4 69 SOFT_KEYWORD = 5 70 CUT_OPERATOR = 6 71 72 73BASE_NODETYPES = { 74 "NAME": NodeTypes.NAME_TOKEN, 75 "NUMBER": NodeTypes.NUMBER_TOKEN, 76 "STRING": NodeTypes.STRING_TOKEN, 77 "SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD, 78} 79 80 81@dataclass 82class FunctionCall: 83 function: str 84 arguments: List[Any] = field(default_factory=list) 85 assigned_variable: Optional[str] = None 86 assigned_variable_type: Optional[str] = None 87 return_type: Optional[str] = None 88 nodetype: Optional[NodeTypes] = None 89 force_true: bool = False 90 comment: Optional[str] = None 91 92 def __str__(self) -> str: 93 parts = [] 94 parts.append(self.function) 95 if self.arguments: 96 parts.append(f"({', '.join(map(str, self.arguments))})") 97 if self.force_true: 98 parts.append(", !p->error_indicator") 99 if self.assigned_variable: 100 if self.assigned_variable_type: 101 parts = [ 102 "(", 103 self.assigned_variable, 104 " = ", 105 "(", 106 self.assigned_variable_type, 107 ")", 108 *parts, 109 ")", 110 ] 111 else: 112 parts = ["(", self.assigned_variable, " = ", *parts, ")"] 113 if self.comment: 114 parts.append(f" // {self.comment}") 115 return "".join(parts) 116 117 118class CCallMakerVisitor(GrammarVisitor): 119 def __init__( 120 self, 121 parser_generator: ParserGenerator, 122 exact_tokens: Dict[str, int], 123 non_exact_tokens: Set[str], 124 ): 125 self.gen = parser_generator 126 self.exact_tokens = exact_tokens 127 self.non_exact_tokens = non_exact_tokens 128 self.cache: Dict[Any, FunctionCall] = {} 129 self.cleanup_statements: List[str] = [] 130 131 def keyword_helper(self, keyword: str) -> FunctionCall: 132 return FunctionCall( 133 assigned_variable="_keyword", 134 function="_PyPegen_expect_token", 135 arguments=["p", self.gen.keywords[keyword]], 136 return_type="Token *", 137 nodetype=NodeTypes.KEYWORD, 138 comment=f"token='{keyword}'", 139 ) 140 141 def soft_keyword_helper(self, value: str) -> FunctionCall: 142 return FunctionCall( 143 assigned_variable="_keyword", 144 function="_PyPegen_expect_soft_keyword", 145 arguments=["p", value], 146 return_type="expr_ty", 147 nodetype=NodeTypes.SOFT_KEYWORD, 148 comment=f"soft_keyword='{value}'", 149 ) 150 151 def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: 152 name = node.value 153 if name in self.non_exact_tokens: 154 if name in BASE_NODETYPES: 155 return FunctionCall( 156 assigned_variable=f"{name.lower()}_var", 157 function=f"_PyPegen_{name.lower()}_token", 158 arguments=["p"], 159 nodetype=BASE_NODETYPES[name], 160 return_type="expr_ty", 161 comment=name, 162 ) 163 return FunctionCall( 164 assigned_variable=f"{name.lower()}_var", 165 function=f"_PyPegen_expect_token", 166 arguments=["p", name], 167 nodetype=NodeTypes.GENERIC_TOKEN, 168 return_type="Token *", 169 comment=f"token='{name}'", 170 ) 171 172 type = None 173 rule = self.gen.all_rules.get(name.lower()) 174 if rule is not None: 175 type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type 176 177 return FunctionCall( 178 assigned_variable=f"{name}_var", 179 function=f"{name}_rule", 180 arguments=["p"], 181 return_type=type, 182 comment=f"{node}", 183 ) 184 185 def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: 186 val = ast.literal_eval(node.value) 187 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword 188 if node.value.endswith("'"): 189 return self.keyword_helper(val) 190 else: 191 return self.soft_keyword_helper(node.value) 192 else: 193 assert val in self.exact_tokens, f"{node.value} is not a known literal" 194 type = self.exact_tokens[val] 195 return FunctionCall( 196 assigned_variable="_literal", 197 function=f"_PyPegen_expect_token", 198 arguments=["p", type], 199 nodetype=NodeTypes.GENERIC_TOKEN, 200 return_type="Token *", 201 comment=f"token='{val}'", 202 ) 203 204 def visit_Rhs(self, node: Rhs) -> FunctionCall: 205 if node in self.cache: 206 return self.cache[node] 207 if node.can_be_inlined: 208 self.cache[node] = self.generate_call(node.alts[0].items[0]) 209 else: 210 name = self.gen.artifical_rule_from_rhs(node) 211 self.cache[node] = FunctionCall( 212 assigned_variable=f"{name}_var", 213 function=f"{name}_rule", 214 arguments=["p"], 215 comment=f"{node}", 216 ) 217 return self.cache[node] 218 219 def visit_NamedItem(self, node: NamedItem) -> FunctionCall: 220 call = self.generate_call(node.item) 221 if node.name: 222 call.assigned_variable = node.name 223 if node.type: 224 call.assigned_variable_type = node.type 225 return call 226 227 def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: 228 call = self.generate_call(node.node) 229 if call.nodetype == NodeTypes.NAME_TOKEN: 230 return FunctionCall( 231 function=f"_PyPegen_lookahead_with_name", 232 arguments=[positive, call.function, *call.arguments], 233 return_type="int", 234 ) 235 elif call.nodetype == NodeTypes.SOFT_KEYWORD: 236 return FunctionCall( 237 function=f"_PyPegen_lookahead_with_string", 238 arguments=[positive, call.function, *call.arguments], 239 return_type="int", 240 ) 241 elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: 242 return FunctionCall( 243 function=f"_PyPegen_lookahead_with_int", 244 arguments=[positive, call.function, *call.arguments], 245 return_type="int", 246 comment=f"token={node.node}", 247 ) 248 else: 249 return FunctionCall( 250 function=f"_PyPegen_lookahead", 251 arguments=[positive, call.function, *call.arguments], 252 return_type="int", 253 ) 254 255 def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: 256 return self.lookahead_call_helper(node, 1) 257 258 def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall: 259 return self.lookahead_call_helper(node, 0) 260 261 def visit_Forced(self, node: Forced) -> FunctionCall: 262 call = self.generate_call(node.node) 263 if isinstance(node.node, Leaf): 264 assert isinstance(node.node, Leaf) 265 val = ast.literal_eval(node.node.value) 266 assert val in self.exact_tokens, f"{node.node.value} is not a known literal" 267 type = self.exact_tokens[val] 268 return FunctionCall( 269 assigned_variable="_literal", 270 function=f"_PyPegen_expect_forced_token", 271 arguments=["p", type, f'"{val}"'], 272 nodetype=NodeTypes.GENERIC_TOKEN, 273 return_type="Token *", 274 comment=f"forced_token='{val}'", 275 ) 276 if isinstance(node.node, Group): 277 call = self.visit(node.node.rhs) 278 call.assigned_variable = None 279 call.comment = None 280 return FunctionCall( 281 assigned_variable="_literal", 282 function=f"_PyPegen_expect_forced_result", 283 arguments=["p", str(call), f'"{node.node.rhs!s}"'], 284 return_type="void *", 285 comment=f"forced_token=({node.node.rhs!s})", 286 ) 287 else: 288 raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes") 289 290 def visit_Opt(self, node: Opt) -> FunctionCall: 291 call = self.generate_call(node.node) 292 return FunctionCall( 293 assigned_variable="_opt_var", 294 function=call.function, 295 arguments=call.arguments, 296 force_true=True, 297 comment=f"{node}", 298 ) 299 300 def visit_Repeat0(self, node: Repeat0) -> FunctionCall: 301 if node in self.cache: 302 return self.cache[node] 303 name = self.gen.artificial_rule_from_repeat(node.node, False) 304 self.cache[node] = FunctionCall( 305 assigned_variable=f"{name}_var", 306 function=f"{name}_rule", 307 arguments=["p"], 308 return_type="asdl_seq *", 309 comment=f"{node}", 310 ) 311 return self.cache[node] 312 313 def visit_Repeat1(self, node: Repeat1) -> FunctionCall: 314 if node in self.cache: 315 return self.cache[node] 316 name = self.gen.artificial_rule_from_repeat(node.node, True) 317 self.cache[node] = FunctionCall( 318 assigned_variable=f"{name}_var", 319 function=f"{name}_rule", 320 arguments=["p"], 321 return_type="asdl_seq *", 322 comment=f"{node}", 323 ) 324 return self.cache[node] 325 326 def visit_Gather(self, node: Gather) -> FunctionCall: 327 if node in self.cache: 328 return self.cache[node] 329 name = self.gen.artifical_rule_from_gather(node) 330 self.cache[node] = FunctionCall( 331 assigned_variable=f"{name}_var", 332 function=f"{name}_rule", 333 arguments=["p"], 334 return_type="asdl_seq *", 335 comment=f"{node}", 336 ) 337 return self.cache[node] 338 339 def visit_Group(self, node: Group) -> FunctionCall: 340 return self.generate_call(node.rhs) 341 342 def visit_Cut(self, node: Cut) -> FunctionCall: 343 return FunctionCall( 344 assigned_variable="_cut_var", 345 return_type="int", 346 function="1", 347 nodetype=NodeTypes.CUT_OPERATOR, 348 ) 349 350 def generate_call(self, node: Any) -> FunctionCall: 351 return super().visit(node) 352 353 354class CParserGenerator(ParserGenerator, GrammarVisitor): 355 def __init__( 356 self, 357 grammar: grammar.Grammar, 358 tokens: Dict[int, str], 359 exact_tokens: Dict[str, int], 360 non_exact_tokens: Set[str], 361 file: Optional[IO[Text]], 362 debug: bool = False, 363 skip_actions: bool = False, 364 ): 365 super().__init__(grammar, set(tokens.values()), file) 366 self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( 367 self, exact_tokens, non_exact_tokens 368 ) 369 self._varname_counter = 0 370 self.debug = debug 371 self.skip_actions = skip_actions 372 self.cleanup_statements: List[str] = [] 373 374 def add_level(self) -> None: 375 self.print("if (p->level++ == MAXSTACK) {") 376 with self.indent(): 377 self.print("p->error_indicator = 1;") 378 self.print("PyErr_NoMemory();") 379 self.print("}") 380 381 def remove_level(self) -> None: 382 self.print("p->level--;") 383 384 def add_return(self, ret_val: str) -> None: 385 for stmt in self.cleanup_statements: 386 self.print(stmt) 387 self.remove_level() 388 self.print(f"return {ret_val};") 389 390 def unique_varname(self, name: str = "tmpvar") -> str: 391 new_var = name + "_" + str(self._varname_counter) 392 self._varname_counter += 1 393 return new_var 394 395 def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None: 396 error_var = self.unique_varname() 397 self.print(f"int {error_var} = {call_text};") 398 self.print(f"if ({error_var}) {{") 399 with self.indent(): 400 self.add_return(returnval) 401 self.print("}") 402 403 def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: 404 error_var = self.unique_varname() 405 self.print(f"int {error_var} = {call_text};") 406 self.print(f"if ({error_var}) {{") 407 with self.indent(): 408 self.print(f"goto {goto_target};") 409 self.print(f"}}") 410 411 def out_of_memory_return( 412 self, 413 expr: str, 414 cleanup_code: Optional[str] = None, 415 ) -> None: 416 self.print(f"if ({expr}) {{") 417 with self.indent(): 418 if cleanup_code is not None: 419 self.print(cleanup_code) 420 self.print("p->error_indicator = 1;") 421 self.print("PyErr_NoMemory();") 422 self.add_return("NULL") 423 self.print(f"}}") 424 425 def out_of_memory_goto(self, expr: str, goto_target: str) -> None: 426 self.print(f"if ({expr}) {{") 427 with self.indent(): 428 self.print("PyErr_NoMemory();") 429 self.print(f"goto {goto_target};") 430 self.print(f"}}") 431 432 def generate(self, filename: str) -> None: 433 self.collect_rules() 434 basename = os.path.basename(filename) 435 self.print(f"// @generated by pegen from {basename}") 436 header = self.grammar.metas.get("header", EXTENSION_PREFIX) 437 if header: 438 self.print(header.rstrip("\n")) 439 subheader = self.grammar.metas.get("subheader", "") 440 if subheader: 441 self.print(subheader) 442 self._setup_keywords() 443 self._setup_soft_keywords() 444 for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000): 445 comment = " // Left-recursive" if rule.left_recursive else "" 446 self.print(f"#define {rulename}_type {i}{comment}") 447 self.print() 448 for rulename, rule in self.all_rules.items(): 449 if rule.is_loop() or rule.is_gather(): 450 type = "asdl_seq *" 451 elif rule.type: 452 type = rule.type + " " 453 else: 454 type = "void *" 455 self.print(f"static {type}{rulename}_rule(Parser *p);") 456 self.print() 457 for rulename, rule in list(self.all_rules.items()): 458 self.print() 459 if rule.left_recursive: 460 self.print("// Left-recursive") 461 self.visit(rule) 462 if self.skip_actions: 463 mode = 0 464 else: 465 mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1 466 if mode == 1 and self.grammar.metas.get("bytecode"): 467 mode += 1 468 modulename = self.grammar.metas.get("modulename", "parse") 469 trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) 470 if trailer: 471 self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) 472 473 def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: 474 groups: Dict[int, List[Tuple[str, int]]] = {} 475 for keyword_str, keyword_type in self.keywords.items(): 476 length = len(keyword_str) 477 if length in groups: 478 groups[length].append((keyword_str, keyword_type)) 479 else: 480 groups[length] = [(keyword_str, keyword_type)] 481 return groups 482 483 def _setup_keywords(self) -> None: 484 n_keyword_lists = ( 485 len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0 486 ) 487 self.print(f"static const int n_keyword_lists = {n_keyword_lists};") 488 groups = self._group_keywords_by_length() 489 self.print("static KeywordToken *reserved_keywords[] = {") 490 with self.indent(): 491 num_groups = max(groups) + 1 if groups else 1 492 for keywords_length in range(num_groups): 493 if keywords_length not in groups.keys(): 494 self.print("(KeywordToken[]) {{NULL, -1}},") 495 else: 496 self.print("(KeywordToken[]) {") 497 with self.indent(): 498 for keyword_str, keyword_type in groups[keywords_length]: 499 self.print(f'{{"{keyword_str}", {keyword_type}}},') 500 self.print("{NULL, -1},") 501 self.print("},") 502 self.print("};") 503 504 def _setup_soft_keywords(self) -> None: 505 soft_keywords = sorted(self.soft_keywords) 506 self.print("static char *soft_keywords[] = {") 507 with self.indent(): 508 for keyword in soft_keywords: 509 self.print(f'"{keyword}",') 510 self.print("NULL,") 511 self.print("};") 512 513 def _set_up_token_start_metadata_extraction(self) -> None: 514 self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {") 515 with self.indent(): 516 self.print("p->error_indicator = 1;") 517 self.add_return("NULL") 518 self.print("}") 519 self.print("int _start_lineno = p->tokens[_mark]->lineno;") 520 self.print("UNUSED(_start_lineno); // Only used by EXTRA macro") 521 self.print("int _start_col_offset = p->tokens[_mark]->col_offset;") 522 self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro") 523 524 def _set_up_token_end_metadata_extraction(self) -> None: 525 self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);") 526 self.print("if (_token == NULL) {") 527 with self.indent(): 528 self.add_return("NULL") 529 self.print("}") 530 self.print("int _end_lineno = _token->end_lineno;") 531 self.print("UNUSED(_end_lineno); // Only used by EXTRA macro") 532 self.print("int _end_col_offset = _token->end_col_offset;") 533 self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro") 534 535 def _check_for_errors(self) -> None: 536 self.print("if (p->error_indicator) {") 537 with self.indent(): 538 self.add_return("NULL") 539 self.print("}") 540 541 def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: 542 self.print("{") 543 with self.indent(): 544 self.add_level() 545 self.print(f"{result_type} _res = NULL;") 546 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 547 with self.indent(): 548 self.add_return("_res") 549 self.print("}") 550 self.print("int _mark = p->mark;") 551 self.print("int _resmark = p->mark;") 552 self.print("while (1) {") 553 with self.indent(): 554 self.call_with_errorcheck_return( 555 f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" 556 ) 557 self.print("p->mark = _mark;") 558 self.print(f"void *_raw = {node.name}_raw(p);") 559 self.print("if (p->error_indicator) {") 560 with self.indent(): 561 self.add_return("NULL") 562 self.print("}") 563 self.print("if (_raw == NULL || p->mark <= _resmark)") 564 with self.indent(): 565 self.print("break;") 566 self.print(f"_resmark = p->mark;") 567 self.print("_res = _raw;") 568 self.print("}") 569 self.print(f"p->mark = _resmark;") 570 self.add_return("_res") 571 self.print("}") 572 self.print(f"static {result_type}") 573 self.print(f"{node.name}_raw(Parser *p)") 574 575 def _should_memoize(self, node: Rule) -> bool: 576 return node.memo and not node.left_recursive 577 578 def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: 579 memoize = self._should_memoize(node) 580 581 with self.indent(): 582 self.add_level() 583 self._check_for_errors() 584 self.print(f"{result_type} _res = NULL;") 585 if memoize: 586 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 587 with self.indent(): 588 self.add_return("_res") 589 self.print("}") 590 self.print("int _mark = p->mark;") 591 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 592 self._set_up_token_start_metadata_extraction() 593 self.visit( 594 rhs, 595 is_loop=False, 596 is_gather=node.is_gather(), 597 rulename=node.name, 598 ) 599 if self.debug: 600 self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') 601 self.print("_res = NULL;") 602 self.print(" done:") 603 with self.indent(): 604 if memoize: 605 self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") 606 self.add_return("_res") 607 608 def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: 609 memoize = self._should_memoize(node) 610 is_repeat1 = node.name.startswith("_loop1") 611 612 with self.indent(): 613 self.add_level() 614 self._check_for_errors() 615 self.print("void *_res = NULL;") 616 if memoize: 617 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 618 with self.indent(): 619 self.add_return("_res") 620 self.print("}") 621 self.print("int _mark = p->mark;") 622 if memoize: 623 self.print("int _start_mark = p->mark;") 624 self.print("void **_children = PyMem_Malloc(sizeof(void *));") 625 self.out_of_memory_return(f"!_children") 626 self.print("Py_ssize_t _children_capacity = 1;") 627 self.print("Py_ssize_t _n = 0;") 628 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 629 self._set_up_token_start_metadata_extraction() 630 self.visit( 631 rhs, 632 is_loop=True, 633 is_gather=node.is_gather(), 634 rulename=node.name, 635 ) 636 if is_repeat1: 637 self.print("if (_n == 0 || p->error_indicator) {") 638 with self.indent(): 639 self.print("PyMem_Free(_children);") 640 self.add_return("NULL") 641 self.print("}") 642 self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") 643 self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") 644 self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") 645 self.print("PyMem_Free(_children);") 646 if memoize and node.name: 647 self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") 648 self.add_return("_seq") 649 650 def visit_Rule(self, node: Rule) -> None: 651 is_loop = node.is_loop() 652 is_gather = node.is_gather() 653 rhs = node.flatten() 654 if is_loop or is_gather: 655 result_type = "asdl_seq *" 656 elif node.type: 657 result_type = node.type 658 else: 659 result_type = "void *" 660 661 for line in str(node).splitlines(): 662 self.print(f"// {line}") 663 if node.left_recursive and node.leader: 664 self.print(f"static {result_type} {node.name}_raw(Parser *);") 665 666 self.print(f"static {result_type}") 667 self.print(f"{node.name}_rule(Parser *p)") 668 669 if node.left_recursive and node.leader: 670 self._set_up_rule_memoization(node, result_type) 671 672 self.print("{") 673 674 if node.name.endswith("without_invalid"): 675 with self.indent(): 676 self.print("int _prev_call_invalid = p->call_invalid_rules;") 677 self.print("p->call_invalid_rules = 0;") 678 self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;") 679 680 if is_loop: 681 self._handle_loop_rule_body(node, rhs) 682 else: 683 self._handle_default_rule_body(node, rhs, result_type) 684 685 if node.name.endswith("without_invalid"): 686 self.cleanup_statements.pop() 687 688 self.print("}") 689 690 def visit_NamedItem(self, node: NamedItem) -> None: 691 call = self.callmakervisitor.generate_call(node) 692 if call.assigned_variable: 693 call.assigned_variable = self.dedupe(call.assigned_variable) 694 self.print(call) 695 696 def visit_Rhs( 697 self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] 698 ) -> None: 699 if is_loop: 700 assert len(node.alts) == 1 701 for alt in node.alts: 702 self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) 703 704 def join_conditions(self, keyword: str, node: Any) -> None: 705 self.print(f"{keyword} (") 706 with self.indent(): 707 first = True 708 for item in node.items: 709 if first: 710 first = False 711 else: 712 self.print("&&") 713 self.visit(item) 714 self.print(")") 715 716 def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: 717 self.print(f"_res = {node.action};") 718 719 self.print("if (_res == NULL && PyErr_Occurred()) {") 720 with self.indent(): 721 self.print("p->error_indicator = 1;") 722 if cleanup_code: 723 self.print(cleanup_code) 724 self.add_return("NULL") 725 self.print("}") 726 727 if self.debug: 728 self.print( 729 f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));' 730 ) 731 732 def emit_default_action(self, is_gather: bool, node: Alt) -> None: 733 if len(self.local_variable_names) > 1: 734 if is_gather: 735 assert len(self.local_variable_names) == 2 736 self.print( 737 f"_res = _PyPegen_seq_insert_in_front(p, " 738 f"{self.local_variable_names[0]}, {self.local_variable_names[1]});" 739 ) 740 else: 741 if self.debug: 742 self.print( 743 f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 744 ) 745 self.print( 746 f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});" 747 ) 748 else: 749 if self.debug: 750 self.print( 751 f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 752 ) 753 self.print(f"_res = {self.local_variable_names[0]};") 754 755 def emit_dummy_action(self) -> None: 756 self.print("_res = _PyPegen_dummy_name(p);") 757 758 def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 759 self.join_conditions(keyword="if", node=node) 760 self.print("{") 761 # We have parsed successfully all the conditions for the option. 762 with self.indent(): 763 node_str = str(node).replace('"', '\\"') 764 self.print( 765 f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 766 ) 767 # Prepare to emit the rule action and do so 768 if node.action and "EXTRA" in node.action: 769 self._set_up_token_end_metadata_extraction() 770 if self.skip_actions: 771 self.emit_dummy_action() 772 elif node.action: 773 self.emit_action(node) 774 else: 775 self.emit_default_action(is_gather, node) 776 777 # As the current option has parsed correctly, do not continue with the rest. 778 self.print(f"goto done;") 779 self.print("}") 780 781 def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 782 # Condition of the main body of the alternative 783 self.join_conditions(keyword="while", node=node) 784 self.print("{") 785 # We have parsed successfully one item! 786 with self.indent(): 787 # Prepare to emit the rule action and do so 788 if node.action and "EXTRA" in node.action: 789 self._set_up_token_end_metadata_extraction() 790 if self.skip_actions: 791 self.emit_dummy_action() 792 elif node.action: 793 self.emit_action(node, cleanup_code="PyMem_Free(_children);") 794 else: 795 self.emit_default_action(is_gather, node) 796 797 # Add the result of rule to the temporary buffer of children. This buffer 798 # will populate later an asdl_seq with all elements to return. 799 self.print("if (_n == _children_capacity) {") 800 with self.indent(): 801 self.print("_children_capacity *= 2;") 802 self.print( 803 "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" 804 ) 805 self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);") 806 self.print("_children = _new_children;") 807 self.print("}") 808 self.print("_children[_n++] = _res;") 809 self.print("_mark = p->mark;") 810 self.print("}") 811 812 def visit_Alt( 813 self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] 814 ) -> None: 815 if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): 816 self.print(f"if (p->call_invalid_rules) {{ // {node}") 817 else: 818 self.print(f"{{ // {node}") 819 with self.indent(): 820 self._check_for_errors() 821 node_str = str(node).replace('"', '\\"') 822 self.print( 823 f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 824 ) 825 # Prepare variable declarations for the alternative 826 vars = self.collect_vars(node) 827 for v, var_type in sorted(item for item in vars.items() if item[0] is not None): 828 if not var_type: 829 var_type = "void *" 830 else: 831 var_type += " " 832 if v == "_cut_var": 833 v += " = 0" # cut_var must be initialized 834 self.print(f"{var_type}{v};") 835 if v and v.startswith("_opt_var"): 836 self.print(f"UNUSED({v}); // Silence compiler warnings") 837 838 with self.local_variable_context(): 839 if is_loop: 840 self.handle_alt_loop(node, is_gather, rulename) 841 else: 842 self.handle_alt_normal(node, is_gather, rulename) 843 844 self.print("p->mark = _mark;") 845 node_str = str(node).replace('"', '\\"') 846 self.print( 847 f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n" 848 f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));' 849 ) 850 if "_cut_var" in vars: 851 self.print("if (_cut_var) {") 852 with self.indent(): 853 self.add_return("NULL") 854 self.print("}") 855 self.print("}") 856 857 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: 858 types = {} 859 with self.local_variable_context(): 860 for item in node.items: 861 name, type = self.add_var(item) 862 types[name] = type 863 return types 864 865 def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: 866 call = self.callmakervisitor.generate_call(node.item) 867 name = node.name if node.name else call.assigned_variable 868 if name is not None: 869 name = self.dedupe(name) 870 return_type = call.return_type if node.type is None else node.type 871 return name, return_type 872