1*99e0aae7SDavid Rees# Copyright 2019 Google LLC 2*99e0aae7SDavid Rees# 3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License"); 4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License. 5*99e0aae7SDavid Rees# You may obtain a copy of the License at 6*99e0aae7SDavid Rees# 7*99e0aae7SDavid Rees# https://www.apache.org/licenses/LICENSE-2.0 8*99e0aae7SDavid Rees# 9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software 10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS, 11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and 13*99e0aae7SDavid Rees# limitations under the License. 14*99e0aae7SDavid Rees 15*99e0aae7SDavid Rees"""module_ir contains code for generating module-level IRs from parse trees. 16*99e0aae7SDavid Rees 17*99e0aae7SDavid ReesThe primary export is build_ir(), which takes a parse tree (as returned by a 18*99e0aae7SDavid Reesparser from lr1.py), and returns a module-level intermediate representation 19*99e0aae7SDavid Rees("module IR"). 20*99e0aae7SDavid Rees 21*99e0aae7SDavid ReesThis module also notably exports PRODUCTIONS and START_SYMBOL, which should be 22*99e0aae7SDavid Reesfed to lr1.Grammar in order to create a parser for the Emboss language. 23*99e0aae7SDavid Rees""" 24*99e0aae7SDavid Rees 25*99e0aae7SDavid Reesimport re 26*99e0aae7SDavid Reesimport sys 27*99e0aae7SDavid Rees 28*99e0aae7SDavid Reesfrom compiler.util import ir_data 29*99e0aae7SDavid Reesfrom compiler.util import ir_data_utils 30*99e0aae7SDavid Reesfrom compiler.util import name_conversion 31*99e0aae7SDavid Reesfrom compiler.util import parser_types 32*99e0aae7SDavid Rees 33*99e0aae7SDavid Rees 34*99e0aae7SDavid Rees# Intermediate types; should not be found in the final IR. 35*99e0aae7SDavid Reesclass _List(object): 36*99e0aae7SDavid Rees """A list with source location information.""" 37*99e0aae7SDavid Rees __slots__ = ('list', 'source_location') 38*99e0aae7SDavid Rees 39*99e0aae7SDavid Rees def __init__(self, l): 40*99e0aae7SDavid Rees assert isinstance(l, list), "_List object must wrap list, not '%r'" % l 41*99e0aae7SDavid Rees self.list = l 42*99e0aae7SDavid Rees self.source_location = ir_data.Location() 43*99e0aae7SDavid Rees 44*99e0aae7SDavid Rees 45*99e0aae7SDavid Reesclass _ExpressionTail(object): 46*99e0aae7SDavid Rees """A fragment of an expression with an operator and right-hand side. 47*99e0aae7SDavid Rees 48*99e0aae7SDavid Rees _ExpressionTail is the tail of an expression, consisting of an operator and 49*99e0aae7SDavid Rees the right-hand argument to the operator; for example, in the expression (6+8), 50*99e0aae7SDavid Rees the _ExpressionTail would be "+8". 51*99e0aae7SDavid Rees 52*99e0aae7SDavid Rees This is used as a temporary object while converting the right-recursive 53*99e0aae7SDavid Rees "expression" and "times-expression" productions into left-associative 54*99e0aae7SDavid Rees Expressions. 55*99e0aae7SDavid Rees 56*99e0aae7SDavid Rees Attributes: 57*99e0aae7SDavid Rees operator: An ir_data.Word of the operator's name. 58*99e0aae7SDavid Rees expression: The expression on the right side of the operator. 59*99e0aae7SDavid Rees source_location: The source location of the operation fragment. 60*99e0aae7SDavid Rees """ 61*99e0aae7SDavid Rees __slots__ = ('operator', 'expression', 'source_location') 62*99e0aae7SDavid Rees 63*99e0aae7SDavid Rees def __init__(self, operator, expression): 64*99e0aae7SDavid Rees self.operator = operator 65*99e0aae7SDavid Rees self.expression = expression 66*99e0aae7SDavid Rees self.source_location = ir_data.Location() 67*99e0aae7SDavid Rees 68*99e0aae7SDavid Rees 69*99e0aae7SDavid Reesclass _FieldWithType(object): 70*99e0aae7SDavid Rees """A field with zero or more types defined inline with that field.""" 71*99e0aae7SDavid Rees __slots__ = ('field', 'subtypes', 'source_location') 72*99e0aae7SDavid Rees 73*99e0aae7SDavid Rees def __init__(self, field, subtypes=None): 74*99e0aae7SDavid Rees self.field = field 75*99e0aae7SDavid Rees self.subtypes = subtypes or [] 76*99e0aae7SDavid Rees self.source_location = ir_data.Location() 77*99e0aae7SDavid Rees 78*99e0aae7SDavid Rees 79*99e0aae7SDavid Reesdef build_ir(parse_tree, used_productions=None): 80*99e0aae7SDavid Rees r"""Builds a module-level intermediate representation from a valid parse tree. 81*99e0aae7SDavid Rees 82*99e0aae7SDavid Rees The parse tree is precisely dictated by the exact productions in the grammar 83*99e0aae7SDavid Rees used by the parser, with no semantic information. _really_build_ir transforms 84*99e0aae7SDavid Rees this "raw" form into a stable, cooked representation, thereby isolating 85*99e0aae7SDavid Rees subsequent steps from the exact details of the grammar. 86*99e0aae7SDavid Rees 87*99e0aae7SDavid Rees (Probably incomplete) list of transformations: 88*99e0aae7SDavid Rees 89*99e0aae7SDavid Rees * ParseResult and Token nodes are replaced with Module, Attribute, Struct, 90*99e0aae7SDavid Rees Type, etc. objects. 91*99e0aae7SDavid Rees 92*99e0aae7SDavid Rees * Purely syntactic tokens ('"["', '"struct"', etc.) are discarded. 93*99e0aae7SDavid Rees 94*99e0aae7SDavid Rees * Repeated elements are transformed from tree form to list form: 95*99e0aae7SDavid Rees 96*99e0aae7SDavid Rees a* 97*99e0aae7SDavid Rees / \ 98*99e0aae7SDavid Rees b a* 99*99e0aae7SDavid Rees / \ 100*99e0aae7SDavid Rees c a* 101*99e0aae7SDavid Rees / \ 102*99e0aae7SDavid Rees d a* 103*99e0aae7SDavid Rees 104*99e0aae7SDavid Rees (where b, c, and d are nodes of type "a") becomes [b, c, d]. 105*99e0aae7SDavid Rees 106*99e0aae7SDavid Rees * The values of numeric constants (Number, etc. tokens) are parsed. 107*99e0aae7SDavid Rees 108*99e0aae7SDavid Rees * Different classes of names (snake_names, CamelNames, ShoutyNames) are 109*99e0aae7SDavid Rees folded into a single "Name" type, since they are guaranteed to appear in 110*99e0aae7SDavid Rees the correct places in the parse tree. 111*99e0aae7SDavid Rees 112*99e0aae7SDavid Rees 113*99e0aae7SDavid Rees Arguments: 114*99e0aae7SDavid Rees parse_tree: A parse tree. Each leaf node should be a parser_types.Token 115*99e0aae7SDavid Rees object, and each non-leaf node should have a 'symbol' attribute specifying 116*99e0aae7SDavid Rees which grammar symbol it represents, and a 'children' attribute containing 117*99e0aae7SDavid Rees a list of child nodes. This is the format returned by the parsers 118*99e0aae7SDavid Rees produced by the lr1 module, when run against tokens from the tokenizer 119*99e0aae7SDavid Rees module. 120*99e0aae7SDavid Rees used_productions: If specified, used_productions.add() will be called with 121*99e0aae7SDavid Rees each production actually used in parsing. This can be useful when 122*99e0aae7SDavid Rees developing the grammar and writing tests; in particular, it can be used to 123*99e0aae7SDavid Rees figure out which productions are *not* used when parsing a particular 124*99e0aae7SDavid Rees file. 125*99e0aae7SDavid Rees 126*99e0aae7SDavid Rees Returns: 127*99e0aae7SDavid Rees A module-level intermediate representation (module IR) for an Emboss module 128*99e0aae7SDavid Rees (source file). This IR will not have symbols resolved; that must be done on 129*99e0aae7SDavid Rees a forest of module IRs so that names from other modules can be resolved. 130*99e0aae7SDavid Rees """ 131*99e0aae7SDavid Rees 132*99e0aae7SDavid Rees # TODO(b/140259131): Refactor _really_build_ir to be less recursive/use an 133*99e0aae7SDavid Rees # explicit stack. 134*99e0aae7SDavid Rees old_recursion_limit = sys.getrecursionlimit() 135*99e0aae7SDavid Rees sys.setrecursionlimit(16 * 1024) # ~8000 top-level entities in one module. 136*99e0aae7SDavid Rees try: 137*99e0aae7SDavid Rees result = _really_build_ir(parse_tree, used_productions) 138*99e0aae7SDavid Rees finally: 139*99e0aae7SDavid Rees sys.setrecursionlimit(old_recursion_limit) 140*99e0aae7SDavid Rees return result 141*99e0aae7SDavid Rees 142*99e0aae7SDavid Rees 143*99e0aae7SDavid Reesdef _really_build_ir(parse_tree, used_productions): 144*99e0aae7SDavid Rees """Real implementation of build_ir().""" 145*99e0aae7SDavid Rees if used_productions is None: 146*99e0aae7SDavid Rees used_productions = set() 147*99e0aae7SDavid Rees if hasattr(parse_tree, 'children'): 148*99e0aae7SDavid Rees parsed_children = [_really_build_ir(child, used_productions) 149*99e0aae7SDavid Rees for child in parse_tree.children] 150*99e0aae7SDavid Rees used_productions.add(parse_tree.production) 151*99e0aae7SDavid Rees result = _handlers[parse_tree.production](*parsed_children) 152*99e0aae7SDavid Rees if parse_tree.source_location is not None: 153*99e0aae7SDavid Rees if result.source_location: 154*99e0aae7SDavid Rees ir_data_utils.update(result.source_location, parse_tree.source_location) 155*99e0aae7SDavid Rees else: 156*99e0aae7SDavid Rees result.source_location = ir_data_utils.copy(parse_tree.source_location) 157*99e0aae7SDavid Rees return result 158*99e0aae7SDavid Rees else: 159*99e0aae7SDavid Rees # For leaf nodes, the temporary "IR" is just the token. Higher-level rules 160*99e0aae7SDavid Rees # will translate it to a real IR. 161*99e0aae7SDavid Rees assert isinstance(parse_tree, parser_types.Token), str(parse_tree) 162*99e0aae7SDavid Rees return parse_tree 163*99e0aae7SDavid Rees 164*99e0aae7SDavid Rees# Map of productions to their handlers. 165*99e0aae7SDavid Rees_handlers = {} 166*99e0aae7SDavid Rees 167*99e0aae7SDavid Rees_anonymous_name_counter = 0 168*99e0aae7SDavid Rees 169*99e0aae7SDavid Rees 170*99e0aae7SDavid Reesdef _get_anonymous_field_name(): 171*99e0aae7SDavid Rees global _anonymous_name_counter 172*99e0aae7SDavid Rees _anonymous_name_counter += 1 173*99e0aae7SDavid Rees return 'emboss_reserved_anonymous_field_{}'.format(_anonymous_name_counter) 174*99e0aae7SDavid Rees 175*99e0aae7SDavid Rees 176*99e0aae7SDavid Reesdef _handles(production_text): 177*99e0aae7SDavid Rees """_handles marks a function as the handler for a particular production.""" 178*99e0aae7SDavid Rees production = parser_types.Production.parse(production_text) 179*99e0aae7SDavid Rees 180*99e0aae7SDavid Rees def handles(f): 181*99e0aae7SDavid Rees _handlers[production] = f 182*99e0aae7SDavid Rees return f 183*99e0aae7SDavid Rees 184*99e0aae7SDavid Rees return handles 185*99e0aae7SDavid Rees 186*99e0aae7SDavid Rees 187*99e0aae7SDavid Reesdef _make_prelude_import(position): 188*99e0aae7SDavid Rees """Helper function to construct a synthetic ir_data.Import for the prelude.""" 189*99e0aae7SDavid Rees location = parser_types.make_location(position, position) 190*99e0aae7SDavid Rees return ir_data.Import( 191*99e0aae7SDavid Rees file_name=ir_data.String(text='', source_location=location), 192*99e0aae7SDavid Rees local_name=ir_data.Word(text='', source_location=location), 193*99e0aae7SDavid Rees source_location=location) 194*99e0aae7SDavid Rees 195*99e0aae7SDavid Rees 196*99e0aae7SDavid Reesdef _text_to_operator(text): 197*99e0aae7SDavid Rees """Converts an operator's textual name to its corresponding enum.""" 198*99e0aae7SDavid Rees operations = { 199*99e0aae7SDavid Rees '+': ir_data.FunctionMapping.ADDITION, 200*99e0aae7SDavid Rees '-': ir_data.FunctionMapping.SUBTRACTION, 201*99e0aae7SDavid Rees '*': ir_data.FunctionMapping.MULTIPLICATION, 202*99e0aae7SDavid Rees '==': ir_data.FunctionMapping.EQUALITY, 203*99e0aae7SDavid Rees '!=': ir_data.FunctionMapping.INEQUALITY, 204*99e0aae7SDavid Rees '&&': ir_data.FunctionMapping.AND, 205*99e0aae7SDavid Rees '||': ir_data.FunctionMapping.OR, 206*99e0aae7SDavid Rees '>': ir_data.FunctionMapping.GREATER, 207*99e0aae7SDavid Rees '>=': ir_data.FunctionMapping.GREATER_OR_EQUAL, 208*99e0aae7SDavid Rees '<': ir_data.FunctionMapping.LESS, 209*99e0aae7SDavid Rees '<=': ir_data.FunctionMapping.LESS_OR_EQUAL, 210*99e0aae7SDavid Rees } 211*99e0aae7SDavid Rees return operations[text] 212*99e0aae7SDavid Rees 213*99e0aae7SDavid Rees 214*99e0aae7SDavid Reesdef _text_to_function(text): 215*99e0aae7SDavid Rees """Converts a function's textual name to its corresponding enum.""" 216*99e0aae7SDavid Rees functions = { 217*99e0aae7SDavid Rees '$max': ir_data.FunctionMapping.MAXIMUM, 218*99e0aae7SDavid Rees '$present': ir_data.FunctionMapping.PRESENCE, 219*99e0aae7SDavid Rees '$upper_bound': ir_data.FunctionMapping.UPPER_BOUND, 220*99e0aae7SDavid Rees '$lower_bound': ir_data.FunctionMapping.LOWER_BOUND, 221*99e0aae7SDavid Rees } 222*99e0aae7SDavid Rees return functions[text] 223*99e0aae7SDavid Rees 224*99e0aae7SDavid Rees 225*99e0aae7SDavid Rees################################################################################ 226*99e0aae7SDavid Rees# Grammar & parse tree to IR translation. 227*99e0aae7SDavid Rees# 228*99e0aae7SDavid Rees# From here to (almost) the end of the file are functions which recursively 229*99e0aae7SDavid Rees# build an IR. The @_handles annotations indicate the exact grammar 230*99e0aae7SDavid Rees# production(s) handled by each function. The handler function should take 231*99e0aae7SDavid Rees# exactly one argument for each symbol in the production's RHS. 232*99e0aae7SDavid Rees# 233*99e0aae7SDavid Rees# The actual Emboss grammar is extracted directly from the @_handles 234*99e0aae7SDavid Rees# annotations, so this is also the grammar definition. For convenience, the 235*99e0aae7SDavid Rees# grammar can be viewed separately in g3doc/grammar.md. 236*99e0aae7SDavid Rees# 237*99e0aae7SDavid Rees# At the end, symbols whose names end in "*", "+", or "?" are extracted from the 238*99e0aae7SDavid Rees# grammar, and appropriate productions are added for zero-or-more, one-or-more, 239*99e0aae7SDavid Rees# or zero-or-one lists, respectively. (This is analogous to the *, +, and ? 240*99e0aae7SDavid Rees# operators in regex.) It is necessary for this to happen here (and not in 241*99e0aae7SDavid Rees# lr1.py) because the generated productions must be associated with 242*99e0aae7SDavid Rees# IR-generation functions. 243*99e0aae7SDavid Rees 244*99e0aae7SDavid Rees 245*99e0aae7SDavid Rees# A module file is a list of documentation, then imports, then top-level 246*99e0aae7SDavid Rees# attributes, then type definitions. Any section may be missing. 247*99e0aae7SDavid Rees# TODO(bolms): Should Emboss disallow completely empty files? 248*99e0aae7SDavid Rees@_handles('module -> comment-line* doc-line* import-line* attribute-line*' 249*99e0aae7SDavid Rees ' type-definition*') 250*99e0aae7SDavid Reesdef _file(leading_newlines, docs, imports, attributes, type_definitions): 251*99e0aae7SDavid Rees """Assembles the top-level IR for a module.""" 252*99e0aae7SDavid Rees del leading_newlines # Unused. 253*99e0aae7SDavid Rees # Figure out the best synthetic source_location for the synthesized prelude 254*99e0aae7SDavid Rees # import. 255*99e0aae7SDavid Rees if imports.list: 256*99e0aae7SDavid Rees position = imports.list[0].source_location.start 257*99e0aae7SDavid Rees elif docs.list: 258*99e0aae7SDavid Rees position = docs.list[0].source_location.end 259*99e0aae7SDavid Rees elif attributes.list: 260*99e0aae7SDavid Rees position = attributes.list[0].source_location.start 261*99e0aae7SDavid Rees elif type_definitions.list: 262*99e0aae7SDavid Rees position = type_definitions.list[0].source_location.start 263*99e0aae7SDavid Rees else: 264*99e0aae7SDavid Rees position = 1, 1 265*99e0aae7SDavid Rees 266*99e0aae7SDavid Rees # If the source file is completely empty, build_ir won't automatically 267*99e0aae7SDavid Rees # populate the source_location attribute for the module. 268*99e0aae7SDavid Rees if (not docs.list and not imports.list and not attributes.list and 269*99e0aae7SDavid Rees not type_definitions.list): 270*99e0aae7SDavid Rees module_source_location = parser_types.make_location((1, 1), (1, 1)) 271*99e0aae7SDavid Rees else: 272*99e0aae7SDavid Rees module_source_location = None 273*99e0aae7SDavid Rees 274*99e0aae7SDavid Rees return ir_data.Module( 275*99e0aae7SDavid Rees documentation=docs.list, 276*99e0aae7SDavid Rees foreign_import=[_make_prelude_import(position)] + imports.list, 277*99e0aae7SDavid Rees attribute=attributes.list, 278*99e0aae7SDavid Rees type=type_definitions.list, 279*99e0aae7SDavid Rees source_location=module_source_location) 280*99e0aae7SDavid Rees 281*99e0aae7SDavid Rees 282*99e0aae7SDavid Rees@_handles('import-line ->' 283*99e0aae7SDavid Rees ' "import" string-constant "as" snake-word Comment? eol') 284*99e0aae7SDavid Reesdef _import(import_, file_name, as_, local_name, comment, eol): 285*99e0aae7SDavid Rees del import_, as_, comment, eol # Unused 286*99e0aae7SDavid Rees return ir_data.Import(file_name=file_name, local_name=local_name) 287*99e0aae7SDavid Rees 288*99e0aae7SDavid Rees 289*99e0aae7SDavid Rees@_handles('doc-line -> doc Comment? eol') 290*99e0aae7SDavid Reesdef _doc_line(doc, comment, eol): 291*99e0aae7SDavid Rees del comment, eol # Unused. 292*99e0aae7SDavid Rees return doc 293*99e0aae7SDavid Rees 294*99e0aae7SDavid Rees 295*99e0aae7SDavid Rees@_handles('doc -> Documentation') 296*99e0aae7SDavid Reesdef _doc(documentation): 297*99e0aae7SDavid Rees # As a special case, an empty documentation string may omit the trailing 298*99e0aae7SDavid Rees # space. 299*99e0aae7SDavid Rees if documentation.text == '--': 300*99e0aae7SDavid Rees doc_text = '-- ' 301*99e0aae7SDavid Rees else: 302*99e0aae7SDavid Rees doc_text = documentation.text 303*99e0aae7SDavid Rees assert doc_text[0:3] == '-- ', ( 304*99e0aae7SDavid Rees "Documentation token '{}' in unknown format.".format( 305*99e0aae7SDavid Rees documentation.text)) 306*99e0aae7SDavid Rees return ir_data.Documentation(text=doc_text[3:]) 307*99e0aae7SDavid Rees 308*99e0aae7SDavid Rees 309*99e0aae7SDavid Rees# A attribute-line is just a attribute on its own line. 310*99e0aae7SDavid Rees@_handles('attribute-line -> attribute Comment? eol') 311*99e0aae7SDavid Reesdef _attribute_line(attr, comment, eol): 312*99e0aae7SDavid Rees del comment, eol # Unused. 313*99e0aae7SDavid Rees return attr 314*99e0aae7SDavid Rees 315*99e0aae7SDavid Rees 316*99e0aae7SDavid Rees# A attribute is [name = value]. 317*99e0aae7SDavid Rees@_handles('attribute -> "[" attribute-context? "$default"?' 318*99e0aae7SDavid Rees ' snake-word ":" attribute-value "]"') 319*99e0aae7SDavid Reesdef _attribute(open_bracket, context_specifier, default_specifier, name, colon, 320*99e0aae7SDavid Rees attribute_value, close_bracket): 321*99e0aae7SDavid Rees del open_bracket, colon, close_bracket # Unused. 322*99e0aae7SDavid Rees if context_specifier.list: 323*99e0aae7SDavid Rees return ir_data.Attribute(name=name, 324*99e0aae7SDavid Rees value=attribute_value, 325*99e0aae7SDavid Rees is_default=bool(default_specifier.list), 326*99e0aae7SDavid Rees back_end=context_specifier.list[0]) 327*99e0aae7SDavid Rees else: 328*99e0aae7SDavid Rees return ir_data.Attribute(name=name, 329*99e0aae7SDavid Rees value=attribute_value, 330*99e0aae7SDavid Rees is_default=bool(default_specifier.list)) 331*99e0aae7SDavid Rees 332*99e0aae7SDavid Rees 333*99e0aae7SDavid Rees@_handles('attribute-context -> "(" snake-word ")"') 334*99e0aae7SDavid Reesdef _attribute_context(open_paren, context_name, close_paren): 335*99e0aae7SDavid Rees del open_paren, close_paren # Unused. 336*99e0aae7SDavid Rees return context_name 337*99e0aae7SDavid Rees 338*99e0aae7SDavid Rees 339*99e0aae7SDavid Rees@_handles('attribute-value -> expression') 340*99e0aae7SDavid Reesdef _attribute_value_expression(expression): 341*99e0aae7SDavid Rees return ir_data.AttributeValue(expression=expression) 342*99e0aae7SDavid Rees 343*99e0aae7SDavid Rees 344*99e0aae7SDavid Rees@_handles('attribute-value -> string-constant') 345*99e0aae7SDavid Reesdef _attribute_value_string(string): 346*99e0aae7SDavid Rees return ir_data.AttributeValue(string_constant=string) 347*99e0aae7SDavid Rees 348*99e0aae7SDavid Rees 349*99e0aae7SDavid Rees@_handles('boolean-constant -> BooleanConstant') 350*99e0aae7SDavid Reesdef _boolean_constant(boolean): 351*99e0aae7SDavid Rees return ir_data.BooleanConstant(value=(boolean.text == 'true')) 352*99e0aae7SDavid Rees 353*99e0aae7SDavid Rees 354*99e0aae7SDavid Rees@_handles('string-constant -> String') 355*99e0aae7SDavid Reesdef _string_constant(string): 356*99e0aae7SDavid Rees """Turns a String token into an ir_data.String, with proper unescaping. 357*99e0aae7SDavid Rees 358*99e0aae7SDavid Rees Arguments: 359*99e0aae7SDavid Rees string: A String token. 360*99e0aae7SDavid Rees 361*99e0aae7SDavid Rees Returns: 362*99e0aae7SDavid Rees An ir_data.String with the "text" field set to the unescaped value of 363*99e0aae7SDavid Rees string.text. 364*99e0aae7SDavid Rees """ 365*99e0aae7SDavid Rees # TODO(bolms): If/when this logic becomes more complex (e.g., to handle \NNN 366*99e0aae7SDavid Rees # or \xNN escapes), extract this into a separate module with separate tests. 367*99e0aae7SDavid Rees assert string.text[0] == '"' 368*99e0aae7SDavid Rees assert string.text[-1] == '"' 369*99e0aae7SDavid Rees assert len(string.text) >= 2 370*99e0aae7SDavid Rees result = [] 371*99e0aae7SDavid Rees for substring in re.split(r'(\\.)', string.text[1:-1]): 372*99e0aae7SDavid Rees if substring and substring[0] == '\\': 373*99e0aae7SDavid Rees assert len(substring) == 2 374*99e0aae7SDavid Rees result.append({'\\': '\\', '"': '"', 'n': '\n'}[substring[1]]) 375*99e0aae7SDavid Rees else: 376*99e0aae7SDavid Rees result.append(substring) 377*99e0aae7SDavid Rees return ir_data.String(text=''.join(result)) 378*99e0aae7SDavid Rees 379*99e0aae7SDavid Rees 380*99e0aae7SDavid Rees# In Emboss, '&&' and '||' may not be mixed without parentheses. These are all 381*99e0aae7SDavid Rees# fine: 382*99e0aae7SDavid Rees# 383*99e0aae7SDavid Rees# x && y && z 384*99e0aae7SDavid Rees# x || y || z 385*99e0aae7SDavid Rees# (x || y) && z 386*99e0aae7SDavid Rees# x || (y && z) 387*99e0aae7SDavid Rees# 388*99e0aae7SDavid Rees# These are syntax errors: 389*99e0aae7SDavid Rees# 390*99e0aae7SDavid Rees# x || y && z 391*99e0aae7SDavid Rees# x && y || z 392*99e0aae7SDavid Rees# 393*99e0aae7SDavid Rees# This is accomplished by making && and || separate-but-equal in the precedence 394*99e0aae7SDavid Rees# hierarchy. Instead of the more traditional: 395*99e0aae7SDavid Rees# 396*99e0aae7SDavid Rees# logical-expression -> or-expression 397*99e0aae7SDavid Rees# or-expression -> and-expression or-expression-right* 398*99e0aae7SDavid Rees# or-expression-right -> '||' and-expression 399*99e0aae7SDavid Rees# and-expression -> equality-expression and-expression-right* 400*99e0aae7SDavid Rees# and-expression-right -> '&&' equality-expression 401*99e0aae7SDavid Rees# 402*99e0aae7SDavid Rees# Or, using yacc-style precedence specifiers: 403*99e0aae7SDavid Rees# 404*99e0aae7SDavid Rees# %left "||" 405*99e0aae7SDavid Rees# %left "&&" 406*99e0aae7SDavid Rees# expression -> expression 407*99e0aae7SDavid Rees# | expression '||' expression 408*99e0aae7SDavid Rees# | expression '&&' expression 409*99e0aae7SDavid Rees# 410*99e0aae7SDavid Rees# Emboss uses a slightly more complex grammar, in which '&&' and '||' are 411*99e0aae7SDavid Rees# parallel, but unmixable: 412*99e0aae7SDavid Rees# 413*99e0aae7SDavid Rees# logical-expression -> and-expression 414*99e0aae7SDavid Rees# | or-expression 415*99e0aae7SDavid Rees# | equality-expression 416*99e0aae7SDavid Rees# or-expression -> equality-expression or-expression-right+ 417*99e0aae7SDavid Rees# or-expression-right -> '||' equality-expression 418*99e0aae7SDavid Rees# and-expression -> equality-expression and-expression-right+ 419*99e0aae7SDavid Rees# and-expression-right -> '&&' equality-expression 420*99e0aae7SDavid Rees# 421*99e0aae7SDavid Rees# In either case, explicit parenthesization is handled elsewhere in the grammar. 422*99e0aae7SDavid Rees@_handles('logical-expression -> and-expression') 423*99e0aae7SDavid Rees@_handles('logical-expression -> or-expression') 424*99e0aae7SDavid Rees@_handles('logical-expression -> comparison-expression') 425*99e0aae7SDavid Rees@_handles('choice-expression -> logical-expression') 426*99e0aae7SDavid Rees@_handles('expression -> choice-expression') 427*99e0aae7SDavid Reesdef _expression(expression): 428*99e0aae7SDavid Rees return expression 429*99e0aae7SDavid Rees 430*99e0aae7SDavid Rees 431*99e0aae7SDavid Rees# The `logical-expression`s here means that ?: can't be chained without 432*99e0aae7SDavid Rees# parentheses. `x < 0 ? -1 : (x == 0 ? 0 : 1)` is OK, but `x < 0 ? -1 : x == 0 433*99e0aae7SDavid Rees# ? 0 : 1` is not. Parentheses are also needed in the middle: `x <= 0 ? x < 0 ? 434*99e0aae7SDavid Rees# -1 : 0 : 1` is not syntactically valid. 435*99e0aae7SDavid Rees@_handles('choice-expression -> logical-expression "?" logical-expression' 436*99e0aae7SDavid Rees ' ":" logical-expression') 437*99e0aae7SDavid Reesdef _choice_expression(condition, question, if_true, colon, if_false): 438*99e0aae7SDavid Rees location = parser_types.make_location( 439*99e0aae7SDavid Rees condition.source_location.start, if_false.source_location.end) 440*99e0aae7SDavid Rees operator_location = parser_types.make_location( 441*99e0aae7SDavid Rees question.source_location.start, colon.source_location.end) 442*99e0aae7SDavid Rees # The function_name is a bit weird, but should suffice for any error messages 443*99e0aae7SDavid Rees # that might need it. 444*99e0aae7SDavid Rees return ir_data.Expression( 445*99e0aae7SDavid Rees function=ir_data.Function(function=ir_data.FunctionMapping.CHOICE, 446*99e0aae7SDavid Rees args=[condition, if_true, if_false], 447*99e0aae7SDavid Rees function_name=ir_data.Word( 448*99e0aae7SDavid Rees text='?:', 449*99e0aae7SDavid Rees source_location=operator_location), 450*99e0aae7SDavid Rees source_location=location)) 451*99e0aae7SDavid Rees 452*99e0aae7SDavid Rees 453*99e0aae7SDavid Rees@_handles('comparison-expression -> additive-expression') 454*99e0aae7SDavid Reesdef _no_op_comparative_expression(expression): 455*99e0aae7SDavid Rees return expression 456*99e0aae7SDavid Rees 457*99e0aae7SDavid Rees 458*99e0aae7SDavid Rees@_handles('comparison-expression ->' 459*99e0aae7SDavid Rees ' additive-expression inequality-operator additive-expression') 460*99e0aae7SDavid Reesdef _comparative_expression(left, operator, right): 461*99e0aae7SDavid Rees location = parser_types.make_location( 462*99e0aae7SDavid Rees left.source_location.start, right.source_location.end) 463*99e0aae7SDavid Rees return ir_data.Expression( 464*99e0aae7SDavid Rees function=ir_data.Function(function=_text_to_operator(operator.text), 465*99e0aae7SDavid Rees args=[left, right], 466*99e0aae7SDavid Rees function_name=operator, 467*99e0aae7SDavid Rees source_location=location)) 468*99e0aae7SDavid Rees 469*99e0aae7SDavid Rees 470*99e0aae7SDavid Rees@_handles('additive-expression -> times-expression additive-expression-right*') 471*99e0aae7SDavid Rees@_handles('times-expression -> negation-expression times-expression-right*') 472*99e0aae7SDavid Rees@_handles('and-expression -> comparison-expression and-expression-right+') 473*99e0aae7SDavid Rees@_handles('or-expression -> comparison-expression or-expression-right+') 474*99e0aae7SDavid Reesdef _binary_operator_expression(expression, expression_right): 475*99e0aae7SDavid Rees """Builds the IR for a chain of equal-precedence left-associative operations. 476*99e0aae7SDavid Rees 477*99e0aae7SDavid Rees _binary_operator_expression transforms a right-recursive list of expression 478*99e0aae7SDavid Rees tails into a left-associative Expression tree. For example, given the 479*99e0aae7SDavid Rees arguments: 480*99e0aae7SDavid Rees 481*99e0aae7SDavid Rees 6, (Tail("+", 7), Tail("-", 8), Tail("+", 10)) 482*99e0aae7SDavid Rees 483*99e0aae7SDavid Rees _expression produces a structure like: 484*99e0aae7SDavid Rees 485*99e0aae7SDavid Rees Expression(Expression(Expression(6, "+", 7), "-", 8), "+", 10) 486*99e0aae7SDavid Rees 487*99e0aae7SDavid Rees This transformation is necessary because strict LR(1) grammars do not allow 488*99e0aae7SDavid Rees left recursion. 489*99e0aae7SDavid Rees 490*99e0aae7SDavid Rees Note that this method is used for several productions; each of those 491*99e0aae7SDavid Rees productions handles a different precedence level, but are identical in form. 492*99e0aae7SDavid Rees 493*99e0aae7SDavid Rees Arguments: 494*99e0aae7SDavid Rees expression: An ir_data.Expression which is the head of the (expr, operator, 495*99e0aae7SDavid Rees expr, operator, expr, ...) list. 496*99e0aae7SDavid Rees expression_right: A list of _ExpressionTails corresponding to the (operator, 497*99e0aae7SDavid Rees expr, operator, expr, ...) list that comes after expression. 498*99e0aae7SDavid Rees 499*99e0aae7SDavid Rees Returns: 500*99e0aae7SDavid Rees An ir_data.Expression with the correct recursive structure to represent a 501*99e0aae7SDavid Rees list of left-associative operations. 502*99e0aae7SDavid Rees """ 503*99e0aae7SDavid Rees e = expression 504*99e0aae7SDavid Rees for right in expression_right.list: 505*99e0aae7SDavid Rees location = parser_types.make_location( 506*99e0aae7SDavid Rees e.source_location.start, right.source_location.end) 507*99e0aae7SDavid Rees e = ir_data.Expression( 508*99e0aae7SDavid Rees function=ir_data.Function( 509*99e0aae7SDavid Rees function=_text_to_operator(right.operator.text), 510*99e0aae7SDavid Rees args=[e, right.expression], 511*99e0aae7SDavid Rees function_name=right.operator, 512*99e0aae7SDavid Rees source_location=location), 513*99e0aae7SDavid Rees source_location=location) 514*99e0aae7SDavid Rees return e 515*99e0aae7SDavid Rees 516*99e0aae7SDavid Rees 517*99e0aae7SDavid Rees@_handles('comparison-expression ->' 518*99e0aae7SDavid Rees ' additive-expression equality-expression-right+') 519*99e0aae7SDavid Rees@_handles('comparison-expression ->' 520*99e0aae7SDavid Rees ' additive-expression less-expression-right-list') 521*99e0aae7SDavid Rees@_handles('comparison-expression ->' 522*99e0aae7SDavid Rees ' additive-expression greater-expression-right-list') 523*99e0aae7SDavid Reesdef _chained_comparison_expression(expression, expression_right): 524*99e0aae7SDavid Rees """Builds the IR for a chain of comparisons, like a == b == c. 525*99e0aae7SDavid Rees 526*99e0aae7SDavid Rees Like _binary_operator_expression, _chained_comparison_expression transforms a 527*99e0aae7SDavid Rees right-recursive list of expression tails into a left-associative Expression 528*99e0aae7SDavid Rees tree. Unlike _binary_operator_expression, extra AND nodes are added. For 529*99e0aae7SDavid Rees example, the following expression: 530*99e0aae7SDavid Rees 531*99e0aae7SDavid Rees 0 <= b <= 64 532*99e0aae7SDavid Rees 533*99e0aae7SDavid Rees must be translated to the conceptually-equivalent expression: 534*99e0aae7SDavid Rees 535*99e0aae7SDavid Rees 0 <= b && b <= 64 536*99e0aae7SDavid Rees 537*99e0aae7SDavid Rees (The middle subexpression is duplicated -- this would be a problem in a 538*99e0aae7SDavid Rees programming language like C where expressions like `x++` have side effects, 539*99e0aae7SDavid Rees but side effects do not make sense in a data definition language like Emboss.) 540*99e0aae7SDavid Rees 541*99e0aae7SDavid Rees _chained_comparison_expression receives a left-hand head expression and a list 542*99e0aae7SDavid Rees of tails, like: 543*99e0aae7SDavid Rees 544*99e0aae7SDavid Rees 6, (Tail("<=", b), Tail("<=", 64)) 545*99e0aae7SDavid Rees 546*99e0aae7SDavid Rees which it translates to a structure like: 547*99e0aae7SDavid Rees 548*99e0aae7SDavid Rees Expression(Expression(6, "<=", b), "&&", Expression(b, "<=", 64)) 549*99e0aae7SDavid Rees 550*99e0aae7SDavid Rees The Emboss grammar is constructed such that sequences of "<", "<=", and "==" 551*99e0aae7SDavid Rees comparisons may be chained, and sequences of ">", ">=", and "==" can be 552*99e0aae7SDavid Rees chained, but greater and less-than comparisons may not; e.g., "b < 64 > a" is 553*99e0aae7SDavid Rees not allowed. 554*99e0aae7SDavid Rees 555*99e0aae7SDavid Rees Arguments: 556*99e0aae7SDavid Rees expression: An ir_data.Expression which is the head of the (expr, operator, 557*99e0aae7SDavid Rees expr, operator, expr, ...) list. 558*99e0aae7SDavid Rees expression_right: A list of _ExpressionTails corresponding to the (operator, 559*99e0aae7SDavid Rees expr, operator, expr, ...) list that comes after expression. 560*99e0aae7SDavid Rees 561*99e0aae7SDavid Rees Returns: 562*99e0aae7SDavid Rees An ir_data.Expression with the correct recursive structure to represent a 563*99e0aae7SDavid Rees chain of left-associative comparison operations. 564*99e0aae7SDavid Rees """ 565*99e0aae7SDavid Rees sequence = [expression] 566*99e0aae7SDavid Rees for right in expression_right.list: 567*99e0aae7SDavid Rees sequence.append(right.operator) 568*99e0aae7SDavid Rees sequence.append(right.expression) 569*99e0aae7SDavid Rees comparisons = [] 570*99e0aae7SDavid Rees for i in range(0, len(sequence) - 1, 2): 571*99e0aae7SDavid Rees left, operator, right = sequence[i:i+3] 572*99e0aae7SDavid Rees location = parser_types.make_location( 573*99e0aae7SDavid Rees left.source_location.start, right.source_location.end) 574*99e0aae7SDavid Rees comparisons.append(ir_data.Expression( 575*99e0aae7SDavid Rees function=ir_data.Function( 576*99e0aae7SDavid Rees function=_text_to_operator(operator.text), 577*99e0aae7SDavid Rees args=[left, right], 578*99e0aae7SDavid Rees function_name=operator, 579*99e0aae7SDavid Rees source_location=location), 580*99e0aae7SDavid Rees source_location=location)) 581*99e0aae7SDavid Rees e = comparisons[0] 582*99e0aae7SDavid Rees for comparison in comparisons[1:]: 583*99e0aae7SDavid Rees location = parser_types.make_location( 584*99e0aae7SDavid Rees e.source_location.start, comparison.source_location.end) 585*99e0aae7SDavid Rees e = ir_data.Expression( 586*99e0aae7SDavid Rees function=ir_data.Function( 587*99e0aae7SDavid Rees function=ir_data.FunctionMapping.AND, 588*99e0aae7SDavid Rees args=[e, comparison], 589*99e0aae7SDavid Rees function_name=ir_data.Word( 590*99e0aae7SDavid Rees text='&&', 591*99e0aae7SDavid Rees source_location=comparison.function.args[0].source_location), 592*99e0aae7SDavid Rees source_location=location), 593*99e0aae7SDavid Rees source_location=location) 594*99e0aae7SDavid Rees return e 595*99e0aae7SDavid Rees 596*99e0aae7SDavid Rees 597*99e0aae7SDavid Rees# _chained_comparison_expression, above, handles three types of chains: `a == b 598*99e0aae7SDavid Rees# == c`, `a < b <= c`, and `a > b >= c`. 599*99e0aae7SDavid Rees# 600*99e0aae7SDavid Rees# This requires a bit of subtlety in the productions for 601*99e0aae7SDavid Rees# `x-expression-right-list`, because the `==` operator may be freely mixed into 602*99e0aae7SDavid Rees# greater-than or less-than chains, like `a < b == c <= d` or `a > b == c >= d`, 603*99e0aae7SDavid Rees# but greater-than and less-than may not be mixed; i.e., `a < b >= c` is 604*99e0aae7SDavid Rees# disallowed. 605*99e0aae7SDavid Rees# 606*99e0aae7SDavid Rees# In order to keep the grammar unambiguous -- that is, in order to ensure that 607*99e0aae7SDavid Rees# every valid input can only be parsed in exactly one way -- the languages 608*99e0aae7SDavid Rees# defined by `equality-expression-right*`, `greater-expression-right-list`, and 609*99e0aae7SDavid Rees# `less-expression-right-list` cannot overlap. 610*99e0aae7SDavid Rees# 611*99e0aae7SDavid Rees# `equality-expression-right*`, by definition, only contains `== n` elements. 612*99e0aae7SDavid Rees# By forcing `greater-expression-right-list` to contain at least one 613*99e0aae7SDavid Rees# `greater-expression-right`, we can ensure that a chain like `== n == m` cannot 614*99e0aae7SDavid Rees# be parsed as a `greater-expression-right-list`. Similar logic applies in the 615*99e0aae7SDavid Rees# less-than case. 616*99e0aae7SDavid Rees# 617*99e0aae7SDavid Rees# There is another potential source of ambiguity here: if 618*99e0aae7SDavid Rees# `greater-expression-right-list` were 619*99e0aae7SDavid Rees# 620*99e0aae7SDavid Rees# greater-expression-right-list -> 621*99e0aae7SDavid Rees# equality-or-greater-expression-right* greater-expression-right 622*99e0aae7SDavid Rees# equality-or-greater-expression-right* 623*99e0aae7SDavid Rees# 624*99e0aae7SDavid Rees# then a sequence like '> b > c > d' could be parsed as any of: 625*99e0aae7SDavid Rees# 626*99e0aae7SDavid Rees# () (> b) ((> c) (> d)) 627*99e0aae7SDavid Rees# ((> b)) (> c) ((> d)) 628*99e0aae7SDavid Rees# ((> b) (> c)) (> d) () 629*99e0aae7SDavid Rees# 630*99e0aae7SDavid Rees# By using `equality-expression-right*` for the first symbol, only the first 631*99e0aae7SDavid Rees# parse is possible. 632*99e0aae7SDavid Rees@_handles('greater-expression-right-list ->' 633*99e0aae7SDavid Rees ' equality-expression-right* greater-expression-right' 634*99e0aae7SDavid Rees ' equality-or-greater-expression-right*') 635*99e0aae7SDavid Rees@_handles('less-expression-right-list ->' 636*99e0aae7SDavid Rees ' equality-expression-right* less-expression-right' 637*99e0aae7SDavid Rees ' equality-or-less-expression-right*') 638*99e0aae7SDavid Reesdef _chained_comparison_tails(start, middle, end): 639*99e0aae7SDavid Rees return _List(start.list + [middle] + end.list) 640*99e0aae7SDavid Rees 641*99e0aae7SDavid Rees 642*99e0aae7SDavid Rees@_handles('equality-or-greater-expression-right -> equality-expression-right') 643*99e0aae7SDavid Rees@_handles('equality-or-greater-expression-right -> greater-expression-right') 644*99e0aae7SDavid Rees@_handles('equality-or-less-expression-right -> equality-expression-right') 645*99e0aae7SDavid Rees@_handles('equality-or-less-expression-right -> less-expression-right') 646*99e0aae7SDavid Reesdef _equality_or_less_or_greater(right): 647*99e0aae7SDavid Rees return right 648*99e0aae7SDavid Rees 649*99e0aae7SDavid Rees 650*99e0aae7SDavid Rees@_handles('and-expression-right -> and-operator comparison-expression') 651*99e0aae7SDavid Rees@_handles('or-expression-right -> or-operator comparison-expression') 652*99e0aae7SDavid Rees@_handles('additive-expression-right -> additive-operator times-expression') 653*99e0aae7SDavid Rees@_handles('equality-expression-right -> equality-operator additive-expression') 654*99e0aae7SDavid Rees@_handles('greater-expression-right -> greater-operator additive-expression') 655*99e0aae7SDavid Rees@_handles('less-expression-right -> less-operator additive-expression') 656*99e0aae7SDavid Rees@_handles('times-expression-right ->' 657*99e0aae7SDavid Rees ' multiplicative-operator negation-expression') 658*99e0aae7SDavid Reesdef _expression_right_production(operator, expression): 659*99e0aae7SDavid Rees return _ExpressionTail(operator, expression) 660*99e0aae7SDavid Rees 661*99e0aae7SDavid Rees 662*99e0aae7SDavid Rees# This supports a single layer of unary plus/minus, so "+5" and "-value" are 663*99e0aae7SDavid Rees# allowed, but "+-5" or "-+-something" are not. 664*99e0aae7SDavid Rees@_handles('negation-expression -> additive-operator bottom-expression') 665*99e0aae7SDavid Reesdef _negation_expression_with_operator(operator, expression): 666*99e0aae7SDavid Rees phantom_zero_location = ir_data.Location(start=operator.source_location.start, 667*99e0aae7SDavid Rees end=operator.source_location.start) 668*99e0aae7SDavid Rees return ir_data.Expression( 669*99e0aae7SDavid Rees function=ir_data.Function( 670*99e0aae7SDavid Rees function=_text_to_operator(operator.text), 671*99e0aae7SDavid Rees args=[ir_data.Expression( 672*99e0aae7SDavid Rees constant=ir_data.NumericConstant( 673*99e0aae7SDavid Rees value='0', 674*99e0aae7SDavid Rees source_location=phantom_zero_location), 675*99e0aae7SDavid Rees source_location=phantom_zero_location), expression], 676*99e0aae7SDavid Rees function_name=operator, 677*99e0aae7SDavid Rees source_location=ir_data.Location( 678*99e0aae7SDavid Rees start=operator.source_location.start, 679*99e0aae7SDavid Rees end=expression.source_location.end))) 680*99e0aae7SDavid Rees 681*99e0aae7SDavid Rees 682*99e0aae7SDavid Rees@_handles('negation-expression -> bottom-expression') 683*99e0aae7SDavid Reesdef _negation_expression(expression): 684*99e0aae7SDavid Rees return expression 685*99e0aae7SDavid Rees 686*99e0aae7SDavid Rees 687*99e0aae7SDavid Rees@_handles('bottom-expression -> "(" expression ")"') 688*99e0aae7SDavid Reesdef _bottom_expression_parentheses(open_paren, expression, close_paren): 689*99e0aae7SDavid Rees del open_paren, close_paren # Unused. 690*99e0aae7SDavid Rees return expression 691*99e0aae7SDavid Rees 692*99e0aae7SDavid Rees 693*99e0aae7SDavid Rees@_handles('bottom-expression -> function-name "(" argument-list ")"') 694*99e0aae7SDavid Reesdef _bottom_expression_function(function, open_paren, arguments, close_paren): 695*99e0aae7SDavid Rees del open_paren # Unused. 696*99e0aae7SDavid Rees return ir_data.Expression( 697*99e0aae7SDavid Rees function=ir_data.Function( 698*99e0aae7SDavid Rees function=_text_to_function(function.text), 699*99e0aae7SDavid Rees args=arguments.list, 700*99e0aae7SDavid Rees function_name=function, 701*99e0aae7SDavid Rees source_location=ir_data.Location( 702*99e0aae7SDavid Rees start=function.source_location.start, 703*99e0aae7SDavid Rees end=close_paren.source_location.end))) 704*99e0aae7SDavid Rees 705*99e0aae7SDavid Rees 706*99e0aae7SDavid Rees@_handles('comma-then-expression -> "," expression') 707*99e0aae7SDavid Reesdef _comma_then_expression(comma, expression): 708*99e0aae7SDavid Rees del comma # Unused. 709*99e0aae7SDavid Rees return expression 710*99e0aae7SDavid Rees 711*99e0aae7SDavid Rees 712*99e0aae7SDavid Rees@_handles('argument-list -> expression comma-then-expression*') 713*99e0aae7SDavid Reesdef _argument_list(head, tail): 714*99e0aae7SDavid Rees tail.list.insert(0, head) 715*99e0aae7SDavid Rees return tail 716*99e0aae7SDavid Rees 717*99e0aae7SDavid Rees 718*99e0aae7SDavid Rees@_handles('argument-list ->') 719*99e0aae7SDavid Reesdef _empty_argument_list(): 720*99e0aae7SDavid Rees return _List([]) 721*99e0aae7SDavid Rees 722*99e0aae7SDavid Rees 723*99e0aae7SDavid Rees@_handles('bottom-expression -> numeric-constant') 724*99e0aae7SDavid Reesdef _bottom_expression_from_numeric_constant(constant): 725*99e0aae7SDavid Rees return ir_data.Expression(constant=constant) 726*99e0aae7SDavid Rees 727*99e0aae7SDavid Rees 728*99e0aae7SDavid Rees@_handles('bottom-expression -> constant-reference') 729*99e0aae7SDavid Reesdef _bottom_expression_from_constant_reference(reference): 730*99e0aae7SDavid Rees return ir_data.Expression(constant_reference=reference) 731*99e0aae7SDavid Rees 732*99e0aae7SDavid Rees 733*99e0aae7SDavid Rees@_handles('bottom-expression -> builtin-reference') 734*99e0aae7SDavid Reesdef _bottom_expression_from_builtin(reference): 735*99e0aae7SDavid Rees return ir_data.Expression(builtin_reference=reference) 736*99e0aae7SDavid Rees 737*99e0aae7SDavid Rees 738*99e0aae7SDavid Rees@_handles('bottom-expression -> boolean-constant') 739*99e0aae7SDavid Reesdef _bottom_expression_from_boolean_constant(boolean): 740*99e0aae7SDavid Rees return ir_data.Expression(boolean_constant=boolean) 741*99e0aae7SDavid Rees 742*99e0aae7SDavid Rees 743*99e0aae7SDavid Rees@_handles('bottom-expression -> field-reference') 744*99e0aae7SDavid Reesdef _bottom_expression_from_reference(reference): 745*99e0aae7SDavid Rees return reference 746*99e0aae7SDavid Rees 747*99e0aae7SDavid Rees 748*99e0aae7SDavid Rees@_handles('field-reference -> snake-reference field-reference-tail*') 749*99e0aae7SDavid Reesdef _indirect_field_reference(field_reference, field_references): 750*99e0aae7SDavid Rees if field_references.source_location.HasField('end'): 751*99e0aae7SDavid Rees end_location = field_references.source_location.end 752*99e0aae7SDavid Rees else: 753*99e0aae7SDavid Rees end_location = field_reference.source_location.end 754*99e0aae7SDavid Rees return ir_data.Expression(field_reference=ir_data.FieldReference( 755*99e0aae7SDavid Rees path=[field_reference] + field_references.list, 756*99e0aae7SDavid Rees source_location=parser_types.make_location( 757*99e0aae7SDavid Rees field_reference.source_location.start, end_location))) 758*99e0aae7SDavid Rees 759*99e0aae7SDavid Rees 760*99e0aae7SDavid Rees# If "Type.field" ever becomes syntactically valid, it will be necessary to 761*99e0aae7SDavid Rees# check that enum values are compile-time constants. 762*99e0aae7SDavid Rees@_handles('field-reference-tail -> "." snake-reference') 763*99e0aae7SDavid Reesdef _field_reference_tail(dot, reference): 764*99e0aae7SDavid Rees del dot # Unused. 765*99e0aae7SDavid Rees return reference 766*99e0aae7SDavid Rees 767*99e0aae7SDavid Rees 768*99e0aae7SDavid Rees@_handles('numeric-constant -> Number') 769*99e0aae7SDavid Reesdef _numeric_constant(number): 770*99e0aae7SDavid Rees # All types of numeric constant tokenize to the same symbol, because they are 771*99e0aae7SDavid Rees # interchangeable in source code. 772*99e0aae7SDavid Rees if number.text[0:2] == '0b': 773*99e0aae7SDavid Rees n = int(number.text.replace('_', '')[2:], 2) 774*99e0aae7SDavid Rees elif number.text[0:2] == '0x': 775*99e0aae7SDavid Rees n = int(number.text.replace('_', '')[2:], 16) 776*99e0aae7SDavid Rees else: 777*99e0aae7SDavid Rees n = int(number.text.replace('_', ''), 10) 778*99e0aae7SDavid Rees return ir_data.NumericConstant(value=str(n)) 779*99e0aae7SDavid Rees 780*99e0aae7SDavid Rees 781*99e0aae7SDavid Rees@_handles('type-definition -> struct') 782*99e0aae7SDavid Rees@_handles('type-definition -> bits') 783*99e0aae7SDavid Rees@_handles('type-definition -> enum') 784*99e0aae7SDavid Rees@_handles('type-definition -> external') 785*99e0aae7SDavid Reesdef _type_definition(type_definition): 786*99e0aae7SDavid Rees return type_definition 787*99e0aae7SDavid Rees 788*99e0aae7SDavid Rees 789*99e0aae7SDavid Rees# struct StructureName: 790*99e0aae7SDavid Rees# ... fields ... 791*99e0aae7SDavid Rees# bits BitName: 792*99e0aae7SDavid Rees# ... fields ... 793*99e0aae7SDavid Rees@_handles('struct -> "struct" type-name delimited-parameter-definition-list?' 794*99e0aae7SDavid Rees ' ":" Comment? eol struct-body') 795*99e0aae7SDavid Rees@_handles('bits -> "bits" type-name delimited-parameter-definition-list? ":"' 796*99e0aae7SDavid Rees ' Comment? eol bits-body') 797*99e0aae7SDavid Reesdef _structure(struct, name, parameters, colon, comment, newline, struct_body): 798*99e0aae7SDavid Rees """Composes the top-level IR for an Emboss structure.""" 799*99e0aae7SDavid Rees del colon, comment, newline # Unused. 800*99e0aae7SDavid Rees ir_data_utils.builder(struct_body.structure).source_location.start.CopyFrom( 801*99e0aae7SDavid Rees struct.source_location.start) 802*99e0aae7SDavid Rees ir_data_utils.builder(struct_body.structure).source_location.end.CopyFrom( 803*99e0aae7SDavid Rees struct_body.source_location.end) 804*99e0aae7SDavid Rees if struct_body.name: 805*99e0aae7SDavid Rees ir_data_utils.update(struct_body.name, name) 806*99e0aae7SDavid Rees else: 807*99e0aae7SDavid Rees struct_body.name = ir_data_utils.copy(name) 808*99e0aae7SDavid Rees if parameters.list: 809*99e0aae7SDavid Rees struct_body.runtime_parameter.extend(parameters.list[0].list) 810*99e0aae7SDavid Rees return struct_body 811*99e0aae7SDavid Rees 812*99e0aae7SDavid Rees 813*99e0aae7SDavid Rees@_handles('delimited-parameter-definition-list ->' 814*99e0aae7SDavid Rees ' "(" parameter-definition-list ")"') 815*99e0aae7SDavid Reesdef _delimited_parameter_definition_list(open_paren, parameters, close_paren): 816*99e0aae7SDavid Rees del open_paren, close_paren # Unused 817*99e0aae7SDavid Rees return parameters 818*99e0aae7SDavid Rees 819*99e0aae7SDavid Rees 820*99e0aae7SDavid Rees@_handles('parameter-definition -> snake-name ":" type') 821*99e0aae7SDavid Reesdef _parameter_definition(name, double_colon, parameter_type): 822*99e0aae7SDavid Rees del double_colon # Unused 823*99e0aae7SDavid Rees return ir_data.RuntimeParameter(name=name, physical_type_alias=parameter_type) 824*99e0aae7SDavid Rees 825*99e0aae7SDavid Rees 826*99e0aae7SDavid Rees@_handles('parameter-definition-list-tail -> "," parameter-definition') 827*99e0aae7SDavid Reesdef _parameter_definition_list_tail(comma, parameter): 828*99e0aae7SDavid Rees del comma # Unused. 829*99e0aae7SDavid Rees return parameter 830*99e0aae7SDavid Rees 831*99e0aae7SDavid Rees 832*99e0aae7SDavid Rees@_handles('parameter-definition-list -> parameter-definition' 833*99e0aae7SDavid Rees ' parameter-definition-list-tail*') 834*99e0aae7SDavid Reesdef _parameter_definition_list(head, tail): 835*99e0aae7SDavid Rees tail.list.insert(0, head) 836*99e0aae7SDavid Rees return tail 837*99e0aae7SDavid Rees 838*99e0aae7SDavid Rees 839*99e0aae7SDavid Rees@_handles('parameter-definition-list ->') 840*99e0aae7SDavid Reesdef _empty_parameter_definition_list(): 841*99e0aae7SDavid Rees return _List([]) 842*99e0aae7SDavid Rees 843*99e0aae7SDavid Rees 844*99e0aae7SDavid Rees# The body of a struct: basically, the part after the first line. 845*99e0aae7SDavid Rees@_handles('struct-body -> Indent doc-line* attribute-line*' 846*99e0aae7SDavid Rees ' type-definition* struct-field-block Dedent') 847*99e0aae7SDavid Reesdef _struct_body(indent, docs, attributes, types, fields, dedent): 848*99e0aae7SDavid Rees del indent, dedent # Unused. 849*99e0aae7SDavid Rees return _structure_body(docs, attributes, types, fields, 850*99e0aae7SDavid Rees ir_data.AddressableUnit.BYTE) 851*99e0aae7SDavid Rees 852*99e0aae7SDavid Rees 853*99e0aae7SDavid Reesdef _structure_body(docs, attributes, types, fields, addressable_unit): 854*99e0aae7SDavid Rees """Constructs the body of a structure (bits or struct) definition.""" 855*99e0aae7SDavid Rees return ir_data.TypeDefinition( 856*99e0aae7SDavid Rees structure=ir_data.Structure(field=[field.field for field in fields.list]), 857*99e0aae7SDavid Rees documentation=docs.list, 858*99e0aae7SDavid Rees attribute=attributes.list, 859*99e0aae7SDavid Rees subtype=types.list + [subtype for field in fields.list for subtype in 860*99e0aae7SDavid Rees field.subtypes], 861*99e0aae7SDavid Rees addressable_unit=addressable_unit) 862*99e0aae7SDavid Rees 863*99e0aae7SDavid Rees 864*99e0aae7SDavid Rees@_handles('struct-field-block ->') 865*99e0aae7SDavid Rees@_handles('bits-field-block ->') 866*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block ->') 867*99e0aae7SDavid Reesdef _empty_field_block(): 868*99e0aae7SDavid Rees return _List([]) 869*99e0aae7SDavid Rees 870*99e0aae7SDavid Rees 871*99e0aae7SDavid Rees@_handles('struct-field-block ->' 872*99e0aae7SDavid Rees ' conditional-struct-field-block struct-field-block') 873*99e0aae7SDavid Rees@_handles('bits-field-block ->' 874*99e0aae7SDavid Rees ' conditional-bits-field-block bits-field-block') 875*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block -> conditional-anonymous-bits-field-block' 876*99e0aae7SDavid Rees ' anonymous-bits-field-block') 877*99e0aae7SDavid Reesdef _conditional_block_plus_field_block(conditional_block, block): 878*99e0aae7SDavid Rees return _List(conditional_block.list + block.list) 879*99e0aae7SDavid Rees 880*99e0aae7SDavid Rees 881*99e0aae7SDavid Rees@_handles('struct-field-block ->' 882*99e0aae7SDavid Rees ' unconditional-struct-field struct-field-block') 883*99e0aae7SDavid Rees@_handles('bits-field-block ->' 884*99e0aae7SDavid Rees ' unconditional-bits-field bits-field-block') 885*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block ->' 886*99e0aae7SDavid Rees ' unconditional-anonymous-bits-field anonymous-bits-field-block') 887*99e0aae7SDavid Reesdef _unconditional_block_plus_field_block(field, block): 888*99e0aae7SDavid Rees """Prepends an unconditional field to block.""" 889*99e0aae7SDavid Rees ir_data_utils.builder(field.field).existence_condition.source_location.CopyFrom( 890*99e0aae7SDavid Rees field.source_location) 891*99e0aae7SDavid Rees ir_data_utils.builder(field.field).existence_condition.boolean_constant.source_location.CopyFrom( 892*99e0aae7SDavid Rees field.source_location) 893*99e0aae7SDavid Rees ir_data_utils.builder(field.field).existence_condition.boolean_constant.value = True 894*99e0aae7SDavid Rees return _List([field] + block.list) 895*99e0aae7SDavid Rees 896*99e0aae7SDavid Rees 897*99e0aae7SDavid Rees# Struct "fields" are regular fields, inline enums, bits, or structs, anonymous 898*99e0aae7SDavid Rees# inline bits, or virtual fields. 899*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> field') 900*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-enum-field-definition') 901*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-bits-field-definition') 902*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-struct-field-definition') 903*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> anonymous-bits-field-definition') 904*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> virtual-field') 905*99e0aae7SDavid Rees# Bits fields are "regular" fields, inline enums or bits, or virtual fields. 906*99e0aae7SDavid Rees# 907*99e0aae7SDavid Rees# Inline structs and anonymous inline bits are not allowed inside of bits: 908*99e0aae7SDavid Rees# anonymous inline bits are pointless, and inline structs do not make sense, 909*99e0aae7SDavid Rees# since a struct cannot be a part of a bits. 910*99e0aae7SDavid Rees# 911*99e0aae7SDavid Rees# Anonymous inline bits may not include virtual fields; instead, the virtual 912*99e0aae7SDavid Rees# field should be a direct part of the enclosing structure. 913*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> field') 914*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> inline-enum-field-definition') 915*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> inline-bits-field-definition') 916*99e0aae7SDavid Rees@_handles('unconditional-bits-field -> unconditional-anonymous-bits-field') 917*99e0aae7SDavid Rees@_handles('unconditional-bits-field -> virtual-field') 918*99e0aae7SDavid Reesdef _unconditional_field(field): 919*99e0aae7SDavid Rees """Handles the unifying grammar production for a struct or bits field.""" 920*99e0aae7SDavid Rees return field 921*99e0aae7SDavid Rees 922*99e0aae7SDavid Rees 923*99e0aae7SDavid Rees# TODO(bolms): Add 'elif' and 'else' support. 924*99e0aae7SDavid Rees# TODO(bolms): Should nested 'if' blocks be allowed? 925*99e0aae7SDavid Rees@_handles('conditional-struct-field-block ->' 926*99e0aae7SDavid Rees ' "if" expression ":" Comment? eol' 927*99e0aae7SDavid Rees ' Indent unconditional-struct-field+ Dedent') 928*99e0aae7SDavid Rees@_handles('conditional-bits-field-block ->' 929*99e0aae7SDavid Rees ' "if" expression ":" Comment? eol' 930*99e0aae7SDavid Rees ' Indent unconditional-bits-field+ Dedent') 931*99e0aae7SDavid Rees@_handles('conditional-anonymous-bits-field-block ->' 932*99e0aae7SDavid Rees ' "if" expression ":" Comment? eol' 933*99e0aae7SDavid Rees ' Indent unconditional-anonymous-bits-field+ Dedent') 934*99e0aae7SDavid Reesdef _conditional_field_block(if_keyword, expression, colon, comment, newline, 935*99e0aae7SDavid Rees indent, fields, dedent): 936*99e0aae7SDavid Rees """Applies an existence_condition to each element of fields.""" 937*99e0aae7SDavid Rees del if_keyword, newline, colon, comment, indent, dedent # Unused. 938*99e0aae7SDavid Rees for field in fields.list: 939*99e0aae7SDavid Rees condition = ir_data_utils.builder(field.field).existence_condition 940*99e0aae7SDavid Rees condition.CopyFrom(expression) 941*99e0aae7SDavid Rees condition.source_location.is_disjoint_from_parent = True 942*99e0aae7SDavid Rees return fields 943*99e0aae7SDavid Rees 944*99e0aae7SDavid Rees 945*99e0aae7SDavid Rees# The body of a bit field definition: basically, the part after the first line. 946*99e0aae7SDavid Rees@_handles('bits-body -> Indent doc-line* attribute-line*' 947*99e0aae7SDavid Rees ' type-definition* bits-field-block Dedent') 948*99e0aae7SDavid Reesdef _bits_body(indent, docs, attributes, types, fields, dedent): 949*99e0aae7SDavid Rees del indent, dedent # Unused. 950*99e0aae7SDavid Rees return _structure_body(docs, attributes, types, fields, 951*99e0aae7SDavid Rees ir_data.AddressableUnit.BIT) 952*99e0aae7SDavid Rees 953*99e0aae7SDavid Rees 954*99e0aae7SDavid Rees# Inline bits (defined as part of a field) are more restricted than standalone 955*99e0aae7SDavid Rees# bits. 956*99e0aae7SDavid Rees@_handles('anonymous-bits-body ->' 957*99e0aae7SDavid Rees ' Indent attribute-line* anonymous-bits-field-block Dedent') 958*99e0aae7SDavid Reesdef _anonymous_bits_body(indent, attributes, fields, dedent): 959*99e0aae7SDavid Rees del indent, dedent # Unused. 960*99e0aae7SDavid Rees return _structure_body(_List([]), attributes, _List([]), fields, 961*99e0aae7SDavid Rees ir_data.AddressableUnit.BIT) 962*99e0aae7SDavid Rees 963*99e0aae7SDavid Rees 964*99e0aae7SDavid Rees# A field is: 965*99e0aae7SDavid Rees# range type name (abbr) [attr: value] [attr2: value] -- doc 966*99e0aae7SDavid Rees# -- doc 967*99e0aae7SDavid Rees# -- doc 968*99e0aae7SDavid Rees# [attr3: value] 969*99e0aae7SDavid Rees# [attr4: value] 970*99e0aae7SDavid Rees@_handles('field ->' 971*99e0aae7SDavid Rees ' field-location type snake-name abbreviation? attribute* doc?' 972*99e0aae7SDavid Rees ' Comment? eol field-body?') 973*99e0aae7SDavid Reesdef _field(location, field_type, name, abbreviation, attributes, doc, comment, 974*99e0aae7SDavid Rees newline, field_body): 975*99e0aae7SDavid Rees """Constructs an ir_data.Field from the given components.""" 976*99e0aae7SDavid Rees del comment # Unused 977*99e0aae7SDavid Rees field_ir = ir_data.Field(location=location, 978*99e0aae7SDavid Rees type=field_type, 979*99e0aae7SDavid Rees name=name, 980*99e0aae7SDavid Rees attribute=attributes.list, 981*99e0aae7SDavid Rees documentation=doc.list) 982*99e0aae7SDavid Rees field = ir_data_utils.builder(field_ir) 983*99e0aae7SDavid Rees if field_body.list: 984*99e0aae7SDavid Rees field.attribute.extend(field_body.list[0].attribute) 985*99e0aae7SDavid Rees field.documentation.extend(field_body.list[0].documentation) 986*99e0aae7SDavid Rees if abbreviation.list: 987*99e0aae7SDavid Rees field.abbreviation.CopyFrom(abbreviation.list[0]) 988*99e0aae7SDavid Rees field.source_location.start.CopyFrom(location.source_location.start) 989*99e0aae7SDavid Rees if field_body.source_location.HasField('end'): 990*99e0aae7SDavid Rees field.source_location.end.CopyFrom(field_body.source_location.end) 991*99e0aae7SDavid Rees else: 992*99e0aae7SDavid Rees field.source_location.end.CopyFrom(newline.source_location.end) 993*99e0aae7SDavid Rees return _FieldWithType(field=field_ir) 994*99e0aae7SDavid Rees 995*99e0aae7SDavid Rees 996*99e0aae7SDavid Rees# A "virtual field" is: 997*99e0aae7SDavid Rees# let name = value 998*99e0aae7SDavid Rees# -- doc 999*99e0aae7SDavid Rees# -- doc 1000*99e0aae7SDavid Rees# [attr1: value] 1001*99e0aae7SDavid Rees# [attr2: value] 1002*99e0aae7SDavid Rees@_handles('virtual-field ->' 1003*99e0aae7SDavid Rees ' "let" snake-name "=" expression Comment? eol field-body?') 1004*99e0aae7SDavid Reesdef _virtual_field(let, name, equals, value, comment, newline, field_body): 1005*99e0aae7SDavid Rees """Constructs an ir_data.Field from the given components.""" 1006*99e0aae7SDavid Rees del equals, comment # Unused 1007*99e0aae7SDavid Rees field_ir = ir_data.Field(read_transform=value, name=name) 1008*99e0aae7SDavid Rees field = ir_data_utils.builder(field_ir) 1009*99e0aae7SDavid Rees if field_body.list: 1010*99e0aae7SDavid Rees field.attribute.extend(field_body.list[0].attribute) 1011*99e0aae7SDavid Rees field.documentation.extend(field_body.list[0].documentation) 1012*99e0aae7SDavid Rees field.source_location.start.CopyFrom(let.source_location.start) 1013*99e0aae7SDavid Rees if field_body.source_location.HasField('end'): 1014*99e0aae7SDavid Rees field.source_location.end.CopyFrom(field_body.source_location.end) 1015*99e0aae7SDavid Rees else: 1016*99e0aae7SDavid Rees field.source_location.end.CopyFrom(newline.source_location.end) 1017*99e0aae7SDavid Rees return _FieldWithType(field=field_ir) 1018*99e0aae7SDavid Rees 1019*99e0aae7SDavid Rees 1020*99e0aae7SDavid Rees# An inline enum is: 1021*99e0aae7SDavid Rees# range "enum" name (abbr): 1022*99e0aae7SDavid Rees# -- doc 1023*99e0aae7SDavid Rees# -- doc 1024*99e0aae7SDavid Rees# [attr3: value] 1025*99e0aae7SDavid Rees# [attr4: value] 1026*99e0aae7SDavid Rees# NAME = 10 1027*99e0aae7SDavid Rees# NAME2 = 20 1028*99e0aae7SDavid Rees@_handles('inline-enum-field-definition ->' 1029*99e0aae7SDavid Rees ' field-location "enum" snake-name abbreviation? ":" Comment? eol' 1030*99e0aae7SDavid Rees ' enum-body') 1031*99e0aae7SDavid Reesdef _inline_enum_field(location, enum, name, abbreviation, colon, comment, 1032*99e0aae7SDavid Rees newline, enum_body): 1033*99e0aae7SDavid Rees """Constructs an ir_data.Field for an inline enum field.""" 1034*99e0aae7SDavid Rees del enum, colon, comment, newline # Unused. 1035*99e0aae7SDavid Rees return _inline_type_field(location, name, abbreviation, enum_body) 1036*99e0aae7SDavid Rees 1037*99e0aae7SDavid Rees 1038*99e0aae7SDavid Rees@_handles( 1039*99e0aae7SDavid Rees 'inline-struct-field-definition ->' 1040*99e0aae7SDavid Rees ' field-location "struct" snake-name abbreviation? ":" Comment? eol' 1041*99e0aae7SDavid Rees ' struct-body') 1042*99e0aae7SDavid Reesdef _inline_struct_field(location, struct, name, abbreviation, colon, comment, 1043*99e0aae7SDavid Rees newline, struct_body): 1044*99e0aae7SDavid Rees del struct, colon, comment, newline # Unused. 1045*99e0aae7SDavid Rees return _inline_type_field(location, name, abbreviation, struct_body) 1046*99e0aae7SDavid Rees 1047*99e0aae7SDavid Rees 1048*99e0aae7SDavid Rees@_handles('inline-bits-field-definition ->' 1049*99e0aae7SDavid Rees ' field-location "bits" snake-name abbreviation? ":" Comment? eol' 1050*99e0aae7SDavid Rees ' bits-body') 1051*99e0aae7SDavid Reesdef _inline_bits_field(location, bits, name, abbreviation, colon, comment, 1052*99e0aae7SDavid Rees newline, bits_body): 1053*99e0aae7SDavid Rees del bits, colon, comment, newline # Unused. 1054*99e0aae7SDavid Rees return _inline_type_field(location, name, abbreviation, bits_body) 1055*99e0aae7SDavid Rees 1056*99e0aae7SDavid Rees 1057*99e0aae7SDavid Reesdef _inline_type_field(location, name, abbreviation, body): 1058*99e0aae7SDavid Rees """Shared implementation of _inline_enum_field and _anonymous_bit_field.""" 1059*99e0aae7SDavid Rees field_ir = ir_data.Field(location=location, 1060*99e0aae7SDavid Rees name=name, 1061*99e0aae7SDavid Rees attribute=body.attribute, 1062*99e0aae7SDavid Rees documentation=body.documentation) 1063*99e0aae7SDavid Rees field = ir_data_utils.builder(field_ir) 1064*99e0aae7SDavid Rees # All attributes should be attached to the field, not the type definition: if 1065*99e0aae7SDavid Rees # the user wants to use type attributes, they should create a separate type 1066*99e0aae7SDavid Rees # definition and reference it. 1067*99e0aae7SDavid Rees del body.attribute[:] 1068*99e0aae7SDavid Rees type_name = ir_data_utils.copy(name) 1069*99e0aae7SDavid Rees ir_data_utils.builder(type_name).name.text = name_conversion.snake_to_camel(type_name.name.text) 1070*99e0aae7SDavid Rees field.type.atomic_type.reference.source_name.extend([type_name.name]) 1071*99e0aae7SDavid Rees field.type.atomic_type.reference.source_location.CopyFrom( 1072*99e0aae7SDavid Rees type_name.source_location) 1073*99e0aae7SDavid Rees field.type.atomic_type.reference.is_local_name = True 1074*99e0aae7SDavid Rees field.type.atomic_type.source_location.CopyFrom(type_name.source_location) 1075*99e0aae7SDavid Rees field.type.source_location.CopyFrom(type_name.source_location) 1076*99e0aae7SDavid Rees if abbreviation.list: 1077*99e0aae7SDavid Rees field.abbreviation.CopyFrom(abbreviation.list[0]) 1078*99e0aae7SDavid Rees field.source_location.start.CopyFrom(location.source_location.start) 1079*99e0aae7SDavid Rees ir_data_utils.builder(body.source_location).start.CopyFrom(location.source_location.start) 1080*99e0aae7SDavid Rees if body.HasField('enumeration'): 1081*99e0aae7SDavid Rees ir_data_utils.builder(body.enumeration).source_location.CopyFrom(body.source_location) 1082*99e0aae7SDavid Rees else: 1083*99e0aae7SDavid Rees assert body.HasField('structure') 1084*99e0aae7SDavid Rees ir_data_utils.builder(body.structure).source_location.CopyFrom(body.source_location) 1085*99e0aae7SDavid Rees ir_data_utils.builder(body).name.CopyFrom(type_name) 1086*99e0aae7SDavid Rees field.source_location.end.CopyFrom(body.source_location.end) 1087*99e0aae7SDavid Rees subtypes = [body] + list(body.subtype) 1088*99e0aae7SDavid Rees del body.subtype[:] 1089*99e0aae7SDavid Rees return _FieldWithType(field=field_ir, subtypes=subtypes) 1090*99e0aae7SDavid Rees 1091*99e0aae7SDavid Rees 1092*99e0aae7SDavid Rees@_handles('anonymous-bits-field-definition ->' 1093*99e0aae7SDavid Rees ' field-location "bits" ":" Comment? eol anonymous-bits-body') 1094*99e0aae7SDavid Reesdef _anonymous_bit_field(location, bits_keyword, colon, comment, newline, 1095*99e0aae7SDavid Rees bits_body): 1096*99e0aae7SDavid Rees """Constructs an ir_data.Field for an anonymous bit field.""" 1097*99e0aae7SDavid Rees del colon, comment, newline # Unused. 1098*99e0aae7SDavid Rees name = ir_data.NameDefinition( 1099*99e0aae7SDavid Rees name=ir_data.Word( 1100*99e0aae7SDavid Rees text=_get_anonymous_field_name(), 1101*99e0aae7SDavid Rees source_location=bits_keyword.source_location), 1102*99e0aae7SDavid Rees source_location=bits_keyword.source_location, 1103*99e0aae7SDavid Rees is_anonymous=True) 1104*99e0aae7SDavid Rees return _inline_type_field(location, name, _List([]), bits_body) 1105*99e0aae7SDavid Rees 1106*99e0aae7SDavid Rees 1107*99e0aae7SDavid Rees@_handles('field-body -> Indent doc-line* attribute-line* Dedent') 1108*99e0aae7SDavid Reesdef _field_body(indent, docs, attributes, dedent): 1109*99e0aae7SDavid Rees del indent, dedent # Unused. 1110*99e0aae7SDavid Rees return ir_data.Field(documentation=docs.list, attribute=attributes.list) 1111*99e0aae7SDavid Rees 1112*99e0aae7SDavid Rees 1113*99e0aae7SDavid Rees# A parenthetically-denoted abbreviation. 1114*99e0aae7SDavid Rees@_handles('abbreviation -> "(" snake-word ")"') 1115*99e0aae7SDavid Reesdef _abbreviation(open_paren, word, close_paren): 1116*99e0aae7SDavid Rees del open_paren, close_paren # Unused. 1117*99e0aae7SDavid Rees return word 1118*99e0aae7SDavid Rees 1119*99e0aae7SDavid Rees 1120*99e0aae7SDavid Rees# enum EnumName: 1121*99e0aae7SDavid Rees# ... values ... 1122*99e0aae7SDavid Rees@_handles('enum -> "enum" type-name ":" Comment? eol enum-body') 1123*99e0aae7SDavid Reesdef _enum(enum, name, colon, comment, newline, enum_body): 1124*99e0aae7SDavid Rees del colon, comment, newline # Unused. 1125*99e0aae7SDavid Rees ir_data_utils.builder(enum_body.enumeration).source_location.start.CopyFrom( 1126*99e0aae7SDavid Rees enum.source_location.start) 1127*99e0aae7SDavid Rees ir_data_utils.builder(enum_body.enumeration).source_location.end.CopyFrom( 1128*99e0aae7SDavid Rees enum_body.source_location.end) 1129*99e0aae7SDavid Rees ir_data_utils.builder(enum_body).name.CopyFrom(name) 1130*99e0aae7SDavid Rees return enum_body 1131*99e0aae7SDavid Rees 1132*99e0aae7SDavid Rees 1133*99e0aae7SDavid Rees# [enum Foo:] 1134*99e0aae7SDavid Rees# name = value 1135*99e0aae7SDavid Rees# name = value 1136*99e0aae7SDavid Rees@_handles('enum-body -> Indent doc-line* attribute-line* enum-value+ Dedent') 1137*99e0aae7SDavid Reesdef _enum_body(indent, docs, attributes, values, dedent): 1138*99e0aae7SDavid Rees del indent, dedent # Unused. 1139*99e0aae7SDavid Rees return ir_data.TypeDefinition( 1140*99e0aae7SDavid Rees enumeration=ir_data.Enum(value=values.list), 1141*99e0aae7SDavid Rees documentation=docs.list, 1142*99e0aae7SDavid Rees attribute=attributes.list, 1143*99e0aae7SDavid Rees addressable_unit=ir_data.AddressableUnit.BIT) 1144*99e0aae7SDavid Rees 1145*99e0aae7SDavid Rees 1146*99e0aae7SDavid Rees# name = value 1147*99e0aae7SDavid Rees@_handles('enum-value -> ' 1148*99e0aae7SDavid Rees ' constant-name "=" expression attribute* doc? Comment? eol enum-value-body?') 1149*99e0aae7SDavid Reesdef _enum_value(name, equals, expression, attribute, documentation, comment, newline, 1150*99e0aae7SDavid Rees body): 1151*99e0aae7SDavid Rees del equals, comment, newline # Unused. 1152*99e0aae7SDavid Rees result = ir_data.EnumValue(name=name, 1153*99e0aae7SDavid Rees value=expression, 1154*99e0aae7SDavid Rees documentation=documentation.list, 1155*99e0aae7SDavid Rees attribute=attribute.list) 1156*99e0aae7SDavid Rees if body.list: 1157*99e0aae7SDavid Rees result.documentation.extend(body.list[0].documentation) 1158*99e0aae7SDavid Rees result.attribute.extend(body.list[0].attribute) 1159*99e0aae7SDavid Rees return result 1160*99e0aae7SDavid Rees 1161*99e0aae7SDavid Rees 1162*99e0aae7SDavid Rees@_handles('enum-value-body -> Indent doc-line* attribute-line* Dedent') 1163*99e0aae7SDavid Reesdef _enum_value_body(indent, docs, attributes, dedent): 1164*99e0aae7SDavid Rees del indent, dedent # Unused. 1165*99e0aae7SDavid Rees return ir_data.EnumValue(documentation=docs.list, attribute=attributes.list) 1166*99e0aae7SDavid Rees 1167*99e0aae7SDavid Rees 1168*99e0aae7SDavid Rees# An external is just a declaration that a type exists and has certain 1169*99e0aae7SDavid Rees# attributes. 1170*99e0aae7SDavid Rees@_handles('external -> "external" type-name ":" Comment? eol external-body') 1171*99e0aae7SDavid Reesdef _external(external, name, colon, comment, newline, external_body): 1172*99e0aae7SDavid Rees del colon, comment, newline # Unused. 1173*99e0aae7SDavid Rees ir_data_utils.builder(external_body.source_location).start.CopyFrom(external.source_location.start) 1174*99e0aae7SDavid Rees if external_body.name: 1175*99e0aae7SDavid Rees ir_data_utils.update(external_body.name, name) 1176*99e0aae7SDavid Rees else: 1177*99e0aae7SDavid Rees external_body.name = ir_data_utils.copy(name) 1178*99e0aae7SDavid Rees return external_body 1179*99e0aae7SDavid Rees 1180*99e0aae7SDavid Rees 1181*99e0aae7SDavid Rees# This syntax implicitly requires either a documentation line or a attribute 1182*99e0aae7SDavid Rees# line, or it won't parse (because no Indent/Dedent tokens will be emitted). 1183*99e0aae7SDavid Rees@_handles('external-body -> Indent doc-line* attribute-line* Dedent') 1184*99e0aae7SDavid Reesdef _external_body(indent, docs, attributes, dedent): 1185*99e0aae7SDavid Rees return ir_data.TypeDefinition( 1186*99e0aae7SDavid Rees external=ir_data.External( 1187*99e0aae7SDavid Rees # Set source_location here, since it won't be set automatically. 1188*99e0aae7SDavid Rees source_location=ir_data.Location(start=indent.source_location.start, 1189*99e0aae7SDavid Rees end=dedent.source_location.end)), 1190*99e0aae7SDavid Rees documentation=docs.list, 1191*99e0aae7SDavid Rees attribute=attributes.list) 1192*99e0aae7SDavid Rees 1193*99e0aae7SDavid Rees 1194*99e0aae7SDavid Rees@_handles('field-location -> expression "[" "+" expression "]"') 1195*99e0aae7SDavid Reesdef _field_location(start, open_bracket, plus, size, close_bracket): 1196*99e0aae7SDavid Rees del open_bracket, plus, close_bracket # Unused. 1197*99e0aae7SDavid Rees return ir_data.FieldLocation(start=start, size=size) 1198*99e0aae7SDavid Rees 1199*99e0aae7SDavid Rees 1200*99e0aae7SDavid Rees@_handles('delimited-argument-list -> "(" argument-list ")"') 1201*99e0aae7SDavid Reesdef _type_argument_list(open_paren, arguments, close_paren): 1202*99e0aae7SDavid Rees del open_paren, close_paren # Unused 1203*99e0aae7SDavid Rees return arguments 1204*99e0aae7SDavid Rees 1205*99e0aae7SDavid Rees 1206*99e0aae7SDavid Rees# A type is "TypeName" or "TypeName[length]" or "TypeName[length][length]", etc. 1207*99e0aae7SDavid Rees# An array type may have an empty length ("Type[]"). This is only valid for the 1208*99e0aae7SDavid Rees# outermost length (the last set of brackets), but that must be checked 1209*99e0aae7SDavid Rees# elsewhere. 1210*99e0aae7SDavid Rees@_handles('type -> type-reference delimited-argument-list? type-size-specifier?' 1211*99e0aae7SDavid Rees ' array-length-specifier*') 1212*99e0aae7SDavid Reesdef _type(reference, parameters, size, array_spec): 1213*99e0aae7SDavid Rees """Builds the IR for a type specifier.""" 1214*99e0aae7SDavid Rees base_type_source_location_end = reference.source_location.end 1215*99e0aae7SDavid Rees atomic_type_source_location_end = reference.source_location.end 1216*99e0aae7SDavid Rees if parameters.list: 1217*99e0aae7SDavid Rees base_type_source_location_end = parameters.source_location.end 1218*99e0aae7SDavid Rees atomic_type_source_location_end = parameters.source_location.end 1219*99e0aae7SDavid Rees if size.list: 1220*99e0aae7SDavid Rees base_type_source_location_end = size.source_location.end 1221*99e0aae7SDavid Rees base_type_location = parser_types.make_location( 1222*99e0aae7SDavid Rees reference.source_location.start, 1223*99e0aae7SDavid Rees base_type_source_location_end) 1224*99e0aae7SDavid Rees atomic_type_location = parser_types.make_location( 1225*99e0aae7SDavid Rees reference.source_location.start, 1226*99e0aae7SDavid Rees atomic_type_source_location_end) 1227*99e0aae7SDavid Rees t = ir_data.Type( 1228*99e0aae7SDavid Rees atomic_type=ir_data.AtomicType( 1229*99e0aae7SDavid Rees reference=ir_data_utils.copy(reference), 1230*99e0aae7SDavid Rees source_location=atomic_type_location, 1231*99e0aae7SDavid Rees runtime_parameter=parameters.list[0].list if parameters.list else []), 1232*99e0aae7SDavid Rees size_in_bits=size.list[0] if size.list else None, 1233*99e0aae7SDavid Rees source_location=base_type_location) 1234*99e0aae7SDavid Rees for length in array_spec.list: 1235*99e0aae7SDavid Rees location = parser_types.make_location( 1236*99e0aae7SDavid Rees t.source_location.start, length.source_location.end) 1237*99e0aae7SDavid Rees if isinstance(length, ir_data.Expression): 1238*99e0aae7SDavid Rees t = ir_data.Type( 1239*99e0aae7SDavid Rees array_type=ir_data.ArrayType(base_type=t, 1240*99e0aae7SDavid Rees element_count=length, 1241*99e0aae7SDavid Rees source_location=location), 1242*99e0aae7SDavid Rees source_location=location) 1243*99e0aae7SDavid Rees elif isinstance(length, ir_data.Empty): 1244*99e0aae7SDavid Rees t = ir_data.Type( 1245*99e0aae7SDavid Rees array_type=ir_data.ArrayType(base_type=t, 1246*99e0aae7SDavid Rees automatic=length, 1247*99e0aae7SDavid Rees source_location=location), 1248*99e0aae7SDavid Rees source_location=location) 1249*99e0aae7SDavid Rees else: 1250*99e0aae7SDavid Rees assert False, "Shouldn't be here." 1251*99e0aae7SDavid Rees return t 1252*99e0aae7SDavid Rees 1253*99e0aae7SDavid Rees 1254*99e0aae7SDavid Rees# TODO(bolms): Should symbolic names or expressions be allowed? E.g., 1255*99e0aae7SDavid Rees# UInt:FIELD_SIZE or UInt:(16 + 16)? 1256*99e0aae7SDavid Rees@_handles('type-size-specifier -> ":" numeric-constant') 1257*99e0aae7SDavid Reesdef _type_size_specifier(colon, numeric_constant): 1258*99e0aae7SDavid Rees """handles the ":32" part of a type specifier like "UInt:32".""" 1259*99e0aae7SDavid Rees del colon 1260*99e0aae7SDavid Rees return ir_data.Expression(constant=numeric_constant) 1261*99e0aae7SDavid Rees 1262*99e0aae7SDavid Rees 1263*99e0aae7SDavid Rees# The distinctions between different formats of NameDefinitions, Words, and 1264*99e0aae7SDavid Rees# References are enforced during parsing, but not propagated to the IR. 1265*99e0aae7SDavid Rees@_handles('type-name -> type-word') 1266*99e0aae7SDavid Rees@_handles('snake-name -> snake-word') 1267*99e0aae7SDavid Rees@_handles('constant-name -> constant-word') 1268*99e0aae7SDavid Reesdef _name(word): 1269*99e0aae7SDavid Rees return ir_data.NameDefinition(name=word) 1270*99e0aae7SDavid Rees 1271*99e0aae7SDavid Rees 1272*99e0aae7SDavid Rees@_handles('type-word -> CamelWord') 1273*99e0aae7SDavid Rees@_handles('snake-word -> SnakeWord') 1274*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$size_in_bits"') 1275*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$size_in_bytes"') 1276*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$max_size_in_bits"') 1277*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$max_size_in_bytes"') 1278*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$min_size_in_bits"') 1279*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$min_size_in_bytes"') 1280*99e0aae7SDavid Rees@_handles('builtin-word -> "$is_statically_sized"') 1281*99e0aae7SDavid Rees@_handles('builtin-word -> "$static_size_in_bits"') 1282*99e0aae7SDavid Rees@_handles('builtin-word -> "$next"') 1283*99e0aae7SDavid Rees@_handles('constant-word -> ShoutyWord') 1284*99e0aae7SDavid Rees@_handles('and-operator -> "&&"') 1285*99e0aae7SDavid Rees@_handles('or-operator -> "||"') 1286*99e0aae7SDavid Rees@_handles('less-operator -> "<="') 1287*99e0aae7SDavid Rees@_handles('less-operator -> "<"') 1288*99e0aae7SDavid Rees@_handles('greater-operator -> ">="') 1289*99e0aae7SDavid Rees@_handles('greater-operator -> ">"') 1290*99e0aae7SDavid Rees@_handles('equality-operator -> "=="') 1291*99e0aae7SDavid Rees@_handles('inequality-operator -> "!="') 1292*99e0aae7SDavid Rees@_handles('additive-operator -> "+"') 1293*99e0aae7SDavid Rees@_handles('additive-operator -> "-"') 1294*99e0aae7SDavid Rees@_handles('multiplicative-operator -> "*"') 1295*99e0aae7SDavid Rees@_handles('function-name -> "$max"') 1296*99e0aae7SDavid Rees@_handles('function-name -> "$present"') 1297*99e0aae7SDavid Rees@_handles('function-name -> "$upper_bound"') 1298*99e0aae7SDavid Rees@_handles('function-name -> "$lower_bound"') 1299*99e0aae7SDavid Reesdef _word(word): 1300*99e0aae7SDavid Rees return ir_data.Word(text=word.text) 1301*99e0aae7SDavid Rees 1302*99e0aae7SDavid Rees 1303*99e0aae7SDavid Rees@_handles('type-reference -> type-reference-tail') 1304*99e0aae7SDavid Rees@_handles('constant-reference -> constant-reference-tail') 1305*99e0aae7SDavid Reesdef _un_module_qualified_type_reference(reference): 1306*99e0aae7SDavid Rees return reference 1307*99e0aae7SDavid Rees 1308*99e0aae7SDavid Rees 1309*99e0aae7SDavid Rees@_handles('constant-reference-tail -> constant-word') 1310*99e0aae7SDavid Rees@_handles('type-reference-tail -> type-word') 1311*99e0aae7SDavid Rees@_handles('snake-reference -> snake-word') 1312*99e0aae7SDavid Rees@_handles('snake-reference -> builtin-field-word') 1313*99e0aae7SDavid Reesdef _reference(word): 1314*99e0aae7SDavid Rees return ir_data.Reference(source_name=[word]) 1315*99e0aae7SDavid Rees 1316*99e0aae7SDavid Rees 1317*99e0aae7SDavid Rees@_handles('builtin-reference -> builtin-word') 1318*99e0aae7SDavid Reesdef _builtin_reference(word): 1319*99e0aae7SDavid Rees return ir_data.Reference(source_name=[word], 1320*99e0aae7SDavid Rees canonical_name=ir_data.CanonicalName( 1321*99e0aae7SDavid Rees object_path=[word.text])) 1322*99e0aae7SDavid Rees 1323*99e0aae7SDavid Rees 1324*99e0aae7SDavid Rees# Because constant-references ("Enum.NAME") are used in the same contexts as 1325*99e0aae7SDavid Rees# field-references ("field.subfield"), module-qualified constant references 1326*99e0aae7SDavid Rees# ("module.Enum.VALUE") have to take snake-reference, not snake-word, on the 1327*99e0aae7SDavid Rees# left side of the dot. Otherwise, when a "snake_word" is followed by a "." in 1328*99e0aae7SDavid Rees# an expression context, the LR(1) parser cannot determine whether to reduce the 1329*99e0aae7SDavid Rees# snake-word to snake-reference (to eventually become field-reference), or to 1330*99e0aae7SDavid Rees# shift the dot onto the stack (to eventually become constant-reference). By 1331*99e0aae7SDavid Rees# using snake-reference as the head of both, the parser can always reduce, then 1332*99e0aae7SDavid Rees# shift the dot, then determine whether to proceed with constant-reference if it 1333*99e0aae7SDavid Rees# sees "snake_name.TypeName" or field-reference if it sees 1334*99e0aae7SDavid Rees# "snake_name.snake_name". 1335*99e0aae7SDavid Rees@_handles('constant-reference -> snake-reference "." constant-reference-tail') 1336*99e0aae7SDavid Reesdef _module_qualified_constant_reference(new_head, dot, reference): 1337*99e0aae7SDavid Rees del dot # Unused. 1338*99e0aae7SDavid Rees new_source_name = list(new_head.source_name) + list(reference.source_name) 1339*99e0aae7SDavid Rees del reference.source_name[:] 1340*99e0aae7SDavid Rees reference.source_name.extend(new_source_name) 1341*99e0aae7SDavid Rees return reference 1342*99e0aae7SDavid Rees 1343*99e0aae7SDavid Rees 1344*99e0aae7SDavid Rees@_handles('constant-reference-tail -> type-word "." constant-reference-tail') 1345*99e0aae7SDavid Rees# module.Type.SubType.name is a reference to something that *must* be a 1346*99e0aae7SDavid Rees# constant. 1347*99e0aae7SDavid Rees@_handles('constant-reference-tail -> type-word "." snake-reference') 1348*99e0aae7SDavid Rees@_handles('type-reference-tail -> type-word "." type-reference-tail') 1349*99e0aae7SDavid Rees@_handles('type-reference -> snake-word "." type-reference-tail') 1350*99e0aae7SDavid Reesdef _qualified_reference(word, dot, reference): 1351*99e0aae7SDavid Rees """Adds a name. or Type. qualification to the head of a reference.""" 1352*99e0aae7SDavid Rees del dot # Unused. 1353*99e0aae7SDavid Rees new_source_name = [word] + list(reference.source_name) 1354*99e0aae7SDavid Rees del reference.source_name[:] 1355*99e0aae7SDavid Rees reference.source_name.extend(new_source_name) 1356*99e0aae7SDavid Rees return reference 1357*99e0aae7SDavid Rees 1358*99e0aae7SDavid Rees 1359*99e0aae7SDavid Rees# Arrays are properly translated to IR in _type(). 1360*99e0aae7SDavid Rees@_handles('array-length-specifier -> "[" expression "]"') 1361*99e0aae7SDavid Reesdef _array_length_specifier(open_bracket, length, close_bracket): 1362*99e0aae7SDavid Rees del open_bracket, close_bracket # Unused. 1363*99e0aae7SDavid Rees return length 1364*99e0aae7SDavid Rees 1365*99e0aae7SDavid Rees 1366*99e0aae7SDavid Rees# An array specifier can end with empty brackets ("arr[3][]"), in which case the 1367*99e0aae7SDavid Rees# array's size is inferred from the size of its enclosing field. 1368*99e0aae7SDavid Rees@_handles('array-length-specifier -> "[" "]"') 1369*99e0aae7SDavid Reesdef _auto_array_length_specifier(open_bracket, close_bracket): 1370*99e0aae7SDavid Rees # Note that the Void's source_location is the space between the brackets (if 1371*99e0aae7SDavid Rees # any). 1372*99e0aae7SDavid Rees return ir_data.Empty( 1373*99e0aae7SDavid Rees source_location=ir_data.Location(start=open_bracket.source_location.end, 1374*99e0aae7SDavid Rees end=close_bracket.source_location.start)) 1375*99e0aae7SDavid Rees 1376*99e0aae7SDavid Rees 1377*99e0aae7SDavid Rees@_handles('eol -> "\\n" comment-line*') 1378*99e0aae7SDavid Reesdef _eol(eol, comments): 1379*99e0aae7SDavid Rees del comments # Unused 1380*99e0aae7SDavid Rees return eol 1381*99e0aae7SDavid Rees 1382*99e0aae7SDavid Rees 1383*99e0aae7SDavid Rees@_handles('comment-line -> Comment? "\\n"') 1384*99e0aae7SDavid Reesdef _comment_line(comment, eol): 1385*99e0aae7SDavid Rees del comment # Unused 1386*99e0aae7SDavid Rees return eol 1387*99e0aae7SDavid Rees 1388*99e0aae7SDavid Rees 1389*99e0aae7SDavid Reesdef _finalize_grammar(): 1390*99e0aae7SDavid Rees """_Finalize adds productions for foo*, foo+, and foo? symbols.""" 1391*99e0aae7SDavid Rees star_symbols = set() 1392*99e0aae7SDavid Rees plus_symbols = set() 1393*99e0aae7SDavid Rees option_symbols = set() 1394*99e0aae7SDavid Rees for production in _handlers: 1395*99e0aae7SDavid Rees for symbol in production.rhs: 1396*99e0aae7SDavid Rees if symbol[-1] == '*': 1397*99e0aae7SDavid Rees star_symbols.add(symbol[:-1]) 1398*99e0aae7SDavid Rees elif symbol[-1] == '+': 1399*99e0aae7SDavid Rees # symbol+ relies on the rule for symbol* 1400*99e0aae7SDavid Rees star_symbols.add(symbol[:-1]) 1401*99e0aae7SDavid Rees plus_symbols.add(symbol[:-1]) 1402*99e0aae7SDavid Rees elif symbol[-1] == '?': 1403*99e0aae7SDavid Rees option_symbols.add(symbol[:-1]) 1404*99e0aae7SDavid Rees for symbol in star_symbols: 1405*99e0aae7SDavid Rees _handles('{s}* -> {s} {s}*'.format(s=symbol))( 1406*99e0aae7SDavid Rees lambda e, r: _List([e] + r.list)) 1407*99e0aae7SDavid Rees _handles('{s}* ->'.format(s=symbol))(lambda: _List([])) 1408*99e0aae7SDavid Rees for symbol in plus_symbols: 1409*99e0aae7SDavid Rees _handles('{s}+ -> {s} {s}*'.format(s=symbol))( 1410*99e0aae7SDavid Rees lambda e, r: _List([e] + r.list)) 1411*99e0aae7SDavid Rees for symbol in option_symbols: 1412*99e0aae7SDavid Rees _handles('{s}? -> {s}'.format(s=symbol))(lambda e: _List([e])) 1413*99e0aae7SDavid Rees _handles('{s}? ->'.format(s=symbol))(lambda: _List([])) 1414*99e0aae7SDavid Rees 1415*99e0aae7SDavid Rees 1416*99e0aae7SDavid Rees_finalize_grammar() 1417*99e0aae7SDavid Rees 1418*99e0aae7SDavid Rees# End of grammar. 1419*99e0aae7SDavid Rees################################################################################ 1420*99e0aae7SDavid Rees 1421*99e0aae7SDavid Rees# These export the grammar used by module_ir so that parser_generator can build 1422*99e0aae7SDavid Rees# a parser for the same language. 1423*99e0aae7SDavid ReesSTART_SYMBOL = 'module' 1424*99e0aae7SDavid ReesEXPRESSION_START_SYMBOL = 'expression' 1425*99e0aae7SDavid ReesPRODUCTIONS = list(_handlers.keys()) 1426