xref: /aosp_15_r20/external/emboss/compiler/front_end/module_ir.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1*99e0aae7SDavid Rees# Copyright 2019 Google LLC
2*99e0aae7SDavid Rees#
3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees# You may obtain a copy of the License at
6*99e0aae7SDavid Rees#
7*99e0aae7SDavid Rees#     https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees#
9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees# limitations under the License.
14*99e0aae7SDavid Rees
15*99e0aae7SDavid Rees"""module_ir contains code for generating module-level IRs from parse trees.
16*99e0aae7SDavid Rees
17*99e0aae7SDavid ReesThe primary export is build_ir(), which takes a parse tree (as returned by a
18*99e0aae7SDavid Reesparser from lr1.py), and returns a module-level intermediate representation
19*99e0aae7SDavid Rees("module IR").
20*99e0aae7SDavid Rees
21*99e0aae7SDavid ReesThis module also notably exports PRODUCTIONS and START_SYMBOL, which should be
22*99e0aae7SDavid Reesfed to lr1.Grammar in order to create a parser for the Emboss language.
23*99e0aae7SDavid Rees"""
24*99e0aae7SDavid Rees
25*99e0aae7SDavid Reesimport re
26*99e0aae7SDavid Reesimport sys
27*99e0aae7SDavid Rees
28*99e0aae7SDavid Reesfrom compiler.util import ir_data
29*99e0aae7SDavid Reesfrom compiler.util import ir_data_utils
30*99e0aae7SDavid Reesfrom compiler.util import name_conversion
31*99e0aae7SDavid Reesfrom compiler.util import parser_types
32*99e0aae7SDavid Rees
33*99e0aae7SDavid Rees
34*99e0aae7SDavid Rees# Intermediate types; should not be found in the final IR.
35*99e0aae7SDavid Reesclass _List(object):
36*99e0aae7SDavid Rees  """A list with source location information."""
37*99e0aae7SDavid Rees  __slots__ = ('list', 'source_location')
38*99e0aae7SDavid Rees
39*99e0aae7SDavid Rees  def __init__(self, l):
40*99e0aae7SDavid Rees    assert isinstance(l, list), "_List object must wrap list, not '%r'" % l
41*99e0aae7SDavid Rees    self.list = l
42*99e0aae7SDavid Rees    self.source_location = ir_data.Location()
43*99e0aae7SDavid Rees
44*99e0aae7SDavid Rees
45*99e0aae7SDavid Reesclass _ExpressionTail(object):
46*99e0aae7SDavid Rees  """A fragment of an expression with an operator and right-hand side.
47*99e0aae7SDavid Rees
48*99e0aae7SDavid Rees  _ExpressionTail is the tail of an expression, consisting of an operator and
49*99e0aae7SDavid Rees  the right-hand argument to the operator; for example, in the expression (6+8),
50*99e0aae7SDavid Rees  the _ExpressionTail would be "+8".
51*99e0aae7SDavid Rees
52*99e0aae7SDavid Rees  This is used as a temporary object while converting the right-recursive
53*99e0aae7SDavid Rees  "expression" and "times-expression" productions into left-associative
54*99e0aae7SDavid Rees  Expressions.
55*99e0aae7SDavid Rees
56*99e0aae7SDavid Rees  Attributes:
57*99e0aae7SDavid Rees    operator: An ir_data.Word of the operator's name.
58*99e0aae7SDavid Rees    expression: The expression on the right side of the operator.
59*99e0aae7SDavid Rees    source_location: The source location of the operation fragment.
60*99e0aae7SDavid Rees  """
61*99e0aae7SDavid Rees  __slots__ = ('operator', 'expression', 'source_location')
62*99e0aae7SDavid Rees
63*99e0aae7SDavid Rees  def __init__(self, operator, expression):
64*99e0aae7SDavid Rees    self.operator = operator
65*99e0aae7SDavid Rees    self.expression = expression
66*99e0aae7SDavid Rees    self.source_location = ir_data.Location()
67*99e0aae7SDavid Rees
68*99e0aae7SDavid Rees
69*99e0aae7SDavid Reesclass _FieldWithType(object):
70*99e0aae7SDavid Rees  """A field with zero or more types defined inline with that field."""
71*99e0aae7SDavid Rees  __slots__ = ('field', 'subtypes', 'source_location')
72*99e0aae7SDavid Rees
73*99e0aae7SDavid Rees  def __init__(self, field, subtypes=None):
74*99e0aae7SDavid Rees    self.field = field
75*99e0aae7SDavid Rees    self.subtypes = subtypes or []
76*99e0aae7SDavid Rees    self.source_location = ir_data.Location()
77*99e0aae7SDavid Rees
78*99e0aae7SDavid Rees
79*99e0aae7SDavid Reesdef build_ir(parse_tree, used_productions=None):
80*99e0aae7SDavid Rees  r"""Builds a module-level intermediate representation from a valid parse tree.
81*99e0aae7SDavid Rees
82*99e0aae7SDavid Rees  The parse tree is precisely dictated by the exact productions in the grammar
83*99e0aae7SDavid Rees  used by the parser, with no semantic information.  _really_build_ir transforms
84*99e0aae7SDavid Rees  this "raw" form into a stable, cooked representation, thereby isolating
85*99e0aae7SDavid Rees  subsequent steps from the exact details of the grammar.
86*99e0aae7SDavid Rees
87*99e0aae7SDavid Rees  (Probably incomplete) list of transformations:
88*99e0aae7SDavid Rees
89*99e0aae7SDavid Rees  *   ParseResult and Token nodes are replaced with Module, Attribute, Struct,
90*99e0aae7SDavid Rees      Type, etc. objects.
91*99e0aae7SDavid Rees
92*99e0aae7SDavid Rees  *   Purely syntactic tokens ('"["', '"struct"', etc.) are discarded.
93*99e0aae7SDavid Rees
94*99e0aae7SDavid Rees  *   Repeated elements are transformed from tree form to list form:
95*99e0aae7SDavid Rees
96*99e0aae7SDavid Rees          a*
97*99e0aae7SDavid Rees         / \
98*99e0aae7SDavid Rees        b   a*
99*99e0aae7SDavid Rees           / \
100*99e0aae7SDavid Rees          c   a*
101*99e0aae7SDavid Rees             / \
102*99e0aae7SDavid Rees            d   a*
103*99e0aae7SDavid Rees
104*99e0aae7SDavid Rees      (where b, c, and d are nodes of type "a") becomes [b, c, d].
105*99e0aae7SDavid Rees
106*99e0aae7SDavid Rees  *   The values of numeric constants (Number, etc. tokens) are parsed.
107*99e0aae7SDavid Rees
108*99e0aae7SDavid Rees  *   Different classes of names (snake_names, CamelNames, ShoutyNames) are
109*99e0aae7SDavid Rees      folded into a single "Name" type, since they are guaranteed to appear in
110*99e0aae7SDavid Rees      the correct places in the parse tree.
111*99e0aae7SDavid Rees
112*99e0aae7SDavid Rees
113*99e0aae7SDavid Rees  Arguments:
114*99e0aae7SDavid Rees    parse_tree: A parse tree.  Each leaf node should be a parser_types.Token
115*99e0aae7SDavid Rees      object, and each non-leaf node should have a 'symbol' attribute specifying
116*99e0aae7SDavid Rees      which grammar symbol it represents, and a 'children' attribute containing
117*99e0aae7SDavid Rees      a list of child nodes.  This is the format returned by the parsers
118*99e0aae7SDavid Rees      produced by the lr1 module, when run against tokens from the tokenizer
119*99e0aae7SDavid Rees      module.
120*99e0aae7SDavid Rees    used_productions: If specified, used_productions.add() will be called with
121*99e0aae7SDavid Rees      each production actually used in parsing.  This can be useful when
122*99e0aae7SDavid Rees      developing the grammar and writing tests; in particular, it can be used to
123*99e0aae7SDavid Rees      figure out which productions are *not* used when parsing a particular
124*99e0aae7SDavid Rees      file.
125*99e0aae7SDavid Rees
126*99e0aae7SDavid Rees  Returns:
127*99e0aae7SDavid Rees    A module-level intermediate representation (module IR) for an Emboss module
128*99e0aae7SDavid Rees    (source file).  This IR will not have symbols resolved; that must be done on
129*99e0aae7SDavid Rees    a forest of module IRs so that names from other modules can be resolved.
130*99e0aae7SDavid Rees  """
131*99e0aae7SDavid Rees
132*99e0aae7SDavid Rees  # TODO(b/140259131): Refactor _really_build_ir to be less recursive/use an
133*99e0aae7SDavid Rees  # explicit stack.
134*99e0aae7SDavid Rees  old_recursion_limit = sys.getrecursionlimit()
135*99e0aae7SDavid Rees  sys.setrecursionlimit(16 * 1024)  # ~8000 top-level entities in one module.
136*99e0aae7SDavid Rees  try:
137*99e0aae7SDavid Rees    result = _really_build_ir(parse_tree, used_productions)
138*99e0aae7SDavid Rees  finally:
139*99e0aae7SDavid Rees    sys.setrecursionlimit(old_recursion_limit)
140*99e0aae7SDavid Rees  return result
141*99e0aae7SDavid Rees
142*99e0aae7SDavid Rees
143*99e0aae7SDavid Reesdef _really_build_ir(parse_tree, used_productions):
144*99e0aae7SDavid Rees  """Real implementation of build_ir()."""
145*99e0aae7SDavid Rees  if used_productions is None:
146*99e0aae7SDavid Rees    used_productions = set()
147*99e0aae7SDavid Rees  if hasattr(parse_tree, 'children'):
148*99e0aae7SDavid Rees    parsed_children = [_really_build_ir(child, used_productions)
149*99e0aae7SDavid Rees                       for child in parse_tree.children]
150*99e0aae7SDavid Rees    used_productions.add(parse_tree.production)
151*99e0aae7SDavid Rees    result = _handlers[parse_tree.production](*parsed_children)
152*99e0aae7SDavid Rees    if parse_tree.source_location is not None:
153*99e0aae7SDavid Rees      if result.source_location:
154*99e0aae7SDavid Rees        ir_data_utils.update(result.source_location, parse_tree.source_location)
155*99e0aae7SDavid Rees      else:
156*99e0aae7SDavid Rees        result.source_location = ir_data_utils.copy(parse_tree.source_location)
157*99e0aae7SDavid Rees    return result
158*99e0aae7SDavid Rees  else:
159*99e0aae7SDavid Rees    # For leaf nodes, the temporary "IR" is just the token.  Higher-level rules
160*99e0aae7SDavid Rees    # will translate it to a real IR.
161*99e0aae7SDavid Rees    assert isinstance(parse_tree, parser_types.Token), str(parse_tree)
162*99e0aae7SDavid Rees    return parse_tree
163*99e0aae7SDavid Rees
164*99e0aae7SDavid Rees# Map of productions to their handlers.
165*99e0aae7SDavid Rees_handlers = {}
166*99e0aae7SDavid Rees
167*99e0aae7SDavid Rees_anonymous_name_counter = 0
168*99e0aae7SDavid Rees
169*99e0aae7SDavid Rees
170*99e0aae7SDavid Reesdef _get_anonymous_field_name():
171*99e0aae7SDavid Rees  global _anonymous_name_counter
172*99e0aae7SDavid Rees  _anonymous_name_counter += 1
173*99e0aae7SDavid Rees  return 'emboss_reserved_anonymous_field_{}'.format(_anonymous_name_counter)
174*99e0aae7SDavid Rees
175*99e0aae7SDavid Rees
176*99e0aae7SDavid Reesdef _handles(production_text):
177*99e0aae7SDavid Rees  """_handles marks a function as the handler for a particular production."""
178*99e0aae7SDavid Rees  production = parser_types.Production.parse(production_text)
179*99e0aae7SDavid Rees
180*99e0aae7SDavid Rees  def handles(f):
181*99e0aae7SDavid Rees    _handlers[production] = f
182*99e0aae7SDavid Rees    return f
183*99e0aae7SDavid Rees
184*99e0aae7SDavid Rees  return handles
185*99e0aae7SDavid Rees
186*99e0aae7SDavid Rees
187*99e0aae7SDavid Reesdef _make_prelude_import(position):
188*99e0aae7SDavid Rees  """Helper function to construct a synthetic ir_data.Import for the prelude."""
189*99e0aae7SDavid Rees  location = parser_types.make_location(position, position)
190*99e0aae7SDavid Rees  return ir_data.Import(
191*99e0aae7SDavid Rees      file_name=ir_data.String(text='', source_location=location),
192*99e0aae7SDavid Rees      local_name=ir_data.Word(text='', source_location=location),
193*99e0aae7SDavid Rees      source_location=location)
194*99e0aae7SDavid Rees
195*99e0aae7SDavid Rees
196*99e0aae7SDavid Reesdef _text_to_operator(text):
197*99e0aae7SDavid Rees  """Converts an operator's textual name to its corresponding enum."""
198*99e0aae7SDavid Rees  operations = {
199*99e0aae7SDavid Rees      '+': ir_data.FunctionMapping.ADDITION,
200*99e0aae7SDavid Rees      '-': ir_data.FunctionMapping.SUBTRACTION,
201*99e0aae7SDavid Rees      '*': ir_data.FunctionMapping.MULTIPLICATION,
202*99e0aae7SDavid Rees      '==': ir_data.FunctionMapping.EQUALITY,
203*99e0aae7SDavid Rees      '!=': ir_data.FunctionMapping.INEQUALITY,
204*99e0aae7SDavid Rees      '&&': ir_data.FunctionMapping.AND,
205*99e0aae7SDavid Rees      '||': ir_data.FunctionMapping.OR,
206*99e0aae7SDavid Rees      '>': ir_data.FunctionMapping.GREATER,
207*99e0aae7SDavid Rees      '>=': ir_data.FunctionMapping.GREATER_OR_EQUAL,
208*99e0aae7SDavid Rees      '<': ir_data.FunctionMapping.LESS,
209*99e0aae7SDavid Rees      '<=': ir_data.FunctionMapping.LESS_OR_EQUAL,
210*99e0aae7SDavid Rees  }
211*99e0aae7SDavid Rees  return operations[text]
212*99e0aae7SDavid Rees
213*99e0aae7SDavid Rees
214*99e0aae7SDavid Reesdef _text_to_function(text):
215*99e0aae7SDavid Rees  """Converts a function's textual name to its corresponding enum."""
216*99e0aae7SDavid Rees  functions = {
217*99e0aae7SDavid Rees      '$max': ir_data.FunctionMapping.MAXIMUM,
218*99e0aae7SDavid Rees      '$present': ir_data.FunctionMapping.PRESENCE,
219*99e0aae7SDavid Rees      '$upper_bound': ir_data.FunctionMapping.UPPER_BOUND,
220*99e0aae7SDavid Rees      '$lower_bound': ir_data.FunctionMapping.LOWER_BOUND,
221*99e0aae7SDavid Rees  }
222*99e0aae7SDavid Rees  return functions[text]
223*99e0aae7SDavid Rees
224*99e0aae7SDavid Rees
225*99e0aae7SDavid Rees################################################################################
226*99e0aae7SDavid Rees# Grammar & parse tree to IR translation.
227*99e0aae7SDavid Rees#
228*99e0aae7SDavid Rees# From here to (almost) the end of the file are functions which recursively
229*99e0aae7SDavid Rees# build an IR.  The @_handles annotations indicate the exact grammar
230*99e0aae7SDavid Rees# production(s) handled by each function.  The handler function should take
231*99e0aae7SDavid Rees# exactly one argument for each symbol in the production's RHS.
232*99e0aae7SDavid Rees#
233*99e0aae7SDavid Rees# The actual Emboss grammar is extracted directly from the @_handles
234*99e0aae7SDavid Rees# annotations, so this is also the grammar definition.  For convenience, the
235*99e0aae7SDavid Rees# grammar can be viewed separately in g3doc/grammar.md.
236*99e0aae7SDavid Rees#
237*99e0aae7SDavid Rees# At the end, symbols whose names end in "*", "+", or "?" are extracted from the
238*99e0aae7SDavid Rees# grammar, and appropriate productions are added for zero-or-more, one-or-more,
239*99e0aae7SDavid Rees# or zero-or-one lists, respectively.  (This is analogous to the *, +, and ?
240*99e0aae7SDavid Rees# operators in regex.)  It is necessary for this to happen here (and not in
241*99e0aae7SDavid Rees# lr1.py) because the generated productions must be associated with
242*99e0aae7SDavid Rees# IR-generation functions.
243*99e0aae7SDavid Rees
244*99e0aae7SDavid Rees
245*99e0aae7SDavid Rees# A module file is a list of documentation, then imports, then top-level
246*99e0aae7SDavid Rees# attributes, then type definitions.  Any section may be missing.
247*99e0aae7SDavid Rees# TODO(bolms): Should Emboss disallow completely empty files?
248*99e0aae7SDavid Rees@_handles('module -> comment-line* doc-line* import-line* attribute-line*'
249*99e0aae7SDavid Rees          '          type-definition*')
250*99e0aae7SDavid Reesdef _file(leading_newlines, docs, imports, attributes, type_definitions):
251*99e0aae7SDavid Rees  """Assembles the top-level IR for a module."""
252*99e0aae7SDavid Rees  del leading_newlines  # Unused.
253*99e0aae7SDavid Rees  # Figure out the best synthetic source_location for the synthesized prelude
254*99e0aae7SDavid Rees  # import.
255*99e0aae7SDavid Rees  if imports.list:
256*99e0aae7SDavid Rees    position = imports.list[0].source_location.start
257*99e0aae7SDavid Rees  elif docs.list:
258*99e0aae7SDavid Rees    position = docs.list[0].source_location.end
259*99e0aae7SDavid Rees  elif attributes.list:
260*99e0aae7SDavid Rees    position = attributes.list[0].source_location.start
261*99e0aae7SDavid Rees  elif type_definitions.list:
262*99e0aae7SDavid Rees    position = type_definitions.list[0].source_location.start
263*99e0aae7SDavid Rees  else:
264*99e0aae7SDavid Rees    position = 1, 1
265*99e0aae7SDavid Rees
266*99e0aae7SDavid Rees  # If the source file is completely empty, build_ir won't automatically
267*99e0aae7SDavid Rees  # populate the source_location attribute for the module.
268*99e0aae7SDavid Rees  if (not docs.list and not imports.list and not attributes.list and
269*99e0aae7SDavid Rees      not type_definitions.list):
270*99e0aae7SDavid Rees    module_source_location = parser_types.make_location((1, 1), (1, 1))
271*99e0aae7SDavid Rees  else:
272*99e0aae7SDavid Rees    module_source_location = None
273*99e0aae7SDavid Rees
274*99e0aae7SDavid Rees  return ir_data.Module(
275*99e0aae7SDavid Rees      documentation=docs.list,
276*99e0aae7SDavid Rees      foreign_import=[_make_prelude_import(position)] + imports.list,
277*99e0aae7SDavid Rees      attribute=attributes.list,
278*99e0aae7SDavid Rees      type=type_definitions.list,
279*99e0aae7SDavid Rees      source_location=module_source_location)
280*99e0aae7SDavid Rees
281*99e0aae7SDavid Rees
282*99e0aae7SDavid Rees@_handles('import-line ->'
283*99e0aae7SDavid Rees          '    "import" string-constant "as" snake-word Comment? eol')
284*99e0aae7SDavid Reesdef _import(import_, file_name, as_, local_name, comment, eol):
285*99e0aae7SDavid Rees  del import_, as_, comment, eol  # Unused
286*99e0aae7SDavid Rees  return ir_data.Import(file_name=file_name, local_name=local_name)
287*99e0aae7SDavid Rees
288*99e0aae7SDavid Rees
289*99e0aae7SDavid Rees@_handles('doc-line -> doc Comment? eol')
290*99e0aae7SDavid Reesdef _doc_line(doc, comment, eol):
291*99e0aae7SDavid Rees  del comment, eol  # Unused.
292*99e0aae7SDavid Rees  return doc
293*99e0aae7SDavid Rees
294*99e0aae7SDavid Rees
295*99e0aae7SDavid Rees@_handles('doc -> Documentation')
296*99e0aae7SDavid Reesdef _doc(documentation):
297*99e0aae7SDavid Rees  # As a special case, an empty documentation string may omit the trailing
298*99e0aae7SDavid Rees  # space.
299*99e0aae7SDavid Rees  if documentation.text == '--':
300*99e0aae7SDavid Rees    doc_text = '-- '
301*99e0aae7SDavid Rees  else:
302*99e0aae7SDavid Rees    doc_text = documentation.text
303*99e0aae7SDavid Rees  assert doc_text[0:3] == '-- ', (
304*99e0aae7SDavid Rees      "Documentation token '{}' in unknown format.".format(
305*99e0aae7SDavid Rees          documentation.text))
306*99e0aae7SDavid Rees  return ir_data.Documentation(text=doc_text[3:])
307*99e0aae7SDavid Rees
308*99e0aae7SDavid Rees
309*99e0aae7SDavid Rees# A attribute-line is just a attribute on its own line.
310*99e0aae7SDavid Rees@_handles('attribute-line -> attribute Comment? eol')
311*99e0aae7SDavid Reesdef _attribute_line(attr, comment, eol):
312*99e0aae7SDavid Rees  del comment, eol  # Unused.
313*99e0aae7SDavid Rees  return attr
314*99e0aae7SDavid Rees
315*99e0aae7SDavid Rees
316*99e0aae7SDavid Rees# A attribute is [name = value].
317*99e0aae7SDavid Rees@_handles('attribute -> "[" attribute-context? "$default"?'
318*99e0aae7SDavid Rees          '             snake-word ":" attribute-value "]"')
319*99e0aae7SDavid Reesdef _attribute(open_bracket, context_specifier, default_specifier, name, colon,
320*99e0aae7SDavid Rees               attribute_value, close_bracket):
321*99e0aae7SDavid Rees  del open_bracket, colon, close_bracket  # Unused.
322*99e0aae7SDavid Rees  if context_specifier.list:
323*99e0aae7SDavid Rees    return ir_data.Attribute(name=name,
324*99e0aae7SDavid Rees                            value=attribute_value,
325*99e0aae7SDavid Rees                            is_default=bool(default_specifier.list),
326*99e0aae7SDavid Rees                            back_end=context_specifier.list[0])
327*99e0aae7SDavid Rees  else:
328*99e0aae7SDavid Rees    return ir_data.Attribute(name=name,
329*99e0aae7SDavid Rees                            value=attribute_value,
330*99e0aae7SDavid Rees                            is_default=bool(default_specifier.list))
331*99e0aae7SDavid Rees
332*99e0aae7SDavid Rees
333*99e0aae7SDavid Rees@_handles('attribute-context -> "(" snake-word ")"')
334*99e0aae7SDavid Reesdef _attribute_context(open_paren, context_name, close_paren):
335*99e0aae7SDavid Rees  del open_paren, close_paren  # Unused.
336*99e0aae7SDavid Rees  return context_name
337*99e0aae7SDavid Rees
338*99e0aae7SDavid Rees
339*99e0aae7SDavid Rees@_handles('attribute-value -> expression')
340*99e0aae7SDavid Reesdef _attribute_value_expression(expression):
341*99e0aae7SDavid Rees  return ir_data.AttributeValue(expression=expression)
342*99e0aae7SDavid Rees
343*99e0aae7SDavid Rees
344*99e0aae7SDavid Rees@_handles('attribute-value -> string-constant')
345*99e0aae7SDavid Reesdef _attribute_value_string(string):
346*99e0aae7SDavid Rees  return ir_data.AttributeValue(string_constant=string)
347*99e0aae7SDavid Rees
348*99e0aae7SDavid Rees
349*99e0aae7SDavid Rees@_handles('boolean-constant -> BooleanConstant')
350*99e0aae7SDavid Reesdef _boolean_constant(boolean):
351*99e0aae7SDavid Rees  return ir_data.BooleanConstant(value=(boolean.text == 'true'))
352*99e0aae7SDavid Rees
353*99e0aae7SDavid Rees
354*99e0aae7SDavid Rees@_handles('string-constant -> String')
355*99e0aae7SDavid Reesdef _string_constant(string):
356*99e0aae7SDavid Rees  """Turns a String token into an ir_data.String, with proper unescaping.
357*99e0aae7SDavid Rees
358*99e0aae7SDavid Rees  Arguments:
359*99e0aae7SDavid Rees    string: A String token.
360*99e0aae7SDavid Rees
361*99e0aae7SDavid Rees  Returns:
362*99e0aae7SDavid Rees    An ir_data.String with the "text" field set to the unescaped value of
363*99e0aae7SDavid Rees    string.text.
364*99e0aae7SDavid Rees  """
365*99e0aae7SDavid Rees  # TODO(bolms): If/when this logic becomes more complex (e.g., to handle \NNN
366*99e0aae7SDavid Rees  # or \xNN escapes), extract this into a separate module with separate tests.
367*99e0aae7SDavid Rees  assert string.text[0] == '"'
368*99e0aae7SDavid Rees  assert string.text[-1] == '"'
369*99e0aae7SDavid Rees  assert len(string.text) >= 2
370*99e0aae7SDavid Rees  result = []
371*99e0aae7SDavid Rees  for substring in re.split(r'(\\.)', string.text[1:-1]):
372*99e0aae7SDavid Rees    if substring and substring[0] == '\\':
373*99e0aae7SDavid Rees      assert len(substring) == 2
374*99e0aae7SDavid Rees      result.append({'\\': '\\', '"': '"', 'n': '\n'}[substring[1]])
375*99e0aae7SDavid Rees    else:
376*99e0aae7SDavid Rees      result.append(substring)
377*99e0aae7SDavid Rees  return ir_data.String(text=''.join(result))
378*99e0aae7SDavid Rees
379*99e0aae7SDavid Rees
380*99e0aae7SDavid Rees# In Emboss, '&&' and '||' may not be mixed without parentheses.  These are all
381*99e0aae7SDavid Rees# fine:
382*99e0aae7SDavid Rees#
383*99e0aae7SDavid Rees#     x && y && z
384*99e0aae7SDavid Rees#     x || y || z
385*99e0aae7SDavid Rees#     (x || y) && z
386*99e0aae7SDavid Rees#     x || (y && z)
387*99e0aae7SDavid Rees#
388*99e0aae7SDavid Rees# These are syntax errors:
389*99e0aae7SDavid Rees#
390*99e0aae7SDavid Rees#     x || y && z
391*99e0aae7SDavid Rees#     x && y || z
392*99e0aae7SDavid Rees#
393*99e0aae7SDavid Rees# This is accomplished by making && and || separate-but-equal in the precedence
394*99e0aae7SDavid Rees# hierarchy.  Instead of the more traditional:
395*99e0aae7SDavid Rees#
396*99e0aae7SDavid Rees#     logical-expression   -> or-expression
397*99e0aae7SDavid Rees#     or-expression        -> and-expression or-expression-right*
398*99e0aae7SDavid Rees#     or-expression-right  -> '||' and-expression
399*99e0aae7SDavid Rees#     and-expression       -> equality-expression and-expression-right*
400*99e0aae7SDavid Rees#     and-expression-right -> '&&' equality-expression
401*99e0aae7SDavid Rees#
402*99e0aae7SDavid Rees# Or, using yacc-style precedence specifiers:
403*99e0aae7SDavid Rees#
404*99e0aae7SDavid Rees#     %left "||"
405*99e0aae7SDavid Rees#     %left "&&"
406*99e0aae7SDavid Rees#     expression -> expression
407*99e0aae7SDavid Rees#                 | expression '||' expression
408*99e0aae7SDavid Rees#                 | expression '&&' expression
409*99e0aae7SDavid Rees#
410*99e0aae7SDavid Rees# Emboss uses a slightly more complex grammar, in which '&&' and '||' are
411*99e0aae7SDavid Rees# parallel, but unmixable:
412*99e0aae7SDavid Rees#
413*99e0aae7SDavid Rees#     logical-expression   -> and-expression
414*99e0aae7SDavid Rees#                           | or-expression
415*99e0aae7SDavid Rees#                           | equality-expression
416*99e0aae7SDavid Rees#     or-expression        -> equality-expression or-expression-right+
417*99e0aae7SDavid Rees#     or-expression-right  -> '||' equality-expression
418*99e0aae7SDavid Rees#     and-expression       -> equality-expression and-expression-right+
419*99e0aae7SDavid Rees#     and-expression-right -> '&&' equality-expression
420*99e0aae7SDavid Rees#
421*99e0aae7SDavid Rees# In either case, explicit parenthesization is handled elsewhere in the grammar.
422*99e0aae7SDavid Rees@_handles('logical-expression -> and-expression')
423*99e0aae7SDavid Rees@_handles('logical-expression -> or-expression')
424*99e0aae7SDavid Rees@_handles('logical-expression -> comparison-expression')
425*99e0aae7SDavid Rees@_handles('choice-expression -> logical-expression')
426*99e0aae7SDavid Rees@_handles('expression -> choice-expression')
427*99e0aae7SDavid Reesdef _expression(expression):
428*99e0aae7SDavid Rees  return expression
429*99e0aae7SDavid Rees
430*99e0aae7SDavid Rees
431*99e0aae7SDavid Rees# The `logical-expression`s here means that ?: can't be chained without
432*99e0aae7SDavid Rees# parentheses.  `x < 0 ? -1 : (x == 0 ? 0 : 1)` is OK, but `x < 0 ? -1 : x == 0
433*99e0aae7SDavid Rees# ? 0 : 1` is not.  Parentheses are also needed in the middle: `x <= 0 ? x < 0 ?
434*99e0aae7SDavid Rees# -1 : 0 : 1` is not syntactically valid.
435*99e0aae7SDavid Rees@_handles('choice-expression -> logical-expression "?" logical-expression'
436*99e0aae7SDavid Rees          '                                        ":" logical-expression')
437*99e0aae7SDavid Reesdef _choice_expression(condition, question, if_true, colon, if_false):
438*99e0aae7SDavid Rees  location = parser_types.make_location(
439*99e0aae7SDavid Rees      condition.source_location.start, if_false.source_location.end)
440*99e0aae7SDavid Rees  operator_location = parser_types.make_location(
441*99e0aae7SDavid Rees      question.source_location.start, colon.source_location.end)
442*99e0aae7SDavid Rees  # The function_name is a bit weird, but should suffice for any error messages
443*99e0aae7SDavid Rees  # that might need it.
444*99e0aae7SDavid Rees  return ir_data.Expression(
445*99e0aae7SDavid Rees      function=ir_data.Function(function=ir_data.FunctionMapping.CHOICE,
446*99e0aae7SDavid Rees                               args=[condition, if_true, if_false],
447*99e0aae7SDavid Rees                               function_name=ir_data.Word(
448*99e0aae7SDavid Rees                                   text='?:',
449*99e0aae7SDavid Rees                                   source_location=operator_location),
450*99e0aae7SDavid Rees                               source_location=location))
451*99e0aae7SDavid Rees
452*99e0aae7SDavid Rees
453*99e0aae7SDavid Rees@_handles('comparison-expression -> additive-expression')
454*99e0aae7SDavid Reesdef _no_op_comparative_expression(expression):
455*99e0aae7SDavid Rees  return expression
456*99e0aae7SDavid Rees
457*99e0aae7SDavid Rees
458*99e0aae7SDavid Rees@_handles('comparison-expression ->'
459*99e0aae7SDavid Rees          '    additive-expression inequality-operator additive-expression')
460*99e0aae7SDavid Reesdef _comparative_expression(left, operator, right):
461*99e0aae7SDavid Rees  location = parser_types.make_location(
462*99e0aae7SDavid Rees      left.source_location.start, right.source_location.end)
463*99e0aae7SDavid Rees  return ir_data.Expression(
464*99e0aae7SDavid Rees      function=ir_data.Function(function=_text_to_operator(operator.text),
465*99e0aae7SDavid Rees                               args=[left, right],
466*99e0aae7SDavid Rees                               function_name=operator,
467*99e0aae7SDavid Rees                               source_location=location))
468*99e0aae7SDavid Rees
469*99e0aae7SDavid Rees
470*99e0aae7SDavid Rees@_handles('additive-expression -> times-expression additive-expression-right*')
471*99e0aae7SDavid Rees@_handles('times-expression -> negation-expression times-expression-right*')
472*99e0aae7SDavid Rees@_handles('and-expression -> comparison-expression and-expression-right+')
473*99e0aae7SDavid Rees@_handles('or-expression -> comparison-expression or-expression-right+')
474*99e0aae7SDavid Reesdef _binary_operator_expression(expression, expression_right):
475*99e0aae7SDavid Rees  """Builds the IR for a chain of equal-precedence left-associative operations.
476*99e0aae7SDavid Rees
477*99e0aae7SDavid Rees  _binary_operator_expression transforms a right-recursive list of expression
478*99e0aae7SDavid Rees  tails into a left-associative Expression tree.  For example, given the
479*99e0aae7SDavid Rees  arguments:
480*99e0aae7SDavid Rees
481*99e0aae7SDavid Rees      6, (Tail("+", 7), Tail("-", 8), Tail("+", 10))
482*99e0aae7SDavid Rees
483*99e0aae7SDavid Rees  _expression produces a structure like:
484*99e0aae7SDavid Rees
485*99e0aae7SDavid Rees     Expression(Expression(Expression(6, "+", 7), "-", 8), "+", 10)
486*99e0aae7SDavid Rees
487*99e0aae7SDavid Rees  This transformation is necessary because strict LR(1) grammars do not allow
488*99e0aae7SDavid Rees  left recursion.
489*99e0aae7SDavid Rees
490*99e0aae7SDavid Rees  Note that this method is used for several productions; each of those
491*99e0aae7SDavid Rees  productions handles a different precedence level, but are identical in form.
492*99e0aae7SDavid Rees
493*99e0aae7SDavid Rees  Arguments:
494*99e0aae7SDavid Rees    expression: An ir_data.Expression which is the head of the (expr, operator,
495*99e0aae7SDavid Rees        expr, operator, expr, ...) list.
496*99e0aae7SDavid Rees    expression_right: A list of _ExpressionTails corresponding to the (operator,
497*99e0aae7SDavid Rees        expr, operator, expr, ...) list that comes after expression.
498*99e0aae7SDavid Rees
499*99e0aae7SDavid Rees  Returns:
500*99e0aae7SDavid Rees    An ir_data.Expression with the correct recursive structure to represent a
501*99e0aae7SDavid Rees    list of left-associative operations.
502*99e0aae7SDavid Rees  """
503*99e0aae7SDavid Rees  e = expression
504*99e0aae7SDavid Rees  for right in expression_right.list:
505*99e0aae7SDavid Rees    location = parser_types.make_location(
506*99e0aae7SDavid Rees        e.source_location.start, right.source_location.end)
507*99e0aae7SDavid Rees    e = ir_data.Expression(
508*99e0aae7SDavid Rees        function=ir_data.Function(
509*99e0aae7SDavid Rees            function=_text_to_operator(right.operator.text),
510*99e0aae7SDavid Rees            args=[e, right.expression],
511*99e0aae7SDavid Rees            function_name=right.operator,
512*99e0aae7SDavid Rees            source_location=location),
513*99e0aae7SDavid Rees        source_location=location)
514*99e0aae7SDavid Rees  return e
515*99e0aae7SDavid Rees
516*99e0aae7SDavid Rees
517*99e0aae7SDavid Rees@_handles('comparison-expression ->'
518*99e0aae7SDavid Rees          '    additive-expression equality-expression-right+')
519*99e0aae7SDavid Rees@_handles('comparison-expression ->'
520*99e0aae7SDavid Rees          '    additive-expression less-expression-right-list')
521*99e0aae7SDavid Rees@_handles('comparison-expression ->'
522*99e0aae7SDavid Rees          '    additive-expression greater-expression-right-list')
523*99e0aae7SDavid Reesdef _chained_comparison_expression(expression, expression_right):
524*99e0aae7SDavid Rees  """Builds the IR for a chain of comparisons, like a == b == c.
525*99e0aae7SDavid Rees
526*99e0aae7SDavid Rees  Like _binary_operator_expression, _chained_comparison_expression transforms a
527*99e0aae7SDavid Rees  right-recursive list of expression tails into a left-associative Expression
528*99e0aae7SDavid Rees  tree.  Unlike _binary_operator_expression, extra AND nodes are added.  For
529*99e0aae7SDavid Rees  example, the following expression:
530*99e0aae7SDavid Rees
531*99e0aae7SDavid Rees      0 <= b <= 64
532*99e0aae7SDavid Rees
533*99e0aae7SDavid Rees  must be translated to the conceptually-equivalent expression:
534*99e0aae7SDavid Rees
535*99e0aae7SDavid Rees      0 <= b && b <= 64
536*99e0aae7SDavid Rees
537*99e0aae7SDavid Rees  (The middle subexpression is duplicated -- this would be a problem in a
538*99e0aae7SDavid Rees  programming language like C where expressions like `x++` have side effects,
539*99e0aae7SDavid Rees  but side effects do not make sense in a data definition language like Emboss.)
540*99e0aae7SDavid Rees
541*99e0aae7SDavid Rees  _chained_comparison_expression receives a left-hand head expression and a list
542*99e0aae7SDavid Rees  of tails, like:
543*99e0aae7SDavid Rees
544*99e0aae7SDavid Rees      6, (Tail("<=", b), Tail("<=", 64))
545*99e0aae7SDavid Rees
546*99e0aae7SDavid Rees  which it translates to a structure like:
547*99e0aae7SDavid Rees
548*99e0aae7SDavid Rees      Expression(Expression(6, "<=", b), "&&", Expression(b, "<=", 64))
549*99e0aae7SDavid Rees
550*99e0aae7SDavid Rees  The Emboss grammar is constructed such that sequences of "<", "<=", and "=="
551*99e0aae7SDavid Rees  comparisons may be chained, and sequences of ">", ">=", and "==" can be
552*99e0aae7SDavid Rees  chained, but greater and less-than comparisons may not; e.g., "b < 64 > a" is
553*99e0aae7SDavid Rees  not allowed.
554*99e0aae7SDavid Rees
555*99e0aae7SDavid Rees  Arguments:
556*99e0aae7SDavid Rees    expression: An ir_data.Expression which is the head of the (expr, operator,
557*99e0aae7SDavid Rees        expr, operator, expr, ...) list.
558*99e0aae7SDavid Rees    expression_right: A list of _ExpressionTails corresponding to the (operator,
559*99e0aae7SDavid Rees        expr, operator, expr, ...) list that comes after expression.
560*99e0aae7SDavid Rees
561*99e0aae7SDavid Rees  Returns:
562*99e0aae7SDavid Rees    An ir_data.Expression with the correct recursive structure to represent a
563*99e0aae7SDavid Rees    chain of left-associative comparison operations.
564*99e0aae7SDavid Rees  """
565*99e0aae7SDavid Rees  sequence = [expression]
566*99e0aae7SDavid Rees  for right in expression_right.list:
567*99e0aae7SDavid Rees    sequence.append(right.operator)
568*99e0aae7SDavid Rees    sequence.append(right.expression)
569*99e0aae7SDavid Rees  comparisons = []
570*99e0aae7SDavid Rees  for i in range(0, len(sequence) - 1, 2):
571*99e0aae7SDavid Rees    left, operator, right = sequence[i:i+3]
572*99e0aae7SDavid Rees    location = parser_types.make_location(
573*99e0aae7SDavid Rees        left.source_location.start, right.source_location.end)
574*99e0aae7SDavid Rees    comparisons.append(ir_data.Expression(
575*99e0aae7SDavid Rees        function=ir_data.Function(
576*99e0aae7SDavid Rees            function=_text_to_operator(operator.text),
577*99e0aae7SDavid Rees            args=[left, right],
578*99e0aae7SDavid Rees            function_name=operator,
579*99e0aae7SDavid Rees            source_location=location),
580*99e0aae7SDavid Rees        source_location=location))
581*99e0aae7SDavid Rees  e = comparisons[0]
582*99e0aae7SDavid Rees  for comparison in comparisons[1:]:
583*99e0aae7SDavid Rees    location = parser_types.make_location(
584*99e0aae7SDavid Rees        e.source_location.start, comparison.source_location.end)
585*99e0aae7SDavid Rees    e = ir_data.Expression(
586*99e0aae7SDavid Rees        function=ir_data.Function(
587*99e0aae7SDavid Rees            function=ir_data.FunctionMapping.AND,
588*99e0aae7SDavid Rees            args=[e, comparison],
589*99e0aae7SDavid Rees            function_name=ir_data.Word(
590*99e0aae7SDavid Rees                text='&&',
591*99e0aae7SDavid Rees                source_location=comparison.function.args[0].source_location),
592*99e0aae7SDavid Rees            source_location=location),
593*99e0aae7SDavid Rees        source_location=location)
594*99e0aae7SDavid Rees  return e
595*99e0aae7SDavid Rees
596*99e0aae7SDavid Rees
597*99e0aae7SDavid Rees# _chained_comparison_expression, above, handles three types of chains: `a == b
598*99e0aae7SDavid Rees# == c`, `a < b <= c`, and `a > b >= c`.
599*99e0aae7SDavid Rees#
600*99e0aae7SDavid Rees# This requires a bit of subtlety in the productions for
601*99e0aae7SDavid Rees# `x-expression-right-list`, because the `==` operator may be freely mixed into
602*99e0aae7SDavid Rees# greater-than or less-than chains, like `a < b == c <= d` or `a > b == c >= d`,
603*99e0aae7SDavid Rees# but greater-than and less-than may not be mixed; i.e., `a < b >= c` is
604*99e0aae7SDavid Rees# disallowed.
605*99e0aae7SDavid Rees#
606*99e0aae7SDavid Rees# In order to keep the grammar unambiguous -- that is, in order to ensure that
607*99e0aae7SDavid Rees# every valid input can only be parsed in exactly one way -- the languages
608*99e0aae7SDavid Rees# defined by `equality-expression-right*`, `greater-expression-right-list`, and
609*99e0aae7SDavid Rees# `less-expression-right-list` cannot overlap.
610*99e0aae7SDavid Rees#
611*99e0aae7SDavid Rees# `equality-expression-right*`, by definition, only contains `== n` elements.
612*99e0aae7SDavid Rees# By forcing `greater-expression-right-list` to contain at least one
613*99e0aae7SDavid Rees# `greater-expression-right`, we can ensure that a chain like `== n == m` cannot
614*99e0aae7SDavid Rees# be parsed as a `greater-expression-right-list`.  Similar logic applies in the
615*99e0aae7SDavid Rees# less-than case.
616*99e0aae7SDavid Rees#
617*99e0aae7SDavid Rees# There is another potential source of ambiguity here: if
618*99e0aae7SDavid Rees# `greater-expression-right-list` were
619*99e0aae7SDavid Rees#
620*99e0aae7SDavid Rees#     greater-expression-right-list ->
621*99e0aae7SDavid Rees#         equality-or-greater-expression-right* greater-expression-right
622*99e0aae7SDavid Rees#         equality-or-greater-expression-right*
623*99e0aae7SDavid Rees#
624*99e0aae7SDavid Rees# then a sequence like '> b > c > d' could be parsed as any of:
625*99e0aae7SDavid Rees#
626*99e0aae7SDavid Rees#     () (> b) ((> c) (> d))
627*99e0aae7SDavid Rees#     ((> b)) (> c) ((> d))
628*99e0aae7SDavid Rees#     ((> b) (> c)) (> d) ()
629*99e0aae7SDavid Rees#
630*99e0aae7SDavid Rees# By using `equality-expression-right*` for the first symbol, only the first
631*99e0aae7SDavid Rees# parse is possible.
632*99e0aae7SDavid Rees@_handles('greater-expression-right-list ->'
633*99e0aae7SDavid Rees          '    equality-expression-right* greater-expression-right'
634*99e0aae7SDavid Rees          '    equality-or-greater-expression-right*')
635*99e0aae7SDavid Rees@_handles('less-expression-right-list ->'
636*99e0aae7SDavid Rees          '    equality-expression-right* less-expression-right'
637*99e0aae7SDavid Rees          '    equality-or-less-expression-right*')
638*99e0aae7SDavid Reesdef _chained_comparison_tails(start, middle, end):
639*99e0aae7SDavid Rees  return _List(start.list + [middle] + end.list)
640*99e0aae7SDavid Rees
641*99e0aae7SDavid Rees
642*99e0aae7SDavid Rees@_handles('equality-or-greater-expression-right -> equality-expression-right')
643*99e0aae7SDavid Rees@_handles('equality-or-greater-expression-right -> greater-expression-right')
644*99e0aae7SDavid Rees@_handles('equality-or-less-expression-right -> equality-expression-right')
645*99e0aae7SDavid Rees@_handles('equality-or-less-expression-right -> less-expression-right')
646*99e0aae7SDavid Reesdef _equality_or_less_or_greater(right):
647*99e0aae7SDavid Rees  return right
648*99e0aae7SDavid Rees
649*99e0aae7SDavid Rees
650*99e0aae7SDavid Rees@_handles('and-expression-right -> and-operator comparison-expression')
651*99e0aae7SDavid Rees@_handles('or-expression-right -> or-operator comparison-expression')
652*99e0aae7SDavid Rees@_handles('additive-expression-right -> additive-operator times-expression')
653*99e0aae7SDavid Rees@_handles('equality-expression-right -> equality-operator additive-expression')
654*99e0aae7SDavid Rees@_handles('greater-expression-right -> greater-operator additive-expression')
655*99e0aae7SDavid Rees@_handles('less-expression-right -> less-operator additive-expression')
656*99e0aae7SDavid Rees@_handles('times-expression-right ->'
657*99e0aae7SDavid Rees          '    multiplicative-operator negation-expression')
658*99e0aae7SDavid Reesdef _expression_right_production(operator, expression):
659*99e0aae7SDavid Rees  return _ExpressionTail(operator, expression)
660*99e0aae7SDavid Rees
661*99e0aae7SDavid Rees
662*99e0aae7SDavid Rees# This supports a single layer of unary plus/minus, so "+5" and "-value" are
663*99e0aae7SDavid Rees# allowed, but "+-5" or "-+-something" are not.
664*99e0aae7SDavid Rees@_handles('negation-expression -> additive-operator bottom-expression')
665*99e0aae7SDavid Reesdef _negation_expression_with_operator(operator, expression):
666*99e0aae7SDavid Rees  phantom_zero_location = ir_data.Location(start=operator.source_location.start,
667*99e0aae7SDavid Rees                                          end=operator.source_location.start)
668*99e0aae7SDavid Rees  return ir_data.Expression(
669*99e0aae7SDavid Rees      function=ir_data.Function(
670*99e0aae7SDavid Rees          function=_text_to_operator(operator.text),
671*99e0aae7SDavid Rees          args=[ir_data.Expression(
672*99e0aae7SDavid Rees              constant=ir_data.NumericConstant(
673*99e0aae7SDavid Rees                  value='0',
674*99e0aae7SDavid Rees                  source_location=phantom_zero_location),
675*99e0aae7SDavid Rees              source_location=phantom_zero_location), expression],
676*99e0aae7SDavid Rees          function_name=operator,
677*99e0aae7SDavid Rees          source_location=ir_data.Location(
678*99e0aae7SDavid Rees              start=operator.source_location.start,
679*99e0aae7SDavid Rees              end=expression.source_location.end)))
680*99e0aae7SDavid Rees
681*99e0aae7SDavid Rees
682*99e0aae7SDavid Rees@_handles('negation-expression -> bottom-expression')
683*99e0aae7SDavid Reesdef _negation_expression(expression):
684*99e0aae7SDavid Rees  return expression
685*99e0aae7SDavid Rees
686*99e0aae7SDavid Rees
687*99e0aae7SDavid Rees@_handles('bottom-expression -> "(" expression ")"')
688*99e0aae7SDavid Reesdef _bottom_expression_parentheses(open_paren, expression, close_paren):
689*99e0aae7SDavid Rees  del open_paren, close_paren  # Unused.
690*99e0aae7SDavid Rees  return expression
691*99e0aae7SDavid Rees
692*99e0aae7SDavid Rees
693*99e0aae7SDavid Rees@_handles('bottom-expression -> function-name "(" argument-list ")"')
694*99e0aae7SDavid Reesdef _bottom_expression_function(function, open_paren, arguments, close_paren):
695*99e0aae7SDavid Rees  del open_paren  # Unused.
696*99e0aae7SDavid Rees  return ir_data.Expression(
697*99e0aae7SDavid Rees      function=ir_data.Function(
698*99e0aae7SDavid Rees          function=_text_to_function(function.text),
699*99e0aae7SDavid Rees          args=arguments.list,
700*99e0aae7SDavid Rees          function_name=function,
701*99e0aae7SDavid Rees          source_location=ir_data.Location(
702*99e0aae7SDavid Rees              start=function.source_location.start,
703*99e0aae7SDavid Rees              end=close_paren.source_location.end)))
704*99e0aae7SDavid Rees
705*99e0aae7SDavid Rees
706*99e0aae7SDavid Rees@_handles('comma-then-expression -> "," expression')
707*99e0aae7SDavid Reesdef _comma_then_expression(comma, expression):
708*99e0aae7SDavid Rees  del comma  # Unused.
709*99e0aae7SDavid Rees  return expression
710*99e0aae7SDavid Rees
711*99e0aae7SDavid Rees
712*99e0aae7SDavid Rees@_handles('argument-list -> expression comma-then-expression*')
713*99e0aae7SDavid Reesdef _argument_list(head, tail):
714*99e0aae7SDavid Rees  tail.list.insert(0, head)
715*99e0aae7SDavid Rees  return tail
716*99e0aae7SDavid Rees
717*99e0aae7SDavid Rees
718*99e0aae7SDavid Rees@_handles('argument-list ->')
719*99e0aae7SDavid Reesdef _empty_argument_list():
720*99e0aae7SDavid Rees  return _List([])
721*99e0aae7SDavid Rees
722*99e0aae7SDavid Rees
723*99e0aae7SDavid Rees@_handles('bottom-expression -> numeric-constant')
724*99e0aae7SDavid Reesdef _bottom_expression_from_numeric_constant(constant):
725*99e0aae7SDavid Rees  return ir_data.Expression(constant=constant)
726*99e0aae7SDavid Rees
727*99e0aae7SDavid Rees
728*99e0aae7SDavid Rees@_handles('bottom-expression -> constant-reference')
729*99e0aae7SDavid Reesdef _bottom_expression_from_constant_reference(reference):
730*99e0aae7SDavid Rees  return ir_data.Expression(constant_reference=reference)
731*99e0aae7SDavid Rees
732*99e0aae7SDavid Rees
733*99e0aae7SDavid Rees@_handles('bottom-expression -> builtin-reference')
734*99e0aae7SDavid Reesdef _bottom_expression_from_builtin(reference):
735*99e0aae7SDavid Rees  return ir_data.Expression(builtin_reference=reference)
736*99e0aae7SDavid Rees
737*99e0aae7SDavid Rees
738*99e0aae7SDavid Rees@_handles('bottom-expression -> boolean-constant')
739*99e0aae7SDavid Reesdef _bottom_expression_from_boolean_constant(boolean):
740*99e0aae7SDavid Rees  return ir_data.Expression(boolean_constant=boolean)
741*99e0aae7SDavid Rees
742*99e0aae7SDavid Rees
743*99e0aae7SDavid Rees@_handles('bottom-expression -> field-reference')
744*99e0aae7SDavid Reesdef _bottom_expression_from_reference(reference):
745*99e0aae7SDavid Rees  return reference
746*99e0aae7SDavid Rees
747*99e0aae7SDavid Rees
748*99e0aae7SDavid Rees@_handles('field-reference -> snake-reference field-reference-tail*')
749*99e0aae7SDavid Reesdef _indirect_field_reference(field_reference, field_references):
750*99e0aae7SDavid Rees  if field_references.source_location.HasField('end'):
751*99e0aae7SDavid Rees    end_location = field_references.source_location.end
752*99e0aae7SDavid Rees  else:
753*99e0aae7SDavid Rees    end_location = field_reference.source_location.end
754*99e0aae7SDavid Rees  return ir_data.Expression(field_reference=ir_data.FieldReference(
755*99e0aae7SDavid Rees      path=[field_reference] + field_references.list,
756*99e0aae7SDavid Rees      source_location=parser_types.make_location(
757*99e0aae7SDavid Rees          field_reference.source_location.start, end_location)))
758*99e0aae7SDavid Rees
759*99e0aae7SDavid Rees
760*99e0aae7SDavid Rees# If "Type.field" ever becomes syntactically valid, it will be necessary to
761*99e0aae7SDavid Rees# check that enum values are compile-time constants.
762*99e0aae7SDavid Rees@_handles('field-reference-tail -> "." snake-reference')
763*99e0aae7SDavid Reesdef _field_reference_tail(dot, reference):
764*99e0aae7SDavid Rees  del dot  # Unused.
765*99e0aae7SDavid Rees  return reference
766*99e0aae7SDavid Rees
767*99e0aae7SDavid Rees
768*99e0aae7SDavid Rees@_handles('numeric-constant -> Number')
769*99e0aae7SDavid Reesdef _numeric_constant(number):
770*99e0aae7SDavid Rees  # All types of numeric constant tokenize to the same symbol, because they are
771*99e0aae7SDavid Rees  # interchangeable in source code.
772*99e0aae7SDavid Rees  if number.text[0:2] == '0b':
773*99e0aae7SDavid Rees    n = int(number.text.replace('_', '')[2:], 2)
774*99e0aae7SDavid Rees  elif number.text[0:2] == '0x':
775*99e0aae7SDavid Rees    n = int(number.text.replace('_', '')[2:], 16)
776*99e0aae7SDavid Rees  else:
777*99e0aae7SDavid Rees    n = int(number.text.replace('_', ''), 10)
778*99e0aae7SDavid Rees  return ir_data.NumericConstant(value=str(n))
779*99e0aae7SDavid Rees
780*99e0aae7SDavid Rees
781*99e0aae7SDavid Rees@_handles('type-definition -> struct')
782*99e0aae7SDavid Rees@_handles('type-definition -> bits')
783*99e0aae7SDavid Rees@_handles('type-definition -> enum')
784*99e0aae7SDavid Rees@_handles('type-definition -> external')
785*99e0aae7SDavid Reesdef _type_definition(type_definition):
786*99e0aae7SDavid Rees  return type_definition
787*99e0aae7SDavid Rees
788*99e0aae7SDavid Rees
789*99e0aae7SDavid Rees# struct StructureName:
790*99e0aae7SDavid Rees#   ... fields ...
791*99e0aae7SDavid Rees# bits BitName:
792*99e0aae7SDavid Rees#   ... fields ...
793*99e0aae7SDavid Rees@_handles('struct -> "struct" type-name delimited-parameter-definition-list?'
794*99e0aae7SDavid Rees          '          ":" Comment? eol struct-body')
795*99e0aae7SDavid Rees@_handles('bits -> "bits" type-name delimited-parameter-definition-list? ":"'
796*99e0aae7SDavid Rees          '        Comment? eol bits-body')
797*99e0aae7SDavid Reesdef _structure(struct, name, parameters, colon, comment, newline, struct_body):
798*99e0aae7SDavid Rees  """Composes the top-level IR for an Emboss structure."""
799*99e0aae7SDavid Rees  del colon, comment, newline  # Unused.
800*99e0aae7SDavid Rees  ir_data_utils.builder(struct_body.structure).source_location.start.CopyFrom(
801*99e0aae7SDavid Rees      struct.source_location.start)
802*99e0aae7SDavid Rees  ir_data_utils.builder(struct_body.structure).source_location.end.CopyFrom(
803*99e0aae7SDavid Rees      struct_body.source_location.end)
804*99e0aae7SDavid Rees  if struct_body.name:
805*99e0aae7SDavid Rees    ir_data_utils.update(struct_body.name, name)
806*99e0aae7SDavid Rees  else:
807*99e0aae7SDavid Rees    struct_body.name = ir_data_utils.copy(name)
808*99e0aae7SDavid Rees  if parameters.list:
809*99e0aae7SDavid Rees    struct_body.runtime_parameter.extend(parameters.list[0].list)
810*99e0aae7SDavid Rees  return struct_body
811*99e0aae7SDavid Rees
812*99e0aae7SDavid Rees
813*99e0aae7SDavid Rees@_handles('delimited-parameter-definition-list ->'
814*99e0aae7SDavid Rees          '    "(" parameter-definition-list ")"')
815*99e0aae7SDavid Reesdef _delimited_parameter_definition_list(open_paren, parameters, close_paren):
816*99e0aae7SDavid Rees  del open_paren, close_paren  # Unused
817*99e0aae7SDavid Rees  return parameters
818*99e0aae7SDavid Rees
819*99e0aae7SDavid Rees
820*99e0aae7SDavid Rees@_handles('parameter-definition -> snake-name ":" type')
821*99e0aae7SDavid Reesdef _parameter_definition(name, double_colon, parameter_type):
822*99e0aae7SDavid Rees  del double_colon  # Unused
823*99e0aae7SDavid Rees  return ir_data.RuntimeParameter(name=name, physical_type_alias=parameter_type)
824*99e0aae7SDavid Rees
825*99e0aae7SDavid Rees
826*99e0aae7SDavid Rees@_handles('parameter-definition-list-tail -> "," parameter-definition')
827*99e0aae7SDavid Reesdef _parameter_definition_list_tail(comma, parameter):
828*99e0aae7SDavid Rees  del comma  # Unused.
829*99e0aae7SDavid Rees  return parameter
830*99e0aae7SDavid Rees
831*99e0aae7SDavid Rees
832*99e0aae7SDavid Rees@_handles('parameter-definition-list -> parameter-definition'
833*99e0aae7SDavid Rees          '                             parameter-definition-list-tail*')
834*99e0aae7SDavid Reesdef _parameter_definition_list(head, tail):
835*99e0aae7SDavid Rees  tail.list.insert(0, head)
836*99e0aae7SDavid Rees  return tail
837*99e0aae7SDavid Rees
838*99e0aae7SDavid Rees
839*99e0aae7SDavid Rees@_handles('parameter-definition-list ->')
840*99e0aae7SDavid Reesdef _empty_parameter_definition_list():
841*99e0aae7SDavid Rees  return _List([])
842*99e0aae7SDavid Rees
843*99e0aae7SDavid Rees
844*99e0aae7SDavid Rees# The body of a struct: basically, the part after the first line.
845*99e0aae7SDavid Rees@_handles('struct-body -> Indent doc-line* attribute-line*'
846*99e0aae7SDavid Rees          '               type-definition* struct-field-block Dedent')
847*99e0aae7SDavid Reesdef _struct_body(indent, docs, attributes, types, fields, dedent):
848*99e0aae7SDavid Rees  del indent, dedent  # Unused.
849*99e0aae7SDavid Rees  return _structure_body(docs, attributes, types, fields,
850*99e0aae7SDavid Rees                         ir_data.AddressableUnit.BYTE)
851*99e0aae7SDavid Rees
852*99e0aae7SDavid Rees
853*99e0aae7SDavid Reesdef _structure_body(docs, attributes, types, fields, addressable_unit):
854*99e0aae7SDavid Rees  """Constructs the body of a structure (bits or struct) definition."""
855*99e0aae7SDavid Rees  return ir_data.TypeDefinition(
856*99e0aae7SDavid Rees      structure=ir_data.Structure(field=[field.field for field in fields.list]),
857*99e0aae7SDavid Rees      documentation=docs.list,
858*99e0aae7SDavid Rees      attribute=attributes.list,
859*99e0aae7SDavid Rees      subtype=types.list + [subtype for field in fields.list for subtype in
860*99e0aae7SDavid Rees                            field.subtypes],
861*99e0aae7SDavid Rees      addressable_unit=addressable_unit)
862*99e0aae7SDavid Rees
863*99e0aae7SDavid Rees
864*99e0aae7SDavid Rees@_handles('struct-field-block ->')
865*99e0aae7SDavid Rees@_handles('bits-field-block ->')
866*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block ->')
867*99e0aae7SDavid Reesdef _empty_field_block():
868*99e0aae7SDavid Rees  return _List([])
869*99e0aae7SDavid Rees
870*99e0aae7SDavid Rees
871*99e0aae7SDavid Rees@_handles('struct-field-block ->'
872*99e0aae7SDavid Rees          '    conditional-struct-field-block struct-field-block')
873*99e0aae7SDavid Rees@_handles('bits-field-block ->'
874*99e0aae7SDavid Rees          '    conditional-bits-field-block bits-field-block')
875*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block -> conditional-anonymous-bits-field-block'
876*99e0aae7SDavid Rees          '                              anonymous-bits-field-block')
877*99e0aae7SDavid Reesdef _conditional_block_plus_field_block(conditional_block, block):
878*99e0aae7SDavid Rees  return _List(conditional_block.list + block.list)
879*99e0aae7SDavid Rees
880*99e0aae7SDavid Rees
881*99e0aae7SDavid Rees@_handles('struct-field-block ->'
882*99e0aae7SDavid Rees          '    unconditional-struct-field struct-field-block')
883*99e0aae7SDavid Rees@_handles('bits-field-block ->'
884*99e0aae7SDavid Rees          '    unconditional-bits-field bits-field-block')
885*99e0aae7SDavid Rees@_handles('anonymous-bits-field-block ->'
886*99e0aae7SDavid Rees          '    unconditional-anonymous-bits-field anonymous-bits-field-block')
887*99e0aae7SDavid Reesdef _unconditional_block_plus_field_block(field, block):
888*99e0aae7SDavid Rees  """Prepends an unconditional field to block."""
889*99e0aae7SDavid Rees  ir_data_utils.builder(field.field).existence_condition.source_location.CopyFrom(
890*99e0aae7SDavid Rees      field.source_location)
891*99e0aae7SDavid Rees  ir_data_utils.builder(field.field).existence_condition.boolean_constant.source_location.CopyFrom(
892*99e0aae7SDavid Rees      field.source_location)
893*99e0aae7SDavid Rees  ir_data_utils.builder(field.field).existence_condition.boolean_constant.value = True
894*99e0aae7SDavid Rees  return _List([field] + block.list)
895*99e0aae7SDavid Rees
896*99e0aae7SDavid Rees
897*99e0aae7SDavid Rees# Struct "fields" are regular fields, inline enums, bits, or structs, anonymous
898*99e0aae7SDavid Rees# inline bits, or virtual fields.
899*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> field')
900*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-enum-field-definition')
901*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-bits-field-definition')
902*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> inline-struct-field-definition')
903*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> anonymous-bits-field-definition')
904*99e0aae7SDavid Rees@_handles('unconditional-struct-field -> virtual-field')
905*99e0aae7SDavid Rees# Bits fields are "regular" fields, inline enums or bits, or virtual fields.
906*99e0aae7SDavid Rees#
907*99e0aae7SDavid Rees# Inline structs and anonymous inline bits are not allowed inside of bits:
908*99e0aae7SDavid Rees# anonymous inline bits are pointless, and inline structs do not make sense,
909*99e0aae7SDavid Rees# since a struct cannot be a part of a bits.
910*99e0aae7SDavid Rees#
911*99e0aae7SDavid Rees# Anonymous inline bits may not include virtual fields; instead, the virtual
912*99e0aae7SDavid Rees# field should be a direct part of the enclosing structure.
913*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> field')
914*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> inline-enum-field-definition')
915*99e0aae7SDavid Rees@_handles('unconditional-anonymous-bits-field -> inline-bits-field-definition')
916*99e0aae7SDavid Rees@_handles('unconditional-bits-field -> unconditional-anonymous-bits-field')
917*99e0aae7SDavid Rees@_handles('unconditional-bits-field -> virtual-field')
918*99e0aae7SDavid Reesdef _unconditional_field(field):
919*99e0aae7SDavid Rees  """Handles the unifying grammar production for a struct or bits field."""
920*99e0aae7SDavid Rees  return field
921*99e0aae7SDavid Rees
922*99e0aae7SDavid Rees
923*99e0aae7SDavid Rees# TODO(bolms): Add 'elif' and 'else' support.
924*99e0aae7SDavid Rees# TODO(bolms): Should nested 'if' blocks be allowed?
925*99e0aae7SDavid Rees@_handles('conditional-struct-field-block ->'
926*99e0aae7SDavid Rees          '    "if" expression ":" Comment? eol'
927*99e0aae7SDavid Rees          '        Indent unconditional-struct-field+ Dedent')
928*99e0aae7SDavid Rees@_handles('conditional-bits-field-block ->'
929*99e0aae7SDavid Rees          '    "if" expression ":" Comment? eol'
930*99e0aae7SDavid Rees          '        Indent unconditional-bits-field+ Dedent')
931*99e0aae7SDavid Rees@_handles('conditional-anonymous-bits-field-block ->'
932*99e0aae7SDavid Rees          '    "if" expression ":" Comment? eol'
933*99e0aae7SDavid Rees          '        Indent unconditional-anonymous-bits-field+ Dedent')
934*99e0aae7SDavid Reesdef _conditional_field_block(if_keyword, expression, colon, comment, newline,
935*99e0aae7SDavid Rees                             indent, fields, dedent):
936*99e0aae7SDavid Rees  """Applies an existence_condition to each element of fields."""
937*99e0aae7SDavid Rees  del if_keyword, newline, colon, comment, indent, dedent  # Unused.
938*99e0aae7SDavid Rees  for field in fields.list:
939*99e0aae7SDavid Rees    condition = ir_data_utils.builder(field.field).existence_condition
940*99e0aae7SDavid Rees    condition.CopyFrom(expression)
941*99e0aae7SDavid Rees    condition.source_location.is_disjoint_from_parent = True
942*99e0aae7SDavid Rees  return fields
943*99e0aae7SDavid Rees
944*99e0aae7SDavid Rees
945*99e0aae7SDavid Rees# The body of a bit field definition: basically, the part after the first line.
946*99e0aae7SDavid Rees@_handles('bits-body -> Indent doc-line* attribute-line*'
947*99e0aae7SDavid Rees          '             type-definition* bits-field-block Dedent')
948*99e0aae7SDavid Reesdef _bits_body(indent, docs, attributes, types, fields, dedent):
949*99e0aae7SDavid Rees  del indent, dedent  # Unused.
950*99e0aae7SDavid Rees  return _structure_body(docs, attributes, types, fields,
951*99e0aae7SDavid Rees                         ir_data.AddressableUnit.BIT)
952*99e0aae7SDavid Rees
953*99e0aae7SDavid Rees
954*99e0aae7SDavid Rees# Inline bits (defined as part of a field) are more restricted than standalone
955*99e0aae7SDavid Rees# bits.
956*99e0aae7SDavid Rees@_handles('anonymous-bits-body ->'
957*99e0aae7SDavid Rees          '    Indent attribute-line* anonymous-bits-field-block Dedent')
958*99e0aae7SDavid Reesdef _anonymous_bits_body(indent, attributes, fields, dedent):
959*99e0aae7SDavid Rees  del indent, dedent  # Unused.
960*99e0aae7SDavid Rees  return _structure_body(_List([]), attributes, _List([]), fields,
961*99e0aae7SDavid Rees                         ir_data.AddressableUnit.BIT)
962*99e0aae7SDavid Rees
963*99e0aae7SDavid Rees
964*99e0aae7SDavid Rees# A field is:
965*99e0aae7SDavid Rees#     range  type  name  (abbr)  [attr: value] [attr2: value] -- doc
966*99e0aae7SDavid Rees#         -- doc
967*99e0aae7SDavid Rees#         -- doc
968*99e0aae7SDavid Rees#         [attr3: value]
969*99e0aae7SDavid Rees#         [attr4: value]
970*99e0aae7SDavid Rees@_handles('field ->'
971*99e0aae7SDavid Rees          '    field-location type snake-name abbreviation? attribute* doc?'
972*99e0aae7SDavid Rees          '    Comment? eol field-body?')
973*99e0aae7SDavid Reesdef _field(location, field_type, name, abbreviation, attributes, doc, comment,
974*99e0aae7SDavid Rees           newline, field_body):
975*99e0aae7SDavid Rees  """Constructs an ir_data.Field from the given components."""
976*99e0aae7SDavid Rees  del comment  # Unused
977*99e0aae7SDavid Rees  field_ir = ir_data.Field(location=location,
978*99e0aae7SDavid Rees                       type=field_type,
979*99e0aae7SDavid Rees                       name=name,
980*99e0aae7SDavid Rees                       attribute=attributes.list,
981*99e0aae7SDavid Rees                       documentation=doc.list)
982*99e0aae7SDavid Rees  field = ir_data_utils.builder(field_ir)
983*99e0aae7SDavid Rees  if field_body.list:
984*99e0aae7SDavid Rees    field.attribute.extend(field_body.list[0].attribute)
985*99e0aae7SDavid Rees    field.documentation.extend(field_body.list[0].documentation)
986*99e0aae7SDavid Rees  if abbreviation.list:
987*99e0aae7SDavid Rees    field.abbreviation.CopyFrom(abbreviation.list[0])
988*99e0aae7SDavid Rees  field.source_location.start.CopyFrom(location.source_location.start)
989*99e0aae7SDavid Rees  if field_body.source_location.HasField('end'):
990*99e0aae7SDavid Rees    field.source_location.end.CopyFrom(field_body.source_location.end)
991*99e0aae7SDavid Rees  else:
992*99e0aae7SDavid Rees    field.source_location.end.CopyFrom(newline.source_location.end)
993*99e0aae7SDavid Rees  return _FieldWithType(field=field_ir)
994*99e0aae7SDavid Rees
995*99e0aae7SDavid Rees
996*99e0aae7SDavid Rees# A "virtual field" is:
997*99e0aae7SDavid Rees#     let name = value
998*99e0aae7SDavid Rees#         -- doc
999*99e0aae7SDavid Rees#         -- doc
1000*99e0aae7SDavid Rees#         [attr1: value]
1001*99e0aae7SDavid Rees#         [attr2: value]
1002*99e0aae7SDavid Rees@_handles('virtual-field ->'
1003*99e0aae7SDavid Rees          '    "let" snake-name "=" expression Comment? eol field-body?')
1004*99e0aae7SDavid Reesdef _virtual_field(let, name, equals, value, comment, newline, field_body):
1005*99e0aae7SDavid Rees  """Constructs an ir_data.Field from the given components."""
1006*99e0aae7SDavid Rees  del equals, comment  # Unused
1007*99e0aae7SDavid Rees  field_ir = ir_data.Field(read_transform=value, name=name)
1008*99e0aae7SDavid Rees  field = ir_data_utils.builder(field_ir)
1009*99e0aae7SDavid Rees  if field_body.list:
1010*99e0aae7SDavid Rees    field.attribute.extend(field_body.list[0].attribute)
1011*99e0aae7SDavid Rees    field.documentation.extend(field_body.list[0].documentation)
1012*99e0aae7SDavid Rees  field.source_location.start.CopyFrom(let.source_location.start)
1013*99e0aae7SDavid Rees  if field_body.source_location.HasField('end'):
1014*99e0aae7SDavid Rees    field.source_location.end.CopyFrom(field_body.source_location.end)
1015*99e0aae7SDavid Rees  else:
1016*99e0aae7SDavid Rees    field.source_location.end.CopyFrom(newline.source_location.end)
1017*99e0aae7SDavid Rees  return _FieldWithType(field=field_ir)
1018*99e0aae7SDavid Rees
1019*99e0aae7SDavid Rees
1020*99e0aae7SDavid Rees# An inline enum is:
1021*99e0aae7SDavid Rees#     range  "enum"  name  (abbr):
1022*99e0aae7SDavid Rees#         -- doc
1023*99e0aae7SDavid Rees#         -- doc
1024*99e0aae7SDavid Rees#         [attr3: value]
1025*99e0aae7SDavid Rees#         [attr4: value]
1026*99e0aae7SDavid Rees#         NAME = 10
1027*99e0aae7SDavid Rees#         NAME2 = 20
1028*99e0aae7SDavid Rees@_handles('inline-enum-field-definition ->'
1029*99e0aae7SDavid Rees          '    field-location "enum" snake-name abbreviation? ":" Comment? eol'
1030*99e0aae7SDavid Rees          '    enum-body')
1031*99e0aae7SDavid Reesdef _inline_enum_field(location, enum, name, abbreviation, colon, comment,
1032*99e0aae7SDavid Rees                       newline, enum_body):
1033*99e0aae7SDavid Rees  """Constructs an ir_data.Field for an inline enum field."""
1034*99e0aae7SDavid Rees  del enum, colon, comment, newline  # Unused.
1035*99e0aae7SDavid Rees  return _inline_type_field(location, name, abbreviation, enum_body)
1036*99e0aae7SDavid Rees
1037*99e0aae7SDavid Rees
1038*99e0aae7SDavid Rees@_handles(
1039*99e0aae7SDavid Rees    'inline-struct-field-definition ->'
1040*99e0aae7SDavid Rees    '    field-location "struct" snake-name abbreviation? ":" Comment? eol'
1041*99e0aae7SDavid Rees    '    struct-body')
1042*99e0aae7SDavid Reesdef _inline_struct_field(location, struct, name, abbreviation, colon, comment,
1043*99e0aae7SDavid Rees                         newline, struct_body):
1044*99e0aae7SDavid Rees  del struct, colon, comment, newline  # Unused.
1045*99e0aae7SDavid Rees  return _inline_type_field(location, name, abbreviation, struct_body)
1046*99e0aae7SDavid Rees
1047*99e0aae7SDavid Rees
1048*99e0aae7SDavid Rees@_handles('inline-bits-field-definition ->'
1049*99e0aae7SDavid Rees          '    field-location "bits" snake-name abbreviation? ":" Comment? eol'
1050*99e0aae7SDavid Rees          '    bits-body')
1051*99e0aae7SDavid Reesdef _inline_bits_field(location, bits, name, abbreviation, colon, comment,
1052*99e0aae7SDavid Rees                       newline, bits_body):
1053*99e0aae7SDavid Rees  del bits, colon, comment, newline  # Unused.
1054*99e0aae7SDavid Rees  return _inline_type_field(location, name, abbreviation, bits_body)
1055*99e0aae7SDavid Rees
1056*99e0aae7SDavid Rees
1057*99e0aae7SDavid Reesdef _inline_type_field(location, name, abbreviation, body):
1058*99e0aae7SDavid Rees  """Shared implementation of _inline_enum_field and _anonymous_bit_field."""
1059*99e0aae7SDavid Rees  field_ir = ir_data.Field(location=location,
1060*99e0aae7SDavid Rees                       name=name,
1061*99e0aae7SDavid Rees                       attribute=body.attribute,
1062*99e0aae7SDavid Rees                       documentation=body.documentation)
1063*99e0aae7SDavid Rees  field = ir_data_utils.builder(field_ir)
1064*99e0aae7SDavid Rees  # All attributes should be attached to the field, not the type definition: if
1065*99e0aae7SDavid Rees  # the user wants to use type attributes, they should create a separate type
1066*99e0aae7SDavid Rees  # definition and reference it.
1067*99e0aae7SDavid Rees  del body.attribute[:]
1068*99e0aae7SDavid Rees  type_name = ir_data_utils.copy(name)
1069*99e0aae7SDavid Rees  ir_data_utils.builder(type_name).name.text = name_conversion.snake_to_camel(type_name.name.text)
1070*99e0aae7SDavid Rees  field.type.atomic_type.reference.source_name.extend([type_name.name])
1071*99e0aae7SDavid Rees  field.type.atomic_type.reference.source_location.CopyFrom(
1072*99e0aae7SDavid Rees      type_name.source_location)
1073*99e0aae7SDavid Rees  field.type.atomic_type.reference.is_local_name = True
1074*99e0aae7SDavid Rees  field.type.atomic_type.source_location.CopyFrom(type_name.source_location)
1075*99e0aae7SDavid Rees  field.type.source_location.CopyFrom(type_name.source_location)
1076*99e0aae7SDavid Rees  if abbreviation.list:
1077*99e0aae7SDavid Rees    field.abbreviation.CopyFrom(abbreviation.list[0])
1078*99e0aae7SDavid Rees  field.source_location.start.CopyFrom(location.source_location.start)
1079*99e0aae7SDavid Rees  ir_data_utils.builder(body.source_location).start.CopyFrom(location.source_location.start)
1080*99e0aae7SDavid Rees  if body.HasField('enumeration'):
1081*99e0aae7SDavid Rees    ir_data_utils.builder(body.enumeration).source_location.CopyFrom(body.source_location)
1082*99e0aae7SDavid Rees  else:
1083*99e0aae7SDavid Rees    assert body.HasField('structure')
1084*99e0aae7SDavid Rees    ir_data_utils.builder(body.structure).source_location.CopyFrom(body.source_location)
1085*99e0aae7SDavid Rees  ir_data_utils.builder(body).name.CopyFrom(type_name)
1086*99e0aae7SDavid Rees  field.source_location.end.CopyFrom(body.source_location.end)
1087*99e0aae7SDavid Rees  subtypes = [body] + list(body.subtype)
1088*99e0aae7SDavid Rees  del body.subtype[:]
1089*99e0aae7SDavid Rees  return _FieldWithType(field=field_ir, subtypes=subtypes)
1090*99e0aae7SDavid Rees
1091*99e0aae7SDavid Rees
1092*99e0aae7SDavid Rees@_handles('anonymous-bits-field-definition ->'
1093*99e0aae7SDavid Rees          '    field-location "bits" ":" Comment? eol anonymous-bits-body')
1094*99e0aae7SDavid Reesdef _anonymous_bit_field(location, bits_keyword, colon, comment, newline,
1095*99e0aae7SDavid Rees                         bits_body):
1096*99e0aae7SDavid Rees  """Constructs an ir_data.Field for an anonymous bit field."""
1097*99e0aae7SDavid Rees  del colon, comment, newline  # Unused.
1098*99e0aae7SDavid Rees  name = ir_data.NameDefinition(
1099*99e0aae7SDavid Rees      name=ir_data.Word(
1100*99e0aae7SDavid Rees          text=_get_anonymous_field_name(),
1101*99e0aae7SDavid Rees          source_location=bits_keyword.source_location),
1102*99e0aae7SDavid Rees      source_location=bits_keyword.source_location,
1103*99e0aae7SDavid Rees      is_anonymous=True)
1104*99e0aae7SDavid Rees  return _inline_type_field(location, name, _List([]), bits_body)
1105*99e0aae7SDavid Rees
1106*99e0aae7SDavid Rees
1107*99e0aae7SDavid Rees@_handles('field-body -> Indent doc-line* attribute-line* Dedent')
1108*99e0aae7SDavid Reesdef _field_body(indent, docs, attributes, dedent):
1109*99e0aae7SDavid Rees  del indent, dedent  # Unused.
1110*99e0aae7SDavid Rees  return ir_data.Field(documentation=docs.list, attribute=attributes.list)
1111*99e0aae7SDavid Rees
1112*99e0aae7SDavid Rees
1113*99e0aae7SDavid Rees# A parenthetically-denoted abbreviation.
1114*99e0aae7SDavid Rees@_handles('abbreviation -> "(" snake-word ")"')
1115*99e0aae7SDavid Reesdef _abbreviation(open_paren, word, close_paren):
1116*99e0aae7SDavid Rees  del open_paren, close_paren  # Unused.
1117*99e0aae7SDavid Rees  return word
1118*99e0aae7SDavid Rees
1119*99e0aae7SDavid Rees
1120*99e0aae7SDavid Rees# enum EnumName:
1121*99e0aae7SDavid Rees#   ... values ...
1122*99e0aae7SDavid Rees@_handles('enum -> "enum" type-name ":" Comment? eol enum-body')
1123*99e0aae7SDavid Reesdef _enum(enum, name, colon, comment, newline, enum_body):
1124*99e0aae7SDavid Rees  del colon, comment, newline  # Unused.
1125*99e0aae7SDavid Rees  ir_data_utils.builder(enum_body.enumeration).source_location.start.CopyFrom(
1126*99e0aae7SDavid Rees      enum.source_location.start)
1127*99e0aae7SDavid Rees  ir_data_utils.builder(enum_body.enumeration).source_location.end.CopyFrom(
1128*99e0aae7SDavid Rees      enum_body.source_location.end)
1129*99e0aae7SDavid Rees  ir_data_utils.builder(enum_body).name.CopyFrom(name)
1130*99e0aae7SDavid Rees  return enum_body
1131*99e0aae7SDavid Rees
1132*99e0aae7SDavid Rees
1133*99e0aae7SDavid Rees# [enum Foo:]
1134*99e0aae7SDavid Rees#   name = value
1135*99e0aae7SDavid Rees#   name = value
1136*99e0aae7SDavid Rees@_handles('enum-body -> Indent doc-line* attribute-line* enum-value+ Dedent')
1137*99e0aae7SDavid Reesdef _enum_body(indent, docs, attributes, values, dedent):
1138*99e0aae7SDavid Rees  del indent, dedent  # Unused.
1139*99e0aae7SDavid Rees  return ir_data.TypeDefinition(
1140*99e0aae7SDavid Rees      enumeration=ir_data.Enum(value=values.list),
1141*99e0aae7SDavid Rees      documentation=docs.list,
1142*99e0aae7SDavid Rees      attribute=attributes.list,
1143*99e0aae7SDavid Rees      addressable_unit=ir_data.AddressableUnit.BIT)
1144*99e0aae7SDavid Rees
1145*99e0aae7SDavid Rees
1146*99e0aae7SDavid Rees# name = value
1147*99e0aae7SDavid Rees@_handles('enum-value -> '
1148*99e0aae7SDavid Rees          '    constant-name "=" expression attribute* doc? Comment? eol enum-value-body?')
1149*99e0aae7SDavid Reesdef _enum_value(name, equals, expression, attribute, documentation, comment, newline,
1150*99e0aae7SDavid Rees                body):
1151*99e0aae7SDavid Rees  del equals, comment, newline  # Unused.
1152*99e0aae7SDavid Rees  result = ir_data.EnumValue(name=name,
1153*99e0aae7SDavid Rees                            value=expression,
1154*99e0aae7SDavid Rees                            documentation=documentation.list,
1155*99e0aae7SDavid Rees                            attribute=attribute.list)
1156*99e0aae7SDavid Rees  if body.list:
1157*99e0aae7SDavid Rees    result.documentation.extend(body.list[0].documentation)
1158*99e0aae7SDavid Rees    result.attribute.extend(body.list[0].attribute)
1159*99e0aae7SDavid Rees  return result
1160*99e0aae7SDavid Rees
1161*99e0aae7SDavid Rees
1162*99e0aae7SDavid Rees@_handles('enum-value-body -> Indent doc-line* attribute-line* Dedent')
1163*99e0aae7SDavid Reesdef _enum_value_body(indent, docs, attributes, dedent):
1164*99e0aae7SDavid Rees  del indent, dedent  # Unused.
1165*99e0aae7SDavid Rees  return ir_data.EnumValue(documentation=docs.list, attribute=attributes.list)
1166*99e0aae7SDavid Rees
1167*99e0aae7SDavid Rees
1168*99e0aae7SDavid Rees# An external is just a declaration that a type exists and has certain
1169*99e0aae7SDavid Rees# attributes.
1170*99e0aae7SDavid Rees@_handles('external -> "external" type-name ":" Comment? eol external-body')
1171*99e0aae7SDavid Reesdef _external(external, name, colon, comment, newline, external_body):
1172*99e0aae7SDavid Rees  del colon, comment, newline  # Unused.
1173*99e0aae7SDavid Rees  ir_data_utils.builder(external_body.source_location).start.CopyFrom(external.source_location.start)
1174*99e0aae7SDavid Rees  if external_body.name:
1175*99e0aae7SDavid Rees    ir_data_utils.update(external_body.name, name)
1176*99e0aae7SDavid Rees  else:
1177*99e0aae7SDavid Rees    external_body.name = ir_data_utils.copy(name)
1178*99e0aae7SDavid Rees  return external_body
1179*99e0aae7SDavid Rees
1180*99e0aae7SDavid Rees
1181*99e0aae7SDavid Rees# This syntax implicitly requires either a documentation line or a attribute
1182*99e0aae7SDavid Rees# line, or it won't parse (because no Indent/Dedent tokens will be emitted).
1183*99e0aae7SDavid Rees@_handles('external-body -> Indent doc-line* attribute-line* Dedent')
1184*99e0aae7SDavid Reesdef _external_body(indent, docs, attributes, dedent):
1185*99e0aae7SDavid Rees  return ir_data.TypeDefinition(
1186*99e0aae7SDavid Rees      external=ir_data.External(
1187*99e0aae7SDavid Rees          # Set source_location here, since it won't be set automatically.
1188*99e0aae7SDavid Rees          source_location=ir_data.Location(start=indent.source_location.start,
1189*99e0aae7SDavid Rees                                          end=dedent.source_location.end)),
1190*99e0aae7SDavid Rees      documentation=docs.list,
1191*99e0aae7SDavid Rees      attribute=attributes.list)
1192*99e0aae7SDavid Rees
1193*99e0aae7SDavid Rees
1194*99e0aae7SDavid Rees@_handles('field-location -> expression "[" "+" expression "]"')
1195*99e0aae7SDavid Reesdef _field_location(start, open_bracket, plus, size, close_bracket):
1196*99e0aae7SDavid Rees  del open_bracket, plus, close_bracket  # Unused.
1197*99e0aae7SDavid Rees  return ir_data.FieldLocation(start=start, size=size)
1198*99e0aae7SDavid Rees
1199*99e0aae7SDavid Rees
1200*99e0aae7SDavid Rees@_handles('delimited-argument-list -> "(" argument-list ")"')
1201*99e0aae7SDavid Reesdef _type_argument_list(open_paren, arguments, close_paren):
1202*99e0aae7SDavid Rees  del open_paren, close_paren  # Unused
1203*99e0aae7SDavid Rees  return arguments
1204*99e0aae7SDavid Rees
1205*99e0aae7SDavid Rees
1206*99e0aae7SDavid Rees# A type is "TypeName" or "TypeName[length]" or "TypeName[length][length]", etc.
1207*99e0aae7SDavid Rees# An array type may have an empty length ("Type[]").  This is only valid for the
1208*99e0aae7SDavid Rees# outermost length (the last set of brackets), but that must be checked
1209*99e0aae7SDavid Rees# elsewhere.
1210*99e0aae7SDavid Rees@_handles('type -> type-reference delimited-argument-list? type-size-specifier?'
1211*99e0aae7SDavid Rees          '        array-length-specifier*')
1212*99e0aae7SDavid Reesdef _type(reference, parameters, size, array_spec):
1213*99e0aae7SDavid Rees  """Builds the IR for a type specifier."""
1214*99e0aae7SDavid Rees  base_type_source_location_end = reference.source_location.end
1215*99e0aae7SDavid Rees  atomic_type_source_location_end = reference.source_location.end
1216*99e0aae7SDavid Rees  if parameters.list:
1217*99e0aae7SDavid Rees    base_type_source_location_end = parameters.source_location.end
1218*99e0aae7SDavid Rees    atomic_type_source_location_end = parameters.source_location.end
1219*99e0aae7SDavid Rees  if size.list:
1220*99e0aae7SDavid Rees    base_type_source_location_end = size.source_location.end
1221*99e0aae7SDavid Rees  base_type_location = parser_types.make_location(
1222*99e0aae7SDavid Rees      reference.source_location.start,
1223*99e0aae7SDavid Rees      base_type_source_location_end)
1224*99e0aae7SDavid Rees  atomic_type_location = parser_types.make_location(
1225*99e0aae7SDavid Rees      reference.source_location.start,
1226*99e0aae7SDavid Rees      atomic_type_source_location_end)
1227*99e0aae7SDavid Rees  t = ir_data.Type(
1228*99e0aae7SDavid Rees      atomic_type=ir_data.AtomicType(
1229*99e0aae7SDavid Rees          reference=ir_data_utils.copy(reference),
1230*99e0aae7SDavid Rees          source_location=atomic_type_location,
1231*99e0aae7SDavid Rees          runtime_parameter=parameters.list[0].list if parameters.list else []),
1232*99e0aae7SDavid Rees      size_in_bits=size.list[0] if size.list else None,
1233*99e0aae7SDavid Rees      source_location=base_type_location)
1234*99e0aae7SDavid Rees  for length in array_spec.list:
1235*99e0aae7SDavid Rees    location = parser_types.make_location(
1236*99e0aae7SDavid Rees        t.source_location.start, length.source_location.end)
1237*99e0aae7SDavid Rees    if isinstance(length, ir_data.Expression):
1238*99e0aae7SDavid Rees      t = ir_data.Type(
1239*99e0aae7SDavid Rees          array_type=ir_data.ArrayType(base_type=t,
1240*99e0aae7SDavid Rees                                      element_count=length,
1241*99e0aae7SDavid Rees                                      source_location=location),
1242*99e0aae7SDavid Rees          source_location=location)
1243*99e0aae7SDavid Rees    elif isinstance(length, ir_data.Empty):
1244*99e0aae7SDavid Rees      t = ir_data.Type(
1245*99e0aae7SDavid Rees          array_type=ir_data.ArrayType(base_type=t,
1246*99e0aae7SDavid Rees                                      automatic=length,
1247*99e0aae7SDavid Rees                                      source_location=location),
1248*99e0aae7SDavid Rees          source_location=location)
1249*99e0aae7SDavid Rees    else:
1250*99e0aae7SDavid Rees      assert False, "Shouldn't be here."
1251*99e0aae7SDavid Rees  return t
1252*99e0aae7SDavid Rees
1253*99e0aae7SDavid Rees
1254*99e0aae7SDavid Rees# TODO(bolms): Should symbolic names or expressions be allowed?  E.g.,
1255*99e0aae7SDavid Rees# UInt:FIELD_SIZE or UInt:(16 + 16)?
1256*99e0aae7SDavid Rees@_handles('type-size-specifier -> ":" numeric-constant')
1257*99e0aae7SDavid Reesdef _type_size_specifier(colon, numeric_constant):
1258*99e0aae7SDavid Rees  """handles the ":32" part of a type specifier like "UInt:32"."""
1259*99e0aae7SDavid Rees  del colon
1260*99e0aae7SDavid Rees  return ir_data.Expression(constant=numeric_constant)
1261*99e0aae7SDavid Rees
1262*99e0aae7SDavid Rees
1263*99e0aae7SDavid Rees# The distinctions between different formats of NameDefinitions, Words, and
1264*99e0aae7SDavid Rees# References are enforced during parsing, but not propagated to the IR.
1265*99e0aae7SDavid Rees@_handles('type-name -> type-word')
1266*99e0aae7SDavid Rees@_handles('snake-name -> snake-word')
1267*99e0aae7SDavid Rees@_handles('constant-name -> constant-word')
1268*99e0aae7SDavid Reesdef _name(word):
1269*99e0aae7SDavid Rees  return ir_data.NameDefinition(name=word)
1270*99e0aae7SDavid Rees
1271*99e0aae7SDavid Rees
1272*99e0aae7SDavid Rees@_handles('type-word -> CamelWord')
1273*99e0aae7SDavid Rees@_handles('snake-word -> SnakeWord')
1274*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$size_in_bits"')
1275*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$size_in_bytes"')
1276*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$max_size_in_bits"')
1277*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$max_size_in_bytes"')
1278*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$min_size_in_bits"')
1279*99e0aae7SDavid Rees@_handles('builtin-field-word -> "$min_size_in_bytes"')
1280*99e0aae7SDavid Rees@_handles('builtin-word -> "$is_statically_sized"')
1281*99e0aae7SDavid Rees@_handles('builtin-word -> "$static_size_in_bits"')
1282*99e0aae7SDavid Rees@_handles('builtin-word -> "$next"')
1283*99e0aae7SDavid Rees@_handles('constant-word -> ShoutyWord')
1284*99e0aae7SDavid Rees@_handles('and-operator -> "&&"')
1285*99e0aae7SDavid Rees@_handles('or-operator -> "||"')
1286*99e0aae7SDavid Rees@_handles('less-operator -> "<="')
1287*99e0aae7SDavid Rees@_handles('less-operator -> "<"')
1288*99e0aae7SDavid Rees@_handles('greater-operator -> ">="')
1289*99e0aae7SDavid Rees@_handles('greater-operator -> ">"')
1290*99e0aae7SDavid Rees@_handles('equality-operator -> "=="')
1291*99e0aae7SDavid Rees@_handles('inequality-operator -> "!="')
1292*99e0aae7SDavid Rees@_handles('additive-operator -> "+"')
1293*99e0aae7SDavid Rees@_handles('additive-operator -> "-"')
1294*99e0aae7SDavid Rees@_handles('multiplicative-operator -> "*"')
1295*99e0aae7SDavid Rees@_handles('function-name -> "$max"')
1296*99e0aae7SDavid Rees@_handles('function-name -> "$present"')
1297*99e0aae7SDavid Rees@_handles('function-name -> "$upper_bound"')
1298*99e0aae7SDavid Rees@_handles('function-name -> "$lower_bound"')
1299*99e0aae7SDavid Reesdef _word(word):
1300*99e0aae7SDavid Rees  return ir_data.Word(text=word.text)
1301*99e0aae7SDavid Rees
1302*99e0aae7SDavid Rees
1303*99e0aae7SDavid Rees@_handles('type-reference -> type-reference-tail')
1304*99e0aae7SDavid Rees@_handles('constant-reference -> constant-reference-tail')
1305*99e0aae7SDavid Reesdef _un_module_qualified_type_reference(reference):
1306*99e0aae7SDavid Rees  return reference
1307*99e0aae7SDavid Rees
1308*99e0aae7SDavid Rees
1309*99e0aae7SDavid Rees@_handles('constant-reference-tail -> constant-word')
1310*99e0aae7SDavid Rees@_handles('type-reference-tail -> type-word')
1311*99e0aae7SDavid Rees@_handles('snake-reference -> snake-word')
1312*99e0aae7SDavid Rees@_handles('snake-reference -> builtin-field-word')
1313*99e0aae7SDavid Reesdef _reference(word):
1314*99e0aae7SDavid Rees  return ir_data.Reference(source_name=[word])
1315*99e0aae7SDavid Rees
1316*99e0aae7SDavid Rees
1317*99e0aae7SDavid Rees@_handles('builtin-reference -> builtin-word')
1318*99e0aae7SDavid Reesdef _builtin_reference(word):
1319*99e0aae7SDavid Rees  return ir_data.Reference(source_name=[word],
1320*99e0aae7SDavid Rees                          canonical_name=ir_data.CanonicalName(
1321*99e0aae7SDavid Rees                              object_path=[word.text]))
1322*99e0aae7SDavid Rees
1323*99e0aae7SDavid Rees
1324*99e0aae7SDavid Rees# Because constant-references ("Enum.NAME") are used in the same contexts as
1325*99e0aae7SDavid Rees# field-references ("field.subfield"), module-qualified constant references
1326*99e0aae7SDavid Rees# ("module.Enum.VALUE") have to take snake-reference, not snake-word, on the
1327*99e0aae7SDavid Rees# left side of the dot.  Otherwise, when a "snake_word" is followed by a "." in
1328*99e0aae7SDavid Rees# an expression context, the LR(1) parser cannot determine whether to reduce the
1329*99e0aae7SDavid Rees# snake-word to snake-reference (to eventually become field-reference), or to
1330*99e0aae7SDavid Rees# shift the dot onto the stack (to eventually become constant-reference).  By
1331*99e0aae7SDavid Rees# using snake-reference as the head of both, the parser can always reduce, then
1332*99e0aae7SDavid Rees# shift the dot, then determine whether to proceed with constant-reference if it
1333*99e0aae7SDavid Rees# sees "snake_name.TypeName" or field-reference if it sees
1334*99e0aae7SDavid Rees# "snake_name.snake_name".
1335*99e0aae7SDavid Rees@_handles('constant-reference -> snake-reference "." constant-reference-tail')
1336*99e0aae7SDavid Reesdef _module_qualified_constant_reference(new_head, dot, reference):
1337*99e0aae7SDavid Rees  del dot  # Unused.
1338*99e0aae7SDavid Rees  new_source_name = list(new_head.source_name) + list(reference.source_name)
1339*99e0aae7SDavid Rees  del reference.source_name[:]
1340*99e0aae7SDavid Rees  reference.source_name.extend(new_source_name)
1341*99e0aae7SDavid Rees  return reference
1342*99e0aae7SDavid Rees
1343*99e0aae7SDavid Rees
1344*99e0aae7SDavid Rees@_handles('constant-reference-tail -> type-word "." constant-reference-tail')
1345*99e0aae7SDavid Rees# module.Type.SubType.name is a reference to something that *must* be a
1346*99e0aae7SDavid Rees# constant.
1347*99e0aae7SDavid Rees@_handles('constant-reference-tail -> type-word "." snake-reference')
1348*99e0aae7SDavid Rees@_handles('type-reference-tail -> type-word "." type-reference-tail')
1349*99e0aae7SDavid Rees@_handles('type-reference -> snake-word "." type-reference-tail')
1350*99e0aae7SDavid Reesdef _qualified_reference(word, dot, reference):
1351*99e0aae7SDavid Rees  """Adds a name. or Type. qualification to the head of a reference."""
1352*99e0aae7SDavid Rees  del dot  # Unused.
1353*99e0aae7SDavid Rees  new_source_name = [word] + list(reference.source_name)
1354*99e0aae7SDavid Rees  del reference.source_name[:]
1355*99e0aae7SDavid Rees  reference.source_name.extend(new_source_name)
1356*99e0aae7SDavid Rees  return reference
1357*99e0aae7SDavid Rees
1358*99e0aae7SDavid Rees
1359*99e0aae7SDavid Rees# Arrays are properly translated to IR in _type().
1360*99e0aae7SDavid Rees@_handles('array-length-specifier -> "[" expression "]"')
1361*99e0aae7SDavid Reesdef _array_length_specifier(open_bracket, length, close_bracket):
1362*99e0aae7SDavid Rees  del open_bracket, close_bracket  # Unused.
1363*99e0aae7SDavid Rees  return length
1364*99e0aae7SDavid Rees
1365*99e0aae7SDavid Rees
1366*99e0aae7SDavid Rees# An array specifier can end with empty brackets ("arr[3][]"), in which case the
1367*99e0aae7SDavid Rees# array's size is inferred from the size of its enclosing field.
1368*99e0aae7SDavid Rees@_handles('array-length-specifier -> "[" "]"')
1369*99e0aae7SDavid Reesdef _auto_array_length_specifier(open_bracket, close_bracket):
1370*99e0aae7SDavid Rees  # Note that the Void's source_location is the space between the brackets (if
1371*99e0aae7SDavid Rees  # any).
1372*99e0aae7SDavid Rees  return ir_data.Empty(
1373*99e0aae7SDavid Rees      source_location=ir_data.Location(start=open_bracket.source_location.end,
1374*99e0aae7SDavid Rees                                      end=close_bracket.source_location.start))
1375*99e0aae7SDavid Rees
1376*99e0aae7SDavid Rees
1377*99e0aae7SDavid Rees@_handles('eol -> "\\n" comment-line*')
1378*99e0aae7SDavid Reesdef _eol(eol, comments):
1379*99e0aae7SDavid Rees  del comments  # Unused
1380*99e0aae7SDavid Rees  return eol
1381*99e0aae7SDavid Rees
1382*99e0aae7SDavid Rees
1383*99e0aae7SDavid Rees@_handles('comment-line -> Comment? "\\n"')
1384*99e0aae7SDavid Reesdef _comment_line(comment, eol):
1385*99e0aae7SDavid Rees  del comment  # Unused
1386*99e0aae7SDavid Rees  return eol
1387*99e0aae7SDavid Rees
1388*99e0aae7SDavid Rees
1389*99e0aae7SDavid Reesdef _finalize_grammar():
1390*99e0aae7SDavid Rees  """_Finalize adds productions for foo*, foo+, and foo? symbols."""
1391*99e0aae7SDavid Rees  star_symbols = set()
1392*99e0aae7SDavid Rees  plus_symbols = set()
1393*99e0aae7SDavid Rees  option_symbols = set()
1394*99e0aae7SDavid Rees  for production in _handlers:
1395*99e0aae7SDavid Rees    for symbol in production.rhs:
1396*99e0aae7SDavid Rees      if symbol[-1] == '*':
1397*99e0aae7SDavid Rees        star_symbols.add(symbol[:-1])
1398*99e0aae7SDavid Rees      elif symbol[-1] == '+':
1399*99e0aae7SDavid Rees        # symbol+ relies on the rule for symbol*
1400*99e0aae7SDavid Rees        star_symbols.add(symbol[:-1])
1401*99e0aae7SDavid Rees        plus_symbols.add(symbol[:-1])
1402*99e0aae7SDavid Rees      elif symbol[-1] == '?':
1403*99e0aae7SDavid Rees        option_symbols.add(symbol[:-1])
1404*99e0aae7SDavid Rees  for symbol in star_symbols:
1405*99e0aae7SDavid Rees    _handles('{s}* -> {s} {s}*'.format(s=symbol))(
1406*99e0aae7SDavid Rees        lambda e, r: _List([e] + r.list))
1407*99e0aae7SDavid Rees    _handles('{s}* ->'.format(s=symbol))(lambda: _List([]))
1408*99e0aae7SDavid Rees  for symbol in plus_symbols:
1409*99e0aae7SDavid Rees    _handles('{s}+ -> {s} {s}*'.format(s=symbol))(
1410*99e0aae7SDavid Rees        lambda e, r: _List([e] + r.list))
1411*99e0aae7SDavid Rees  for symbol in option_symbols:
1412*99e0aae7SDavid Rees    _handles('{s}? -> {s}'.format(s=symbol))(lambda e: _List([e]))
1413*99e0aae7SDavid Rees    _handles('{s}? ->'.format(s=symbol))(lambda: _List([]))
1414*99e0aae7SDavid Rees
1415*99e0aae7SDavid Rees
1416*99e0aae7SDavid Rees_finalize_grammar()
1417*99e0aae7SDavid Rees
1418*99e0aae7SDavid Rees# End of grammar.
1419*99e0aae7SDavid Rees################################################################################
1420*99e0aae7SDavid Rees
1421*99e0aae7SDavid Rees# These export the grammar used by module_ir so that parser_generator can build
1422*99e0aae7SDavid Rees# a parser for the same language.
1423*99e0aae7SDavid ReesSTART_SYMBOL = 'module'
1424*99e0aae7SDavid ReesEXPRESSION_START_SYMBOL = 'expression'
1425*99e0aae7SDavid ReesPRODUCTIONS = list(_handlers.keys())
1426