xref: /aosp_15_r20/external/fonttools/Lib/fontTools/feaLib/parser.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1from fontTools.feaLib.error import FeatureLibError
2from fontTools.feaLib.lexer import Lexer, IncludingLexer, NonIncludingLexer
3from fontTools.feaLib.variableScalar import VariableScalar
4from fontTools.misc.encodingTools import getEncoding
5from fontTools.misc.textTools import bytechr, tobytes, tostr
6import fontTools.feaLib.ast as ast
7import logging
8import os
9import re
10
11
12log = logging.getLogger(__name__)
13
14
15class Parser(object):
16    """Initializes a Parser object.
17
18    Example:
19
20        .. code:: python
21
22            from fontTools.feaLib.parser import Parser
23            parser = Parser(file, font.getReverseGlyphMap())
24            parsetree = parser.parse()
25
26    Note: the ``glyphNames`` iterable serves a double role to help distinguish
27    glyph names from ranges in the presence of hyphens and to ensure that glyph
28    names referenced in a feature file are actually part of a font's glyph set.
29    If the iterable is left empty, no glyph name in glyph set checking takes
30    place, and all glyph tokens containing hyphens are treated as literal glyph
31    names, not as ranges. (Adding a space around the hyphen can, in any case,
32    help to disambiguate ranges from glyph names containing hyphens.)
33
34    By default, the parser will follow ``include()`` statements in the feature
35    file. To turn this off, pass ``followIncludes=False``. Pass a directory string as
36    ``includeDir`` to explicitly declare a directory to search included feature files
37    in.
38    """
39
40    extensions = {}
41    ast = ast
42    SS_FEATURE_TAGS = {"ss%02d" % i for i in range(1, 20 + 1)}
43    CV_FEATURE_TAGS = {"cv%02d" % i for i in range(1, 99 + 1)}
44
45    def __init__(
46        self, featurefile, glyphNames=(), followIncludes=True, includeDir=None, **kwargs
47    ):
48        if "glyphMap" in kwargs:
49            from fontTools.misc.loggingTools import deprecateArgument
50
51            deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead")
52            if glyphNames:
53                raise TypeError(
54                    "'glyphNames' and (deprecated) 'glyphMap' are " "mutually exclusive"
55                )
56            glyphNames = kwargs.pop("glyphMap")
57        if kwargs:
58            raise TypeError(
59                "unsupported keyword argument%s: %s"
60                % ("" if len(kwargs) == 1 else "s", ", ".join(repr(k) for k in kwargs))
61            )
62
63        self.glyphNames_ = set(glyphNames)
64        self.doc_ = self.ast.FeatureFile()
65        self.anchors_ = SymbolTable()
66        self.glyphclasses_ = SymbolTable()
67        self.lookups_ = SymbolTable()
68        self.valuerecords_ = SymbolTable()
69        self.symbol_tables_ = {self.anchors_, self.valuerecords_}
70        self.next_token_type_, self.next_token_ = (None, None)
71        self.cur_comments_ = []
72        self.next_token_location_ = None
73        lexerClass = IncludingLexer if followIncludes else NonIncludingLexer
74        self.lexer_ = lexerClass(featurefile, includeDir=includeDir)
75        self.missing = {}
76        self.advance_lexer_(comments=True)
77
78    def parse(self):
79        """Parse the file, and return a :class:`fontTools.feaLib.ast.FeatureFile`
80        object representing the root of the abstract syntax tree containing the
81        parsed contents of the file."""
82        statements = self.doc_.statements
83        while self.next_token_type_ is not None or self.cur_comments_:
84            self.advance_lexer_(comments=True)
85            if self.cur_token_type_ is Lexer.COMMENT:
86                statements.append(
87                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
88                )
89            elif self.is_cur_keyword_("include"):
90                statements.append(self.parse_include_())
91            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
92                statements.append(self.parse_glyphclass_definition_())
93            elif self.is_cur_keyword_(("anon", "anonymous")):
94                statements.append(self.parse_anonymous_())
95            elif self.is_cur_keyword_("anchorDef"):
96                statements.append(self.parse_anchordef_())
97            elif self.is_cur_keyword_("languagesystem"):
98                statements.append(self.parse_languagesystem_())
99            elif self.is_cur_keyword_("lookup"):
100                statements.append(self.parse_lookup_(vertical=False))
101            elif self.is_cur_keyword_("markClass"):
102                statements.append(self.parse_markClass_())
103            elif self.is_cur_keyword_("feature"):
104                statements.append(self.parse_feature_block_())
105            elif self.is_cur_keyword_("conditionset"):
106                statements.append(self.parse_conditionset_())
107            elif self.is_cur_keyword_("variation"):
108                statements.append(self.parse_feature_block_(variation=True))
109            elif self.is_cur_keyword_("table"):
110                statements.append(self.parse_table_())
111            elif self.is_cur_keyword_("valueRecordDef"):
112                statements.append(self.parse_valuerecord_definition_(vertical=False))
113            elif (
114                self.cur_token_type_ is Lexer.NAME
115                and self.cur_token_ in self.extensions
116            ):
117                statements.append(self.extensions[self.cur_token_](self))
118            elif self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == ";":
119                continue
120            else:
121                raise FeatureLibError(
122                    "Expected feature, languagesystem, lookup, markClass, "
123                    'table, or glyph class definition, got {} "{}"'.format(
124                        self.cur_token_type_, self.cur_token_
125                    ),
126                    self.cur_token_location_,
127                )
128        # Report any missing glyphs at the end of parsing
129        if self.missing:
130            error = [
131                " %s (first found at %s)" % (name, loc)
132                for name, loc in self.missing.items()
133            ]
134            raise FeatureLibError(
135                "The following glyph names are referenced but are missing from the "
136                "glyph set:\n" + ("\n".join(error)),
137                None,
138            )
139        return self.doc_
140
141    def parse_anchor_(self):
142        # Parses an anchor in any of the four formats given in the feature
143        # file specification (2.e.vii).
144        self.expect_symbol_("<")
145        self.expect_keyword_("anchor")
146        location = self.cur_token_location_
147
148        if self.next_token_ == "NULL":  # Format D
149            self.expect_keyword_("NULL")
150            self.expect_symbol_(">")
151            return None
152
153        if self.next_token_type_ == Lexer.NAME:  # Format E
154            name = self.expect_name_()
155            anchordef = self.anchors_.resolve(name)
156            if anchordef is None:
157                raise FeatureLibError(
158                    'Unknown anchor "%s"' % name, self.cur_token_location_
159                )
160            self.expect_symbol_(">")
161            return self.ast.Anchor(
162                anchordef.x,
163                anchordef.y,
164                name=name,
165                contourpoint=anchordef.contourpoint,
166                xDeviceTable=None,
167                yDeviceTable=None,
168                location=location,
169            )
170
171        x, y = self.expect_number_(variable=True), self.expect_number_(variable=True)
172
173        contourpoint = None
174        if self.next_token_ == "contourpoint":  # Format B
175            self.expect_keyword_("contourpoint")
176            contourpoint = self.expect_number_()
177
178        if self.next_token_ == "<":  # Format C
179            xDeviceTable = self.parse_device_()
180            yDeviceTable = self.parse_device_()
181        else:
182            xDeviceTable, yDeviceTable = None, None
183
184        self.expect_symbol_(">")
185        return self.ast.Anchor(
186            x,
187            y,
188            name=None,
189            contourpoint=contourpoint,
190            xDeviceTable=xDeviceTable,
191            yDeviceTable=yDeviceTable,
192            location=location,
193        )
194
195    def parse_anchor_marks_(self):
196        # Parses a sequence of ``[<anchor> mark @MARKCLASS]*.``
197        anchorMarks = []  # [(self.ast.Anchor, markClassName)*]
198        while self.next_token_ == "<":
199            anchor = self.parse_anchor_()
200            if anchor is None and self.next_token_ != "mark":
201                continue  # <anchor NULL> without mark, eg. in GPOS type 5
202            self.expect_keyword_("mark")
203            markClass = self.expect_markClass_reference_()
204            anchorMarks.append((anchor, markClass))
205        return anchorMarks
206
207    def parse_anchordef_(self):
208        # Parses a named anchor definition (`section 2.e.viii <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#2.e.vii>`_).
209        assert self.is_cur_keyword_("anchorDef")
210        location = self.cur_token_location_
211        x, y = self.expect_number_(), self.expect_number_()
212        contourpoint = None
213        if self.next_token_ == "contourpoint":
214            self.expect_keyword_("contourpoint")
215            contourpoint = self.expect_number_()
216        name = self.expect_name_()
217        self.expect_symbol_(";")
218        anchordef = self.ast.AnchorDefinition(
219            name, x, y, contourpoint=contourpoint, location=location
220        )
221        self.anchors_.define(name, anchordef)
222        return anchordef
223
224    def parse_anonymous_(self):
225        # Parses an anonymous data block (`section 10 <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#10>`_).
226        assert self.is_cur_keyword_(("anon", "anonymous"))
227        tag = self.expect_tag_()
228        _, content, location = self.lexer_.scan_anonymous_block(tag)
229        self.advance_lexer_()
230        self.expect_symbol_("}")
231        end_tag = self.expect_tag_()
232        assert tag == end_tag, "bad splitting in Lexer.scan_anonymous_block()"
233        self.expect_symbol_(";")
234        return self.ast.AnonymousBlock(tag, content, location=location)
235
236    def parse_attach_(self):
237        # Parses a GDEF Attach statement (`section 9.b <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#9.b>`_)
238        assert self.is_cur_keyword_("Attach")
239        location = self.cur_token_location_
240        glyphs = self.parse_glyphclass_(accept_glyphname=True)
241        contourPoints = {self.expect_number_()}
242        while self.next_token_ != ";":
243            contourPoints.add(self.expect_number_())
244        self.expect_symbol_(";")
245        return self.ast.AttachStatement(glyphs, contourPoints, location=location)
246
247    def parse_enumerate_(self, vertical):
248        # Parse an enumerated pair positioning rule (`section 6.b.ii <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#6.b.ii>`_).
249        assert self.cur_token_ in {"enumerate", "enum"}
250        self.advance_lexer_()
251        return self.parse_position_(enumerated=True, vertical=vertical)
252
253    def parse_GlyphClassDef_(self):
254        # Parses 'GlyphClassDef @BASE, @LIGATURES, @MARKS, @COMPONENTS;'
255        assert self.is_cur_keyword_("GlyphClassDef")
256        location = self.cur_token_location_
257        if self.next_token_ != ",":
258            baseGlyphs = self.parse_glyphclass_(accept_glyphname=False)
259        else:
260            baseGlyphs = None
261        self.expect_symbol_(",")
262        if self.next_token_ != ",":
263            ligatureGlyphs = self.parse_glyphclass_(accept_glyphname=False)
264        else:
265            ligatureGlyphs = None
266        self.expect_symbol_(",")
267        if self.next_token_ != ",":
268            markGlyphs = self.parse_glyphclass_(accept_glyphname=False)
269        else:
270            markGlyphs = None
271        self.expect_symbol_(",")
272        if self.next_token_ != ";":
273            componentGlyphs = self.parse_glyphclass_(accept_glyphname=False)
274        else:
275            componentGlyphs = None
276        self.expect_symbol_(";")
277        return self.ast.GlyphClassDefStatement(
278            baseGlyphs, markGlyphs, ligatureGlyphs, componentGlyphs, location=location
279        )
280
281    def parse_glyphclass_definition_(self):
282        # Parses glyph class definitions such as '@UPPERCASE = [A-Z];'
283        location, name = self.cur_token_location_, self.cur_token_
284        self.expect_symbol_("=")
285        glyphs = self.parse_glyphclass_(accept_glyphname=False)
286        self.expect_symbol_(";")
287        glyphclass = self.ast.GlyphClassDefinition(name, glyphs, location=location)
288        self.glyphclasses_.define(name, glyphclass)
289        return glyphclass
290
291    def split_glyph_range_(self, name, location):
292        # Since v1.20, the OpenType Feature File specification allows
293        # for dashes in glyph names. A sequence like "a-b-c-d" could
294        # therefore mean a single glyph whose name happens to be
295        # "a-b-c-d", or it could mean a range from glyph "a" to glyph
296        # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a
297        # range from glyph "a-b-c" to glyph "d".Technically, this
298        # example could be resolved because the (pretty complex)
299        # definition of glyph ranges renders most of these splits
300        # invalid. But the specification does not say that a compiler
301        # should try to apply such fancy heuristics. To encourage
302        # unambiguous feature files, we therefore try all possible
303        # splits and reject the feature file if there are multiple
304        # splits possible. It is intentional that we don't just emit a
305        # warning; warnings tend to get ignored. To fix the problem,
306        # font designers can trivially add spaces around the intended
307        # split point, and we emit a compiler error that suggests
308        # how exactly the source should be rewritten to make things
309        # unambiguous.
310        parts = name.split("-")
311        solutions = []
312        for i in range(len(parts)):
313            start, limit = "-".join(parts[0:i]), "-".join(parts[i:])
314            if start in self.glyphNames_ and limit in self.glyphNames_:
315                solutions.append((start, limit))
316        if len(solutions) == 1:
317            start, limit = solutions[0]
318            return start, limit
319        elif len(solutions) == 0:
320            raise FeatureLibError(
321                '"%s" is not a glyph in the font, and it can not be split '
322                "into a range of known glyphs" % name,
323                location,
324            )
325        else:
326            ranges = " or ".join(['"%s - %s"' % (s, l) for s, l in solutions])
327            raise FeatureLibError(
328                'Ambiguous glyph range "%s"; '
329                "please use %s to clarify what you mean" % (name, ranges),
330                location,
331            )
332
333    def parse_glyphclass_(self, accept_glyphname, accept_null=False):
334        # Parses a glyph class, either named or anonymous, or (if
335        # ``bool(accept_glyphname)``) a glyph name. If ``bool(accept_null)`` then
336        # also accept the special NULL glyph.
337        if accept_glyphname and self.next_token_type_ in (Lexer.NAME, Lexer.CID):
338            if accept_null and self.next_token_ == "NULL":
339                # If you want a glyph called NULL, you should escape it.
340                self.advance_lexer_()
341                return self.ast.NullGlyph(location=self.cur_token_location_)
342            glyph = self.expect_glyph_()
343            self.check_glyph_name_in_glyph_set(glyph)
344            return self.ast.GlyphName(glyph, location=self.cur_token_location_)
345        if self.next_token_type_ is Lexer.GLYPHCLASS:
346            self.advance_lexer_()
347            gc = self.glyphclasses_.resolve(self.cur_token_)
348            if gc is None:
349                raise FeatureLibError(
350                    "Unknown glyph class @%s" % self.cur_token_,
351                    self.cur_token_location_,
352                )
353            if isinstance(gc, self.ast.MarkClass):
354                return self.ast.MarkClassName(gc, location=self.cur_token_location_)
355            else:
356                return self.ast.GlyphClassName(gc, location=self.cur_token_location_)
357
358        self.expect_symbol_("[")
359        location = self.cur_token_location_
360        glyphs = self.ast.GlyphClass(location=location)
361        while self.next_token_ != "]":
362            if self.next_token_type_ is Lexer.NAME:
363                glyph = self.expect_glyph_()
364                location = self.cur_token_location_
365                if "-" in glyph and self.glyphNames_ and glyph not in self.glyphNames_:
366                    start, limit = self.split_glyph_range_(glyph, location)
367                    self.check_glyph_name_in_glyph_set(start, limit)
368                    glyphs.add_range(
369                        start, limit, self.make_glyph_range_(location, start, limit)
370                    )
371                elif self.next_token_ == "-":
372                    start = glyph
373                    self.expect_symbol_("-")
374                    limit = self.expect_glyph_()
375                    self.check_glyph_name_in_glyph_set(start, limit)
376                    glyphs.add_range(
377                        start, limit, self.make_glyph_range_(location, start, limit)
378                    )
379                else:
380                    if "-" in glyph and not self.glyphNames_:
381                        log.warning(
382                            str(
383                                FeatureLibError(
384                                    f"Ambiguous glyph name that looks like a range: {glyph!r}",
385                                    location,
386                                )
387                            )
388                        )
389                    self.check_glyph_name_in_glyph_set(glyph)
390                    glyphs.append(glyph)
391            elif self.next_token_type_ is Lexer.CID:
392                glyph = self.expect_glyph_()
393                if self.next_token_ == "-":
394                    range_location = self.cur_token_location_
395                    range_start = self.cur_token_
396                    self.expect_symbol_("-")
397                    range_end = self.expect_cid_()
398                    self.check_glyph_name_in_glyph_set(
399                        f"cid{range_start:05d}",
400                        f"cid{range_end:05d}",
401                    )
402                    glyphs.add_cid_range(
403                        range_start,
404                        range_end,
405                        self.make_cid_range_(range_location, range_start, range_end),
406                    )
407                else:
408                    glyph_name = f"cid{self.cur_token_:05d}"
409                    self.check_glyph_name_in_glyph_set(glyph_name)
410                    glyphs.append(glyph_name)
411            elif self.next_token_type_ is Lexer.GLYPHCLASS:
412                self.advance_lexer_()
413                gc = self.glyphclasses_.resolve(self.cur_token_)
414                if gc is None:
415                    raise FeatureLibError(
416                        "Unknown glyph class @%s" % self.cur_token_,
417                        self.cur_token_location_,
418                    )
419                if isinstance(gc, self.ast.MarkClass):
420                    gc = self.ast.MarkClassName(gc, location=self.cur_token_location_)
421                else:
422                    gc = self.ast.GlyphClassName(gc, location=self.cur_token_location_)
423                glyphs.add_class(gc)
424            else:
425                raise FeatureLibError(
426                    "Expected glyph name, glyph range, "
427                    f"or glyph class reference, found {self.next_token_!r}",
428                    self.next_token_location_,
429                )
430        self.expect_symbol_("]")
431        return glyphs
432
433    def parse_glyph_pattern_(self, vertical):
434        # Parses a glyph pattern, including lookups and context, e.g.::
435        #
436        #    a b
437        #    a b c' d e
438        #    a b c' lookup ChangeC d e
439        prefix, glyphs, lookups, values, suffix = ([], [], [], [], [])
440        hasMarks = False
441        while self.next_token_ not in {"by", "from", ";", ","}:
442            gc = self.parse_glyphclass_(accept_glyphname=True)
443            marked = False
444            if self.next_token_ == "'":
445                self.expect_symbol_("'")
446                hasMarks = marked = True
447            if marked:
448                if suffix:
449                    # makeotf also reports this as an error, while FontForge
450                    # silently inserts ' in all the intervening glyphs.
451                    # https://github.com/fonttools/fonttools/pull/1096
452                    raise FeatureLibError(
453                        "Unsupported contextual target sequence: at most "
454                        "one run of marked (') glyph/class names allowed",
455                        self.cur_token_location_,
456                    )
457                glyphs.append(gc)
458            elif glyphs:
459                suffix.append(gc)
460            else:
461                prefix.append(gc)
462
463            if self.is_next_value_():
464                values.append(self.parse_valuerecord_(vertical))
465            else:
466                values.append(None)
467
468            lookuplist = None
469            while self.next_token_ == "lookup":
470                if lookuplist is None:
471                    lookuplist = []
472                self.expect_keyword_("lookup")
473                if not marked:
474                    raise FeatureLibError(
475                        "Lookups can only follow marked glyphs",
476                        self.cur_token_location_,
477                    )
478                lookup_name = self.expect_name_()
479                lookup = self.lookups_.resolve(lookup_name)
480                if lookup is None:
481                    raise FeatureLibError(
482                        'Unknown lookup "%s"' % lookup_name, self.cur_token_location_
483                    )
484                lookuplist.append(lookup)
485            if marked:
486                lookups.append(lookuplist)
487
488        if not glyphs and not suffix:  # eg., "sub f f i by"
489            assert lookups == []
490            return ([], prefix, [None] * len(prefix), values, [], hasMarks)
491        else:
492            if any(values[: len(prefix)]):
493                raise FeatureLibError(
494                    "Positioning cannot be applied in the bactrack glyph sequence, "
495                    "before the marked glyph sequence.",
496                    self.cur_token_location_,
497                )
498            marked_values = values[len(prefix) : len(prefix) + len(glyphs)]
499            if any(marked_values):
500                if any(values[len(prefix) + len(glyphs) :]):
501                    raise FeatureLibError(
502                        "Positioning values are allowed only in the marked glyph "
503                        "sequence, or after the final glyph node when only one glyph "
504                        "node is marked.",
505                        self.cur_token_location_,
506                    )
507                values = marked_values
508            elif values and values[-1]:
509                if len(glyphs) > 1 or any(values[:-1]):
510                    raise FeatureLibError(
511                        "Positioning values are allowed only in the marked glyph "
512                        "sequence, or after the final glyph node when only one glyph "
513                        "node is marked.",
514                        self.cur_token_location_,
515                    )
516                values = values[-1:]
517            elif any(values):
518                raise FeatureLibError(
519                    "Positioning values are allowed only in the marked glyph "
520                    "sequence, or after the final glyph node when only one glyph "
521                    "node is marked.",
522                    self.cur_token_location_,
523                )
524            return (prefix, glyphs, lookups, values, suffix, hasMarks)
525
526    def parse_ignore_glyph_pattern_(self, sub):
527        location = self.cur_token_location_
528        prefix, glyphs, lookups, values, suffix, hasMarks = self.parse_glyph_pattern_(
529            vertical=False
530        )
531        if any(lookups):
532            raise FeatureLibError(
533                f'No lookups can be specified for "ignore {sub}"', location
534            )
535        if not hasMarks:
536            error = FeatureLibError(
537                f'Ambiguous "ignore {sub}", there should be least one marked glyph',
538                location,
539            )
540            log.warning(str(error))
541            suffix, glyphs = glyphs[1:], glyphs[0:1]
542        chainContext = (prefix, glyphs, suffix)
543        return chainContext
544
545    def parse_ignore_context_(self, sub):
546        location = self.cur_token_location_
547        chainContext = [self.parse_ignore_glyph_pattern_(sub)]
548        while self.next_token_ == ",":
549            self.expect_symbol_(",")
550            chainContext.append(self.parse_ignore_glyph_pattern_(sub))
551        self.expect_symbol_(";")
552        return chainContext
553
554    def parse_ignore_(self):
555        # Parses an ignore sub/pos rule.
556        assert self.is_cur_keyword_("ignore")
557        location = self.cur_token_location_
558        self.advance_lexer_()
559        if self.cur_token_ in ["substitute", "sub"]:
560            chainContext = self.parse_ignore_context_("sub")
561            return self.ast.IgnoreSubstStatement(chainContext, location=location)
562        if self.cur_token_ in ["position", "pos"]:
563            chainContext = self.parse_ignore_context_("pos")
564            return self.ast.IgnorePosStatement(chainContext, location=location)
565        raise FeatureLibError(
566            'Expected "substitute" or "position"', self.cur_token_location_
567        )
568
569    def parse_include_(self):
570        assert self.cur_token_ == "include"
571        location = self.cur_token_location_
572        filename = self.expect_filename_()
573        # self.expect_symbol_(";")
574        return ast.IncludeStatement(filename, location=location)
575
576    def parse_language_(self):
577        assert self.is_cur_keyword_("language")
578        location = self.cur_token_location_
579        language = self.expect_language_tag_()
580        include_default, required = (True, False)
581        if self.next_token_ in {"exclude_dflt", "include_dflt"}:
582            include_default = self.expect_name_() == "include_dflt"
583        if self.next_token_ == "required":
584            self.expect_keyword_("required")
585            required = True
586        self.expect_symbol_(";")
587        return self.ast.LanguageStatement(
588            language, include_default, required, location=location
589        )
590
591    def parse_ligatureCaretByIndex_(self):
592        assert self.is_cur_keyword_("LigatureCaretByIndex")
593        location = self.cur_token_location_
594        glyphs = self.parse_glyphclass_(accept_glyphname=True)
595        carets = [self.expect_number_()]
596        while self.next_token_ != ";":
597            carets.append(self.expect_number_())
598        self.expect_symbol_(";")
599        return self.ast.LigatureCaretByIndexStatement(glyphs, carets, location=location)
600
601    def parse_ligatureCaretByPos_(self):
602        assert self.is_cur_keyword_("LigatureCaretByPos")
603        location = self.cur_token_location_
604        glyphs = self.parse_glyphclass_(accept_glyphname=True)
605        carets = [self.expect_number_(variable=True)]
606        while self.next_token_ != ";":
607            carets.append(self.expect_number_(variable=True))
608        self.expect_symbol_(";")
609        return self.ast.LigatureCaretByPosStatement(glyphs, carets, location=location)
610
611    def parse_lookup_(self, vertical):
612        # Parses a ``lookup`` - either a lookup block, or a lookup reference
613        # inside a feature.
614        assert self.is_cur_keyword_("lookup")
615        location, name = self.cur_token_location_, self.expect_name_()
616
617        if self.next_token_ == ";":
618            lookup = self.lookups_.resolve(name)
619            if lookup is None:
620                raise FeatureLibError(
621                    'Unknown lookup "%s"' % name, self.cur_token_location_
622                )
623            self.expect_symbol_(";")
624            return self.ast.LookupReferenceStatement(lookup, location=location)
625
626        use_extension = False
627        if self.next_token_ == "useExtension":
628            self.expect_keyword_("useExtension")
629            use_extension = True
630
631        block = self.ast.LookupBlock(name, use_extension, location=location)
632        self.parse_block_(block, vertical)
633        self.lookups_.define(name, block)
634        return block
635
636    def parse_lookupflag_(self):
637        # Parses a ``lookupflag`` statement, either specified by number or
638        # in words.
639        assert self.is_cur_keyword_("lookupflag")
640        location = self.cur_token_location_
641
642        # format B: "lookupflag 6;"
643        if self.next_token_type_ == Lexer.NUMBER:
644            value = self.expect_number_()
645            self.expect_symbol_(";")
646            return self.ast.LookupFlagStatement(value, location=location)
647
648        # format A: "lookupflag RightToLeft MarkAttachmentType @M;"
649        value_seen = False
650        value, markAttachment, markFilteringSet = 0, None, None
651        flags = {
652            "RightToLeft": 1,
653            "IgnoreBaseGlyphs": 2,
654            "IgnoreLigatures": 4,
655            "IgnoreMarks": 8,
656        }
657        seen = set()
658        while self.next_token_ != ";":
659            if self.next_token_ in seen:
660                raise FeatureLibError(
661                    "%s can be specified only once" % self.next_token_,
662                    self.next_token_location_,
663                )
664            seen.add(self.next_token_)
665            if self.next_token_ == "MarkAttachmentType":
666                self.expect_keyword_("MarkAttachmentType")
667                markAttachment = self.parse_glyphclass_(accept_glyphname=False)
668            elif self.next_token_ == "UseMarkFilteringSet":
669                self.expect_keyword_("UseMarkFilteringSet")
670                markFilteringSet = self.parse_glyphclass_(accept_glyphname=False)
671            elif self.next_token_ in flags:
672                value_seen = True
673                value = value | flags[self.expect_name_()]
674            else:
675                raise FeatureLibError(
676                    '"%s" is not a recognized lookupflag' % self.next_token_,
677                    self.next_token_location_,
678                )
679        self.expect_symbol_(";")
680
681        if not any([value_seen, markAttachment, markFilteringSet]):
682            raise FeatureLibError(
683                "lookupflag must have a value", self.next_token_location_
684            )
685
686        return self.ast.LookupFlagStatement(
687            value,
688            markAttachment=markAttachment,
689            markFilteringSet=markFilteringSet,
690            location=location,
691        )
692
693    def parse_markClass_(self):
694        assert self.is_cur_keyword_("markClass")
695        location = self.cur_token_location_
696        glyphs = self.parse_glyphclass_(accept_glyphname=True)
697        if not glyphs.glyphSet():
698            raise FeatureLibError(
699                "Empty glyph class in mark class definition", location
700            )
701        anchor = self.parse_anchor_()
702        name = self.expect_class_name_()
703        self.expect_symbol_(";")
704        markClass = self.doc_.markClasses.get(name)
705        if markClass is None:
706            markClass = self.ast.MarkClass(name)
707            self.doc_.markClasses[name] = markClass
708            self.glyphclasses_.define(name, markClass)
709        mcdef = self.ast.MarkClassDefinition(
710            markClass, anchor, glyphs, location=location
711        )
712        markClass.addDefinition(mcdef)
713        return mcdef
714
715    def parse_position_(self, enumerated, vertical):
716        assert self.cur_token_ in {"position", "pos"}
717        if self.next_token_ == "cursive":  # GPOS type 3
718            return self.parse_position_cursive_(enumerated, vertical)
719        elif self.next_token_ == "base":  # GPOS type 4
720            return self.parse_position_base_(enumerated, vertical)
721        elif self.next_token_ == "ligature":  # GPOS type 5
722            return self.parse_position_ligature_(enumerated, vertical)
723        elif self.next_token_ == "mark":  # GPOS type 6
724            return self.parse_position_mark_(enumerated, vertical)
725
726        location = self.cur_token_location_
727        prefix, glyphs, lookups, values, suffix, hasMarks = self.parse_glyph_pattern_(
728            vertical
729        )
730        self.expect_symbol_(";")
731
732        if any(lookups):
733            # GPOS type 8: Chaining contextual positioning; explicit lookups
734            if any(values):
735                raise FeatureLibError(
736                    'If "lookup" is present, no values must be specified', location
737                )
738            return self.ast.ChainContextPosStatement(
739                prefix, glyphs, suffix, lookups, location=location
740            )
741
742        # Pair positioning, format A: "pos V 10 A -10;"
743        # Pair positioning, format B: "pos V A -20;"
744        if not prefix and not suffix and len(glyphs) == 2 and not hasMarks:
745            if values[0] is None:  # Format B: "pos V A -20;"
746                values.reverse()
747            return self.ast.PairPosStatement(
748                glyphs[0],
749                values[0],
750                glyphs[1],
751                values[1],
752                enumerated=enumerated,
753                location=location,
754            )
755
756        if enumerated:
757            raise FeatureLibError(
758                '"enumerate" is only allowed with pair positionings', location
759            )
760        return self.ast.SinglePosStatement(
761            list(zip(glyphs, values)),
762            prefix,
763            suffix,
764            forceChain=hasMarks,
765            location=location,
766        )
767
768    def parse_position_cursive_(self, enumerated, vertical):
769        location = self.cur_token_location_
770        self.expect_keyword_("cursive")
771        if enumerated:
772            raise FeatureLibError(
773                '"enumerate" is not allowed with ' "cursive attachment positioning",
774                location,
775            )
776        glyphclass = self.parse_glyphclass_(accept_glyphname=True)
777        entryAnchor = self.parse_anchor_()
778        exitAnchor = self.parse_anchor_()
779        self.expect_symbol_(";")
780        return self.ast.CursivePosStatement(
781            glyphclass, entryAnchor, exitAnchor, location=location
782        )
783
784    def parse_position_base_(self, enumerated, vertical):
785        location = self.cur_token_location_
786        self.expect_keyword_("base")
787        if enumerated:
788            raise FeatureLibError(
789                '"enumerate" is not allowed with '
790                "mark-to-base attachment positioning",
791                location,
792            )
793        base = self.parse_glyphclass_(accept_glyphname=True)
794        marks = self.parse_anchor_marks_()
795        self.expect_symbol_(";")
796        return self.ast.MarkBasePosStatement(base, marks, location=location)
797
798    def parse_position_ligature_(self, enumerated, vertical):
799        location = self.cur_token_location_
800        self.expect_keyword_("ligature")
801        if enumerated:
802            raise FeatureLibError(
803                '"enumerate" is not allowed with '
804                "mark-to-ligature attachment positioning",
805                location,
806            )
807        ligatures = self.parse_glyphclass_(accept_glyphname=True)
808        marks = [self.parse_anchor_marks_()]
809        while self.next_token_ == "ligComponent":
810            self.expect_keyword_("ligComponent")
811            marks.append(self.parse_anchor_marks_())
812        self.expect_symbol_(";")
813        return self.ast.MarkLigPosStatement(ligatures, marks, location=location)
814
815    def parse_position_mark_(self, enumerated, vertical):
816        location = self.cur_token_location_
817        self.expect_keyword_("mark")
818        if enumerated:
819            raise FeatureLibError(
820                '"enumerate" is not allowed with '
821                "mark-to-mark attachment positioning",
822                location,
823            )
824        baseMarks = self.parse_glyphclass_(accept_glyphname=True)
825        marks = self.parse_anchor_marks_()
826        self.expect_symbol_(";")
827        return self.ast.MarkMarkPosStatement(baseMarks, marks, location=location)
828
829    def parse_script_(self):
830        assert self.is_cur_keyword_("script")
831        location, script = self.cur_token_location_, self.expect_script_tag_()
832        self.expect_symbol_(";")
833        return self.ast.ScriptStatement(script, location=location)
834
835    def parse_substitute_(self):
836        assert self.cur_token_ in {"substitute", "sub", "reversesub", "rsub"}
837        location = self.cur_token_location_
838        reverse = self.cur_token_ in {"reversesub", "rsub"}
839        (
840            old_prefix,
841            old,
842            lookups,
843            values,
844            old_suffix,
845            hasMarks,
846        ) = self.parse_glyph_pattern_(vertical=False)
847        if any(values):
848            raise FeatureLibError(
849                "Substitution statements cannot contain values", location
850            )
851        new = []
852        if self.next_token_ == "by":
853            keyword = self.expect_keyword_("by")
854            while self.next_token_ != ";":
855                gc = self.parse_glyphclass_(accept_glyphname=True, accept_null=True)
856                new.append(gc)
857        elif self.next_token_ == "from":
858            keyword = self.expect_keyword_("from")
859            new = [self.parse_glyphclass_(accept_glyphname=False)]
860        else:
861            keyword = None
862        self.expect_symbol_(";")
863        if len(new) == 0 and not any(lookups):
864            raise FeatureLibError(
865                'Expected "by", "from" or explicit lookup references',
866                self.cur_token_location_,
867            )
868
869        # GSUB lookup type 3: Alternate substitution.
870        # Format: "substitute a from [a.1 a.2 a.3];"
871        if keyword == "from":
872            if reverse:
873                raise FeatureLibError(
874                    'Reverse chaining substitutions do not support "from"', location
875                )
876            if len(old) != 1 or len(old[0].glyphSet()) != 1:
877                raise FeatureLibError('Expected a single glyph before "from"', location)
878            if len(new) != 1:
879                raise FeatureLibError(
880                    'Expected a single glyphclass after "from"', location
881                )
882            return self.ast.AlternateSubstStatement(
883                old_prefix, old[0], old_suffix, new[0], location=location
884            )
885
886        num_lookups = len([l for l in lookups if l is not None])
887
888        is_deletion = False
889        if len(new) == 1 and isinstance(new[0], ast.NullGlyph):
890            new = []  # Deletion
891            is_deletion = True
892
893        # GSUB lookup type 1: Single substitution.
894        # Format A: "substitute a by a.sc;"
895        # Format B: "substitute [one.fitted one.oldstyle] by one;"
896        # Format C: "substitute [a-d] by [A.sc-D.sc];"
897        if not reverse and len(old) == 1 and len(new) == 1 and num_lookups == 0:
898            glyphs = list(old[0].glyphSet())
899            replacements = list(new[0].glyphSet())
900            if len(replacements) == 1:
901                replacements = replacements * len(glyphs)
902            if len(glyphs) != len(replacements):
903                raise FeatureLibError(
904                    'Expected a glyph class with %d elements after "by", '
905                    "but found a glyph class with %d elements"
906                    % (len(glyphs), len(replacements)),
907                    location,
908                )
909            return self.ast.SingleSubstStatement(
910                old, new, old_prefix, old_suffix, forceChain=hasMarks, location=location
911            )
912
913        # Glyph deletion, built as GSUB lookup type 2: Multiple substitution
914        # with empty replacement.
915        if is_deletion and len(old) == 1 and num_lookups == 0:
916            return self.ast.MultipleSubstStatement(
917                old_prefix,
918                old[0],
919                old_suffix,
920                (),
921                forceChain=hasMarks,
922                location=location,
923            )
924
925        # GSUB lookup type 2: Multiple substitution.
926        # Format: "substitute f_f_i by f f i;"
927        #
928        # GlyphsApp introduces two additional formats:
929        # Format 1: "substitute [f_i f_l] by [f f] [i l];"
930        # Format 2: "substitute [f_i f_l] by f [i l];"
931        # http://handbook.glyphsapp.com/en/layout/multiple-substitution-with-classes/
932        if not reverse and len(old) == 1 and len(new) > 1 and num_lookups == 0:
933            count = len(old[0].glyphSet())
934            for n in new:
935                if not list(n.glyphSet()):
936                    raise FeatureLibError("Empty class in replacement", location)
937                if len(n.glyphSet()) != 1 and len(n.glyphSet()) != count:
938                    raise FeatureLibError(
939                        f'Expected a glyph class with 1 or {count} elements after "by", '
940                        f"but found a glyph class with {len(n.glyphSet())} elements",
941                        location,
942                    )
943            return self.ast.MultipleSubstStatement(
944                old_prefix,
945                old[0],
946                old_suffix,
947                new,
948                forceChain=hasMarks,
949                location=location,
950            )
951
952        # GSUB lookup type 4: Ligature substitution.
953        # Format: "substitute f f i by f_f_i;"
954        if (
955            not reverse
956            and len(old) > 1
957            and len(new) == 1
958            and len(new[0].glyphSet()) == 1
959            and num_lookups == 0
960        ):
961            return self.ast.LigatureSubstStatement(
962                old_prefix,
963                old,
964                old_suffix,
965                list(new[0].glyphSet())[0],
966                forceChain=hasMarks,
967                location=location,
968            )
969
970        # GSUB lookup type 8: Reverse chaining substitution.
971        if reverse:
972            if len(old) != 1:
973                raise FeatureLibError(
974                    "In reverse chaining single substitutions, "
975                    "only a single glyph or glyph class can be replaced",
976                    location,
977                )
978            if len(new) != 1:
979                raise FeatureLibError(
980                    "In reverse chaining single substitutions, "
981                    'the replacement (after "by") must be a single glyph '
982                    "or glyph class",
983                    location,
984                )
985            if num_lookups != 0:
986                raise FeatureLibError(
987                    "Reverse chaining substitutions cannot call named lookups", location
988                )
989            glyphs = sorted(list(old[0].glyphSet()))
990            replacements = sorted(list(new[0].glyphSet()))
991            if len(replacements) == 1:
992                replacements = replacements * len(glyphs)
993            if len(glyphs) != len(replacements):
994                raise FeatureLibError(
995                    'Expected a glyph class with %d elements after "by", '
996                    "but found a glyph class with %d elements"
997                    % (len(glyphs), len(replacements)),
998                    location,
999                )
1000            return self.ast.ReverseChainSingleSubstStatement(
1001                old_prefix, old_suffix, old, new, location=location
1002            )
1003
1004        if len(old) > 1 and len(new) > 1:
1005            raise FeatureLibError(
1006                "Direct substitution of multiple glyphs by multiple glyphs "
1007                "is not supported",
1008                location,
1009            )
1010
1011        # If there are remaining glyphs to parse, this is an invalid GSUB statement
1012        if len(new) != 0 or is_deletion:
1013            raise FeatureLibError("Invalid substitution statement", location)
1014
1015        # GSUB lookup type 6: Chaining contextual substitution.
1016        rule = self.ast.ChainContextSubstStatement(
1017            old_prefix, old, old_suffix, lookups, location=location
1018        )
1019        return rule
1020
1021    def parse_subtable_(self):
1022        assert self.is_cur_keyword_("subtable")
1023        location = self.cur_token_location_
1024        self.expect_symbol_(";")
1025        return self.ast.SubtableStatement(location=location)
1026
1027    def parse_size_parameters_(self):
1028        # Parses a ``parameters`` statement used in ``size`` features. See
1029        # `section 8.b <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#8.b>`_.
1030        assert self.is_cur_keyword_("parameters")
1031        location = self.cur_token_location_
1032        DesignSize = self.expect_decipoint_()
1033        SubfamilyID = self.expect_number_()
1034        RangeStart = 0.0
1035        RangeEnd = 0.0
1036        if self.next_token_type_ in (Lexer.NUMBER, Lexer.FLOAT) or SubfamilyID != 0:
1037            RangeStart = self.expect_decipoint_()
1038            RangeEnd = self.expect_decipoint_()
1039
1040        self.expect_symbol_(";")
1041        return self.ast.SizeParameters(
1042            DesignSize, SubfamilyID, RangeStart, RangeEnd, location=location
1043        )
1044
1045    def parse_size_menuname_(self):
1046        assert self.is_cur_keyword_("sizemenuname")
1047        location = self.cur_token_location_
1048        platformID, platEncID, langID, string = self.parse_name_()
1049        return self.ast.FeatureNameStatement(
1050            "size", platformID, platEncID, langID, string, location=location
1051        )
1052
1053    def parse_table_(self):
1054        assert self.is_cur_keyword_("table")
1055        location, name = self.cur_token_location_, self.expect_tag_()
1056        table = self.ast.TableBlock(name, location=location)
1057        self.expect_symbol_("{")
1058        handler = {
1059            "GDEF": self.parse_table_GDEF_,
1060            "head": self.parse_table_head_,
1061            "hhea": self.parse_table_hhea_,
1062            "vhea": self.parse_table_vhea_,
1063            "name": self.parse_table_name_,
1064            "BASE": self.parse_table_BASE_,
1065            "OS/2": self.parse_table_OS_2_,
1066            "STAT": self.parse_table_STAT_,
1067        }.get(name)
1068        if handler:
1069            handler(table)
1070        else:
1071            raise FeatureLibError(
1072                '"table %s" is not supported' % name.strip(), location
1073            )
1074        self.expect_symbol_("}")
1075        end_tag = self.expect_tag_()
1076        if end_tag != name:
1077            raise FeatureLibError(
1078                'Expected "%s"' % name.strip(), self.cur_token_location_
1079            )
1080        self.expect_symbol_(";")
1081        return table
1082
1083    def parse_table_GDEF_(self, table):
1084        statements = table.statements
1085        while self.next_token_ != "}" or self.cur_comments_:
1086            self.advance_lexer_(comments=True)
1087            if self.cur_token_type_ is Lexer.COMMENT:
1088                statements.append(
1089                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1090                )
1091            elif self.is_cur_keyword_("Attach"):
1092                statements.append(self.parse_attach_())
1093            elif self.is_cur_keyword_("GlyphClassDef"):
1094                statements.append(self.parse_GlyphClassDef_())
1095            elif self.is_cur_keyword_("LigatureCaretByIndex"):
1096                statements.append(self.parse_ligatureCaretByIndex_())
1097            elif self.is_cur_keyword_("LigatureCaretByPos"):
1098                statements.append(self.parse_ligatureCaretByPos_())
1099            elif self.cur_token_ == ";":
1100                continue
1101            else:
1102                raise FeatureLibError(
1103                    "Expected Attach, LigatureCaretByIndex, " "or LigatureCaretByPos",
1104                    self.cur_token_location_,
1105                )
1106
1107    def parse_table_head_(self, table):
1108        statements = table.statements
1109        while self.next_token_ != "}" or self.cur_comments_:
1110            self.advance_lexer_(comments=True)
1111            if self.cur_token_type_ is Lexer.COMMENT:
1112                statements.append(
1113                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1114                )
1115            elif self.is_cur_keyword_("FontRevision"):
1116                statements.append(self.parse_FontRevision_())
1117            elif self.cur_token_ == ";":
1118                continue
1119            else:
1120                raise FeatureLibError("Expected FontRevision", self.cur_token_location_)
1121
1122    def parse_table_hhea_(self, table):
1123        statements = table.statements
1124        fields = ("CaretOffset", "Ascender", "Descender", "LineGap")
1125        while self.next_token_ != "}" or self.cur_comments_:
1126            self.advance_lexer_(comments=True)
1127            if self.cur_token_type_ is Lexer.COMMENT:
1128                statements.append(
1129                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1130                )
1131            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
1132                key = self.cur_token_.lower()
1133                value = self.expect_number_()
1134                statements.append(
1135                    self.ast.HheaField(key, value, location=self.cur_token_location_)
1136                )
1137                if self.next_token_ != ";":
1138                    raise FeatureLibError(
1139                        "Incomplete statement", self.next_token_location_
1140                    )
1141            elif self.cur_token_ == ";":
1142                continue
1143            else:
1144                raise FeatureLibError(
1145                    "Expected CaretOffset, Ascender, " "Descender or LineGap",
1146                    self.cur_token_location_,
1147                )
1148
1149    def parse_table_vhea_(self, table):
1150        statements = table.statements
1151        fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap")
1152        while self.next_token_ != "}" or self.cur_comments_:
1153            self.advance_lexer_(comments=True)
1154            if self.cur_token_type_ is Lexer.COMMENT:
1155                statements.append(
1156                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1157                )
1158            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
1159                key = self.cur_token_.lower()
1160                value = self.expect_number_()
1161                statements.append(
1162                    self.ast.VheaField(key, value, location=self.cur_token_location_)
1163                )
1164                if self.next_token_ != ";":
1165                    raise FeatureLibError(
1166                        "Incomplete statement", self.next_token_location_
1167                    )
1168            elif self.cur_token_ == ";":
1169                continue
1170            else:
1171                raise FeatureLibError(
1172                    "Expected VertTypoAscender, "
1173                    "VertTypoDescender or VertTypoLineGap",
1174                    self.cur_token_location_,
1175                )
1176
1177    def parse_table_name_(self, table):
1178        statements = table.statements
1179        while self.next_token_ != "}" or self.cur_comments_:
1180            self.advance_lexer_(comments=True)
1181            if self.cur_token_type_ is Lexer.COMMENT:
1182                statements.append(
1183                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1184                )
1185            elif self.is_cur_keyword_("nameid"):
1186                statement = self.parse_nameid_()
1187                if statement:
1188                    statements.append(statement)
1189            elif self.cur_token_ == ";":
1190                continue
1191            else:
1192                raise FeatureLibError("Expected nameid", self.cur_token_location_)
1193
1194    def parse_name_(self):
1195        """Parses a name record. See `section 9.e <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#9.e>`_."""
1196        platEncID = None
1197        langID = None
1198        if self.next_token_type_ in Lexer.NUMBERS:
1199            platformID = self.expect_any_number_()
1200            location = self.cur_token_location_
1201            if platformID not in (1, 3):
1202                raise FeatureLibError("Expected platform id 1 or 3", location)
1203            if self.next_token_type_ in Lexer.NUMBERS:
1204                platEncID = self.expect_any_number_()
1205                langID = self.expect_any_number_()
1206        else:
1207            platformID = 3
1208            location = self.cur_token_location_
1209
1210        if platformID == 1:  # Macintosh
1211            platEncID = platEncID or 0  # Roman
1212            langID = langID or 0  # English
1213        else:  # 3, Windows
1214            platEncID = platEncID or 1  # Unicode
1215            langID = langID or 0x0409  # English
1216
1217        string = self.expect_string_()
1218        self.expect_symbol_(";")
1219
1220        encoding = getEncoding(platformID, platEncID, langID)
1221        if encoding is None:
1222            raise FeatureLibError("Unsupported encoding", location)
1223        unescaped = self.unescape_string_(string, encoding)
1224        return platformID, platEncID, langID, unescaped
1225
1226    def parse_stat_name_(self):
1227        platEncID = None
1228        langID = None
1229        if self.next_token_type_ in Lexer.NUMBERS:
1230            platformID = self.expect_any_number_()
1231            location = self.cur_token_location_
1232            if platformID not in (1, 3):
1233                raise FeatureLibError("Expected platform id 1 or 3", location)
1234            if self.next_token_type_ in Lexer.NUMBERS:
1235                platEncID = self.expect_any_number_()
1236                langID = self.expect_any_number_()
1237        else:
1238            platformID = 3
1239            location = self.cur_token_location_
1240
1241        if platformID == 1:  # Macintosh
1242            platEncID = platEncID or 0  # Roman
1243            langID = langID or 0  # English
1244        else:  # 3, Windows
1245            platEncID = platEncID or 1  # Unicode
1246            langID = langID or 0x0409  # English
1247
1248        string = self.expect_string_()
1249        encoding = getEncoding(platformID, platEncID, langID)
1250        if encoding is None:
1251            raise FeatureLibError("Unsupported encoding", location)
1252        unescaped = self.unescape_string_(string, encoding)
1253        return platformID, platEncID, langID, unescaped
1254
1255    def parse_nameid_(self):
1256        assert self.cur_token_ == "nameid", self.cur_token_
1257        location, nameID = self.cur_token_location_, self.expect_any_number_()
1258        if nameID > 32767:
1259            raise FeatureLibError(
1260                "Name id value cannot be greater than 32767", self.cur_token_location_
1261            )
1262        platformID, platEncID, langID, string = self.parse_name_()
1263        return self.ast.NameRecord(
1264            nameID, platformID, platEncID, langID, string, location=location
1265        )
1266
1267    def unescape_string_(self, string, encoding):
1268        if encoding == "utf_16_be":
1269            s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
1270        else:
1271            unescape = lambda m: self.unescape_byte_(m, encoding)
1272            s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
1273        # We now have a Unicode string, but it might contain surrogate pairs.
1274        # We convert surrogates to actual Unicode by round-tripping through
1275        # Python's UTF-16 codec in a special mode.
1276        utf16 = tobytes(s, "utf_16_be", "surrogatepass")
1277        return tostr(utf16, "utf_16_be")
1278
1279    @staticmethod
1280    def unescape_unichr_(match):
1281        n = match.group(0)[1:]
1282        return chr(int(n, 16))
1283
1284    @staticmethod
1285    def unescape_byte_(match, encoding):
1286        n = match.group(0)[1:]
1287        return bytechr(int(n, 16)).decode(encoding)
1288
1289    def parse_table_BASE_(self, table):
1290        statements = table.statements
1291        while self.next_token_ != "}" or self.cur_comments_:
1292            self.advance_lexer_(comments=True)
1293            if self.cur_token_type_ is Lexer.COMMENT:
1294                statements.append(
1295                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1296                )
1297            elif self.is_cur_keyword_("HorizAxis.BaseTagList"):
1298                horiz_bases = self.parse_base_tag_list_()
1299            elif self.is_cur_keyword_("HorizAxis.BaseScriptList"):
1300                horiz_scripts = self.parse_base_script_list_(len(horiz_bases))
1301                statements.append(
1302                    self.ast.BaseAxis(
1303                        horiz_bases,
1304                        horiz_scripts,
1305                        False,
1306                        location=self.cur_token_location_,
1307                    )
1308                )
1309            elif self.is_cur_keyword_("VertAxis.BaseTagList"):
1310                vert_bases = self.parse_base_tag_list_()
1311            elif self.is_cur_keyword_("VertAxis.BaseScriptList"):
1312                vert_scripts = self.parse_base_script_list_(len(vert_bases))
1313                statements.append(
1314                    self.ast.BaseAxis(
1315                        vert_bases,
1316                        vert_scripts,
1317                        True,
1318                        location=self.cur_token_location_,
1319                    )
1320                )
1321            elif self.cur_token_ == ";":
1322                continue
1323
1324    def parse_table_OS_2_(self, table):
1325        statements = table.statements
1326        numbers = (
1327            "FSType",
1328            "TypoAscender",
1329            "TypoDescender",
1330            "TypoLineGap",
1331            "winAscent",
1332            "winDescent",
1333            "XHeight",
1334            "CapHeight",
1335            "WeightClass",
1336            "WidthClass",
1337            "LowerOpSize",
1338            "UpperOpSize",
1339        )
1340        ranges = ("UnicodeRange", "CodePageRange")
1341        while self.next_token_ != "}" or self.cur_comments_:
1342            self.advance_lexer_(comments=True)
1343            if self.cur_token_type_ is Lexer.COMMENT:
1344                statements.append(
1345                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1346                )
1347            elif self.cur_token_type_ is Lexer.NAME:
1348                key = self.cur_token_.lower()
1349                value = None
1350                if self.cur_token_ in numbers:
1351                    value = self.expect_number_()
1352                elif self.is_cur_keyword_("Panose"):
1353                    value = []
1354                    for i in range(10):
1355                        value.append(self.expect_number_())
1356                elif self.cur_token_ in ranges:
1357                    value = []
1358                    while self.next_token_ != ";":
1359                        value.append(self.expect_number_())
1360                elif self.is_cur_keyword_("Vendor"):
1361                    value = self.expect_string_()
1362                statements.append(
1363                    self.ast.OS2Field(key, value, location=self.cur_token_location_)
1364                )
1365            elif self.cur_token_ == ";":
1366                continue
1367
1368    def parse_STAT_ElidedFallbackName(self):
1369        assert self.is_cur_keyword_("ElidedFallbackName")
1370        self.expect_symbol_("{")
1371        names = []
1372        while self.next_token_ != "}" or self.cur_comments_:
1373            self.advance_lexer_()
1374            if self.is_cur_keyword_("name"):
1375                platformID, platEncID, langID, string = self.parse_stat_name_()
1376                nameRecord = self.ast.STATNameStatement(
1377                    "stat",
1378                    platformID,
1379                    platEncID,
1380                    langID,
1381                    string,
1382                    location=self.cur_token_location_,
1383                )
1384                names.append(nameRecord)
1385            else:
1386                if self.cur_token_ != ";":
1387                    raise FeatureLibError(
1388                        f"Unexpected token {self.cur_token_} " f"in ElidedFallbackName",
1389                        self.cur_token_location_,
1390                    )
1391        self.expect_symbol_("}")
1392        if not names:
1393            raise FeatureLibError('Expected "name"', self.cur_token_location_)
1394        return names
1395
1396    def parse_STAT_design_axis(self):
1397        assert self.is_cur_keyword_("DesignAxis")
1398        names = []
1399        axisTag = self.expect_tag_()
1400        if (
1401            axisTag not in ("ital", "opsz", "slnt", "wdth", "wght")
1402            and not axisTag.isupper()
1403        ):
1404            log.warning(f"Unregistered axis tag {axisTag} should be uppercase.")
1405        axisOrder = self.expect_number_()
1406        self.expect_symbol_("{")
1407        while self.next_token_ != "}" or self.cur_comments_:
1408            self.advance_lexer_()
1409            if self.cur_token_type_ is Lexer.COMMENT:
1410                continue
1411            elif self.is_cur_keyword_("name"):
1412                location = self.cur_token_location_
1413                platformID, platEncID, langID, string = self.parse_stat_name_()
1414                name = self.ast.STATNameStatement(
1415                    "stat", platformID, platEncID, langID, string, location=location
1416                )
1417                names.append(name)
1418            elif self.cur_token_ == ";":
1419                continue
1420            else:
1421                raise FeatureLibError(
1422                    f'Expected "name", got {self.cur_token_}', self.cur_token_location_
1423                )
1424
1425        self.expect_symbol_("}")
1426        return self.ast.STATDesignAxisStatement(
1427            axisTag, axisOrder, names, self.cur_token_location_
1428        )
1429
1430    def parse_STAT_axis_value_(self):
1431        assert self.is_cur_keyword_("AxisValue")
1432        self.expect_symbol_("{")
1433        locations = []
1434        names = []
1435        flags = 0
1436        while self.next_token_ != "}" or self.cur_comments_:
1437            self.advance_lexer_(comments=True)
1438            if self.cur_token_type_ is Lexer.COMMENT:
1439                continue
1440            elif self.is_cur_keyword_("name"):
1441                location = self.cur_token_location_
1442                platformID, platEncID, langID, string = self.parse_stat_name_()
1443                name = self.ast.STATNameStatement(
1444                    "stat", platformID, platEncID, langID, string, location=location
1445                )
1446                names.append(name)
1447            elif self.is_cur_keyword_("location"):
1448                location = self.parse_STAT_location()
1449                locations.append(location)
1450            elif self.is_cur_keyword_("flag"):
1451                flags = self.expect_stat_flags()
1452            elif self.cur_token_ == ";":
1453                continue
1454            else:
1455                raise FeatureLibError(
1456                    f"Unexpected token {self.cur_token_} " f"in AxisValue",
1457                    self.cur_token_location_,
1458                )
1459        self.expect_symbol_("}")
1460        if not names:
1461            raise FeatureLibError('Expected "Axis Name"', self.cur_token_location_)
1462        if not locations:
1463            raise FeatureLibError('Expected "Axis location"', self.cur_token_location_)
1464        if len(locations) > 1:
1465            for location in locations:
1466                if len(location.values) > 1:
1467                    raise FeatureLibError(
1468                        "Only one value is allowed in a "
1469                        "Format 4 Axis Value Record, but "
1470                        f"{len(location.values)} were found.",
1471                        self.cur_token_location_,
1472                    )
1473            format4_tags = []
1474            for location in locations:
1475                tag = location.tag
1476                if tag in format4_tags:
1477                    raise FeatureLibError(
1478                        f"Axis tag {tag} already " "defined.", self.cur_token_location_
1479                    )
1480                format4_tags.append(tag)
1481
1482        return self.ast.STATAxisValueStatement(
1483            names, locations, flags, self.cur_token_location_
1484        )
1485
1486    def parse_STAT_location(self):
1487        values = []
1488        tag = self.expect_tag_()
1489        if len(tag.strip()) != 4:
1490            raise FeatureLibError(
1491                f"Axis tag {self.cur_token_} must be 4 " "characters",
1492                self.cur_token_location_,
1493            )
1494
1495        while self.next_token_ != ";":
1496            if self.next_token_type_ is Lexer.FLOAT:
1497                value = self.expect_float_()
1498                values.append(value)
1499            elif self.next_token_type_ is Lexer.NUMBER:
1500                value = self.expect_number_()
1501                values.append(value)
1502            else:
1503                raise FeatureLibError(
1504                    f'Unexpected value "{self.next_token_}". '
1505                    "Expected integer or float.",
1506                    self.next_token_location_,
1507                )
1508        if len(values) == 3:
1509            nominal, min_val, max_val = values
1510            if nominal < min_val or nominal > max_val:
1511                raise FeatureLibError(
1512                    f"Default value {nominal} is outside "
1513                    f"of specified range "
1514                    f"{min_val}-{max_val}.",
1515                    self.next_token_location_,
1516                )
1517        return self.ast.AxisValueLocationStatement(tag, values)
1518
1519    def parse_table_STAT_(self, table):
1520        statements = table.statements
1521        design_axes = []
1522        while self.next_token_ != "}" or self.cur_comments_:
1523            self.advance_lexer_(comments=True)
1524            if self.cur_token_type_ is Lexer.COMMENT:
1525                statements.append(
1526                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1527                )
1528            elif self.cur_token_type_ is Lexer.NAME:
1529                if self.is_cur_keyword_("ElidedFallbackName"):
1530                    names = self.parse_STAT_ElidedFallbackName()
1531                    statements.append(self.ast.ElidedFallbackName(names))
1532                elif self.is_cur_keyword_("ElidedFallbackNameID"):
1533                    value = self.expect_number_()
1534                    statements.append(self.ast.ElidedFallbackNameID(value))
1535                    self.expect_symbol_(";")
1536                elif self.is_cur_keyword_("DesignAxis"):
1537                    designAxis = self.parse_STAT_design_axis()
1538                    design_axes.append(designAxis.tag)
1539                    statements.append(designAxis)
1540                    self.expect_symbol_(";")
1541                elif self.is_cur_keyword_("AxisValue"):
1542                    axisValueRecord = self.parse_STAT_axis_value_()
1543                    for location in axisValueRecord.locations:
1544                        if location.tag not in design_axes:
1545                            # Tag must be defined in a DesignAxis before it
1546                            # can be referenced
1547                            raise FeatureLibError(
1548                                "DesignAxis not defined for " f"{location.tag}.",
1549                                self.cur_token_location_,
1550                            )
1551                    statements.append(axisValueRecord)
1552                    self.expect_symbol_(";")
1553                else:
1554                    raise FeatureLibError(
1555                        f"Unexpected token {self.cur_token_}", self.cur_token_location_
1556                    )
1557            elif self.cur_token_ == ";":
1558                continue
1559
1560    def parse_base_tag_list_(self):
1561        # Parses BASE table entries. (See `section 9.a <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#9.a>`_)
1562        assert self.cur_token_ in (
1563            "HorizAxis.BaseTagList",
1564            "VertAxis.BaseTagList",
1565        ), self.cur_token_
1566        bases = []
1567        while self.next_token_ != ";":
1568            bases.append(self.expect_script_tag_())
1569        self.expect_symbol_(";")
1570        return bases
1571
1572    def parse_base_script_list_(self, count):
1573        assert self.cur_token_ in (
1574            "HorizAxis.BaseScriptList",
1575            "VertAxis.BaseScriptList",
1576        ), self.cur_token_
1577        scripts = [(self.parse_base_script_record_(count))]
1578        while self.next_token_ == ",":
1579            self.expect_symbol_(",")
1580            scripts.append(self.parse_base_script_record_(count))
1581        self.expect_symbol_(";")
1582        return scripts
1583
1584    def parse_base_script_record_(self, count):
1585        script_tag = self.expect_script_tag_()
1586        base_tag = self.expect_script_tag_()
1587        coords = [self.expect_number_() for i in range(count)]
1588        return script_tag, base_tag, coords
1589
1590    def parse_device_(self):
1591        result = None
1592        self.expect_symbol_("<")
1593        self.expect_keyword_("device")
1594        if self.next_token_ == "NULL":
1595            self.expect_keyword_("NULL")
1596        else:
1597            result = [(self.expect_number_(), self.expect_number_())]
1598            while self.next_token_ == ",":
1599                self.expect_symbol_(",")
1600                result.append((self.expect_number_(), self.expect_number_()))
1601            result = tuple(result)  # make it hashable
1602        self.expect_symbol_(">")
1603        return result
1604
1605    def is_next_value_(self):
1606        return (
1607            self.next_token_type_ is Lexer.NUMBER
1608            or self.next_token_ == "<"
1609            or self.next_token_ == "("
1610        )
1611
1612    def parse_valuerecord_(self, vertical):
1613        if (
1614            self.next_token_type_ is Lexer.SYMBOL and self.next_token_ == "("
1615        ) or self.next_token_type_ is Lexer.NUMBER:
1616            number, location = (
1617                self.expect_number_(variable=True),
1618                self.cur_token_location_,
1619            )
1620            if vertical:
1621                val = self.ast.ValueRecord(
1622                    yAdvance=number, vertical=vertical, location=location
1623                )
1624            else:
1625                val = self.ast.ValueRecord(
1626                    xAdvance=number, vertical=vertical, location=location
1627                )
1628            return val
1629        self.expect_symbol_("<")
1630        location = self.cur_token_location_
1631        if self.next_token_type_ is Lexer.NAME:
1632            name = self.expect_name_()
1633            if name == "NULL":
1634                self.expect_symbol_(">")
1635                return self.ast.ValueRecord()
1636            vrd = self.valuerecords_.resolve(name)
1637            if vrd is None:
1638                raise FeatureLibError(
1639                    'Unknown valueRecordDef "%s"' % name, self.cur_token_location_
1640                )
1641            value = vrd.value
1642            xPlacement, yPlacement = (value.xPlacement, value.yPlacement)
1643            xAdvance, yAdvance = (value.xAdvance, value.yAdvance)
1644        else:
1645            xPlacement, yPlacement, xAdvance, yAdvance = (
1646                self.expect_number_(variable=True),
1647                self.expect_number_(variable=True),
1648                self.expect_number_(variable=True),
1649                self.expect_number_(variable=True),
1650            )
1651
1652        if self.next_token_ == "<":
1653            xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = (
1654                self.parse_device_(),
1655                self.parse_device_(),
1656                self.parse_device_(),
1657                self.parse_device_(),
1658            )
1659            allDeltas = sorted(
1660                [
1661                    delta
1662                    for size, delta in (xPlaDevice if xPlaDevice else ())
1663                    + (yPlaDevice if yPlaDevice else ())
1664                    + (xAdvDevice if xAdvDevice else ())
1665                    + (yAdvDevice if yAdvDevice else ())
1666                ]
1667            )
1668            if allDeltas[0] < -128 or allDeltas[-1] > 127:
1669                raise FeatureLibError(
1670                    "Device value out of valid range (-128..127)",
1671                    self.cur_token_location_,
1672                )
1673        else:
1674            xPlaDevice, yPlaDevice, xAdvDevice, yAdvDevice = (None, None, None, None)
1675
1676        self.expect_symbol_(">")
1677        return self.ast.ValueRecord(
1678            xPlacement,
1679            yPlacement,
1680            xAdvance,
1681            yAdvance,
1682            xPlaDevice,
1683            yPlaDevice,
1684            xAdvDevice,
1685            yAdvDevice,
1686            vertical=vertical,
1687            location=location,
1688        )
1689
1690    def parse_valuerecord_definition_(self, vertical):
1691        # Parses a named value record definition. (See section `2.e.v <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#2.e.v>`_)
1692        assert self.is_cur_keyword_("valueRecordDef")
1693        location = self.cur_token_location_
1694        value = self.parse_valuerecord_(vertical)
1695        name = self.expect_name_()
1696        self.expect_symbol_(";")
1697        vrd = self.ast.ValueRecordDefinition(name, value, location=location)
1698        self.valuerecords_.define(name, vrd)
1699        return vrd
1700
1701    def parse_languagesystem_(self):
1702        assert self.cur_token_ == "languagesystem"
1703        location = self.cur_token_location_
1704        script = self.expect_script_tag_()
1705        language = self.expect_language_tag_()
1706        self.expect_symbol_(";")
1707        return self.ast.LanguageSystemStatement(script, language, location=location)
1708
1709    def parse_feature_block_(self, variation=False):
1710        if variation:
1711            assert self.cur_token_ == "variation"
1712        else:
1713            assert self.cur_token_ == "feature"
1714        location = self.cur_token_location_
1715        tag = self.expect_tag_()
1716        vertical = tag in {"vkrn", "vpal", "vhal", "valt"}
1717
1718        stylisticset = None
1719        cv_feature = None
1720        size_feature = False
1721        if tag in self.SS_FEATURE_TAGS:
1722            stylisticset = tag
1723        elif tag in self.CV_FEATURE_TAGS:
1724            cv_feature = tag
1725        elif tag == "size":
1726            size_feature = True
1727
1728        if variation:
1729            conditionset = self.expect_name_()
1730
1731        use_extension = False
1732        if self.next_token_ == "useExtension":
1733            self.expect_keyword_("useExtension")
1734            use_extension = True
1735
1736        if variation:
1737            block = self.ast.VariationBlock(
1738                tag, conditionset, use_extension=use_extension, location=location
1739            )
1740        else:
1741            block = self.ast.FeatureBlock(
1742                tag, use_extension=use_extension, location=location
1743            )
1744        self.parse_block_(block, vertical, stylisticset, size_feature, cv_feature)
1745        return block
1746
1747    def parse_feature_reference_(self):
1748        assert self.cur_token_ == "feature", self.cur_token_
1749        location = self.cur_token_location_
1750        featureName = self.expect_tag_()
1751        self.expect_symbol_(";")
1752        return self.ast.FeatureReferenceStatement(featureName, location=location)
1753
1754    def parse_featureNames_(self, tag):
1755        """Parses a ``featureNames`` statement found in stylistic set features.
1756        See section `8.c <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#8.c>`_.
1757        """
1758        assert self.cur_token_ == "featureNames", self.cur_token_
1759        block = self.ast.NestedBlock(
1760            tag, self.cur_token_, location=self.cur_token_location_
1761        )
1762        self.expect_symbol_("{")
1763        for symtab in self.symbol_tables_:
1764            symtab.enter_scope()
1765        while self.next_token_ != "}" or self.cur_comments_:
1766            self.advance_lexer_(comments=True)
1767            if self.cur_token_type_ is Lexer.COMMENT:
1768                block.statements.append(
1769                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1770                )
1771            elif self.is_cur_keyword_("name"):
1772                location = self.cur_token_location_
1773                platformID, platEncID, langID, string = self.parse_name_()
1774                block.statements.append(
1775                    self.ast.FeatureNameStatement(
1776                        tag, platformID, platEncID, langID, string, location=location
1777                    )
1778                )
1779            elif self.cur_token_ == ";":
1780                continue
1781            else:
1782                raise FeatureLibError('Expected "name"', self.cur_token_location_)
1783        self.expect_symbol_("}")
1784        for symtab in self.symbol_tables_:
1785            symtab.exit_scope()
1786        self.expect_symbol_(";")
1787        return block
1788
1789    def parse_cvParameters_(self, tag):
1790        # Parses a ``cvParameters`` block found in Character Variant features.
1791        # See section `8.d <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#8.d>`_.
1792        assert self.cur_token_ == "cvParameters", self.cur_token_
1793        block = self.ast.NestedBlock(
1794            tag, self.cur_token_, location=self.cur_token_location_
1795        )
1796        self.expect_symbol_("{")
1797        for symtab in self.symbol_tables_:
1798            symtab.enter_scope()
1799
1800        statements = block.statements
1801        while self.next_token_ != "}" or self.cur_comments_:
1802            self.advance_lexer_(comments=True)
1803            if self.cur_token_type_ is Lexer.COMMENT:
1804                statements.append(
1805                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1806                )
1807            elif self.is_cur_keyword_(
1808                {
1809                    "FeatUILabelNameID",
1810                    "FeatUITooltipTextNameID",
1811                    "SampleTextNameID",
1812                    "ParamUILabelNameID",
1813                }
1814            ):
1815                statements.append(self.parse_cvNameIDs_(tag, self.cur_token_))
1816            elif self.is_cur_keyword_("Character"):
1817                statements.append(self.parse_cvCharacter_(tag))
1818            elif self.cur_token_ == ";":
1819                continue
1820            else:
1821                raise FeatureLibError(
1822                    "Expected statement: got {} {}".format(
1823                        self.cur_token_type_, self.cur_token_
1824                    ),
1825                    self.cur_token_location_,
1826                )
1827
1828        self.expect_symbol_("}")
1829        for symtab in self.symbol_tables_:
1830            symtab.exit_scope()
1831        self.expect_symbol_(";")
1832        return block
1833
1834    def parse_cvNameIDs_(self, tag, block_name):
1835        assert self.cur_token_ == block_name, self.cur_token_
1836        block = self.ast.NestedBlock(tag, block_name, location=self.cur_token_location_)
1837        self.expect_symbol_("{")
1838        for symtab in self.symbol_tables_:
1839            symtab.enter_scope()
1840        while self.next_token_ != "}" or self.cur_comments_:
1841            self.advance_lexer_(comments=True)
1842            if self.cur_token_type_ is Lexer.COMMENT:
1843                block.statements.append(
1844                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1845                )
1846            elif self.is_cur_keyword_("name"):
1847                location = self.cur_token_location_
1848                platformID, platEncID, langID, string = self.parse_name_()
1849                block.statements.append(
1850                    self.ast.CVParametersNameStatement(
1851                        tag,
1852                        platformID,
1853                        platEncID,
1854                        langID,
1855                        string,
1856                        block_name,
1857                        location=location,
1858                    )
1859                )
1860            elif self.cur_token_ == ";":
1861                continue
1862            else:
1863                raise FeatureLibError('Expected "name"', self.cur_token_location_)
1864        self.expect_symbol_("}")
1865        for symtab in self.symbol_tables_:
1866            symtab.exit_scope()
1867        self.expect_symbol_(";")
1868        return block
1869
1870    def parse_cvCharacter_(self, tag):
1871        assert self.cur_token_ == "Character", self.cur_token_
1872        location, character = self.cur_token_location_, self.expect_any_number_()
1873        self.expect_symbol_(";")
1874        if not (0xFFFFFF >= character >= 0):
1875            raise FeatureLibError(
1876                "Character value must be between "
1877                "{:#x} and {:#x}".format(0, 0xFFFFFF),
1878                location,
1879            )
1880        return self.ast.CharacterStatement(character, tag, location=location)
1881
1882    def parse_FontRevision_(self):
1883        # Parses a ``FontRevision`` statement found in the head table. See
1884        # `section 9.c <https://adobe-type-tools.github.io/afdko/OpenTypeFeatureFileSpecification.html#9.c>`_.
1885        assert self.cur_token_ == "FontRevision", self.cur_token_
1886        location, version = self.cur_token_location_, self.expect_float_()
1887        self.expect_symbol_(";")
1888        if version <= 0:
1889            raise FeatureLibError("Font revision numbers must be positive", location)
1890        return self.ast.FontRevisionStatement(version, location=location)
1891
1892    def parse_conditionset_(self):
1893        name = self.expect_name_()
1894
1895        conditions = {}
1896        self.expect_symbol_("{")
1897
1898        while self.next_token_ != "}":
1899            self.advance_lexer_()
1900            if self.cur_token_type_ is not Lexer.NAME:
1901                raise FeatureLibError("Expected an axis name", self.cur_token_location_)
1902
1903            axis = self.cur_token_
1904            if axis in conditions:
1905                raise FeatureLibError(
1906                    f"Repeated condition for axis {axis}", self.cur_token_location_
1907                )
1908
1909            if self.next_token_type_ is Lexer.FLOAT:
1910                min_value = self.expect_float_()
1911            elif self.next_token_type_ is Lexer.NUMBER:
1912                min_value = self.expect_number_(variable=False)
1913
1914            if self.next_token_type_ is Lexer.FLOAT:
1915                max_value = self.expect_float_()
1916            elif self.next_token_type_ is Lexer.NUMBER:
1917                max_value = self.expect_number_(variable=False)
1918            self.expect_symbol_(";")
1919
1920            conditions[axis] = (min_value, max_value)
1921
1922        self.expect_symbol_("}")
1923
1924        finalname = self.expect_name_()
1925        if finalname != name:
1926            raise FeatureLibError('Expected "%s"' % name, self.cur_token_location_)
1927        return self.ast.ConditionsetStatement(name, conditions)
1928
1929    def parse_block_(
1930        self, block, vertical, stylisticset=None, size_feature=False, cv_feature=None
1931    ):
1932        self.expect_symbol_("{")
1933        for symtab in self.symbol_tables_:
1934            symtab.enter_scope()
1935
1936        statements = block.statements
1937        while self.next_token_ != "}" or self.cur_comments_:
1938            self.advance_lexer_(comments=True)
1939            if self.cur_token_type_ is Lexer.COMMENT:
1940                statements.append(
1941                    self.ast.Comment(self.cur_token_, location=self.cur_token_location_)
1942                )
1943            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
1944                statements.append(self.parse_glyphclass_definition_())
1945            elif self.is_cur_keyword_("anchorDef"):
1946                statements.append(self.parse_anchordef_())
1947            elif self.is_cur_keyword_({"enum", "enumerate"}):
1948                statements.append(self.parse_enumerate_(vertical=vertical))
1949            elif self.is_cur_keyword_("feature"):
1950                statements.append(self.parse_feature_reference_())
1951            elif self.is_cur_keyword_("ignore"):
1952                statements.append(self.parse_ignore_())
1953            elif self.is_cur_keyword_("language"):
1954                statements.append(self.parse_language_())
1955            elif self.is_cur_keyword_("lookup"):
1956                statements.append(self.parse_lookup_(vertical))
1957            elif self.is_cur_keyword_("lookupflag"):
1958                statements.append(self.parse_lookupflag_())
1959            elif self.is_cur_keyword_("markClass"):
1960                statements.append(self.parse_markClass_())
1961            elif self.is_cur_keyword_({"pos", "position"}):
1962                statements.append(
1963                    self.parse_position_(enumerated=False, vertical=vertical)
1964                )
1965            elif self.is_cur_keyword_("script"):
1966                statements.append(self.parse_script_())
1967            elif self.is_cur_keyword_({"sub", "substitute", "rsub", "reversesub"}):
1968                statements.append(self.parse_substitute_())
1969            elif self.is_cur_keyword_("subtable"):
1970                statements.append(self.parse_subtable_())
1971            elif self.is_cur_keyword_("valueRecordDef"):
1972                statements.append(self.parse_valuerecord_definition_(vertical))
1973            elif stylisticset and self.is_cur_keyword_("featureNames"):
1974                statements.append(self.parse_featureNames_(stylisticset))
1975            elif cv_feature and self.is_cur_keyword_("cvParameters"):
1976                statements.append(self.parse_cvParameters_(cv_feature))
1977            elif size_feature and self.is_cur_keyword_("parameters"):
1978                statements.append(self.parse_size_parameters_())
1979            elif size_feature and self.is_cur_keyword_("sizemenuname"):
1980                statements.append(self.parse_size_menuname_())
1981            elif (
1982                self.cur_token_type_ is Lexer.NAME
1983                and self.cur_token_ in self.extensions
1984            ):
1985                statements.append(self.extensions[self.cur_token_](self))
1986            elif self.cur_token_ == ";":
1987                continue
1988            else:
1989                raise FeatureLibError(
1990                    "Expected glyph class definition or statement: got {} {}".format(
1991                        self.cur_token_type_, self.cur_token_
1992                    ),
1993                    self.cur_token_location_,
1994                )
1995
1996        self.expect_symbol_("}")
1997        for symtab in self.symbol_tables_:
1998            symtab.exit_scope()
1999
2000        name = self.expect_name_()
2001        if name != block.name.strip():
2002            raise FeatureLibError(
2003                'Expected "%s"' % block.name.strip(), self.cur_token_location_
2004            )
2005        self.expect_symbol_(";")
2006
2007        # A multiple substitution may have a single destination, in which case
2008        # it will look just like a single substitution. So if there are both
2009        # multiple and single substitutions, upgrade all the single ones to
2010        # multiple substitutions.
2011
2012        # Check if we have a mix of non-contextual singles and multiples.
2013        has_single = False
2014        has_multiple = False
2015        for s in statements:
2016            if isinstance(s, self.ast.SingleSubstStatement):
2017                has_single = not any([s.prefix, s.suffix, s.forceChain])
2018            elif isinstance(s, self.ast.MultipleSubstStatement):
2019                has_multiple = not any([s.prefix, s.suffix, s.forceChain])
2020
2021        # Upgrade all single substitutions to multiple substitutions.
2022        if has_single and has_multiple:
2023            statements = []
2024            for s in block.statements:
2025                if isinstance(s, self.ast.SingleSubstStatement):
2026                    glyphs = s.glyphs[0].glyphSet()
2027                    replacements = s.replacements[0].glyphSet()
2028                    if len(replacements) == 1:
2029                        replacements *= len(glyphs)
2030                    for i, glyph in enumerate(glyphs):
2031                        statements.append(
2032                            self.ast.MultipleSubstStatement(
2033                                s.prefix,
2034                                glyph,
2035                                s.suffix,
2036                                [replacements[i]],
2037                                s.forceChain,
2038                                location=s.location,
2039                            )
2040                        )
2041                else:
2042                    statements.append(s)
2043            block.statements = statements
2044
2045    def is_cur_keyword_(self, k):
2046        if self.cur_token_type_ is Lexer.NAME:
2047            if isinstance(k, type("")):  # basestring is gone in Python3
2048                return self.cur_token_ == k
2049            else:
2050                return self.cur_token_ in k
2051        return False
2052
2053    def expect_class_name_(self):
2054        self.advance_lexer_()
2055        if self.cur_token_type_ is not Lexer.GLYPHCLASS:
2056            raise FeatureLibError("Expected @NAME", self.cur_token_location_)
2057        return self.cur_token_
2058
2059    def expect_cid_(self):
2060        self.advance_lexer_()
2061        if self.cur_token_type_ is Lexer.CID:
2062            return self.cur_token_
2063        raise FeatureLibError("Expected a CID", self.cur_token_location_)
2064
2065    def expect_filename_(self):
2066        self.advance_lexer_()
2067        if self.cur_token_type_ is not Lexer.FILENAME:
2068            raise FeatureLibError("Expected file name", self.cur_token_location_)
2069        return self.cur_token_
2070
2071    def expect_glyph_(self):
2072        self.advance_lexer_()
2073        if self.cur_token_type_ is Lexer.NAME:
2074            return self.cur_token_.lstrip("\\")
2075        elif self.cur_token_type_ is Lexer.CID:
2076            return "cid%05d" % self.cur_token_
2077        raise FeatureLibError("Expected a glyph name or CID", self.cur_token_location_)
2078
2079    def check_glyph_name_in_glyph_set(self, *names):
2080        """Adds a glyph name (just `start`) or glyph names of a
2081        range (`start` and `end`) which are not in the glyph set
2082        to the "missing list" for future error reporting.
2083
2084        If no glyph set is present, does nothing.
2085        """
2086        if self.glyphNames_:
2087            for name in names:
2088                if name in self.glyphNames_:
2089                    continue
2090                if name not in self.missing:
2091                    self.missing[name] = self.cur_token_location_
2092
2093    def expect_markClass_reference_(self):
2094        name = self.expect_class_name_()
2095        mc = self.glyphclasses_.resolve(name)
2096        if mc is None:
2097            raise FeatureLibError(
2098                "Unknown markClass @%s" % name, self.cur_token_location_
2099            )
2100        if not isinstance(mc, self.ast.MarkClass):
2101            raise FeatureLibError(
2102                "@%s is not a markClass" % name, self.cur_token_location_
2103            )
2104        return mc
2105
2106    def expect_tag_(self):
2107        self.advance_lexer_()
2108        if self.cur_token_type_ is not Lexer.NAME:
2109            raise FeatureLibError("Expected a tag", self.cur_token_location_)
2110        if len(self.cur_token_) > 4:
2111            raise FeatureLibError(
2112                "Tags cannot be longer than 4 characters", self.cur_token_location_
2113            )
2114        return (self.cur_token_ + "    ")[:4]
2115
2116    def expect_script_tag_(self):
2117        tag = self.expect_tag_()
2118        if tag == "dflt":
2119            raise FeatureLibError(
2120                '"dflt" is not a valid script tag; use "DFLT" instead',
2121                self.cur_token_location_,
2122            )
2123        return tag
2124
2125    def expect_language_tag_(self):
2126        tag = self.expect_tag_()
2127        if tag == "DFLT":
2128            raise FeatureLibError(
2129                '"DFLT" is not a valid language tag; use "dflt" instead',
2130                self.cur_token_location_,
2131            )
2132        return tag
2133
2134    def expect_symbol_(self, symbol):
2135        self.advance_lexer_()
2136        if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol:
2137            return symbol
2138        raise FeatureLibError("Expected '%s'" % symbol, self.cur_token_location_)
2139
2140    def expect_keyword_(self, keyword):
2141        self.advance_lexer_()
2142        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
2143            return self.cur_token_
2144        raise FeatureLibError('Expected "%s"' % keyword, self.cur_token_location_)
2145
2146    def expect_name_(self):
2147        self.advance_lexer_()
2148        if self.cur_token_type_ is Lexer.NAME:
2149            return self.cur_token_
2150        raise FeatureLibError("Expected a name", self.cur_token_location_)
2151
2152    def expect_number_(self, variable=False):
2153        self.advance_lexer_()
2154        if self.cur_token_type_ is Lexer.NUMBER:
2155            return self.cur_token_
2156        if variable and self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == "(":
2157            return self.expect_variable_scalar_()
2158        raise FeatureLibError("Expected a number", self.cur_token_location_)
2159
2160    def expect_variable_scalar_(self):
2161        self.advance_lexer_()  # "("
2162        scalar = VariableScalar()
2163        while True:
2164            if self.cur_token_type_ == Lexer.SYMBOL and self.cur_token_ == ")":
2165                break
2166            location, value = self.expect_master_()
2167            scalar.add_value(location, value)
2168        return scalar
2169
2170    def expect_master_(self):
2171        location = {}
2172        while True:
2173            if self.cur_token_type_ is not Lexer.NAME:
2174                raise FeatureLibError("Expected an axis name", self.cur_token_location_)
2175            axis = self.cur_token_
2176            self.advance_lexer_()
2177            if not (self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == "="):
2178                raise FeatureLibError(
2179                    "Expected an equals sign", self.cur_token_location_
2180                )
2181            value = self.expect_number_()
2182            location[axis] = value
2183            if self.next_token_type_ is Lexer.NAME and self.next_token_[0] == ":":
2184                # Lexer has just read the value as a glyph name. We'll correct it later
2185                break
2186            self.advance_lexer_()
2187            if not (self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == ","):
2188                raise FeatureLibError(
2189                    "Expected an comma or an equals sign", self.cur_token_location_
2190                )
2191            self.advance_lexer_()
2192        self.advance_lexer_()
2193        value = int(self.cur_token_[1:])
2194        self.advance_lexer_()
2195        return location, value
2196
2197    def expect_any_number_(self):
2198        self.advance_lexer_()
2199        if self.cur_token_type_ in Lexer.NUMBERS:
2200            return self.cur_token_
2201        raise FeatureLibError(
2202            "Expected a decimal, hexadecimal or octal number", self.cur_token_location_
2203        )
2204
2205    def expect_float_(self):
2206        self.advance_lexer_()
2207        if self.cur_token_type_ is Lexer.FLOAT:
2208            return self.cur_token_
2209        raise FeatureLibError(
2210            "Expected a floating-point number", self.cur_token_location_
2211        )
2212
2213    def expect_decipoint_(self):
2214        if self.next_token_type_ == Lexer.FLOAT:
2215            return self.expect_float_()
2216        elif self.next_token_type_ is Lexer.NUMBER:
2217            return self.expect_number_() / 10
2218        else:
2219            raise FeatureLibError(
2220                "Expected an integer or floating-point number", self.cur_token_location_
2221            )
2222
2223    def expect_stat_flags(self):
2224        value = 0
2225        flags = {
2226            "OlderSiblingFontAttribute": 1,
2227            "ElidableAxisValueName": 2,
2228        }
2229        while self.next_token_ != ";":
2230            if self.next_token_ in flags:
2231                name = self.expect_name_()
2232                value = value | flags[name]
2233            else:
2234                raise FeatureLibError(
2235                    f"Unexpected STAT flag {self.cur_token_}", self.cur_token_location_
2236                )
2237        return value
2238
2239    def expect_stat_values_(self):
2240        if self.next_token_type_ == Lexer.FLOAT:
2241            return self.expect_float_()
2242        elif self.next_token_type_ is Lexer.NUMBER:
2243            return self.expect_number_()
2244        else:
2245            raise FeatureLibError(
2246                "Expected an integer or floating-point number", self.cur_token_location_
2247            )
2248
2249    def expect_string_(self):
2250        self.advance_lexer_()
2251        if self.cur_token_type_ is Lexer.STRING:
2252            return self.cur_token_
2253        raise FeatureLibError("Expected a string", self.cur_token_location_)
2254
2255    def advance_lexer_(self, comments=False):
2256        if comments and self.cur_comments_:
2257            self.cur_token_type_ = Lexer.COMMENT
2258            self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0)
2259            return
2260        else:
2261            self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
2262                self.next_token_type_,
2263                self.next_token_,
2264                self.next_token_location_,
2265            )
2266        while True:
2267            try:
2268                (
2269                    self.next_token_type_,
2270                    self.next_token_,
2271                    self.next_token_location_,
2272                ) = next(self.lexer_)
2273            except StopIteration:
2274                self.next_token_type_, self.next_token_ = (None, None)
2275            if self.next_token_type_ != Lexer.COMMENT:
2276                break
2277            self.cur_comments_.append((self.next_token_, self.next_token_location_))
2278
2279    @staticmethod
2280    def reverse_string_(s):
2281        """'abc' --> 'cba'"""
2282        return "".join(reversed(list(s)))
2283
2284    def make_cid_range_(self, location, start, limit):
2285        """(location, 999, 1001) --> ["cid00999", "cid01000", "cid01001"]"""
2286        result = list()
2287        if start > limit:
2288            raise FeatureLibError(
2289                "Bad range: start should be less than limit", location
2290            )
2291        for cid in range(start, limit + 1):
2292            result.append("cid%05d" % cid)
2293        return result
2294
2295    def make_glyph_range_(self, location, start, limit):
2296        """(location, "a.sc", "d.sc") --> ["a.sc", "b.sc", "c.sc", "d.sc"]"""
2297        result = list()
2298        if len(start) != len(limit):
2299            raise FeatureLibError(
2300                'Bad range: "%s" and "%s" should have the same length' % (start, limit),
2301                location,
2302            )
2303
2304        rev = self.reverse_string_
2305        prefix = os.path.commonprefix([start, limit])
2306        suffix = rev(os.path.commonprefix([rev(start), rev(limit)]))
2307        if len(suffix) > 0:
2308            start_range = start[len(prefix) : -len(suffix)]
2309            limit_range = limit[len(prefix) : -len(suffix)]
2310        else:
2311            start_range = start[len(prefix) :]
2312            limit_range = limit[len(prefix) :]
2313
2314        if start_range >= limit_range:
2315            raise FeatureLibError(
2316                "Start of range must be smaller than its end", location
2317            )
2318
2319        uppercase = re.compile(r"^[A-Z]$")
2320        if uppercase.match(start_range) and uppercase.match(limit_range):
2321            for c in range(ord(start_range), ord(limit_range) + 1):
2322                result.append("%s%c%s" % (prefix, c, suffix))
2323            return result
2324
2325        lowercase = re.compile(r"^[a-z]$")
2326        if lowercase.match(start_range) and lowercase.match(limit_range):
2327            for c in range(ord(start_range), ord(limit_range) + 1):
2328                result.append("%s%c%s" % (prefix, c, suffix))
2329            return result
2330
2331        digits = re.compile(r"^[0-9]{1,3}$")
2332        if digits.match(start_range) and digits.match(limit_range):
2333            for i in range(int(start_range, 10), int(limit_range, 10) + 1):
2334                number = ("000" + str(i))[-len(start_range) :]
2335                result.append("%s%s%s" % (prefix, number, suffix))
2336            return result
2337
2338        raise FeatureLibError('Bad range: "%s-%s"' % (start, limit), location)
2339
2340
2341class SymbolTable(object):
2342    def __init__(self):
2343        self.scopes_ = [{}]
2344
2345    def enter_scope(self):
2346        self.scopes_.append({})
2347
2348    def exit_scope(self):
2349        self.scopes_.pop()
2350
2351    def define(self, name, item):
2352        self.scopes_[-1][name] = item
2353
2354    def resolve(self, name):
2355        for scope in reversed(self.scopes_):
2356            item = scope.get(name)
2357            if item:
2358                return item
2359        return None
2360