xref: /aosp_15_r20/external/fonttools/Lib/fontTools/voltLib/parser.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1import fontTools.voltLib.ast as ast
2from fontTools.voltLib.lexer import Lexer
3from fontTools.voltLib.error import VoltLibError
4from io import open
5
6PARSE_FUNCS = {
7    "DEF_GLYPH": "parse_def_glyph_",
8    "DEF_GROUP": "parse_def_group_",
9    "DEF_SCRIPT": "parse_def_script_",
10    "DEF_LOOKUP": "parse_def_lookup_",
11    "DEF_ANCHOR": "parse_def_anchor_",
12    "GRID_PPEM": "parse_ppem_",
13    "PRESENTATION_PPEM": "parse_ppem_",
14    "PPOSITIONING_PPEM": "parse_ppem_",
15    "COMPILER_USEEXTENSIONLOOKUPS": "parse_noarg_option_",
16    "COMPILER_USEPAIRPOSFORMAT2": "parse_noarg_option_",
17    "CMAP_FORMAT": "parse_cmap_format",
18    "DO_NOT_TOUCH_CMAP": "parse_noarg_option_",
19}
20
21
22class Parser(object):
23    def __init__(self, path):
24        self.doc_ = ast.VoltFile()
25        self.glyphs_ = OrderedSymbolTable()
26        self.groups_ = SymbolTable()
27        self.anchors_ = {}  # dictionary of SymbolTable() keyed by glyph
28        self.scripts_ = SymbolTable()
29        self.langs_ = SymbolTable()
30        self.lookups_ = SymbolTable()
31        self.next_token_type_, self.next_token_ = (None, None)
32        self.next_token_location_ = None
33        self.make_lexer_(path)
34        self.advance_lexer_()
35
36    def make_lexer_(self, file_or_path):
37        if hasattr(file_or_path, "read"):
38            filename = getattr(file_or_path, "name", None)
39            data = file_or_path.read()
40        else:
41            filename = file_or_path
42            with open(file_or_path, "r") as f:
43                data = f.read()
44        self.lexer_ = Lexer(data, filename)
45
46    def parse(self):
47        statements = self.doc_.statements
48        while self.next_token_type_ is not None:
49            self.advance_lexer_()
50            if self.cur_token_ in PARSE_FUNCS.keys():
51                func = getattr(self, PARSE_FUNCS[self.cur_token_])
52                statements.append(func())
53            elif self.is_cur_keyword_("END"):
54                break
55            else:
56                raise VoltLibError(
57                    "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())),
58                    self.cur_token_location_,
59                )
60        return self.doc_
61
62    def parse_def_glyph_(self):
63        assert self.is_cur_keyword_("DEF_GLYPH")
64        location = self.cur_token_location_
65        name = self.expect_string_()
66        self.expect_keyword_("ID")
67        gid = self.expect_number_()
68        if gid < 0:
69            raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
70        gunicode = None
71        if self.next_token_ == "UNICODE":
72            self.expect_keyword_("UNICODE")
73            gunicode = [self.expect_number_()]
74            if gunicode[0] < 0:
75                raise VoltLibError("Invalid glyph UNICODE", self.cur_token_location_)
76        elif self.next_token_ == "UNICODEVALUES":
77            self.expect_keyword_("UNICODEVALUES")
78            gunicode = self.parse_unicode_values_()
79        gtype = None
80        if self.next_token_ == "TYPE":
81            self.expect_keyword_("TYPE")
82            gtype = self.expect_name_()
83            assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT")
84        components = None
85        if self.next_token_ == "COMPONENTS":
86            self.expect_keyword_("COMPONENTS")
87            components = self.expect_number_()
88        self.expect_keyword_("END_GLYPH")
89        if self.glyphs_.resolve(name) is not None:
90            raise VoltLibError(
91                'Glyph "%s" (gid %i) already defined' % (name, gid), location
92            )
93        def_glyph = ast.GlyphDefinition(
94            name, gid, gunicode, gtype, components, location=location
95        )
96        self.glyphs_.define(name, def_glyph)
97        return def_glyph
98
99    def parse_def_group_(self):
100        assert self.is_cur_keyword_("DEF_GROUP")
101        location = self.cur_token_location_
102        name = self.expect_string_()
103        enum = None
104        if self.next_token_ == "ENUM":
105            enum = self.parse_enum_()
106        self.expect_keyword_("END_GROUP")
107        if self.groups_.resolve(name) is not None:
108            raise VoltLibError(
109                'Glyph group "%s" already defined, '
110                "group names are case insensitive" % name,
111                location,
112            )
113        def_group = ast.GroupDefinition(name, enum, location=location)
114        self.groups_.define(name, def_group)
115        return def_group
116
117    def parse_def_script_(self):
118        assert self.is_cur_keyword_("DEF_SCRIPT")
119        location = self.cur_token_location_
120        name = None
121        if self.next_token_ == "NAME":
122            self.expect_keyword_("NAME")
123            name = self.expect_string_()
124        self.expect_keyword_("TAG")
125        tag = self.expect_string_()
126        if self.scripts_.resolve(tag) is not None:
127            raise VoltLibError(
128                'Script "%s" already defined, '
129                "script tags are case insensitive" % tag,
130                location,
131            )
132        self.langs_.enter_scope()
133        langs = []
134        while self.next_token_ != "END_SCRIPT":
135            self.advance_lexer_()
136            lang = self.parse_langsys_()
137            self.expect_keyword_("END_LANGSYS")
138            if self.langs_.resolve(lang.tag) is not None:
139                raise VoltLibError(
140                    'Language "%s" already defined in script "%s", '
141                    "language tags are case insensitive" % (lang.tag, tag),
142                    location,
143                )
144            self.langs_.define(lang.tag, lang)
145            langs.append(lang)
146        self.expect_keyword_("END_SCRIPT")
147        self.langs_.exit_scope()
148        def_script = ast.ScriptDefinition(name, tag, langs, location=location)
149        self.scripts_.define(tag, def_script)
150        return def_script
151
152    def parse_langsys_(self):
153        assert self.is_cur_keyword_("DEF_LANGSYS")
154        location = self.cur_token_location_
155        name = None
156        if self.next_token_ == "NAME":
157            self.expect_keyword_("NAME")
158            name = self.expect_string_()
159        self.expect_keyword_("TAG")
160        tag = self.expect_string_()
161        features = []
162        while self.next_token_ != "END_LANGSYS":
163            self.advance_lexer_()
164            feature = self.parse_feature_()
165            self.expect_keyword_("END_FEATURE")
166            features.append(feature)
167        def_langsys = ast.LangSysDefinition(name, tag, features, location=location)
168        return def_langsys
169
170    def parse_feature_(self):
171        assert self.is_cur_keyword_("DEF_FEATURE")
172        location = self.cur_token_location_
173        self.expect_keyword_("NAME")
174        name = self.expect_string_()
175        self.expect_keyword_("TAG")
176        tag = self.expect_string_()
177        lookups = []
178        while self.next_token_ != "END_FEATURE":
179            # self.advance_lexer_()
180            self.expect_keyword_("LOOKUP")
181            lookup = self.expect_string_()
182            lookups.append(lookup)
183        feature = ast.FeatureDefinition(name, tag, lookups, location=location)
184        return feature
185
186    def parse_def_lookup_(self):
187        assert self.is_cur_keyword_("DEF_LOOKUP")
188        location = self.cur_token_location_
189        name = self.expect_string_()
190        if not name[0].isalpha():
191            raise VoltLibError(
192                'Lookup name "%s" must start with a letter' % name, location
193            )
194        if self.lookups_.resolve(name) is not None:
195            raise VoltLibError(
196                'Lookup "%s" already defined, '
197                "lookup names are case insensitive" % name,
198                location,
199            )
200        process_base = True
201        if self.next_token_ == "PROCESS_BASE":
202            self.advance_lexer_()
203        elif self.next_token_ == "SKIP_BASE":
204            self.advance_lexer_()
205            process_base = False
206        process_marks = True
207        mark_glyph_set = None
208        if self.next_token_ == "PROCESS_MARKS":
209            self.advance_lexer_()
210            if self.next_token_ == "MARK_GLYPH_SET":
211                self.advance_lexer_()
212                mark_glyph_set = self.expect_string_()
213            elif self.next_token_ == "ALL":
214                self.advance_lexer_()
215            elif self.next_token_ == "NONE":
216                self.advance_lexer_()
217                process_marks = False
218            elif self.next_token_type_ == Lexer.STRING:
219                process_marks = self.expect_string_()
220            else:
221                raise VoltLibError(
222                    "Expected ALL, NONE, MARK_GLYPH_SET or an ID. "
223                    "Got %s" % (self.next_token_type_),
224                    location,
225                )
226        elif self.next_token_ == "SKIP_MARKS":
227            self.advance_lexer_()
228            process_marks = False
229        direction = None
230        if self.next_token_ == "DIRECTION":
231            self.expect_keyword_("DIRECTION")
232            direction = self.expect_name_()
233            assert direction in ("LTR", "RTL")
234        reversal = None
235        if self.next_token_ == "REVERSAL":
236            self.expect_keyword_("REVERSAL")
237            reversal = True
238        comments = None
239        if self.next_token_ == "COMMENTS":
240            self.expect_keyword_("COMMENTS")
241            comments = self.expect_string_().replace(r"\n", "\n")
242        context = []
243        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
244            context = self.parse_context_()
245        as_pos_or_sub = self.expect_name_()
246        sub = None
247        pos = None
248        if as_pos_or_sub == "AS_SUBSTITUTION":
249            sub = self.parse_substitution_(reversal)
250        elif as_pos_or_sub == "AS_POSITION":
251            pos = self.parse_position_()
252        else:
253            raise VoltLibError(
254                "Expected AS_SUBSTITUTION or AS_POSITION. " "Got %s" % (as_pos_or_sub),
255                location,
256            )
257        def_lookup = ast.LookupDefinition(
258            name,
259            process_base,
260            process_marks,
261            mark_glyph_set,
262            direction,
263            reversal,
264            comments,
265            context,
266            sub,
267            pos,
268            location=location,
269        )
270        self.lookups_.define(name, def_lookup)
271        return def_lookup
272
273    def parse_context_(self):
274        location = self.cur_token_location_
275        contexts = []
276        while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"):
277            side = None
278            coverage = None
279            ex_or_in = self.expect_name_()
280            # side_contexts = [] # XXX
281            if self.next_token_ != "END_CONTEXT":
282                left = []
283                right = []
284                while self.next_token_ in ("LEFT", "RIGHT"):
285                    side = self.expect_name_()
286                    coverage = self.parse_coverage_()
287                    if side == "LEFT":
288                        left.append(coverage)
289                    else:
290                        right.append(coverage)
291                self.expect_keyword_("END_CONTEXT")
292                context = ast.ContextDefinition(
293                    ex_or_in, left, right, location=location
294                )
295                contexts.append(context)
296            else:
297                self.expect_keyword_("END_CONTEXT")
298        return contexts
299
300    def parse_substitution_(self, reversal):
301        assert self.is_cur_keyword_("AS_SUBSTITUTION")
302        location = self.cur_token_location_
303        src = []
304        dest = []
305        if self.next_token_ != "SUB":
306            raise VoltLibError("Expected SUB", location)
307        while self.next_token_ == "SUB":
308            self.expect_keyword_("SUB")
309            src.append(self.parse_coverage_())
310            self.expect_keyword_("WITH")
311            dest.append(self.parse_coverage_())
312            self.expect_keyword_("END_SUB")
313        self.expect_keyword_("END_SUBSTITUTION")
314        max_src = max([len(cov) for cov in src])
315        max_dest = max([len(cov) for cov in dest])
316        # many to many or mixed is invalid
317        if (max_src > 1 and max_dest > 1) or (
318            reversal and (max_src > 1 or max_dest > 1)
319        ):
320            raise VoltLibError("Invalid substitution type", location)
321        mapping = dict(zip(tuple(src), tuple(dest)))
322        if max_src == 1 and max_dest == 1:
323            if reversal:
324                sub = ast.SubstitutionReverseChainingSingleDefinition(
325                    mapping, location=location
326                )
327            else:
328                sub = ast.SubstitutionSingleDefinition(mapping, location=location)
329        elif max_src == 1 and max_dest > 1:
330            sub = ast.SubstitutionMultipleDefinition(mapping, location=location)
331        elif max_src > 1 and max_dest == 1:
332            sub = ast.SubstitutionLigatureDefinition(mapping, location=location)
333        return sub
334
335    def parse_position_(self):
336        assert self.is_cur_keyword_("AS_POSITION")
337        location = self.cur_token_location_
338        pos_type = self.expect_name_()
339        if pos_type not in ("ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"):
340            raise VoltLibError(
341                "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", location
342            )
343        if pos_type == "ATTACH":
344            position = self.parse_attach_()
345        elif pos_type == "ATTACH_CURSIVE":
346            position = self.parse_attach_cursive_()
347        elif pos_type == "ADJUST_PAIR":
348            position = self.parse_adjust_pair_()
349        elif pos_type == "ADJUST_SINGLE":
350            position = self.parse_adjust_single_()
351        self.expect_keyword_("END_POSITION")
352        return position
353
354    def parse_attach_(self):
355        assert self.is_cur_keyword_("ATTACH")
356        location = self.cur_token_location_
357        coverage = self.parse_coverage_()
358        coverage_to = []
359        self.expect_keyword_("TO")
360        while self.next_token_ != "END_ATTACH":
361            cov = self.parse_coverage_()
362            self.expect_keyword_("AT")
363            self.expect_keyword_("ANCHOR")
364            anchor_name = self.expect_string_()
365            coverage_to.append((cov, anchor_name))
366        self.expect_keyword_("END_ATTACH")
367        position = ast.PositionAttachDefinition(
368            coverage, coverage_to, location=location
369        )
370        return position
371
372    def parse_attach_cursive_(self):
373        assert self.is_cur_keyword_("ATTACH_CURSIVE")
374        location = self.cur_token_location_
375        coverages_exit = []
376        coverages_enter = []
377        while self.next_token_ != "ENTER":
378            self.expect_keyword_("EXIT")
379            coverages_exit.append(self.parse_coverage_())
380        while self.next_token_ != "END_ATTACH":
381            self.expect_keyword_("ENTER")
382            coverages_enter.append(self.parse_coverage_())
383        self.expect_keyword_("END_ATTACH")
384        position = ast.PositionAttachCursiveDefinition(
385            coverages_exit, coverages_enter, location=location
386        )
387        return position
388
389    def parse_adjust_pair_(self):
390        assert self.is_cur_keyword_("ADJUST_PAIR")
391        location = self.cur_token_location_
392        coverages_1 = []
393        coverages_2 = []
394        adjust_pair = {}
395        while self.next_token_ == "FIRST":
396            self.advance_lexer_()
397            coverage_1 = self.parse_coverage_()
398            coverages_1.append(coverage_1)
399        while self.next_token_ == "SECOND":
400            self.advance_lexer_()
401            coverage_2 = self.parse_coverage_()
402            coverages_2.append(coverage_2)
403        while self.next_token_ != "END_ADJUST":
404            id_1 = self.expect_number_()
405            id_2 = self.expect_number_()
406            self.expect_keyword_("BY")
407            pos_1 = self.parse_pos_()
408            pos_2 = self.parse_pos_()
409            adjust_pair[(id_1, id_2)] = (pos_1, pos_2)
410        self.expect_keyword_("END_ADJUST")
411        position = ast.PositionAdjustPairDefinition(
412            coverages_1, coverages_2, adjust_pair, location=location
413        )
414        return position
415
416    def parse_adjust_single_(self):
417        assert self.is_cur_keyword_("ADJUST_SINGLE")
418        location = self.cur_token_location_
419        adjust_single = []
420        while self.next_token_ != "END_ADJUST":
421            coverages = self.parse_coverage_()
422            self.expect_keyword_("BY")
423            pos = self.parse_pos_()
424            adjust_single.append((coverages, pos))
425        self.expect_keyword_("END_ADJUST")
426        position = ast.PositionAdjustSingleDefinition(adjust_single, location=location)
427        return position
428
429    def parse_def_anchor_(self):
430        assert self.is_cur_keyword_("DEF_ANCHOR")
431        location = self.cur_token_location_
432        name = self.expect_string_()
433        self.expect_keyword_("ON")
434        gid = self.expect_number_()
435        self.expect_keyword_("GLYPH")
436        glyph_name = self.expect_name_()
437        self.expect_keyword_("COMPONENT")
438        component = self.expect_number_()
439        # check for duplicate anchor names on this glyph
440        if glyph_name in self.anchors_:
441            anchor = self.anchors_[glyph_name].resolve(name)
442            if anchor is not None and anchor.component == component:
443                raise VoltLibError(
444                    'Anchor "%s" already defined, '
445                    "anchor names are case insensitive" % name,
446                    location,
447                )
448        if self.next_token_ == "LOCKED":
449            locked = True
450            self.advance_lexer_()
451        else:
452            locked = False
453        self.expect_keyword_("AT")
454        pos = self.parse_pos_()
455        self.expect_keyword_("END_ANCHOR")
456        anchor = ast.AnchorDefinition(
457            name, gid, glyph_name, component, locked, pos, location=location
458        )
459        if glyph_name not in self.anchors_:
460            self.anchors_[glyph_name] = SymbolTable()
461        self.anchors_[glyph_name].define(name, anchor)
462        return anchor
463
464    def parse_adjust_by_(self):
465        self.advance_lexer_()
466        assert self.is_cur_keyword_("ADJUST_BY")
467        adjustment = self.expect_number_()
468        self.expect_keyword_("AT")
469        size = self.expect_number_()
470        return adjustment, size
471
472    def parse_pos_(self):
473        # VOLT syntax doesn't seem to take device Y advance
474        self.advance_lexer_()
475        location = self.cur_token_location_
476        assert self.is_cur_keyword_("POS"), location
477        adv = None
478        dx = None
479        dy = None
480        adv_adjust_by = {}
481        dx_adjust_by = {}
482        dy_adjust_by = {}
483        if self.next_token_ == "ADV":
484            self.advance_lexer_()
485            adv = self.expect_number_()
486            while self.next_token_ == "ADJUST_BY":
487                adjustment, size = self.parse_adjust_by_()
488                adv_adjust_by[size] = adjustment
489        if self.next_token_ == "DX":
490            self.advance_lexer_()
491            dx = self.expect_number_()
492            while self.next_token_ == "ADJUST_BY":
493                adjustment, size = self.parse_adjust_by_()
494                dx_adjust_by[size] = adjustment
495        if self.next_token_ == "DY":
496            self.advance_lexer_()
497            dy = self.expect_number_()
498            while self.next_token_ == "ADJUST_BY":
499                adjustment, size = self.parse_adjust_by_()
500                dy_adjust_by[size] = adjustment
501        self.expect_keyword_("END_POS")
502        return ast.Pos(adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by)
503
504    def parse_unicode_values_(self):
505        location = self.cur_token_location_
506        try:
507            unicode_values = self.expect_string_().split(",")
508            unicode_values = [int(uni[2:], 16) for uni in unicode_values if uni != ""]
509        except ValueError as err:
510            raise VoltLibError(str(err), location)
511        return unicode_values if unicode_values != [] else None
512
513    def parse_enum_(self):
514        self.expect_keyword_("ENUM")
515        location = self.cur_token_location_
516        enum = ast.Enum(self.parse_coverage_(), location=location)
517        self.expect_keyword_("END_ENUM")
518        return enum
519
520    def parse_coverage_(self):
521        coverage = []
522        location = self.cur_token_location_
523        while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"):
524            if self.next_token_ == "ENUM":
525                enum = self.parse_enum_()
526                coverage.append(enum)
527            elif self.next_token_ == "GLYPH":
528                self.expect_keyword_("GLYPH")
529                name = self.expect_string_()
530                coverage.append(ast.GlyphName(name, location=location))
531            elif self.next_token_ == "GROUP":
532                self.expect_keyword_("GROUP")
533                name = self.expect_string_()
534                coverage.append(ast.GroupName(name, self, location=location))
535            elif self.next_token_ == "RANGE":
536                self.expect_keyword_("RANGE")
537                start = self.expect_string_()
538                self.expect_keyword_("TO")
539                end = self.expect_string_()
540                coverage.append(ast.Range(start, end, self, location=location))
541        return tuple(coverage)
542
543    def resolve_group(self, group_name):
544        return self.groups_.resolve(group_name)
545
546    def glyph_range(self, start, end):
547        return self.glyphs_.range(start, end)
548
549    def parse_ppem_(self):
550        location = self.cur_token_location_
551        ppem_name = self.cur_token_
552        value = self.expect_number_()
553        setting = ast.SettingDefinition(ppem_name, value, location=location)
554        return setting
555
556    def parse_noarg_option_(self):
557        location = self.cur_token_location_
558        name = self.cur_token_
559        value = True
560        setting = ast.SettingDefinition(name, value, location=location)
561        return setting
562
563    def parse_cmap_format(self):
564        location = self.cur_token_location_
565        name = self.cur_token_
566        value = (self.expect_number_(), self.expect_number_(), self.expect_number_())
567        setting = ast.SettingDefinition(name, value, location=location)
568        return setting
569
570    def is_cur_keyword_(self, k):
571        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
572
573    def expect_string_(self):
574        self.advance_lexer_()
575        if self.cur_token_type_ is not Lexer.STRING:
576            raise VoltLibError("Expected a string", self.cur_token_location_)
577        return self.cur_token_
578
579    def expect_keyword_(self, keyword):
580        self.advance_lexer_()
581        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
582            return self.cur_token_
583        raise VoltLibError('Expected "%s"' % keyword, self.cur_token_location_)
584
585    def expect_name_(self):
586        self.advance_lexer_()
587        if self.cur_token_type_ is Lexer.NAME:
588            return self.cur_token_
589        raise VoltLibError("Expected a name", self.cur_token_location_)
590
591    def expect_number_(self):
592        self.advance_lexer_()
593        if self.cur_token_type_ is not Lexer.NUMBER:
594            raise VoltLibError("Expected a number", self.cur_token_location_)
595        return self.cur_token_
596
597    def advance_lexer_(self):
598        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
599            self.next_token_type_,
600            self.next_token_,
601            self.next_token_location_,
602        )
603        try:
604            if self.is_cur_keyword_("END"):
605                raise StopIteration
606            (
607                self.next_token_type_,
608                self.next_token_,
609                self.next_token_location_,
610            ) = self.lexer_.next()
611        except StopIteration:
612            self.next_token_type_, self.next_token_ = (None, None)
613
614
615class SymbolTable(object):
616    def __init__(self):
617        self.scopes_ = [{}]
618
619    def enter_scope(self):
620        self.scopes_.append({})
621
622    def exit_scope(self):
623        self.scopes_.pop()
624
625    def define(self, name, item):
626        self.scopes_[-1][name] = item
627
628    def resolve(self, name, case_insensitive=True):
629        for scope in reversed(self.scopes_):
630            item = scope.get(name)
631            if item:
632                return item
633        if case_insensitive:
634            for key in scope:
635                if key.lower() == name.lower():
636                    return scope[key]
637        return None
638
639
640class OrderedSymbolTable(SymbolTable):
641    def __init__(self):
642        self.scopes_ = [{}]
643
644    def enter_scope(self):
645        self.scopes_.append({})
646
647    def resolve(self, name, case_insensitive=False):
648        SymbolTable.resolve(self, name, case_insensitive=case_insensitive)
649
650    def range(self, start, end):
651        for scope in reversed(self.scopes_):
652            if start in scope and end in scope:
653                start_idx = list(scope.keys()).index(start)
654                end_idx = list(scope.keys()).index(end)
655                return list(scope.keys())[start_idx : end_idx + 1]
656        return None
657