1import fontTools.voltLib.ast as ast 2from fontTools.voltLib.lexer import Lexer 3from fontTools.voltLib.error import VoltLibError 4from io import open 5 6PARSE_FUNCS = { 7 "DEF_GLYPH": "parse_def_glyph_", 8 "DEF_GROUP": "parse_def_group_", 9 "DEF_SCRIPT": "parse_def_script_", 10 "DEF_LOOKUP": "parse_def_lookup_", 11 "DEF_ANCHOR": "parse_def_anchor_", 12 "GRID_PPEM": "parse_ppem_", 13 "PRESENTATION_PPEM": "parse_ppem_", 14 "PPOSITIONING_PPEM": "parse_ppem_", 15 "COMPILER_USEEXTENSIONLOOKUPS": "parse_noarg_option_", 16 "COMPILER_USEPAIRPOSFORMAT2": "parse_noarg_option_", 17 "CMAP_FORMAT": "parse_cmap_format", 18 "DO_NOT_TOUCH_CMAP": "parse_noarg_option_", 19} 20 21 22class Parser(object): 23 def __init__(self, path): 24 self.doc_ = ast.VoltFile() 25 self.glyphs_ = OrderedSymbolTable() 26 self.groups_ = SymbolTable() 27 self.anchors_ = {} # dictionary of SymbolTable() keyed by glyph 28 self.scripts_ = SymbolTable() 29 self.langs_ = SymbolTable() 30 self.lookups_ = SymbolTable() 31 self.next_token_type_, self.next_token_ = (None, None) 32 self.next_token_location_ = None 33 self.make_lexer_(path) 34 self.advance_lexer_() 35 36 def make_lexer_(self, file_or_path): 37 if hasattr(file_or_path, "read"): 38 filename = getattr(file_or_path, "name", None) 39 data = file_or_path.read() 40 else: 41 filename = file_or_path 42 with open(file_or_path, "r") as f: 43 data = f.read() 44 self.lexer_ = Lexer(data, filename) 45 46 def parse(self): 47 statements = self.doc_.statements 48 while self.next_token_type_ is not None: 49 self.advance_lexer_() 50 if self.cur_token_ in PARSE_FUNCS.keys(): 51 func = getattr(self, PARSE_FUNCS[self.cur_token_]) 52 statements.append(func()) 53 elif self.is_cur_keyword_("END"): 54 break 55 else: 56 raise VoltLibError( 57 "Expected " + ", ".join(sorted(PARSE_FUNCS.keys())), 58 self.cur_token_location_, 59 ) 60 return self.doc_ 61 62 def parse_def_glyph_(self): 63 assert self.is_cur_keyword_("DEF_GLYPH") 64 location = self.cur_token_location_ 65 name = self.expect_string_() 66 self.expect_keyword_("ID") 67 gid = self.expect_number_() 68 if gid < 0: 69 raise VoltLibError("Invalid glyph ID", self.cur_token_location_) 70 gunicode = None 71 if self.next_token_ == "UNICODE": 72 self.expect_keyword_("UNICODE") 73 gunicode = [self.expect_number_()] 74 if gunicode[0] < 0: 75 raise VoltLibError("Invalid glyph UNICODE", self.cur_token_location_) 76 elif self.next_token_ == "UNICODEVALUES": 77 self.expect_keyword_("UNICODEVALUES") 78 gunicode = self.parse_unicode_values_() 79 gtype = None 80 if self.next_token_ == "TYPE": 81 self.expect_keyword_("TYPE") 82 gtype = self.expect_name_() 83 assert gtype in ("BASE", "LIGATURE", "MARK", "COMPONENT") 84 components = None 85 if self.next_token_ == "COMPONENTS": 86 self.expect_keyword_("COMPONENTS") 87 components = self.expect_number_() 88 self.expect_keyword_("END_GLYPH") 89 if self.glyphs_.resolve(name) is not None: 90 raise VoltLibError( 91 'Glyph "%s" (gid %i) already defined' % (name, gid), location 92 ) 93 def_glyph = ast.GlyphDefinition( 94 name, gid, gunicode, gtype, components, location=location 95 ) 96 self.glyphs_.define(name, def_glyph) 97 return def_glyph 98 99 def parse_def_group_(self): 100 assert self.is_cur_keyword_("DEF_GROUP") 101 location = self.cur_token_location_ 102 name = self.expect_string_() 103 enum = None 104 if self.next_token_ == "ENUM": 105 enum = self.parse_enum_() 106 self.expect_keyword_("END_GROUP") 107 if self.groups_.resolve(name) is not None: 108 raise VoltLibError( 109 'Glyph group "%s" already defined, ' 110 "group names are case insensitive" % name, 111 location, 112 ) 113 def_group = ast.GroupDefinition(name, enum, location=location) 114 self.groups_.define(name, def_group) 115 return def_group 116 117 def parse_def_script_(self): 118 assert self.is_cur_keyword_("DEF_SCRIPT") 119 location = self.cur_token_location_ 120 name = None 121 if self.next_token_ == "NAME": 122 self.expect_keyword_("NAME") 123 name = self.expect_string_() 124 self.expect_keyword_("TAG") 125 tag = self.expect_string_() 126 if self.scripts_.resolve(tag) is not None: 127 raise VoltLibError( 128 'Script "%s" already defined, ' 129 "script tags are case insensitive" % tag, 130 location, 131 ) 132 self.langs_.enter_scope() 133 langs = [] 134 while self.next_token_ != "END_SCRIPT": 135 self.advance_lexer_() 136 lang = self.parse_langsys_() 137 self.expect_keyword_("END_LANGSYS") 138 if self.langs_.resolve(lang.tag) is not None: 139 raise VoltLibError( 140 'Language "%s" already defined in script "%s", ' 141 "language tags are case insensitive" % (lang.tag, tag), 142 location, 143 ) 144 self.langs_.define(lang.tag, lang) 145 langs.append(lang) 146 self.expect_keyword_("END_SCRIPT") 147 self.langs_.exit_scope() 148 def_script = ast.ScriptDefinition(name, tag, langs, location=location) 149 self.scripts_.define(tag, def_script) 150 return def_script 151 152 def parse_langsys_(self): 153 assert self.is_cur_keyword_("DEF_LANGSYS") 154 location = self.cur_token_location_ 155 name = None 156 if self.next_token_ == "NAME": 157 self.expect_keyword_("NAME") 158 name = self.expect_string_() 159 self.expect_keyword_("TAG") 160 tag = self.expect_string_() 161 features = [] 162 while self.next_token_ != "END_LANGSYS": 163 self.advance_lexer_() 164 feature = self.parse_feature_() 165 self.expect_keyword_("END_FEATURE") 166 features.append(feature) 167 def_langsys = ast.LangSysDefinition(name, tag, features, location=location) 168 return def_langsys 169 170 def parse_feature_(self): 171 assert self.is_cur_keyword_("DEF_FEATURE") 172 location = self.cur_token_location_ 173 self.expect_keyword_("NAME") 174 name = self.expect_string_() 175 self.expect_keyword_("TAG") 176 tag = self.expect_string_() 177 lookups = [] 178 while self.next_token_ != "END_FEATURE": 179 # self.advance_lexer_() 180 self.expect_keyword_("LOOKUP") 181 lookup = self.expect_string_() 182 lookups.append(lookup) 183 feature = ast.FeatureDefinition(name, tag, lookups, location=location) 184 return feature 185 186 def parse_def_lookup_(self): 187 assert self.is_cur_keyword_("DEF_LOOKUP") 188 location = self.cur_token_location_ 189 name = self.expect_string_() 190 if not name[0].isalpha(): 191 raise VoltLibError( 192 'Lookup name "%s" must start with a letter' % name, location 193 ) 194 if self.lookups_.resolve(name) is not None: 195 raise VoltLibError( 196 'Lookup "%s" already defined, ' 197 "lookup names are case insensitive" % name, 198 location, 199 ) 200 process_base = True 201 if self.next_token_ == "PROCESS_BASE": 202 self.advance_lexer_() 203 elif self.next_token_ == "SKIP_BASE": 204 self.advance_lexer_() 205 process_base = False 206 process_marks = True 207 mark_glyph_set = None 208 if self.next_token_ == "PROCESS_MARKS": 209 self.advance_lexer_() 210 if self.next_token_ == "MARK_GLYPH_SET": 211 self.advance_lexer_() 212 mark_glyph_set = self.expect_string_() 213 elif self.next_token_ == "ALL": 214 self.advance_lexer_() 215 elif self.next_token_ == "NONE": 216 self.advance_lexer_() 217 process_marks = False 218 elif self.next_token_type_ == Lexer.STRING: 219 process_marks = self.expect_string_() 220 else: 221 raise VoltLibError( 222 "Expected ALL, NONE, MARK_GLYPH_SET or an ID. " 223 "Got %s" % (self.next_token_type_), 224 location, 225 ) 226 elif self.next_token_ == "SKIP_MARKS": 227 self.advance_lexer_() 228 process_marks = False 229 direction = None 230 if self.next_token_ == "DIRECTION": 231 self.expect_keyword_("DIRECTION") 232 direction = self.expect_name_() 233 assert direction in ("LTR", "RTL") 234 reversal = None 235 if self.next_token_ == "REVERSAL": 236 self.expect_keyword_("REVERSAL") 237 reversal = True 238 comments = None 239 if self.next_token_ == "COMMENTS": 240 self.expect_keyword_("COMMENTS") 241 comments = self.expect_string_().replace(r"\n", "\n") 242 context = [] 243 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 244 context = self.parse_context_() 245 as_pos_or_sub = self.expect_name_() 246 sub = None 247 pos = None 248 if as_pos_or_sub == "AS_SUBSTITUTION": 249 sub = self.parse_substitution_(reversal) 250 elif as_pos_or_sub == "AS_POSITION": 251 pos = self.parse_position_() 252 else: 253 raise VoltLibError( 254 "Expected AS_SUBSTITUTION or AS_POSITION. " "Got %s" % (as_pos_or_sub), 255 location, 256 ) 257 def_lookup = ast.LookupDefinition( 258 name, 259 process_base, 260 process_marks, 261 mark_glyph_set, 262 direction, 263 reversal, 264 comments, 265 context, 266 sub, 267 pos, 268 location=location, 269 ) 270 self.lookups_.define(name, def_lookup) 271 return def_lookup 272 273 def parse_context_(self): 274 location = self.cur_token_location_ 275 contexts = [] 276 while self.next_token_ in ("EXCEPT_CONTEXT", "IN_CONTEXT"): 277 side = None 278 coverage = None 279 ex_or_in = self.expect_name_() 280 # side_contexts = [] # XXX 281 if self.next_token_ != "END_CONTEXT": 282 left = [] 283 right = [] 284 while self.next_token_ in ("LEFT", "RIGHT"): 285 side = self.expect_name_() 286 coverage = self.parse_coverage_() 287 if side == "LEFT": 288 left.append(coverage) 289 else: 290 right.append(coverage) 291 self.expect_keyword_("END_CONTEXT") 292 context = ast.ContextDefinition( 293 ex_or_in, left, right, location=location 294 ) 295 contexts.append(context) 296 else: 297 self.expect_keyword_("END_CONTEXT") 298 return contexts 299 300 def parse_substitution_(self, reversal): 301 assert self.is_cur_keyword_("AS_SUBSTITUTION") 302 location = self.cur_token_location_ 303 src = [] 304 dest = [] 305 if self.next_token_ != "SUB": 306 raise VoltLibError("Expected SUB", location) 307 while self.next_token_ == "SUB": 308 self.expect_keyword_("SUB") 309 src.append(self.parse_coverage_()) 310 self.expect_keyword_("WITH") 311 dest.append(self.parse_coverage_()) 312 self.expect_keyword_("END_SUB") 313 self.expect_keyword_("END_SUBSTITUTION") 314 max_src = max([len(cov) for cov in src]) 315 max_dest = max([len(cov) for cov in dest]) 316 # many to many or mixed is invalid 317 if (max_src > 1 and max_dest > 1) or ( 318 reversal and (max_src > 1 or max_dest > 1) 319 ): 320 raise VoltLibError("Invalid substitution type", location) 321 mapping = dict(zip(tuple(src), tuple(dest))) 322 if max_src == 1 and max_dest == 1: 323 if reversal: 324 sub = ast.SubstitutionReverseChainingSingleDefinition( 325 mapping, location=location 326 ) 327 else: 328 sub = ast.SubstitutionSingleDefinition(mapping, location=location) 329 elif max_src == 1 and max_dest > 1: 330 sub = ast.SubstitutionMultipleDefinition(mapping, location=location) 331 elif max_src > 1 and max_dest == 1: 332 sub = ast.SubstitutionLigatureDefinition(mapping, location=location) 333 return sub 334 335 def parse_position_(self): 336 assert self.is_cur_keyword_("AS_POSITION") 337 location = self.cur_token_location_ 338 pos_type = self.expect_name_() 339 if pos_type not in ("ATTACH", "ATTACH_CURSIVE", "ADJUST_PAIR", "ADJUST_SINGLE"): 340 raise VoltLibError( 341 "Expected ATTACH, ATTACH_CURSIVE, ADJUST_PAIR, ADJUST_SINGLE", location 342 ) 343 if pos_type == "ATTACH": 344 position = self.parse_attach_() 345 elif pos_type == "ATTACH_CURSIVE": 346 position = self.parse_attach_cursive_() 347 elif pos_type == "ADJUST_PAIR": 348 position = self.parse_adjust_pair_() 349 elif pos_type == "ADJUST_SINGLE": 350 position = self.parse_adjust_single_() 351 self.expect_keyword_("END_POSITION") 352 return position 353 354 def parse_attach_(self): 355 assert self.is_cur_keyword_("ATTACH") 356 location = self.cur_token_location_ 357 coverage = self.parse_coverage_() 358 coverage_to = [] 359 self.expect_keyword_("TO") 360 while self.next_token_ != "END_ATTACH": 361 cov = self.parse_coverage_() 362 self.expect_keyword_("AT") 363 self.expect_keyword_("ANCHOR") 364 anchor_name = self.expect_string_() 365 coverage_to.append((cov, anchor_name)) 366 self.expect_keyword_("END_ATTACH") 367 position = ast.PositionAttachDefinition( 368 coverage, coverage_to, location=location 369 ) 370 return position 371 372 def parse_attach_cursive_(self): 373 assert self.is_cur_keyword_("ATTACH_CURSIVE") 374 location = self.cur_token_location_ 375 coverages_exit = [] 376 coverages_enter = [] 377 while self.next_token_ != "ENTER": 378 self.expect_keyword_("EXIT") 379 coverages_exit.append(self.parse_coverage_()) 380 while self.next_token_ != "END_ATTACH": 381 self.expect_keyword_("ENTER") 382 coverages_enter.append(self.parse_coverage_()) 383 self.expect_keyword_("END_ATTACH") 384 position = ast.PositionAttachCursiveDefinition( 385 coverages_exit, coverages_enter, location=location 386 ) 387 return position 388 389 def parse_adjust_pair_(self): 390 assert self.is_cur_keyword_("ADJUST_PAIR") 391 location = self.cur_token_location_ 392 coverages_1 = [] 393 coverages_2 = [] 394 adjust_pair = {} 395 while self.next_token_ == "FIRST": 396 self.advance_lexer_() 397 coverage_1 = self.parse_coverage_() 398 coverages_1.append(coverage_1) 399 while self.next_token_ == "SECOND": 400 self.advance_lexer_() 401 coverage_2 = self.parse_coverage_() 402 coverages_2.append(coverage_2) 403 while self.next_token_ != "END_ADJUST": 404 id_1 = self.expect_number_() 405 id_2 = self.expect_number_() 406 self.expect_keyword_("BY") 407 pos_1 = self.parse_pos_() 408 pos_2 = self.parse_pos_() 409 adjust_pair[(id_1, id_2)] = (pos_1, pos_2) 410 self.expect_keyword_("END_ADJUST") 411 position = ast.PositionAdjustPairDefinition( 412 coverages_1, coverages_2, adjust_pair, location=location 413 ) 414 return position 415 416 def parse_adjust_single_(self): 417 assert self.is_cur_keyword_("ADJUST_SINGLE") 418 location = self.cur_token_location_ 419 adjust_single = [] 420 while self.next_token_ != "END_ADJUST": 421 coverages = self.parse_coverage_() 422 self.expect_keyword_("BY") 423 pos = self.parse_pos_() 424 adjust_single.append((coverages, pos)) 425 self.expect_keyword_("END_ADJUST") 426 position = ast.PositionAdjustSingleDefinition(adjust_single, location=location) 427 return position 428 429 def parse_def_anchor_(self): 430 assert self.is_cur_keyword_("DEF_ANCHOR") 431 location = self.cur_token_location_ 432 name = self.expect_string_() 433 self.expect_keyword_("ON") 434 gid = self.expect_number_() 435 self.expect_keyword_("GLYPH") 436 glyph_name = self.expect_name_() 437 self.expect_keyword_("COMPONENT") 438 component = self.expect_number_() 439 # check for duplicate anchor names on this glyph 440 if glyph_name in self.anchors_: 441 anchor = self.anchors_[glyph_name].resolve(name) 442 if anchor is not None and anchor.component == component: 443 raise VoltLibError( 444 'Anchor "%s" already defined, ' 445 "anchor names are case insensitive" % name, 446 location, 447 ) 448 if self.next_token_ == "LOCKED": 449 locked = True 450 self.advance_lexer_() 451 else: 452 locked = False 453 self.expect_keyword_("AT") 454 pos = self.parse_pos_() 455 self.expect_keyword_("END_ANCHOR") 456 anchor = ast.AnchorDefinition( 457 name, gid, glyph_name, component, locked, pos, location=location 458 ) 459 if glyph_name not in self.anchors_: 460 self.anchors_[glyph_name] = SymbolTable() 461 self.anchors_[glyph_name].define(name, anchor) 462 return anchor 463 464 def parse_adjust_by_(self): 465 self.advance_lexer_() 466 assert self.is_cur_keyword_("ADJUST_BY") 467 adjustment = self.expect_number_() 468 self.expect_keyword_("AT") 469 size = self.expect_number_() 470 return adjustment, size 471 472 def parse_pos_(self): 473 # VOLT syntax doesn't seem to take device Y advance 474 self.advance_lexer_() 475 location = self.cur_token_location_ 476 assert self.is_cur_keyword_("POS"), location 477 adv = None 478 dx = None 479 dy = None 480 adv_adjust_by = {} 481 dx_adjust_by = {} 482 dy_adjust_by = {} 483 if self.next_token_ == "ADV": 484 self.advance_lexer_() 485 adv = self.expect_number_() 486 while self.next_token_ == "ADJUST_BY": 487 adjustment, size = self.parse_adjust_by_() 488 adv_adjust_by[size] = adjustment 489 if self.next_token_ == "DX": 490 self.advance_lexer_() 491 dx = self.expect_number_() 492 while self.next_token_ == "ADJUST_BY": 493 adjustment, size = self.parse_adjust_by_() 494 dx_adjust_by[size] = adjustment 495 if self.next_token_ == "DY": 496 self.advance_lexer_() 497 dy = self.expect_number_() 498 while self.next_token_ == "ADJUST_BY": 499 adjustment, size = self.parse_adjust_by_() 500 dy_adjust_by[size] = adjustment 501 self.expect_keyword_("END_POS") 502 return ast.Pos(adv, dx, dy, adv_adjust_by, dx_adjust_by, dy_adjust_by) 503 504 def parse_unicode_values_(self): 505 location = self.cur_token_location_ 506 try: 507 unicode_values = self.expect_string_().split(",") 508 unicode_values = [int(uni[2:], 16) for uni in unicode_values if uni != ""] 509 except ValueError as err: 510 raise VoltLibError(str(err), location) 511 return unicode_values if unicode_values != [] else None 512 513 def parse_enum_(self): 514 self.expect_keyword_("ENUM") 515 location = self.cur_token_location_ 516 enum = ast.Enum(self.parse_coverage_(), location=location) 517 self.expect_keyword_("END_ENUM") 518 return enum 519 520 def parse_coverage_(self): 521 coverage = [] 522 location = self.cur_token_location_ 523 while self.next_token_ in ("GLYPH", "GROUP", "RANGE", "ENUM"): 524 if self.next_token_ == "ENUM": 525 enum = self.parse_enum_() 526 coverage.append(enum) 527 elif self.next_token_ == "GLYPH": 528 self.expect_keyword_("GLYPH") 529 name = self.expect_string_() 530 coverage.append(ast.GlyphName(name, location=location)) 531 elif self.next_token_ == "GROUP": 532 self.expect_keyword_("GROUP") 533 name = self.expect_string_() 534 coverage.append(ast.GroupName(name, self, location=location)) 535 elif self.next_token_ == "RANGE": 536 self.expect_keyword_("RANGE") 537 start = self.expect_string_() 538 self.expect_keyword_("TO") 539 end = self.expect_string_() 540 coverage.append(ast.Range(start, end, self, location=location)) 541 return tuple(coverage) 542 543 def resolve_group(self, group_name): 544 return self.groups_.resolve(group_name) 545 546 def glyph_range(self, start, end): 547 return self.glyphs_.range(start, end) 548 549 def parse_ppem_(self): 550 location = self.cur_token_location_ 551 ppem_name = self.cur_token_ 552 value = self.expect_number_() 553 setting = ast.SettingDefinition(ppem_name, value, location=location) 554 return setting 555 556 def parse_noarg_option_(self): 557 location = self.cur_token_location_ 558 name = self.cur_token_ 559 value = True 560 setting = ast.SettingDefinition(name, value, location=location) 561 return setting 562 563 def parse_cmap_format(self): 564 location = self.cur_token_location_ 565 name = self.cur_token_ 566 value = (self.expect_number_(), self.expect_number_(), self.expect_number_()) 567 setting = ast.SettingDefinition(name, value, location=location) 568 return setting 569 570 def is_cur_keyword_(self, k): 571 return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k) 572 573 def expect_string_(self): 574 self.advance_lexer_() 575 if self.cur_token_type_ is not Lexer.STRING: 576 raise VoltLibError("Expected a string", self.cur_token_location_) 577 return self.cur_token_ 578 579 def expect_keyword_(self, keyword): 580 self.advance_lexer_() 581 if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword: 582 return self.cur_token_ 583 raise VoltLibError('Expected "%s"' % keyword, self.cur_token_location_) 584 585 def expect_name_(self): 586 self.advance_lexer_() 587 if self.cur_token_type_ is Lexer.NAME: 588 return self.cur_token_ 589 raise VoltLibError("Expected a name", self.cur_token_location_) 590 591 def expect_number_(self): 592 self.advance_lexer_() 593 if self.cur_token_type_ is not Lexer.NUMBER: 594 raise VoltLibError("Expected a number", self.cur_token_location_) 595 return self.cur_token_ 596 597 def advance_lexer_(self): 598 self.cur_token_type_, self.cur_token_, self.cur_token_location_ = ( 599 self.next_token_type_, 600 self.next_token_, 601 self.next_token_location_, 602 ) 603 try: 604 if self.is_cur_keyword_("END"): 605 raise StopIteration 606 ( 607 self.next_token_type_, 608 self.next_token_, 609 self.next_token_location_, 610 ) = self.lexer_.next() 611 except StopIteration: 612 self.next_token_type_, self.next_token_ = (None, None) 613 614 615class SymbolTable(object): 616 def __init__(self): 617 self.scopes_ = [{}] 618 619 def enter_scope(self): 620 self.scopes_.append({}) 621 622 def exit_scope(self): 623 self.scopes_.pop() 624 625 def define(self, name, item): 626 self.scopes_[-1][name] = item 627 628 def resolve(self, name, case_insensitive=True): 629 for scope in reversed(self.scopes_): 630 item = scope.get(name) 631 if item: 632 return item 633 if case_insensitive: 634 for key in scope: 635 if key.lower() == name.lower(): 636 return scope[key] 637 return None 638 639 640class OrderedSymbolTable(SymbolTable): 641 def __init__(self): 642 self.scopes_ = [{}] 643 644 def enter_scope(self): 645 self.scopes_.append({}) 646 647 def resolve(self, name, case_insensitive=False): 648 SymbolTable.resolve(self, name, case_insensitive=case_insensitive) 649 650 def range(self, start, end): 651 for scope in reversed(self.scopes_): 652 if start in scope and end in scope: 653 start_idx = list(scope.keys()).index(start) 654 end_idx = list(scope.keys()).index(end) 655 return list(scope.keys())[start_idx : end_idx + 1] 656 return None 657