1from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound 2from fontTools.feaLib.location import FeatureLibLocation 3import re 4import os 5 6try: 7 import cython 8except ImportError: 9 # if cython not installed, use mock module with no-op decorators and types 10 from fontTools.misc import cython 11 12 13class Lexer(object): 14 NUMBER = "NUMBER" 15 HEXADECIMAL = "HEXADECIMAL" 16 OCTAL = "OCTAL" 17 NUMBERS = (NUMBER, HEXADECIMAL, OCTAL) 18 FLOAT = "FLOAT" 19 STRING = "STRING" 20 NAME = "NAME" 21 FILENAME = "FILENAME" 22 GLYPHCLASS = "GLYPHCLASS" 23 CID = "CID" 24 SYMBOL = "SYMBOL" 25 COMMENT = "COMMENT" 26 NEWLINE = "NEWLINE" 27 ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK" 28 29 CHAR_WHITESPACE_ = " \t" 30 CHAR_NEWLINE_ = "\r\n" 31 CHAR_SYMBOL_ = ",;:-+'{}[]<>()=" 32 CHAR_DIGIT_ = "0123456789" 33 CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef" 34 CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 35 CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\" 36 CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-" 37 38 RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$") 39 40 MODE_NORMAL_ = "NORMAL" 41 MODE_FILENAME_ = "FILENAME" 42 43 def __init__(self, text, filename): 44 self.filename_ = filename 45 self.line_ = 1 46 self.pos_ = 0 47 self.line_start_ = 0 48 self.text_ = text 49 self.text_length_ = len(text) 50 self.mode_ = Lexer.MODE_NORMAL_ 51 52 def __iter__(self): 53 return self 54 55 def next(self): # Python 2 56 return self.__next__() 57 58 def __next__(self): # Python 3 59 while True: 60 token_type, token, location = self.next_() 61 if token_type != Lexer.NEWLINE: 62 return (token_type, token, location) 63 64 def location_(self): 65 column = self.pos_ - self.line_start_ + 1 66 return FeatureLibLocation(self.filename_ or "<features>", self.line_, column) 67 68 def next_(self): 69 self.scan_over_(Lexer.CHAR_WHITESPACE_) 70 location = self.location_() 71 start = self.pos_ 72 text = self.text_ 73 limit = len(text) 74 if start >= limit: 75 raise StopIteration() 76 cur_char = text[start] 77 next_char = text[start + 1] if start + 1 < limit else None 78 79 if cur_char == "\n": 80 self.pos_ += 1 81 self.line_ += 1 82 self.line_start_ = self.pos_ 83 return (Lexer.NEWLINE, None, location) 84 if cur_char == "\r": 85 self.pos_ += 2 if next_char == "\n" else 1 86 self.line_ += 1 87 self.line_start_ = self.pos_ 88 return (Lexer.NEWLINE, None, location) 89 if cur_char == "#": 90 self.scan_until_(Lexer.CHAR_NEWLINE_) 91 return (Lexer.COMMENT, text[start : self.pos_], location) 92 93 if self.mode_ is Lexer.MODE_FILENAME_: 94 if cur_char != "(": 95 raise FeatureLibError("Expected '(' before file name", location) 96 self.scan_until_(")") 97 cur_char = text[self.pos_] if self.pos_ < limit else None 98 if cur_char != ")": 99 raise FeatureLibError("Expected ')' after file name", location) 100 self.pos_ += 1 101 self.mode_ = Lexer.MODE_NORMAL_ 102 return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location) 103 104 if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_: 105 self.pos_ += 1 106 self.scan_over_(Lexer.CHAR_DIGIT_) 107 return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location) 108 if cur_char == "@": 109 self.pos_ += 1 110 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 111 glyphclass = text[start + 1 : self.pos_] 112 if len(glyphclass) < 1: 113 raise FeatureLibError("Expected glyph class name", location) 114 if not Lexer.RE_GLYPHCLASS.match(glyphclass): 115 raise FeatureLibError( 116 "Glyph class names must consist of letters, digits, " 117 "underscore, period or hyphen", 118 location, 119 ) 120 return (Lexer.GLYPHCLASS, glyphclass, location) 121 if cur_char in Lexer.CHAR_NAME_START_: 122 self.pos_ += 1 123 self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_) 124 token = text[start : self.pos_] 125 if token == "include": 126 self.mode_ = Lexer.MODE_FILENAME_ 127 return (Lexer.NAME, token, location) 128 if cur_char == "0" and next_char in "xX": 129 self.pos_ += 2 130 self.scan_over_(Lexer.CHAR_HEXDIGIT_) 131 return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location) 132 if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_: 133 self.scan_over_(Lexer.CHAR_DIGIT_) 134 return (Lexer.OCTAL, int(text[start : self.pos_], 8), location) 135 if cur_char in Lexer.CHAR_DIGIT_: 136 self.scan_over_(Lexer.CHAR_DIGIT_) 137 if self.pos_ >= limit or text[self.pos_] != ".": 138 return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 139 self.scan_over_(".") 140 self.scan_over_(Lexer.CHAR_DIGIT_) 141 return (Lexer.FLOAT, float(text[start : self.pos_]), location) 142 if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_: 143 self.pos_ += 1 144 self.scan_over_(Lexer.CHAR_DIGIT_) 145 if self.pos_ >= limit or text[self.pos_] != ".": 146 return (Lexer.NUMBER, int(text[start : self.pos_], 10), location) 147 self.scan_over_(".") 148 self.scan_over_(Lexer.CHAR_DIGIT_) 149 return (Lexer.FLOAT, float(text[start : self.pos_]), location) 150 if cur_char in Lexer.CHAR_SYMBOL_: 151 self.pos_ += 1 152 return (Lexer.SYMBOL, cur_char, location) 153 if cur_char == '"': 154 self.pos_ += 1 155 self.scan_until_('"') 156 if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"': 157 self.pos_ += 1 158 # strip newlines embedded within a string 159 string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1]) 160 return (Lexer.STRING, string, location) 161 else: 162 raise FeatureLibError("Expected '\"' to terminate string", location) 163 raise FeatureLibError("Unexpected character: %r" % cur_char, location) 164 165 def scan_over_(self, valid): 166 p = self.pos_ 167 while p < self.text_length_ and self.text_[p] in valid: 168 p += 1 169 self.pos_ = p 170 171 def scan_until_(self, stop_at): 172 p = self.pos_ 173 while p < self.text_length_ and self.text_[p] not in stop_at: 174 p += 1 175 self.pos_ = p 176 177 def scan_anonymous_block(self, tag): 178 location = self.location_() 179 tag = tag.strip() 180 self.scan_until_(Lexer.CHAR_NEWLINE_) 181 self.scan_over_(Lexer.CHAR_NEWLINE_) 182 regexp = r"}\s*" + tag + r"\s*;" 183 split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1) 184 if len(split) != 2: 185 raise FeatureLibError( 186 "Expected '} %s;' to terminate anonymous block" % tag, location 187 ) 188 self.pos_ += len(split[0]) 189 return (Lexer.ANONYMOUS_BLOCK, split[0], location) 190 191 192class IncludingLexer(object): 193 """A Lexer that follows include statements. 194 195 The OpenType feature file specification states that due to 196 historical reasons, relative imports should be resolved in this 197 order: 198 199 1. If the source font is UFO format, then relative to the UFO's 200 font directory 201 2. relative to the top-level include file 202 3. relative to the parent include file 203 204 We only support 1 (via includeDir) and 2. 205 """ 206 207 def __init__(self, featurefile, *, includeDir=None): 208 """Initializes an IncludingLexer. 209 210 Behavior: 211 If includeDir is passed, it will be used to determine the top-level 212 include directory to use for all encountered include statements. If it is 213 not passed, ``os.path.dirname(featurefile)`` will be considered the 214 include directory. 215 """ 216 217 self.lexers_ = [self.make_lexer_(featurefile)] 218 self.featurefilepath = self.lexers_[0].filename_ 219 self.includeDir = includeDir 220 221 def __iter__(self): 222 return self 223 224 def next(self): # Python 2 225 return self.__next__() 226 227 def __next__(self): # Python 3 228 while self.lexers_: 229 lexer = self.lexers_[-1] 230 try: 231 token_type, token, location = next(lexer) 232 except StopIteration: 233 self.lexers_.pop() 234 continue 235 if token_type is Lexer.NAME and token == "include": 236 fname_type, fname_token, fname_location = lexer.next() 237 if fname_type is not Lexer.FILENAME: 238 raise FeatureLibError("Expected file name", fname_location) 239 # semi_type, semi_token, semi_location = lexer.next() 240 # if semi_type is not Lexer.SYMBOL or semi_token != ";": 241 # raise FeatureLibError("Expected ';'", semi_location) 242 if os.path.isabs(fname_token): 243 path = fname_token 244 else: 245 if self.includeDir is not None: 246 curpath = self.includeDir 247 elif self.featurefilepath is not None: 248 curpath = os.path.dirname(self.featurefilepath) 249 else: 250 # if the IncludingLexer was initialized from an in-memory 251 # file-like stream, it doesn't have a 'name' pointing to 252 # its filesystem path, therefore we fall back to using the 253 # current working directory to resolve relative includes 254 curpath = os.getcwd() 255 path = os.path.join(curpath, fname_token) 256 if len(self.lexers_) >= 5: 257 raise FeatureLibError("Too many recursive includes", fname_location) 258 try: 259 self.lexers_.append(self.make_lexer_(path)) 260 except FileNotFoundError as err: 261 raise IncludedFeaNotFound(fname_token, fname_location) from err 262 else: 263 return (token_type, token, location) 264 raise StopIteration() 265 266 @staticmethod 267 def make_lexer_(file_or_path): 268 if hasattr(file_or_path, "read"): 269 fileobj, closing = file_or_path, False 270 else: 271 filename, closing = file_or_path, True 272 fileobj = open(filename, "r", encoding="utf-8") 273 data = fileobj.read() 274 filename = getattr(fileobj, "name", None) 275 if closing: 276 fileobj.close() 277 return Lexer(data, filename) 278 279 def scan_anonymous_block(self, tag): 280 return self.lexers_[-1].scan_anonymous_block(tag) 281 282 283class NonIncludingLexer(IncludingLexer): 284 """Lexer that does not follow `include` statements, emits them as-is.""" 285 286 def __next__(self): # Python 3 287 return next(self.lexers_[0]) 288