xref: /aosp_15_r20/external/fonttools/Lib/fontTools/feaLib/lexer.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1from fontTools.feaLib.error import FeatureLibError, IncludedFeaNotFound
2from fontTools.feaLib.location import FeatureLibLocation
3import re
4import os
5
6try:
7    import cython
8except ImportError:
9    # if cython not installed, use mock module with no-op decorators and types
10    from fontTools.misc import cython
11
12
13class Lexer(object):
14    NUMBER = "NUMBER"
15    HEXADECIMAL = "HEXADECIMAL"
16    OCTAL = "OCTAL"
17    NUMBERS = (NUMBER, HEXADECIMAL, OCTAL)
18    FLOAT = "FLOAT"
19    STRING = "STRING"
20    NAME = "NAME"
21    FILENAME = "FILENAME"
22    GLYPHCLASS = "GLYPHCLASS"
23    CID = "CID"
24    SYMBOL = "SYMBOL"
25    COMMENT = "COMMENT"
26    NEWLINE = "NEWLINE"
27    ANONYMOUS_BLOCK = "ANONYMOUS_BLOCK"
28
29    CHAR_WHITESPACE_ = " \t"
30    CHAR_NEWLINE_ = "\r\n"
31    CHAR_SYMBOL_ = ",;:-+'{}[]<>()="
32    CHAR_DIGIT_ = "0123456789"
33    CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
34    CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
35    CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
36    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
37
38    RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.\-]+$")
39
40    MODE_NORMAL_ = "NORMAL"
41    MODE_FILENAME_ = "FILENAME"
42
43    def __init__(self, text, filename):
44        self.filename_ = filename
45        self.line_ = 1
46        self.pos_ = 0
47        self.line_start_ = 0
48        self.text_ = text
49        self.text_length_ = len(text)
50        self.mode_ = Lexer.MODE_NORMAL_
51
52    def __iter__(self):
53        return self
54
55    def next(self):  # Python 2
56        return self.__next__()
57
58    def __next__(self):  # Python 3
59        while True:
60            token_type, token, location = self.next_()
61            if token_type != Lexer.NEWLINE:
62                return (token_type, token, location)
63
64    def location_(self):
65        column = self.pos_ - self.line_start_ + 1
66        return FeatureLibLocation(self.filename_ or "<features>", self.line_, column)
67
68    def next_(self):
69        self.scan_over_(Lexer.CHAR_WHITESPACE_)
70        location = self.location_()
71        start = self.pos_
72        text = self.text_
73        limit = len(text)
74        if start >= limit:
75            raise StopIteration()
76        cur_char = text[start]
77        next_char = text[start + 1] if start + 1 < limit else None
78
79        if cur_char == "\n":
80            self.pos_ += 1
81            self.line_ += 1
82            self.line_start_ = self.pos_
83            return (Lexer.NEWLINE, None, location)
84        if cur_char == "\r":
85            self.pos_ += 2 if next_char == "\n" else 1
86            self.line_ += 1
87            self.line_start_ = self.pos_
88            return (Lexer.NEWLINE, None, location)
89        if cur_char == "#":
90            self.scan_until_(Lexer.CHAR_NEWLINE_)
91            return (Lexer.COMMENT, text[start : self.pos_], location)
92
93        if self.mode_ is Lexer.MODE_FILENAME_:
94            if cur_char != "(":
95                raise FeatureLibError("Expected '(' before file name", location)
96            self.scan_until_(")")
97            cur_char = text[self.pos_] if self.pos_ < limit else None
98            if cur_char != ")":
99                raise FeatureLibError("Expected ')' after file name", location)
100            self.pos_ += 1
101            self.mode_ = Lexer.MODE_NORMAL_
102            return (Lexer.FILENAME, text[start + 1 : self.pos_ - 1], location)
103
104        if cur_char == "\\" and next_char in Lexer.CHAR_DIGIT_:
105            self.pos_ += 1
106            self.scan_over_(Lexer.CHAR_DIGIT_)
107            return (Lexer.CID, int(text[start + 1 : self.pos_], 10), location)
108        if cur_char == "@":
109            self.pos_ += 1
110            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
111            glyphclass = text[start + 1 : self.pos_]
112            if len(glyphclass) < 1:
113                raise FeatureLibError("Expected glyph class name", location)
114            if not Lexer.RE_GLYPHCLASS.match(glyphclass):
115                raise FeatureLibError(
116                    "Glyph class names must consist of letters, digits, "
117                    "underscore, period or hyphen",
118                    location,
119                )
120            return (Lexer.GLYPHCLASS, glyphclass, location)
121        if cur_char in Lexer.CHAR_NAME_START_:
122            self.pos_ += 1
123            self.scan_over_(Lexer.CHAR_NAME_CONTINUATION_)
124            token = text[start : self.pos_]
125            if token == "include":
126                self.mode_ = Lexer.MODE_FILENAME_
127            return (Lexer.NAME, token, location)
128        if cur_char == "0" and next_char in "xX":
129            self.pos_ += 2
130            self.scan_over_(Lexer.CHAR_HEXDIGIT_)
131            return (Lexer.HEXADECIMAL, int(text[start : self.pos_], 16), location)
132        if cur_char == "0" and next_char in Lexer.CHAR_DIGIT_:
133            self.scan_over_(Lexer.CHAR_DIGIT_)
134            return (Lexer.OCTAL, int(text[start : self.pos_], 8), location)
135        if cur_char in Lexer.CHAR_DIGIT_:
136            self.scan_over_(Lexer.CHAR_DIGIT_)
137            if self.pos_ >= limit or text[self.pos_] != ".":
138                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
139            self.scan_over_(".")
140            self.scan_over_(Lexer.CHAR_DIGIT_)
141            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
142        if cur_char == "-" and next_char in Lexer.CHAR_DIGIT_:
143            self.pos_ += 1
144            self.scan_over_(Lexer.CHAR_DIGIT_)
145            if self.pos_ >= limit or text[self.pos_] != ".":
146                return (Lexer.NUMBER, int(text[start : self.pos_], 10), location)
147            self.scan_over_(".")
148            self.scan_over_(Lexer.CHAR_DIGIT_)
149            return (Lexer.FLOAT, float(text[start : self.pos_]), location)
150        if cur_char in Lexer.CHAR_SYMBOL_:
151            self.pos_ += 1
152            return (Lexer.SYMBOL, cur_char, location)
153        if cur_char == '"':
154            self.pos_ += 1
155            self.scan_until_('"')
156            if self.pos_ < self.text_length_ and self.text_[self.pos_] == '"':
157                self.pos_ += 1
158                # strip newlines embedded within a string
159                string = re.sub("[\r\n]", "", text[start + 1 : self.pos_ - 1])
160                return (Lexer.STRING, string, location)
161            else:
162                raise FeatureLibError("Expected '\"' to terminate string", location)
163        raise FeatureLibError("Unexpected character: %r" % cur_char, location)
164
165    def scan_over_(self, valid):
166        p = self.pos_
167        while p < self.text_length_ and self.text_[p] in valid:
168            p += 1
169        self.pos_ = p
170
171    def scan_until_(self, stop_at):
172        p = self.pos_
173        while p < self.text_length_ and self.text_[p] not in stop_at:
174            p += 1
175        self.pos_ = p
176
177    def scan_anonymous_block(self, tag):
178        location = self.location_()
179        tag = tag.strip()
180        self.scan_until_(Lexer.CHAR_NEWLINE_)
181        self.scan_over_(Lexer.CHAR_NEWLINE_)
182        regexp = r"}\s*" + tag + r"\s*;"
183        split = re.split(regexp, self.text_[self.pos_ :], maxsplit=1)
184        if len(split) != 2:
185            raise FeatureLibError(
186                "Expected '} %s;' to terminate anonymous block" % tag, location
187            )
188        self.pos_ += len(split[0])
189        return (Lexer.ANONYMOUS_BLOCK, split[0], location)
190
191
192class IncludingLexer(object):
193    """A Lexer that follows include statements.
194
195    The OpenType feature file specification states that due to
196    historical reasons, relative imports should be resolved in this
197    order:
198
199    1. If the source font is UFO format, then relative to the UFO's
200       font directory
201    2. relative to the top-level include file
202    3. relative to the parent include file
203
204    We only support 1 (via includeDir) and 2.
205    """
206
207    def __init__(self, featurefile, *, includeDir=None):
208        """Initializes an IncludingLexer.
209
210        Behavior:
211            If includeDir is passed, it will be used to determine the top-level
212            include directory to use for all encountered include statements. If it is
213            not passed, ``os.path.dirname(featurefile)`` will be considered the
214            include directory.
215        """
216
217        self.lexers_ = [self.make_lexer_(featurefile)]
218        self.featurefilepath = self.lexers_[0].filename_
219        self.includeDir = includeDir
220
221    def __iter__(self):
222        return self
223
224    def next(self):  # Python 2
225        return self.__next__()
226
227    def __next__(self):  # Python 3
228        while self.lexers_:
229            lexer = self.lexers_[-1]
230            try:
231                token_type, token, location = next(lexer)
232            except StopIteration:
233                self.lexers_.pop()
234                continue
235            if token_type is Lexer.NAME and token == "include":
236                fname_type, fname_token, fname_location = lexer.next()
237                if fname_type is not Lexer.FILENAME:
238                    raise FeatureLibError("Expected file name", fname_location)
239                # semi_type, semi_token, semi_location = lexer.next()
240                # if semi_type is not Lexer.SYMBOL or semi_token != ";":
241                #    raise FeatureLibError("Expected ';'", semi_location)
242                if os.path.isabs(fname_token):
243                    path = fname_token
244                else:
245                    if self.includeDir is not None:
246                        curpath = self.includeDir
247                    elif self.featurefilepath is not None:
248                        curpath = os.path.dirname(self.featurefilepath)
249                    else:
250                        # if the IncludingLexer was initialized from an in-memory
251                        # file-like stream, it doesn't have a 'name' pointing to
252                        # its filesystem path, therefore we fall back to using the
253                        # current working directory to resolve relative includes
254                        curpath = os.getcwd()
255                    path = os.path.join(curpath, fname_token)
256                if len(self.lexers_) >= 5:
257                    raise FeatureLibError("Too many recursive includes", fname_location)
258                try:
259                    self.lexers_.append(self.make_lexer_(path))
260                except FileNotFoundError as err:
261                    raise IncludedFeaNotFound(fname_token, fname_location) from err
262            else:
263                return (token_type, token, location)
264        raise StopIteration()
265
266    @staticmethod
267    def make_lexer_(file_or_path):
268        if hasattr(file_or_path, "read"):
269            fileobj, closing = file_or_path, False
270        else:
271            filename, closing = file_or_path, True
272            fileobj = open(filename, "r", encoding="utf-8")
273        data = fileobj.read()
274        filename = getattr(fileobj, "name", None)
275        if closing:
276            fileobj.close()
277        return Lexer(data, filename)
278
279    def scan_anonymous_block(self, tag):
280        return self.lexers_[-1].scan_anonymous_block(tag)
281
282
283class NonIncludingLexer(IncludingLexer):
284    """Lexer that does not follow `include` statements, emits them as-is."""
285
286    def __next__(self):  # Python 3
287        return next(self.lexers_[0])
288