xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/xmlReader.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughesfrom fontTools import ttLib
2*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import safeEval
3*e1fe3e4aSElliott Hughesfrom fontTools.ttLib.tables.DefaultTable import DefaultTable
4*e1fe3e4aSElliott Hughesimport sys
5*e1fe3e4aSElliott Hughesimport os
6*e1fe3e4aSElliott Hughesimport logging
7*e1fe3e4aSElliott Hughes
8*e1fe3e4aSElliott Hughes
9*e1fe3e4aSElliott Hugheslog = logging.getLogger(__name__)
10*e1fe3e4aSElliott Hughes
11*e1fe3e4aSElliott Hughes
12*e1fe3e4aSElliott Hughesclass TTXParseError(Exception):
13*e1fe3e4aSElliott Hughes    pass
14*e1fe3e4aSElliott Hughes
15*e1fe3e4aSElliott Hughes
16*e1fe3e4aSElliott HughesBUFSIZE = 0x4000
17*e1fe3e4aSElliott Hughes
18*e1fe3e4aSElliott Hughes
19*e1fe3e4aSElliott Hughesclass XMLReader(object):
20*e1fe3e4aSElliott Hughes    def __init__(
21*e1fe3e4aSElliott Hughes        self, fileOrPath, ttFont, progress=None, quiet=None, contentOnly=False
22*e1fe3e4aSElliott Hughes    ):
23*e1fe3e4aSElliott Hughes        if fileOrPath == "-":
24*e1fe3e4aSElliott Hughes            fileOrPath = sys.stdin
25*e1fe3e4aSElliott Hughes        if not hasattr(fileOrPath, "read"):
26*e1fe3e4aSElliott Hughes            self.file = open(fileOrPath, "rb")
27*e1fe3e4aSElliott Hughes            self._closeStream = True
28*e1fe3e4aSElliott Hughes        else:
29*e1fe3e4aSElliott Hughes            # assume readable file object
30*e1fe3e4aSElliott Hughes            self.file = fileOrPath
31*e1fe3e4aSElliott Hughes            self._closeStream = False
32*e1fe3e4aSElliott Hughes        self.ttFont = ttFont
33*e1fe3e4aSElliott Hughes        self.progress = progress
34*e1fe3e4aSElliott Hughes        if quiet is not None:
35*e1fe3e4aSElliott Hughes            from fontTools.misc.loggingTools import deprecateArgument
36*e1fe3e4aSElliott Hughes
37*e1fe3e4aSElliott Hughes            deprecateArgument("quiet", "configure logging instead")
38*e1fe3e4aSElliott Hughes            self.quiet = quiet
39*e1fe3e4aSElliott Hughes        self.root = None
40*e1fe3e4aSElliott Hughes        self.contentStack = []
41*e1fe3e4aSElliott Hughes        self.contentOnly = contentOnly
42*e1fe3e4aSElliott Hughes        self.stackSize = 0
43*e1fe3e4aSElliott Hughes
44*e1fe3e4aSElliott Hughes    def read(self, rootless=False):
45*e1fe3e4aSElliott Hughes        if rootless:
46*e1fe3e4aSElliott Hughes            self.stackSize += 1
47*e1fe3e4aSElliott Hughes        if self.progress:
48*e1fe3e4aSElliott Hughes            self.file.seek(0, 2)
49*e1fe3e4aSElliott Hughes            fileSize = self.file.tell()
50*e1fe3e4aSElliott Hughes            self.progress.set(0, fileSize // 100 or 1)
51*e1fe3e4aSElliott Hughes            self.file.seek(0)
52*e1fe3e4aSElliott Hughes        self._parseFile(self.file)
53*e1fe3e4aSElliott Hughes        if self._closeStream:
54*e1fe3e4aSElliott Hughes            self.close()
55*e1fe3e4aSElliott Hughes        if rootless:
56*e1fe3e4aSElliott Hughes            self.stackSize -= 1
57*e1fe3e4aSElliott Hughes
58*e1fe3e4aSElliott Hughes    def close(self):
59*e1fe3e4aSElliott Hughes        self.file.close()
60*e1fe3e4aSElliott Hughes
61*e1fe3e4aSElliott Hughes    def _parseFile(self, file):
62*e1fe3e4aSElliott Hughes        from xml.parsers.expat import ParserCreate
63*e1fe3e4aSElliott Hughes
64*e1fe3e4aSElliott Hughes        parser = ParserCreate()
65*e1fe3e4aSElliott Hughes        parser.StartElementHandler = self._startElementHandler
66*e1fe3e4aSElliott Hughes        parser.EndElementHandler = self._endElementHandler
67*e1fe3e4aSElliott Hughes        parser.CharacterDataHandler = self._characterDataHandler
68*e1fe3e4aSElliott Hughes
69*e1fe3e4aSElliott Hughes        pos = 0
70*e1fe3e4aSElliott Hughes        while True:
71*e1fe3e4aSElliott Hughes            chunk = file.read(BUFSIZE)
72*e1fe3e4aSElliott Hughes            if not chunk:
73*e1fe3e4aSElliott Hughes                parser.Parse(chunk, 1)
74*e1fe3e4aSElliott Hughes                break
75*e1fe3e4aSElliott Hughes            pos = pos + len(chunk)
76*e1fe3e4aSElliott Hughes            if self.progress:
77*e1fe3e4aSElliott Hughes                self.progress.set(pos // 100)
78*e1fe3e4aSElliott Hughes            parser.Parse(chunk, 0)
79*e1fe3e4aSElliott Hughes
80*e1fe3e4aSElliott Hughes    def _startElementHandler(self, name, attrs):
81*e1fe3e4aSElliott Hughes        if self.stackSize == 1 and self.contentOnly:
82*e1fe3e4aSElliott Hughes            # We already know the table we're parsing, skip
83*e1fe3e4aSElliott Hughes            # parsing the table tag and continue to
84*e1fe3e4aSElliott Hughes            # stack '2' which begins parsing content
85*e1fe3e4aSElliott Hughes            self.contentStack.append([])
86*e1fe3e4aSElliott Hughes            self.stackSize = 2
87*e1fe3e4aSElliott Hughes            return
88*e1fe3e4aSElliott Hughes        stackSize = self.stackSize
89*e1fe3e4aSElliott Hughes        self.stackSize = stackSize + 1
90*e1fe3e4aSElliott Hughes        subFile = attrs.get("src")
91*e1fe3e4aSElliott Hughes        if subFile is not None:
92*e1fe3e4aSElliott Hughes            if hasattr(self.file, "name"):
93*e1fe3e4aSElliott Hughes                # if file has a name, get its parent directory
94*e1fe3e4aSElliott Hughes                dirname = os.path.dirname(self.file.name)
95*e1fe3e4aSElliott Hughes            else:
96*e1fe3e4aSElliott Hughes                # else fall back to using the current working directory
97*e1fe3e4aSElliott Hughes                dirname = os.getcwd()
98*e1fe3e4aSElliott Hughes            subFile = os.path.join(dirname, subFile)
99*e1fe3e4aSElliott Hughes        if not stackSize:
100*e1fe3e4aSElliott Hughes            if name != "ttFont":
101*e1fe3e4aSElliott Hughes                raise TTXParseError("illegal root tag: %s" % name)
102*e1fe3e4aSElliott Hughes            if self.ttFont.reader is None and not self.ttFont.tables:
103*e1fe3e4aSElliott Hughes                sfntVersion = attrs.get("sfntVersion")
104*e1fe3e4aSElliott Hughes                if sfntVersion is not None:
105*e1fe3e4aSElliott Hughes                    if len(sfntVersion) != 4:
106*e1fe3e4aSElliott Hughes                        sfntVersion = safeEval('"' + sfntVersion + '"')
107*e1fe3e4aSElliott Hughes                    self.ttFont.sfntVersion = sfntVersion
108*e1fe3e4aSElliott Hughes            self.contentStack.append([])
109*e1fe3e4aSElliott Hughes        elif stackSize == 1:
110*e1fe3e4aSElliott Hughes            if subFile is not None:
111*e1fe3e4aSElliott Hughes                subReader = XMLReader(subFile, self.ttFont, self.progress)
112*e1fe3e4aSElliott Hughes                subReader.read()
113*e1fe3e4aSElliott Hughes                self.contentStack.append([])
114*e1fe3e4aSElliott Hughes                return
115*e1fe3e4aSElliott Hughes            tag = ttLib.xmlToTag(name)
116*e1fe3e4aSElliott Hughes            msg = "Parsing '%s' table..." % tag
117*e1fe3e4aSElliott Hughes            if self.progress:
118*e1fe3e4aSElliott Hughes                self.progress.setLabel(msg)
119*e1fe3e4aSElliott Hughes            log.info(msg)
120*e1fe3e4aSElliott Hughes            if tag == "GlyphOrder":
121*e1fe3e4aSElliott Hughes                tableClass = ttLib.GlyphOrder
122*e1fe3e4aSElliott Hughes            elif "ERROR" in attrs or ("raw" in attrs and safeEval(attrs["raw"])):
123*e1fe3e4aSElliott Hughes                tableClass = DefaultTable
124*e1fe3e4aSElliott Hughes            else:
125*e1fe3e4aSElliott Hughes                tableClass = ttLib.getTableClass(tag)
126*e1fe3e4aSElliott Hughes                if tableClass is None:
127*e1fe3e4aSElliott Hughes                    tableClass = DefaultTable
128*e1fe3e4aSElliott Hughes            if tag == "loca" and tag in self.ttFont:
129*e1fe3e4aSElliott Hughes                # Special-case the 'loca' table as we need the
130*e1fe3e4aSElliott Hughes                #    original if the 'glyf' table isn't recompiled.
131*e1fe3e4aSElliott Hughes                self.currentTable = self.ttFont[tag]
132*e1fe3e4aSElliott Hughes            else:
133*e1fe3e4aSElliott Hughes                self.currentTable = tableClass(tag)
134*e1fe3e4aSElliott Hughes                self.ttFont[tag] = self.currentTable
135*e1fe3e4aSElliott Hughes            self.contentStack.append([])
136*e1fe3e4aSElliott Hughes        elif stackSize == 2 and subFile is not None:
137*e1fe3e4aSElliott Hughes            subReader = XMLReader(subFile, self.ttFont, self.progress, contentOnly=True)
138*e1fe3e4aSElliott Hughes            subReader.read()
139*e1fe3e4aSElliott Hughes            self.contentStack.append([])
140*e1fe3e4aSElliott Hughes            self.root = subReader.root
141*e1fe3e4aSElliott Hughes        elif stackSize == 2:
142*e1fe3e4aSElliott Hughes            self.contentStack.append([])
143*e1fe3e4aSElliott Hughes            self.root = (name, attrs, self.contentStack[-1])
144*e1fe3e4aSElliott Hughes        else:
145*e1fe3e4aSElliott Hughes            l = []
146*e1fe3e4aSElliott Hughes            self.contentStack[-1].append((name, attrs, l))
147*e1fe3e4aSElliott Hughes            self.contentStack.append(l)
148*e1fe3e4aSElliott Hughes
149*e1fe3e4aSElliott Hughes    def _characterDataHandler(self, data):
150*e1fe3e4aSElliott Hughes        if self.stackSize > 1:
151*e1fe3e4aSElliott Hughes            # parser parses in chunks, so we may get multiple calls
152*e1fe3e4aSElliott Hughes            # for the same text node; thus we need to append the data
153*e1fe3e4aSElliott Hughes            # to the last item in the content stack:
154*e1fe3e4aSElliott Hughes            # https://github.com/fonttools/fonttools/issues/2614
155*e1fe3e4aSElliott Hughes            if (
156*e1fe3e4aSElliott Hughes                data != "\n"
157*e1fe3e4aSElliott Hughes                and self.contentStack[-1]
158*e1fe3e4aSElliott Hughes                and isinstance(self.contentStack[-1][-1], str)
159*e1fe3e4aSElliott Hughes                and self.contentStack[-1][-1] != "\n"
160*e1fe3e4aSElliott Hughes            ):
161*e1fe3e4aSElliott Hughes                self.contentStack[-1][-1] += data
162*e1fe3e4aSElliott Hughes            else:
163*e1fe3e4aSElliott Hughes                self.contentStack[-1].append(data)
164*e1fe3e4aSElliott Hughes
165*e1fe3e4aSElliott Hughes    def _endElementHandler(self, name):
166*e1fe3e4aSElliott Hughes        self.stackSize = self.stackSize - 1
167*e1fe3e4aSElliott Hughes        del self.contentStack[-1]
168*e1fe3e4aSElliott Hughes        if not self.contentOnly:
169*e1fe3e4aSElliott Hughes            if self.stackSize == 1:
170*e1fe3e4aSElliott Hughes                self.root = None
171*e1fe3e4aSElliott Hughes            elif self.stackSize == 2:
172*e1fe3e4aSElliott Hughes                name, attrs, content = self.root
173*e1fe3e4aSElliott Hughes                self.currentTable.fromXML(name, attrs, content, self.ttFont)
174*e1fe3e4aSElliott Hughes                self.root = None
175*e1fe3e4aSElliott Hughes
176*e1fe3e4aSElliott Hughes
177*e1fe3e4aSElliott Hughesclass ProgressPrinter(object):
178*e1fe3e4aSElliott Hughes    def __init__(self, title, maxval=100):
179*e1fe3e4aSElliott Hughes        print(title)
180*e1fe3e4aSElliott Hughes
181*e1fe3e4aSElliott Hughes    def set(self, val, maxval=None):
182*e1fe3e4aSElliott Hughes        pass
183*e1fe3e4aSElliott Hughes
184*e1fe3e4aSElliott Hughes    def increment(self, val=1):
185*e1fe3e4aSElliott Hughes        pass
186*e1fe3e4aSElliott Hughes
187*e1fe3e4aSElliott Hughes    def setLabel(self, text):
188*e1fe3e4aSElliott Hughes        print(text)
189