xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/re/_parser.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1*cda5da8dSAndroid Build Coastguard Worker#
2*cda5da8dSAndroid Build Coastguard Worker# Secret Labs' Regular Expression Engine
3*cda5da8dSAndroid Build Coastguard Worker#
4*cda5da8dSAndroid Build Coastguard Worker# convert re-style regular expression to sre pattern
5*cda5da8dSAndroid Build Coastguard Worker#
6*cda5da8dSAndroid Build Coastguard Worker# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
7*cda5da8dSAndroid Build Coastguard Worker#
8*cda5da8dSAndroid Build Coastguard Worker# See the __init__.py file for information on usage and redistribution.
9*cda5da8dSAndroid Build Coastguard Worker#
10*cda5da8dSAndroid Build Coastguard Worker
11*cda5da8dSAndroid Build Coastguard Worker"""Internal support module for sre"""
12*cda5da8dSAndroid Build Coastguard Worker
13*cda5da8dSAndroid Build Coastguard Worker# XXX: show string offset and offending character for all errors
14*cda5da8dSAndroid Build Coastguard Worker
15*cda5da8dSAndroid Build Coastguard Workerfrom ._constants import *
16*cda5da8dSAndroid Build Coastguard Worker
17*cda5da8dSAndroid Build Coastguard WorkerSPECIAL_CHARS = ".\\[{()*+?^$|"
18*cda5da8dSAndroid Build Coastguard WorkerREPEAT_CHARS = "*+?{"
19*cda5da8dSAndroid Build Coastguard Worker
20*cda5da8dSAndroid Build Coastguard WorkerDIGITS = frozenset("0123456789")
21*cda5da8dSAndroid Build Coastguard Worker
22*cda5da8dSAndroid Build Coastguard WorkerOCTDIGITS = frozenset("01234567")
23*cda5da8dSAndroid Build Coastguard WorkerHEXDIGITS = frozenset("0123456789abcdefABCDEF")
24*cda5da8dSAndroid Build Coastguard WorkerASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
25*cda5da8dSAndroid Build Coastguard Worker
26*cda5da8dSAndroid Build Coastguard WorkerWHITESPACE = frozenset(" \t\n\r\v\f")
27*cda5da8dSAndroid Build Coastguard Worker
28*cda5da8dSAndroid Build Coastguard Worker_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT})
29*cda5da8dSAndroid Build Coastguard Worker_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})
30*cda5da8dSAndroid Build Coastguard Worker
31*cda5da8dSAndroid Build Coastguard WorkerESCAPES = {
32*cda5da8dSAndroid Build Coastguard Worker    r"\a": (LITERAL, ord("\a")),
33*cda5da8dSAndroid Build Coastguard Worker    r"\b": (LITERAL, ord("\b")),
34*cda5da8dSAndroid Build Coastguard Worker    r"\f": (LITERAL, ord("\f")),
35*cda5da8dSAndroid Build Coastguard Worker    r"\n": (LITERAL, ord("\n")),
36*cda5da8dSAndroid Build Coastguard Worker    r"\r": (LITERAL, ord("\r")),
37*cda5da8dSAndroid Build Coastguard Worker    r"\t": (LITERAL, ord("\t")),
38*cda5da8dSAndroid Build Coastguard Worker    r"\v": (LITERAL, ord("\v")),
39*cda5da8dSAndroid Build Coastguard Worker    r"\\": (LITERAL, ord("\\"))
40*cda5da8dSAndroid Build Coastguard Worker}
41*cda5da8dSAndroid Build Coastguard Worker
42*cda5da8dSAndroid Build Coastguard WorkerCATEGORIES = {
43*cda5da8dSAndroid Build Coastguard Worker    r"\A": (AT, AT_BEGINNING_STRING), # start of string
44*cda5da8dSAndroid Build Coastguard Worker    r"\b": (AT, AT_BOUNDARY),
45*cda5da8dSAndroid Build Coastguard Worker    r"\B": (AT, AT_NON_BOUNDARY),
46*cda5da8dSAndroid Build Coastguard Worker    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
47*cda5da8dSAndroid Build Coastguard Worker    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
48*cda5da8dSAndroid Build Coastguard Worker    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
49*cda5da8dSAndroid Build Coastguard Worker    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
50*cda5da8dSAndroid Build Coastguard Worker    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
51*cda5da8dSAndroid Build Coastguard Worker    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
52*cda5da8dSAndroid Build Coastguard Worker    r"\Z": (AT, AT_END_STRING), # end of string
53*cda5da8dSAndroid Build Coastguard Worker}
54*cda5da8dSAndroid Build Coastguard Worker
55*cda5da8dSAndroid Build Coastguard WorkerFLAGS = {
56*cda5da8dSAndroid Build Coastguard Worker    # standard flags
57*cda5da8dSAndroid Build Coastguard Worker    "i": SRE_FLAG_IGNORECASE,
58*cda5da8dSAndroid Build Coastguard Worker    "L": SRE_FLAG_LOCALE,
59*cda5da8dSAndroid Build Coastguard Worker    "m": SRE_FLAG_MULTILINE,
60*cda5da8dSAndroid Build Coastguard Worker    "s": SRE_FLAG_DOTALL,
61*cda5da8dSAndroid Build Coastguard Worker    "x": SRE_FLAG_VERBOSE,
62*cda5da8dSAndroid Build Coastguard Worker    # extensions
63*cda5da8dSAndroid Build Coastguard Worker    "a": SRE_FLAG_ASCII,
64*cda5da8dSAndroid Build Coastguard Worker    "t": SRE_FLAG_TEMPLATE,
65*cda5da8dSAndroid Build Coastguard Worker    "u": SRE_FLAG_UNICODE,
66*cda5da8dSAndroid Build Coastguard Worker}
67*cda5da8dSAndroid Build Coastguard Worker
68*cda5da8dSAndroid Build Coastguard WorkerTYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE
69*cda5da8dSAndroid Build Coastguard WorkerGLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE
70*cda5da8dSAndroid Build Coastguard Worker
71*cda5da8dSAndroid Build Coastguard Workerclass State:
72*cda5da8dSAndroid Build Coastguard Worker    # keeps track of state for parsing
73*cda5da8dSAndroid Build Coastguard Worker    def __init__(self):
74*cda5da8dSAndroid Build Coastguard Worker        self.flags = 0
75*cda5da8dSAndroid Build Coastguard Worker        self.groupdict = {}
76*cda5da8dSAndroid Build Coastguard Worker        self.groupwidths = [None]  # group 0
77*cda5da8dSAndroid Build Coastguard Worker        self.lookbehindgroups = None
78*cda5da8dSAndroid Build Coastguard Worker        self.grouprefpos = {}
79*cda5da8dSAndroid Build Coastguard Worker    @property
80*cda5da8dSAndroid Build Coastguard Worker    def groups(self):
81*cda5da8dSAndroid Build Coastguard Worker        return len(self.groupwidths)
82*cda5da8dSAndroid Build Coastguard Worker    def opengroup(self, name=None):
83*cda5da8dSAndroid Build Coastguard Worker        gid = self.groups
84*cda5da8dSAndroid Build Coastguard Worker        self.groupwidths.append(None)
85*cda5da8dSAndroid Build Coastguard Worker        if self.groups > MAXGROUPS:
86*cda5da8dSAndroid Build Coastguard Worker            raise error("too many groups")
87*cda5da8dSAndroid Build Coastguard Worker        if name is not None:
88*cda5da8dSAndroid Build Coastguard Worker            ogid = self.groupdict.get(name, None)
89*cda5da8dSAndroid Build Coastguard Worker            if ogid is not None:
90*cda5da8dSAndroid Build Coastguard Worker                raise error("redefinition of group name %r as group %d; "
91*cda5da8dSAndroid Build Coastguard Worker                            "was group %d" % (name, gid,  ogid))
92*cda5da8dSAndroid Build Coastguard Worker            self.groupdict[name] = gid
93*cda5da8dSAndroid Build Coastguard Worker        return gid
94*cda5da8dSAndroid Build Coastguard Worker    def closegroup(self, gid, p):
95*cda5da8dSAndroid Build Coastguard Worker        self.groupwidths[gid] = p.getwidth()
96*cda5da8dSAndroid Build Coastguard Worker    def checkgroup(self, gid):
97*cda5da8dSAndroid Build Coastguard Worker        return gid < self.groups and self.groupwidths[gid] is not None
98*cda5da8dSAndroid Build Coastguard Worker
99*cda5da8dSAndroid Build Coastguard Worker    def checklookbehindgroup(self, gid, source):
100*cda5da8dSAndroid Build Coastguard Worker        if self.lookbehindgroups is not None:
101*cda5da8dSAndroid Build Coastguard Worker            if not self.checkgroup(gid):
102*cda5da8dSAndroid Build Coastguard Worker                raise source.error('cannot refer to an open group')
103*cda5da8dSAndroid Build Coastguard Worker            if gid >= self.lookbehindgroups:
104*cda5da8dSAndroid Build Coastguard Worker                raise source.error('cannot refer to group defined in the same '
105*cda5da8dSAndroid Build Coastguard Worker                                   'lookbehind subpattern')
106*cda5da8dSAndroid Build Coastguard Worker
107*cda5da8dSAndroid Build Coastguard Workerclass SubPattern:
108*cda5da8dSAndroid Build Coastguard Worker    # a subpattern, in intermediate form
109*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, state, data=None):
110*cda5da8dSAndroid Build Coastguard Worker        self.state = state
111*cda5da8dSAndroid Build Coastguard Worker        if data is None:
112*cda5da8dSAndroid Build Coastguard Worker            data = []
113*cda5da8dSAndroid Build Coastguard Worker        self.data = data
114*cda5da8dSAndroid Build Coastguard Worker        self.width = None
115*cda5da8dSAndroid Build Coastguard Worker
116*cda5da8dSAndroid Build Coastguard Worker    def dump(self, level=0):
117*cda5da8dSAndroid Build Coastguard Worker        nl = True
118*cda5da8dSAndroid Build Coastguard Worker        seqtypes = (tuple, list)
119*cda5da8dSAndroid Build Coastguard Worker        for op, av in self.data:
120*cda5da8dSAndroid Build Coastguard Worker            print(level*"  " + str(op), end='')
121*cda5da8dSAndroid Build Coastguard Worker            if op is IN:
122*cda5da8dSAndroid Build Coastguard Worker                # member sublanguage
123*cda5da8dSAndroid Build Coastguard Worker                print()
124*cda5da8dSAndroid Build Coastguard Worker                for op, a in av:
125*cda5da8dSAndroid Build Coastguard Worker                    print((level+1)*"  " + str(op), a)
126*cda5da8dSAndroid Build Coastguard Worker            elif op is BRANCH:
127*cda5da8dSAndroid Build Coastguard Worker                print()
128*cda5da8dSAndroid Build Coastguard Worker                for i, a in enumerate(av[1]):
129*cda5da8dSAndroid Build Coastguard Worker                    if i:
130*cda5da8dSAndroid Build Coastguard Worker                        print(level*"  " + "OR")
131*cda5da8dSAndroid Build Coastguard Worker                    a.dump(level+1)
132*cda5da8dSAndroid Build Coastguard Worker            elif op is GROUPREF_EXISTS:
133*cda5da8dSAndroid Build Coastguard Worker                condgroup, item_yes, item_no = av
134*cda5da8dSAndroid Build Coastguard Worker                print('', condgroup)
135*cda5da8dSAndroid Build Coastguard Worker                item_yes.dump(level+1)
136*cda5da8dSAndroid Build Coastguard Worker                if item_no:
137*cda5da8dSAndroid Build Coastguard Worker                    print(level*"  " + "ELSE")
138*cda5da8dSAndroid Build Coastguard Worker                    item_no.dump(level+1)
139*cda5da8dSAndroid Build Coastguard Worker            elif isinstance(av, seqtypes):
140*cda5da8dSAndroid Build Coastguard Worker                nl = False
141*cda5da8dSAndroid Build Coastguard Worker                for a in av:
142*cda5da8dSAndroid Build Coastguard Worker                    if isinstance(a, SubPattern):
143*cda5da8dSAndroid Build Coastguard Worker                        if not nl:
144*cda5da8dSAndroid Build Coastguard Worker                            print()
145*cda5da8dSAndroid Build Coastguard Worker                        a.dump(level+1)
146*cda5da8dSAndroid Build Coastguard Worker                        nl = True
147*cda5da8dSAndroid Build Coastguard Worker                    else:
148*cda5da8dSAndroid Build Coastguard Worker                        if not nl:
149*cda5da8dSAndroid Build Coastguard Worker                            print(' ', end='')
150*cda5da8dSAndroid Build Coastguard Worker                        print(a, end='')
151*cda5da8dSAndroid Build Coastguard Worker                        nl = False
152*cda5da8dSAndroid Build Coastguard Worker                if not nl:
153*cda5da8dSAndroid Build Coastguard Worker                    print()
154*cda5da8dSAndroid Build Coastguard Worker            else:
155*cda5da8dSAndroid Build Coastguard Worker                print('', av)
156*cda5da8dSAndroid Build Coastguard Worker    def __repr__(self):
157*cda5da8dSAndroid Build Coastguard Worker        return repr(self.data)
158*cda5da8dSAndroid Build Coastguard Worker    def __len__(self):
159*cda5da8dSAndroid Build Coastguard Worker        return len(self.data)
160*cda5da8dSAndroid Build Coastguard Worker    def __delitem__(self, index):
161*cda5da8dSAndroid Build Coastguard Worker        del self.data[index]
162*cda5da8dSAndroid Build Coastguard Worker    def __getitem__(self, index):
163*cda5da8dSAndroid Build Coastguard Worker        if isinstance(index, slice):
164*cda5da8dSAndroid Build Coastguard Worker            return SubPattern(self.state, self.data[index])
165*cda5da8dSAndroid Build Coastguard Worker        return self.data[index]
166*cda5da8dSAndroid Build Coastguard Worker    def __setitem__(self, index, code):
167*cda5da8dSAndroid Build Coastguard Worker        self.data[index] = code
168*cda5da8dSAndroid Build Coastguard Worker    def insert(self, index, code):
169*cda5da8dSAndroid Build Coastguard Worker        self.data.insert(index, code)
170*cda5da8dSAndroid Build Coastguard Worker    def append(self, code):
171*cda5da8dSAndroid Build Coastguard Worker        self.data.append(code)
172*cda5da8dSAndroid Build Coastguard Worker    def getwidth(self):
173*cda5da8dSAndroid Build Coastguard Worker        # determine the width (min, max) for this subpattern
174*cda5da8dSAndroid Build Coastguard Worker        if self.width is not None:
175*cda5da8dSAndroid Build Coastguard Worker            return self.width
176*cda5da8dSAndroid Build Coastguard Worker        lo = hi = 0
177*cda5da8dSAndroid Build Coastguard Worker        for op, av in self.data:
178*cda5da8dSAndroid Build Coastguard Worker            if op is BRANCH:
179*cda5da8dSAndroid Build Coastguard Worker                i = MAXREPEAT - 1
180*cda5da8dSAndroid Build Coastguard Worker                j = 0
181*cda5da8dSAndroid Build Coastguard Worker                for av in av[1]:
182*cda5da8dSAndroid Build Coastguard Worker                    l, h = av.getwidth()
183*cda5da8dSAndroid Build Coastguard Worker                    i = min(i, l)
184*cda5da8dSAndroid Build Coastguard Worker                    j = max(j, h)
185*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i
186*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j
187*cda5da8dSAndroid Build Coastguard Worker            elif op is ATOMIC_GROUP:
188*cda5da8dSAndroid Build Coastguard Worker                i, j = av.getwidth()
189*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i
190*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j
191*cda5da8dSAndroid Build Coastguard Worker            elif op is SUBPATTERN:
192*cda5da8dSAndroid Build Coastguard Worker                i, j = av[-1].getwidth()
193*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i
194*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j
195*cda5da8dSAndroid Build Coastguard Worker            elif op in _REPEATCODES:
196*cda5da8dSAndroid Build Coastguard Worker                i, j = av[2].getwidth()
197*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i * av[0]
198*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j * av[1]
199*cda5da8dSAndroid Build Coastguard Worker            elif op in _UNITCODES:
200*cda5da8dSAndroid Build Coastguard Worker                lo = lo + 1
201*cda5da8dSAndroid Build Coastguard Worker                hi = hi + 1
202*cda5da8dSAndroid Build Coastguard Worker            elif op is GROUPREF:
203*cda5da8dSAndroid Build Coastguard Worker                i, j = self.state.groupwidths[av]
204*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i
205*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j
206*cda5da8dSAndroid Build Coastguard Worker            elif op is GROUPREF_EXISTS:
207*cda5da8dSAndroid Build Coastguard Worker                i, j = av[1].getwidth()
208*cda5da8dSAndroid Build Coastguard Worker                if av[2] is not None:
209*cda5da8dSAndroid Build Coastguard Worker                    l, h = av[2].getwidth()
210*cda5da8dSAndroid Build Coastguard Worker                    i = min(i, l)
211*cda5da8dSAndroid Build Coastguard Worker                    j = max(j, h)
212*cda5da8dSAndroid Build Coastguard Worker                else:
213*cda5da8dSAndroid Build Coastguard Worker                    i = 0
214*cda5da8dSAndroid Build Coastguard Worker                lo = lo + i
215*cda5da8dSAndroid Build Coastguard Worker                hi = hi + j
216*cda5da8dSAndroid Build Coastguard Worker            elif op is SUCCESS:
217*cda5da8dSAndroid Build Coastguard Worker                break
218*cda5da8dSAndroid Build Coastguard Worker        self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)
219*cda5da8dSAndroid Build Coastguard Worker        return self.width
220*cda5da8dSAndroid Build Coastguard Worker
221*cda5da8dSAndroid Build Coastguard Workerclass Tokenizer:
222*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, string):
223*cda5da8dSAndroid Build Coastguard Worker        self.istext = isinstance(string, str)
224*cda5da8dSAndroid Build Coastguard Worker        self.string = string
225*cda5da8dSAndroid Build Coastguard Worker        if not self.istext:
226*cda5da8dSAndroid Build Coastguard Worker            string = str(string, 'latin1')
227*cda5da8dSAndroid Build Coastguard Worker        self.decoded_string = string
228*cda5da8dSAndroid Build Coastguard Worker        self.index = 0
229*cda5da8dSAndroid Build Coastguard Worker        self.next = None
230*cda5da8dSAndroid Build Coastguard Worker        self.__next()
231*cda5da8dSAndroid Build Coastguard Worker    def __next(self):
232*cda5da8dSAndroid Build Coastguard Worker        index = self.index
233*cda5da8dSAndroid Build Coastguard Worker        try:
234*cda5da8dSAndroid Build Coastguard Worker            char = self.decoded_string[index]
235*cda5da8dSAndroid Build Coastguard Worker        except IndexError:
236*cda5da8dSAndroid Build Coastguard Worker            self.next = None
237*cda5da8dSAndroid Build Coastguard Worker            return
238*cda5da8dSAndroid Build Coastguard Worker        if char == "\\":
239*cda5da8dSAndroid Build Coastguard Worker            index += 1
240*cda5da8dSAndroid Build Coastguard Worker            try:
241*cda5da8dSAndroid Build Coastguard Worker                char += self.decoded_string[index]
242*cda5da8dSAndroid Build Coastguard Worker            except IndexError:
243*cda5da8dSAndroid Build Coastguard Worker                raise error("bad escape (end of pattern)",
244*cda5da8dSAndroid Build Coastguard Worker                            self.string, len(self.string) - 1) from None
245*cda5da8dSAndroid Build Coastguard Worker        self.index = index + 1
246*cda5da8dSAndroid Build Coastguard Worker        self.next = char
247*cda5da8dSAndroid Build Coastguard Worker    def match(self, char):
248*cda5da8dSAndroid Build Coastguard Worker        if char == self.next:
249*cda5da8dSAndroid Build Coastguard Worker            self.__next()
250*cda5da8dSAndroid Build Coastguard Worker            return True
251*cda5da8dSAndroid Build Coastguard Worker        return False
252*cda5da8dSAndroid Build Coastguard Worker    def get(self):
253*cda5da8dSAndroid Build Coastguard Worker        this = self.next
254*cda5da8dSAndroid Build Coastguard Worker        self.__next()
255*cda5da8dSAndroid Build Coastguard Worker        return this
256*cda5da8dSAndroid Build Coastguard Worker    def getwhile(self, n, charset):
257*cda5da8dSAndroid Build Coastguard Worker        result = ''
258*cda5da8dSAndroid Build Coastguard Worker        for _ in range(n):
259*cda5da8dSAndroid Build Coastguard Worker            c = self.next
260*cda5da8dSAndroid Build Coastguard Worker            if c not in charset:
261*cda5da8dSAndroid Build Coastguard Worker                break
262*cda5da8dSAndroid Build Coastguard Worker            result += c
263*cda5da8dSAndroid Build Coastguard Worker            self.__next()
264*cda5da8dSAndroid Build Coastguard Worker        return result
265*cda5da8dSAndroid Build Coastguard Worker    def getuntil(self, terminator, name):
266*cda5da8dSAndroid Build Coastguard Worker        result = ''
267*cda5da8dSAndroid Build Coastguard Worker        while True:
268*cda5da8dSAndroid Build Coastguard Worker            c = self.next
269*cda5da8dSAndroid Build Coastguard Worker            self.__next()
270*cda5da8dSAndroid Build Coastguard Worker            if c is None:
271*cda5da8dSAndroid Build Coastguard Worker                if not result:
272*cda5da8dSAndroid Build Coastguard Worker                    raise self.error("missing " + name)
273*cda5da8dSAndroid Build Coastguard Worker                raise self.error("missing %s, unterminated name" % terminator,
274*cda5da8dSAndroid Build Coastguard Worker                                 len(result))
275*cda5da8dSAndroid Build Coastguard Worker            if c == terminator:
276*cda5da8dSAndroid Build Coastguard Worker                if not result:
277*cda5da8dSAndroid Build Coastguard Worker                    raise self.error("missing " + name, 1)
278*cda5da8dSAndroid Build Coastguard Worker                break
279*cda5da8dSAndroid Build Coastguard Worker            result += c
280*cda5da8dSAndroid Build Coastguard Worker        return result
281*cda5da8dSAndroid Build Coastguard Worker    @property
282*cda5da8dSAndroid Build Coastguard Worker    def pos(self):
283*cda5da8dSAndroid Build Coastguard Worker        return self.index - len(self.next or '')
284*cda5da8dSAndroid Build Coastguard Worker    def tell(self):
285*cda5da8dSAndroid Build Coastguard Worker        return self.index - len(self.next or '')
286*cda5da8dSAndroid Build Coastguard Worker    def seek(self, index):
287*cda5da8dSAndroid Build Coastguard Worker        self.index = index
288*cda5da8dSAndroid Build Coastguard Worker        self.__next()
289*cda5da8dSAndroid Build Coastguard Worker
290*cda5da8dSAndroid Build Coastguard Worker    def error(self, msg, offset=0):
291*cda5da8dSAndroid Build Coastguard Worker        if not self.istext:
292*cda5da8dSAndroid Build Coastguard Worker            msg = msg.encode('ascii', 'backslashreplace').decode('ascii')
293*cda5da8dSAndroid Build Coastguard Worker        return error(msg, self.string, self.tell() - offset)
294*cda5da8dSAndroid Build Coastguard Worker
295*cda5da8dSAndroid Build Coastguard Worker    def checkgroupname(self, name, offset, nested):
296*cda5da8dSAndroid Build Coastguard Worker        if not name.isidentifier():
297*cda5da8dSAndroid Build Coastguard Worker            msg = "bad character in group name %r" % name
298*cda5da8dSAndroid Build Coastguard Worker            raise self.error(msg, len(name) + offset)
299*cda5da8dSAndroid Build Coastguard Worker        if not (self.istext or name.isascii()):
300*cda5da8dSAndroid Build Coastguard Worker            import warnings
301*cda5da8dSAndroid Build Coastguard Worker            warnings.warn(
302*cda5da8dSAndroid Build Coastguard Worker                "bad character in group name %a at position %d" %
303*cda5da8dSAndroid Build Coastguard Worker                (name, self.tell() - len(name) - offset),
304*cda5da8dSAndroid Build Coastguard Worker                DeprecationWarning, stacklevel=nested + 7
305*cda5da8dSAndroid Build Coastguard Worker            )
306*cda5da8dSAndroid Build Coastguard Worker
307*cda5da8dSAndroid Build Coastguard Workerdef _class_escape(source, escape):
308*cda5da8dSAndroid Build Coastguard Worker    # handle escape code inside character class
309*cda5da8dSAndroid Build Coastguard Worker    code = ESCAPES.get(escape)
310*cda5da8dSAndroid Build Coastguard Worker    if code:
311*cda5da8dSAndroid Build Coastguard Worker        return code
312*cda5da8dSAndroid Build Coastguard Worker    code = CATEGORIES.get(escape)
313*cda5da8dSAndroid Build Coastguard Worker    if code and code[0] is IN:
314*cda5da8dSAndroid Build Coastguard Worker        return code
315*cda5da8dSAndroid Build Coastguard Worker    try:
316*cda5da8dSAndroid Build Coastguard Worker        c = escape[1:2]
317*cda5da8dSAndroid Build Coastguard Worker        if c == "x":
318*cda5da8dSAndroid Build Coastguard Worker            # hexadecimal escape (exactly two digits)
319*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(2, HEXDIGITS)
320*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 4:
321*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
322*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, int(escape[2:], 16)
323*cda5da8dSAndroid Build Coastguard Worker        elif c == "u" and source.istext:
324*cda5da8dSAndroid Build Coastguard Worker            # unicode escape (exactly four digits)
325*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(4, HEXDIGITS)
326*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 6:
327*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
328*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, int(escape[2:], 16)
329*cda5da8dSAndroid Build Coastguard Worker        elif c == "U" and source.istext:
330*cda5da8dSAndroid Build Coastguard Worker            # unicode escape (exactly eight digits)
331*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(8, HEXDIGITS)
332*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 10:
333*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
334*cda5da8dSAndroid Build Coastguard Worker            c = int(escape[2:], 16)
335*cda5da8dSAndroid Build Coastguard Worker            chr(c) # raise ValueError for invalid code
336*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, c
337*cda5da8dSAndroid Build Coastguard Worker        elif c == "N" and source.istext:
338*cda5da8dSAndroid Build Coastguard Worker            import unicodedata
339*cda5da8dSAndroid Build Coastguard Worker            # named unicode escape e.g. \N{EM DASH}
340*cda5da8dSAndroid Build Coastguard Worker            if not source.match('{'):
341*cda5da8dSAndroid Build Coastguard Worker                raise source.error("missing {")
342*cda5da8dSAndroid Build Coastguard Worker            charname = source.getuntil('}', 'character name')
343*cda5da8dSAndroid Build Coastguard Worker            try:
344*cda5da8dSAndroid Build Coastguard Worker                c = ord(unicodedata.lookup(charname))
345*cda5da8dSAndroid Build Coastguard Worker            except (KeyError, TypeError):
346*cda5da8dSAndroid Build Coastguard Worker                raise source.error("undefined character name %r" % charname,
347*cda5da8dSAndroid Build Coastguard Worker                                   len(charname) + len(r'\N{}')) from None
348*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, c
349*cda5da8dSAndroid Build Coastguard Worker        elif c in OCTDIGITS:
350*cda5da8dSAndroid Build Coastguard Worker            # octal escape (up to three digits)
351*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(2, OCTDIGITS)
352*cda5da8dSAndroid Build Coastguard Worker            c = int(escape[1:], 8)
353*cda5da8dSAndroid Build Coastguard Worker            if c > 0o377:
354*cda5da8dSAndroid Build Coastguard Worker                raise source.error('octal escape value %s outside of '
355*cda5da8dSAndroid Build Coastguard Worker                                   'range 0-0o377' % escape, len(escape))
356*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, c
357*cda5da8dSAndroid Build Coastguard Worker        elif c in DIGITS:
358*cda5da8dSAndroid Build Coastguard Worker            raise ValueError
359*cda5da8dSAndroid Build Coastguard Worker        if len(escape) == 2:
360*cda5da8dSAndroid Build Coastguard Worker            if c in ASCIILETTERS:
361*cda5da8dSAndroid Build Coastguard Worker                raise source.error('bad escape %s' % escape, len(escape))
362*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, ord(escape[1])
363*cda5da8dSAndroid Build Coastguard Worker    except ValueError:
364*cda5da8dSAndroid Build Coastguard Worker        pass
365*cda5da8dSAndroid Build Coastguard Worker    raise source.error("bad escape %s" % escape, len(escape))
366*cda5da8dSAndroid Build Coastguard Worker
367*cda5da8dSAndroid Build Coastguard Workerdef _escape(source, escape, state):
368*cda5da8dSAndroid Build Coastguard Worker    # handle escape code in expression
369*cda5da8dSAndroid Build Coastguard Worker    code = CATEGORIES.get(escape)
370*cda5da8dSAndroid Build Coastguard Worker    if code:
371*cda5da8dSAndroid Build Coastguard Worker        return code
372*cda5da8dSAndroid Build Coastguard Worker    code = ESCAPES.get(escape)
373*cda5da8dSAndroid Build Coastguard Worker    if code:
374*cda5da8dSAndroid Build Coastguard Worker        return code
375*cda5da8dSAndroid Build Coastguard Worker    try:
376*cda5da8dSAndroid Build Coastguard Worker        c = escape[1:2]
377*cda5da8dSAndroid Build Coastguard Worker        if c == "x":
378*cda5da8dSAndroid Build Coastguard Worker            # hexadecimal escape
379*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(2, HEXDIGITS)
380*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 4:
381*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
382*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, int(escape[2:], 16)
383*cda5da8dSAndroid Build Coastguard Worker        elif c == "u" and source.istext:
384*cda5da8dSAndroid Build Coastguard Worker            # unicode escape (exactly four digits)
385*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(4, HEXDIGITS)
386*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 6:
387*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
388*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, int(escape[2:], 16)
389*cda5da8dSAndroid Build Coastguard Worker        elif c == "U" and source.istext:
390*cda5da8dSAndroid Build Coastguard Worker            # unicode escape (exactly eight digits)
391*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(8, HEXDIGITS)
392*cda5da8dSAndroid Build Coastguard Worker            if len(escape) != 10:
393*cda5da8dSAndroid Build Coastguard Worker                raise source.error("incomplete escape %s" % escape, len(escape))
394*cda5da8dSAndroid Build Coastguard Worker            c = int(escape[2:], 16)
395*cda5da8dSAndroid Build Coastguard Worker            chr(c) # raise ValueError for invalid code
396*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, c
397*cda5da8dSAndroid Build Coastguard Worker        elif c == "N" and source.istext:
398*cda5da8dSAndroid Build Coastguard Worker            import unicodedata
399*cda5da8dSAndroid Build Coastguard Worker            # named unicode escape e.g. \N{EM DASH}
400*cda5da8dSAndroid Build Coastguard Worker            if not source.match('{'):
401*cda5da8dSAndroid Build Coastguard Worker                raise source.error("missing {")
402*cda5da8dSAndroid Build Coastguard Worker            charname = source.getuntil('}', 'character name')
403*cda5da8dSAndroid Build Coastguard Worker            try:
404*cda5da8dSAndroid Build Coastguard Worker                c = ord(unicodedata.lookup(charname))
405*cda5da8dSAndroid Build Coastguard Worker            except (KeyError, TypeError):
406*cda5da8dSAndroid Build Coastguard Worker                raise source.error("undefined character name %r" % charname,
407*cda5da8dSAndroid Build Coastguard Worker                                   len(charname) + len(r'\N{}')) from None
408*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, c
409*cda5da8dSAndroid Build Coastguard Worker        elif c == "0":
410*cda5da8dSAndroid Build Coastguard Worker            # octal escape
411*cda5da8dSAndroid Build Coastguard Worker            escape += source.getwhile(2, OCTDIGITS)
412*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, int(escape[1:], 8)
413*cda5da8dSAndroid Build Coastguard Worker        elif c in DIGITS:
414*cda5da8dSAndroid Build Coastguard Worker            # octal escape *or* decimal group reference (sigh)
415*cda5da8dSAndroid Build Coastguard Worker            if source.next in DIGITS:
416*cda5da8dSAndroid Build Coastguard Worker                escape += source.get()
417*cda5da8dSAndroid Build Coastguard Worker                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
418*cda5da8dSAndroid Build Coastguard Worker                    source.next in OCTDIGITS):
419*cda5da8dSAndroid Build Coastguard Worker                    # got three octal digits; this is an octal escape
420*cda5da8dSAndroid Build Coastguard Worker                    escape += source.get()
421*cda5da8dSAndroid Build Coastguard Worker                    c = int(escape[1:], 8)
422*cda5da8dSAndroid Build Coastguard Worker                    if c > 0o377:
423*cda5da8dSAndroid Build Coastguard Worker                        raise source.error('octal escape value %s outside of '
424*cda5da8dSAndroid Build Coastguard Worker                                           'range 0-0o377' % escape,
425*cda5da8dSAndroid Build Coastguard Worker                                           len(escape))
426*cda5da8dSAndroid Build Coastguard Worker                    return LITERAL, c
427*cda5da8dSAndroid Build Coastguard Worker            # not an octal escape, so this is a group reference
428*cda5da8dSAndroid Build Coastguard Worker            group = int(escape[1:])
429*cda5da8dSAndroid Build Coastguard Worker            if group < state.groups:
430*cda5da8dSAndroid Build Coastguard Worker                if not state.checkgroup(group):
431*cda5da8dSAndroid Build Coastguard Worker                    raise source.error("cannot refer to an open group",
432*cda5da8dSAndroid Build Coastguard Worker                                       len(escape))
433*cda5da8dSAndroid Build Coastguard Worker                state.checklookbehindgroup(group, source)
434*cda5da8dSAndroid Build Coastguard Worker                return GROUPREF, group
435*cda5da8dSAndroid Build Coastguard Worker            raise source.error("invalid group reference %d" % group, len(escape) - 1)
436*cda5da8dSAndroid Build Coastguard Worker        if len(escape) == 2:
437*cda5da8dSAndroid Build Coastguard Worker            if c in ASCIILETTERS:
438*cda5da8dSAndroid Build Coastguard Worker                raise source.error("bad escape %s" % escape, len(escape))
439*cda5da8dSAndroid Build Coastguard Worker            return LITERAL, ord(escape[1])
440*cda5da8dSAndroid Build Coastguard Worker    except ValueError:
441*cda5da8dSAndroid Build Coastguard Worker        pass
442*cda5da8dSAndroid Build Coastguard Worker    raise source.error("bad escape %s" % escape, len(escape))
443*cda5da8dSAndroid Build Coastguard Worker
444*cda5da8dSAndroid Build Coastguard Workerdef _uniq(items):
445*cda5da8dSAndroid Build Coastguard Worker    return list(dict.fromkeys(items))
446*cda5da8dSAndroid Build Coastguard Worker
447*cda5da8dSAndroid Build Coastguard Workerdef _parse_sub(source, state, verbose, nested):
448*cda5da8dSAndroid Build Coastguard Worker    # parse an alternation: a|b|c
449*cda5da8dSAndroid Build Coastguard Worker
450*cda5da8dSAndroid Build Coastguard Worker    items = []
451*cda5da8dSAndroid Build Coastguard Worker    itemsappend = items.append
452*cda5da8dSAndroid Build Coastguard Worker    sourcematch = source.match
453*cda5da8dSAndroid Build Coastguard Worker    start = source.tell()
454*cda5da8dSAndroid Build Coastguard Worker    while True:
455*cda5da8dSAndroid Build Coastguard Worker        itemsappend(_parse(source, state, verbose, nested + 1,
456*cda5da8dSAndroid Build Coastguard Worker                           not nested and not items))
457*cda5da8dSAndroid Build Coastguard Worker        if not sourcematch("|"):
458*cda5da8dSAndroid Build Coastguard Worker            break
459*cda5da8dSAndroid Build Coastguard Worker        if not nested:
460*cda5da8dSAndroid Build Coastguard Worker            verbose = state.flags & SRE_FLAG_VERBOSE
461*cda5da8dSAndroid Build Coastguard Worker
462*cda5da8dSAndroid Build Coastguard Worker    if len(items) == 1:
463*cda5da8dSAndroid Build Coastguard Worker        return items[0]
464*cda5da8dSAndroid Build Coastguard Worker
465*cda5da8dSAndroid Build Coastguard Worker    subpattern = SubPattern(state)
466*cda5da8dSAndroid Build Coastguard Worker
467*cda5da8dSAndroid Build Coastguard Worker    # check if all items share a common prefix
468*cda5da8dSAndroid Build Coastguard Worker    while True:
469*cda5da8dSAndroid Build Coastguard Worker        prefix = None
470*cda5da8dSAndroid Build Coastguard Worker        for item in items:
471*cda5da8dSAndroid Build Coastguard Worker            if not item:
472*cda5da8dSAndroid Build Coastguard Worker                break
473*cda5da8dSAndroid Build Coastguard Worker            if prefix is None:
474*cda5da8dSAndroid Build Coastguard Worker                prefix = item[0]
475*cda5da8dSAndroid Build Coastguard Worker            elif item[0] != prefix:
476*cda5da8dSAndroid Build Coastguard Worker                break
477*cda5da8dSAndroid Build Coastguard Worker        else:
478*cda5da8dSAndroid Build Coastguard Worker            # all subitems start with a common "prefix".
479*cda5da8dSAndroid Build Coastguard Worker            # move it out of the branch
480*cda5da8dSAndroid Build Coastguard Worker            for item in items:
481*cda5da8dSAndroid Build Coastguard Worker                del item[0]
482*cda5da8dSAndroid Build Coastguard Worker            subpattern.append(prefix)
483*cda5da8dSAndroid Build Coastguard Worker            continue # check next one
484*cda5da8dSAndroid Build Coastguard Worker        break
485*cda5da8dSAndroid Build Coastguard Worker
486*cda5da8dSAndroid Build Coastguard Worker    # check if the branch can be replaced by a character set
487*cda5da8dSAndroid Build Coastguard Worker    set = []
488*cda5da8dSAndroid Build Coastguard Worker    for item in items:
489*cda5da8dSAndroid Build Coastguard Worker        if len(item) != 1:
490*cda5da8dSAndroid Build Coastguard Worker            break
491*cda5da8dSAndroid Build Coastguard Worker        op, av = item[0]
492*cda5da8dSAndroid Build Coastguard Worker        if op is LITERAL:
493*cda5da8dSAndroid Build Coastguard Worker            set.append((op, av))
494*cda5da8dSAndroid Build Coastguard Worker        elif op is IN and av[0][0] is not NEGATE:
495*cda5da8dSAndroid Build Coastguard Worker            set.extend(av)
496*cda5da8dSAndroid Build Coastguard Worker        else:
497*cda5da8dSAndroid Build Coastguard Worker            break
498*cda5da8dSAndroid Build Coastguard Worker    else:
499*cda5da8dSAndroid Build Coastguard Worker        # we can store this as a character set instead of a
500*cda5da8dSAndroid Build Coastguard Worker        # branch (the compiler may optimize this even more)
501*cda5da8dSAndroid Build Coastguard Worker        subpattern.append((IN, _uniq(set)))
502*cda5da8dSAndroid Build Coastguard Worker        return subpattern
503*cda5da8dSAndroid Build Coastguard Worker
504*cda5da8dSAndroid Build Coastguard Worker    subpattern.append((BRANCH, (None, items)))
505*cda5da8dSAndroid Build Coastguard Worker    return subpattern
506*cda5da8dSAndroid Build Coastguard Worker
507*cda5da8dSAndroid Build Coastguard Workerdef _parse(source, state, verbose, nested, first=False):
508*cda5da8dSAndroid Build Coastguard Worker    # parse a simple pattern
509*cda5da8dSAndroid Build Coastguard Worker    subpattern = SubPattern(state)
510*cda5da8dSAndroid Build Coastguard Worker
511*cda5da8dSAndroid Build Coastguard Worker    # precompute constants into local variables
512*cda5da8dSAndroid Build Coastguard Worker    subpatternappend = subpattern.append
513*cda5da8dSAndroid Build Coastguard Worker    sourceget = source.get
514*cda5da8dSAndroid Build Coastguard Worker    sourcematch = source.match
515*cda5da8dSAndroid Build Coastguard Worker    _len = len
516*cda5da8dSAndroid Build Coastguard Worker    _ord = ord
517*cda5da8dSAndroid Build Coastguard Worker
518*cda5da8dSAndroid Build Coastguard Worker    while True:
519*cda5da8dSAndroid Build Coastguard Worker
520*cda5da8dSAndroid Build Coastguard Worker        this = source.next
521*cda5da8dSAndroid Build Coastguard Worker        if this is None:
522*cda5da8dSAndroid Build Coastguard Worker            break # end of pattern
523*cda5da8dSAndroid Build Coastguard Worker        if this in "|)":
524*cda5da8dSAndroid Build Coastguard Worker            break # end of subpattern
525*cda5da8dSAndroid Build Coastguard Worker        sourceget()
526*cda5da8dSAndroid Build Coastguard Worker
527*cda5da8dSAndroid Build Coastguard Worker        if verbose:
528*cda5da8dSAndroid Build Coastguard Worker            # skip whitespace and comments
529*cda5da8dSAndroid Build Coastguard Worker            if this in WHITESPACE:
530*cda5da8dSAndroid Build Coastguard Worker                continue
531*cda5da8dSAndroid Build Coastguard Worker            if this == "#":
532*cda5da8dSAndroid Build Coastguard Worker                while True:
533*cda5da8dSAndroid Build Coastguard Worker                    this = sourceget()
534*cda5da8dSAndroid Build Coastguard Worker                    if this is None or this == "\n":
535*cda5da8dSAndroid Build Coastguard Worker                        break
536*cda5da8dSAndroid Build Coastguard Worker                continue
537*cda5da8dSAndroid Build Coastguard Worker
538*cda5da8dSAndroid Build Coastguard Worker        if this[0] == "\\":
539*cda5da8dSAndroid Build Coastguard Worker            code = _escape(source, this, state)
540*cda5da8dSAndroid Build Coastguard Worker            subpatternappend(code)
541*cda5da8dSAndroid Build Coastguard Worker
542*cda5da8dSAndroid Build Coastguard Worker        elif this not in SPECIAL_CHARS:
543*cda5da8dSAndroid Build Coastguard Worker            subpatternappend((LITERAL, _ord(this)))
544*cda5da8dSAndroid Build Coastguard Worker
545*cda5da8dSAndroid Build Coastguard Worker        elif this == "[":
546*cda5da8dSAndroid Build Coastguard Worker            here = source.tell() - 1
547*cda5da8dSAndroid Build Coastguard Worker            # character set
548*cda5da8dSAndroid Build Coastguard Worker            set = []
549*cda5da8dSAndroid Build Coastguard Worker            setappend = set.append
550*cda5da8dSAndroid Build Coastguard Worker##          if sourcematch(":"):
551*cda5da8dSAndroid Build Coastguard Worker##              pass # handle character classes
552*cda5da8dSAndroid Build Coastguard Worker            if source.next == '[':
553*cda5da8dSAndroid Build Coastguard Worker                import warnings
554*cda5da8dSAndroid Build Coastguard Worker                warnings.warn(
555*cda5da8dSAndroid Build Coastguard Worker                    'Possible nested set at position %d' % source.tell(),
556*cda5da8dSAndroid Build Coastguard Worker                    FutureWarning, stacklevel=nested + 6
557*cda5da8dSAndroid Build Coastguard Worker                )
558*cda5da8dSAndroid Build Coastguard Worker            negate = sourcematch("^")
559*cda5da8dSAndroid Build Coastguard Worker            # check remaining characters
560*cda5da8dSAndroid Build Coastguard Worker            while True:
561*cda5da8dSAndroid Build Coastguard Worker                this = sourceget()
562*cda5da8dSAndroid Build Coastguard Worker                if this is None:
563*cda5da8dSAndroid Build Coastguard Worker                    raise source.error("unterminated character set",
564*cda5da8dSAndroid Build Coastguard Worker                                       source.tell() - here)
565*cda5da8dSAndroid Build Coastguard Worker                if this == "]" and set:
566*cda5da8dSAndroid Build Coastguard Worker                    break
567*cda5da8dSAndroid Build Coastguard Worker                elif this[0] == "\\":
568*cda5da8dSAndroid Build Coastguard Worker                    code1 = _class_escape(source, this)
569*cda5da8dSAndroid Build Coastguard Worker                else:
570*cda5da8dSAndroid Build Coastguard Worker                    if set and this in '-&~|' and source.next == this:
571*cda5da8dSAndroid Build Coastguard Worker                        import warnings
572*cda5da8dSAndroid Build Coastguard Worker                        warnings.warn(
573*cda5da8dSAndroid Build Coastguard Worker                            'Possible set %s at position %d' % (
574*cda5da8dSAndroid Build Coastguard Worker                                'difference' if this == '-' else
575*cda5da8dSAndroid Build Coastguard Worker                                'intersection' if this == '&' else
576*cda5da8dSAndroid Build Coastguard Worker                                'symmetric difference' if this == '~' else
577*cda5da8dSAndroid Build Coastguard Worker                                'union',
578*cda5da8dSAndroid Build Coastguard Worker                                source.tell() - 1),
579*cda5da8dSAndroid Build Coastguard Worker                            FutureWarning, stacklevel=nested + 6
580*cda5da8dSAndroid Build Coastguard Worker                        )
581*cda5da8dSAndroid Build Coastguard Worker                    code1 = LITERAL, _ord(this)
582*cda5da8dSAndroid Build Coastguard Worker                if sourcematch("-"):
583*cda5da8dSAndroid Build Coastguard Worker                    # potential range
584*cda5da8dSAndroid Build Coastguard Worker                    that = sourceget()
585*cda5da8dSAndroid Build Coastguard Worker                    if that is None:
586*cda5da8dSAndroid Build Coastguard Worker                        raise source.error("unterminated character set",
587*cda5da8dSAndroid Build Coastguard Worker                                           source.tell() - here)
588*cda5da8dSAndroid Build Coastguard Worker                    if that == "]":
589*cda5da8dSAndroid Build Coastguard Worker                        if code1[0] is IN:
590*cda5da8dSAndroid Build Coastguard Worker                            code1 = code1[1][0]
591*cda5da8dSAndroid Build Coastguard Worker                        setappend(code1)
592*cda5da8dSAndroid Build Coastguard Worker                        setappend((LITERAL, _ord("-")))
593*cda5da8dSAndroid Build Coastguard Worker                        break
594*cda5da8dSAndroid Build Coastguard Worker                    if that[0] == "\\":
595*cda5da8dSAndroid Build Coastguard Worker                        code2 = _class_escape(source, that)
596*cda5da8dSAndroid Build Coastguard Worker                    else:
597*cda5da8dSAndroid Build Coastguard Worker                        if that == '-':
598*cda5da8dSAndroid Build Coastguard Worker                            import warnings
599*cda5da8dSAndroid Build Coastguard Worker                            warnings.warn(
600*cda5da8dSAndroid Build Coastguard Worker                                'Possible set difference at position %d' % (
601*cda5da8dSAndroid Build Coastguard Worker                                    source.tell() - 2),
602*cda5da8dSAndroid Build Coastguard Worker                                FutureWarning, stacklevel=nested + 6
603*cda5da8dSAndroid Build Coastguard Worker                            )
604*cda5da8dSAndroid Build Coastguard Worker                        code2 = LITERAL, _ord(that)
605*cda5da8dSAndroid Build Coastguard Worker                    if code1[0] != LITERAL or code2[0] != LITERAL:
606*cda5da8dSAndroid Build Coastguard Worker                        msg = "bad character range %s-%s" % (this, that)
607*cda5da8dSAndroid Build Coastguard Worker                        raise source.error(msg, len(this) + 1 + len(that))
608*cda5da8dSAndroid Build Coastguard Worker                    lo = code1[1]
609*cda5da8dSAndroid Build Coastguard Worker                    hi = code2[1]
610*cda5da8dSAndroid Build Coastguard Worker                    if hi < lo:
611*cda5da8dSAndroid Build Coastguard Worker                        msg = "bad character range %s-%s" % (this, that)
612*cda5da8dSAndroid Build Coastguard Worker                        raise source.error(msg, len(this) + 1 + len(that))
613*cda5da8dSAndroid Build Coastguard Worker                    setappend((RANGE, (lo, hi)))
614*cda5da8dSAndroid Build Coastguard Worker                else:
615*cda5da8dSAndroid Build Coastguard Worker                    if code1[0] is IN:
616*cda5da8dSAndroid Build Coastguard Worker                        code1 = code1[1][0]
617*cda5da8dSAndroid Build Coastguard Worker                    setappend(code1)
618*cda5da8dSAndroid Build Coastguard Worker
619*cda5da8dSAndroid Build Coastguard Worker            set = _uniq(set)
620*cda5da8dSAndroid Build Coastguard Worker            # XXX: <fl> should move set optimization to compiler!
621*cda5da8dSAndroid Build Coastguard Worker            if _len(set) == 1 and set[0][0] is LITERAL:
622*cda5da8dSAndroid Build Coastguard Worker                # optimization
623*cda5da8dSAndroid Build Coastguard Worker                if negate:
624*cda5da8dSAndroid Build Coastguard Worker                    subpatternappend((NOT_LITERAL, set[0][1]))
625*cda5da8dSAndroid Build Coastguard Worker                else:
626*cda5da8dSAndroid Build Coastguard Worker                    subpatternappend(set[0])
627*cda5da8dSAndroid Build Coastguard Worker            else:
628*cda5da8dSAndroid Build Coastguard Worker                if negate:
629*cda5da8dSAndroid Build Coastguard Worker                    set.insert(0, (NEGATE, None))
630*cda5da8dSAndroid Build Coastguard Worker                # charmap optimization can't be added here because
631*cda5da8dSAndroid Build Coastguard Worker                # global flags still are not known
632*cda5da8dSAndroid Build Coastguard Worker                subpatternappend((IN, set))
633*cda5da8dSAndroid Build Coastguard Worker
634*cda5da8dSAndroid Build Coastguard Worker        elif this in REPEAT_CHARS:
635*cda5da8dSAndroid Build Coastguard Worker            # repeat previous item
636*cda5da8dSAndroid Build Coastguard Worker            here = source.tell()
637*cda5da8dSAndroid Build Coastguard Worker            if this == "?":
638*cda5da8dSAndroid Build Coastguard Worker                min, max = 0, 1
639*cda5da8dSAndroid Build Coastguard Worker            elif this == "*":
640*cda5da8dSAndroid Build Coastguard Worker                min, max = 0, MAXREPEAT
641*cda5da8dSAndroid Build Coastguard Worker
642*cda5da8dSAndroid Build Coastguard Worker            elif this == "+":
643*cda5da8dSAndroid Build Coastguard Worker                min, max = 1, MAXREPEAT
644*cda5da8dSAndroid Build Coastguard Worker            elif this == "{":
645*cda5da8dSAndroid Build Coastguard Worker                if source.next == "}":
646*cda5da8dSAndroid Build Coastguard Worker                    subpatternappend((LITERAL, _ord(this)))
647*cda5da8dSAndroid Build Coastguard Worker                    continue
648*cda5da8dSAndroid Build Coastguard Worker
649*cda5da8dSAndroid Build Coastguard Worker                min, max = 0, MAXREPEAT
650*cda5da8dSAndroid Build Coastguard Worker                lo = hi = ""
651*cda5da8dSAndroid Build Coastguard Worker                while source.next in DIGITS:
652*cda5da8dSAndroid Build Coastguard Worker                    lo += sourceget()
653*cda5da8dSAndroid Build Coastguard Worker                if sourcematch(","):
654*cda5da8dSAndroid Build Coastguard Worker                    while source.next in DIGITS:
655*cda5da8dSAndroid Build Coastguard Worker                        hi += sourceget()
656*cda5da8dSAndroid Build Coastguard Worker                else:
657*cda5da8dSAndroid Build Coastguard Worker                    hi = lo
658*cda5da8dSAndroid Build Coastguard Worker                if not sourcematch("}"):
659*cda5da8dSAndroid Build Coastguard Worker                    subpatternappend((LITERAL, _ord(this)))
660*cda5da8dSAndroid Build Coastguard Worker                    source.seek(here)
661*cda5da8dSAndroid Build Coastguard Worker                    continue
662*cda5da8dSAndroid Build Coastguard Worker
663*cda5da8dSAndroid Build Coastguard Worker                if lo:
664*cda5da8dSAndroid Build Coastguard Worker                    min = int(lo)
665*cda5da8dSAndroid Build Coastguard Worker                    if min >= MAXREPEAT:
666*cda5da8dSAndroid Build Coastguard Worker                        raise OverflowError("the repetition number is too large")
667*cda5da8dSAndroid Build Coastguard Worker                if hi:
668*cda5da8dSAndroid Build Coastguard Worker                    max = int(hi)
669*cda5da8dSAndroid Build Coastguard Worker                    if max >= MAXREPEAT:
670*cda5da8dSAndroid Build Coastguard Worker                        raise OverflowError("the repetition number is too large")
671*cda5da8dSAndroid Build Coastguard Worker                    if max < min:
672*cda5da8dSAndroid Build Coastguard Worker                        raise source.error("min repeat greater than max repeat",
673*cda5da8dSAndroid Build Coastguard Worker                                           source.tell() - here)
674*cda5da8dSAndroid Build Coastguard Worker            else:
675*cda5da8dSAndroid Build Coastguard Worker                raise AssertionError("unsupported quantifier %r" % (char,))
676*cda5da8dSAndroid Build Coastguard Worker            # figure out which item to repeat
677*cda5da8dSAndroid Build Coastguard Worker            if subpattern:
678*cda5da8dSAndroid Build Coastguard Worker                item = subpattern[-1:]
679*cda5da8dSAndroid Build Coastguard Worker            else:
680*cda5da8dSAndroid Build Coastguard Worker                item = None
681*cda5da8dSAndroid Build Coastguard Worker            if not item or item[0][0] is AT:
682*cda5da8dSAndroid Build Coastguard Worker                raise source.error("nothing to repeat",
683*cda5da8dSAndroid Build Coastguard Worker                                   source.tell() - here + len(this))
684*cda5da8dSAndroid Build Coastguard Worker            if item[0][0] in _REPEATCODES:
685*cda5da8dSAndroid Build Coastguard Worker                raise source.error("multiple repeat",
686*cda5da8dSAndroid Build Coastguard Worker                                   source.tell() - here + len(this))
687*cda5da8dSAndroid Build Coastguard Worker            if item[0][0] is SUBPATTERN:
688*cda5da8dSAndroid Build Coastguard Worker                group, add_flags, del_flags, p = item[0][1]
689*cda5da8dSAndroid Build Coastguard Worker                if group is None and not add_flags and not del_flags:
690*cda5da8dSAndroid Build Coastguard Worker                    item = p
691*cda5da8dSAndroid Build Coastguard Worker            if sourcematch("?"):
692*cda5da8dSAndroid Build Coastguard Worker                # Non-Greedy Match
693*cda5da8dSAndroid Build Coastguard Worker                subpattern[-1] = (MIN_REPEAT, (min, max, item))
694*cda5da8dSAndroid Build Coastguard Worker            elif sourcematch("+"):
695*cda5da8dSAndroid Build Coastguard Worker                # Possessive Match (Always Greedy)
696*cda5da8dSAndroid Build Coastguard Worker                subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item))
697*cda5da8dSAndroid Build Coastguard Worker            else:
698*cda5da8dSAndroid Build Coastguard Worker                # Greedy Match
699*cda5da8dSAndroid Build Coastguard Worker                subpattern[-1] = (MAX_REPEAT, (min, max, item))
700*cda5da8dSAndroid Build Coastguard Worker
701*cda5da8dSAndroid Build Coastguard Worker        elif this == ".":
702*cda5da8dSAndroid Build Coastguard Worker            subpatternappend((ANY, None))
703*cda5da8dSAndroid Build Coastguard Worker
704*cda5da8dSAndroid Build Coastguard Worker        elif this == "(":
705*cda5da8dSAndroid Build Coastguard Worker            start = source.tell() - 1
706*cda5da8dSAndroid Build Coastguard Worker            capture = True
707*cda5da8dSAndroid Build Coastguard Worker            atomic = False
708*cda5da8dSAndroid Build Coastguard Worker            name = None
709*cda5da8dSAndroid Build Coastguard Worker            add_flags = 0
710*cda5da8dSAndroid Build Coastguard Worker            del_flags = 0
711*cda5da8dSAndroid Build Coastguard Worker            if sourcematch("?"):
712*cda5da8dSAndroid Build Coastguard Worker                # options
713*cda5da8dSAndroid Build Coastguard Worker                char = sourceget()
714*cda5da8dSAndroid Build Coastguard Worker                if char is None:
715*cda5da8dSAndroid Build Coastguard Worker                    raise source.error("unexpected end of pattern")
716*cda5da8dSAndroid Build Coastguard Worker                if char == "P":
717*cda5da8dSAndroid Build Coastguard Worker                    # python extensions
718*cda5da8dSAndroid Build Coastguard Worker                    if sourcematch("<"):
719*cda5da8dSAndroid Build Coastguard Worker                        # named group: skip forward to end of name
720*cda5da8dSAndroid Build Coastguard Worker                        name = source.getuntil(">", "group name")
721*cda5da8dSAndroid Build Coastguard Worker                        source.checkgroupname(name, 1, nested)
722*cda5da8dSAndroid Build Coastguard Worker                    elif sourcematch("="):
723*cda5da8dSAndroid Build Coastguard Worker                        # named backreference
724*cda5da8dSAndroid Build Coastguard Worker                        name = source.getuntil(")", "group name")
725*cda5da8dSAndroid Build Coastguard Worker                        source.checkgroupname(name, 1, nested)
726*cda5da8dSAndroid Build Coastguard Worker                        gid = state.groupdict.get(name)
727*cda5da8dSAndroid Build Coastguard Worker                        if gid is None:
728*cda5da8dSAndroid Build Coastguard Worker                            msg = "unknown group name %r" % name
729*cda5da8dSAndroid Build Coastguard Worker                            raise source.error(msg, len(name) + 1)
730*cda5da8dSAndroid Build Coastguard Worker                        if not state.checkgroup(gid):
731*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("cannot refer to an open group",
732*cda5da8dSAndroid Build Coastguard Worker                                               len(name) + 1)
733*cda5da8dSAndroid Build Coastguard Worker                        state.checklookbehindgroup(gid, source)
734*cda5da8dSAndroid Build Coastguard Worker                        subpatternappend((GROUPREF, gid))
735*cda5da8dSAndroid Build Coastguard Worker                        continue
736*cda5da8dSAndroid Build Coastguard Worker
737*cda5da8dSAndroid Build Coastguard Worker                    else:
738*cda5da8dSAndroid Build Coastguard Worker                        char = sourceget()
739*cda5da8dSAndroid Build Coastguard Worker                        if char is None:
740*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("unexpected end of pattern")
741*cda5da8dSAndroid Build Coastguard Worker                        raise source.error("unknown extension ?P" + char,
742*cda5da8dSAndroid Build Coastguard Worker                                           len(char) + 2)
743*cda5da8dSAndroid Build Coastguard Worker                elif char == ":":
744*cda5da8dSAndroid Build Coastguard Worker                    # non-capturing group
745*cda5da8dSAndroid Build Coastguard Worker                    capture = False
746*cda5da8dSAndroid Build Coastguard Worker                elif char == "#":
747*cda5da8dSAndroid Build Coastguard Worker                    # comment
748*cda5da8dSAndroid Build Coastguard Worker                    while True:
749*cda5da8dSAndroid Build Coastguard Worker                        if source.next is None:
750*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("missing ), unterminated comment",
751*cda5da8dSAndroid Build Coastguard Worker                                               source.tell() - start)
752*cda5da8dSAndroid Build Coastguard Worker                        if sourceget() == ")":
753*cda5da8dSAndroid Build Coastguard Worker                            break
754*cda5da8dSAndroid Build Coastguard Worker                    continue
755*cda5da8dSAndroid Build Coastguard Worker
756*cda5da8dSAndroid Build Coastguard Worker                elif char in "=!<":
757*cda5da8dSAndroid Build Coastguard Worker                    # lookahead assertions
758*cda5da8dSAndroid Build Coastguard Worker                    dir = 1
759*cda5da8dSAndroid Build Coastguard Worker                    if char == "<":
760*cda5da8dSAndroid Build Coastguard Worker                        char = sourceget()
761*cda5da8dSAndroid Build Coastguard Worker                        if char is None:
762*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("unexpected end of pattern")
763*cda5da8dSAndroid Build Coastguard Worker                        if char not in "=!":
764*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("unknown extension ?<" + char,
765*cda5da8dSAndroid Build Coastguard Worker                                               len(char) + 2)
766*cda5da8dSAndroid Build Coastguard Worker                        dir = -1 # lookbehind
767*cda5da8dSAndroid Build Coastguard Worker                        lookbehindgroups = state.lookbehindgroups
768*cda5da8dSAndroid Build Coastguard Worker                        if lookbehindgroups is None:
769*cda5da8dSAndroid Build Coastguard Worker                            state.lookbehindgroups = state.groups
770*cda5da8dSAndroid Build Coastguard Worker                    p = _parse_sub(source, state, verbose, nested + 1)
771*cda5da8dSAndroid Build Coastguard Worker                    if dir < 0:
772*cda5da8dSAndroid Build Coastguard Worker                        if lookbehindgroups is None:
773*cda5da8dSAndroid Build Coastguard Worker                            state.lookbehindgroups = None
774*cda5da8dSAndroid Build Coastguard Worker                    if not sourcematch(")"):
775*cda5da8dSAndroid Build Coastguard Worker                        raise source.error("missing ), unterminated subpattern",
776*cda5da8dSAndroid Build Coastguard Worker                                           source.tell() - start)
777*cda5da8dSAndroid Build Coastguard Worker                    if char == "=":
778*cda5da8dSAndroid Build Coastguard Worker                        subpatternappend((ASSERT, (dir, p)))
779*cda5da8dSAndroid Build Coastguard Worker                    else:
780*cda5da8dSAndroid Build Coastguard Worker                        subpatternappend((ASSERT_NOT, (dir, p)))
781*cda5da8dSAndroid Build Coastguard Worker                    continue
782*cda5da8dSAndroid Build Coastguard Worker
783*cda5da8dSAndroid Build Coastguard Worker                elif char == "(":
784*cda5da8dSAndroid Build Coastguard Worker                    # conditional backreference group
785*cda5da8dSAndroid Build Coastguard Worker                    condname = source.getuntil(")", "group name")
786*cda5da8dSAndroid Build Coastguard Worker                    if condname.isidentifier():
787*cda5da8dSAndroid Build Coastguard Worker                        source.checkgroupname(condname, 1, nested)
788*cda5da8dSAndroid Build Coastguard Worker                        condgroup = state.groupdict.get(condname)
789*cda5da8dSAndroid Build Coastguard Worker                        if condgroup is None:
790*cda5da8dSAndroid Build Coastguard Worker                            msg = "unknown group name %r" % condname
791*cda5da8dSAndroid Build Coastguard Worker                            raise source.error(msg, len(condname) + 1)
792*cda5da8dSAndroid Build Coastguard Worker                    else:
793*cda5da8dSAndroid Build Coastguard Worker                        try:
794*cda5da8dSAndroid Build Coastguard Worker                            condgroup = int(condname)
795*cda5da8dSAndroid Build Coastguard Worker                            if condgroup < 0:
796*cda5da8dSAndroid Build Coastguard Worker                                raise ValueError
797*cda5da8dSAndroid Build Coastguard Worker                        except ValueError:
798*cda5da8dSAndroid Build Coastguard Worker                            msg = "bad character in group name %r" % condname
799*cda5da8dSAndroid Build Coastguard Worker                            raise source.error(msg, len(condname) + 1) from None
800*cda5da8dSAndroid Build Coastguard Worker                        if not condgroup:
801*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("bad group number",
802*cda5da8dSAndroid Build Coastguard Worker                                               len(condname) + 1)
803*cda5da8dSAndroid Build Coastguard Worker                        if condgroup >= MAXGROUPS:
804*cda5da8dSAndroid Build Coastguard Worker                            msg = "invalid group reference %d" % condgroup
805*cda5da8dSAndroid Build Coastguard Worker                            raise source.error(msg, len(condname) + 1)
806*cda5da8dSAndroid Build Coastguard Worker                        if condgroup not in state.grouprefpos:
807*cda5da8dSAndroid Build Coastguard Worker                            state.grouprefpos[condgroup] = (
808*cda5da8dSAndroid Build Coastguard Worker                                source.tell() - len(condname) - 1
809*cda5da8dSAndroid Build Coastguard Worker                            )
810*cda5da8dSAndroid Build Coastguard Worker                        if not (condname.isdecimal() and condname.isascii()):
811*cda5da8dSAndroid Build Coastguard Worker                            import warnings
812*cda5da8dSAndroid Build Coastguard Worker                            warnings.warn(
813*cda5da8dSAndroid Build Coastguard Worker                                "bad character in group name %s at position %d" %
814*cda5da8dSAndroid Build Coastguard Worker                                (repr(condname) if source.istext else ascii(condname),
815*cda5da8dSAndroid Build Coastguard Worker                                 source.tell() - len(condname) - 1),
816*cda5da8dSAndroid Build Coastguard Worker                                DeprecationWarning, stacklevel=nested + 6
817*cda5da8dSAndroid Build Coastguard Worker                            )
818*cda5da8dSAndroid Build Coastguard Worker                    state.checklookbehindgroup(condgroup, source)
819*cda5da8dSAndroid Build Coastguard Worker                    item_yes = _parse(source, state, verbose, nested + 1)
820*cda5da8dSAndroid Build Coastguard Worker                    if source.match("|"):
821*cda5da8dSAndroid Build Coastguard Worker                        item_no = _parse(source, state, verbose, nested + 1)
822*cda5da8dSAndroid Build Coastguard Worker                        if source.next == "|":
823*cda5da8dSAndroid Build Coastguard Worker                            raise source.error("conditional backref with more than two branches")
824*cda5da8dSAndroid Build Coastguard Worker                    else:
825*cda5da8dSAndroid Build Coastguard Worker                        item_no = None
826*cda5da8dSAndroid Build Coastguard Worker                    if not source.match(")"):
827*cda5da8dSAndroid Build Coastguard Worker                        raise source.error("missing ), unterminated subpattern",
828*cda5da8dSAndroid Build Coastguard Worker                                           source.tell() - start)
829*cda5da8dSAndroid Build Coastguard Worker                    subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
830*cda5da8dSAndroid Build Coastguard Worker                    continue
831*cda5da8dSAndroid Build Coastguard Worker
832*cda5da8dSAndroid Build Coastguard Worker                elif char == ">":
833*cda5da8dSAndroid Build Coastguard Worker                    # non-capturing, atomic group
834*cda5da8dSAndroid Build Coastguard Worker                    capture = False
835*cda5da8dSAndroid Build Coastguard Worker                    atomic = True
836*cda5da8dSAndroid Build Coastguard Worker                elif char in FLAGS or char == "-":
837*cda5da8dSAndroid Build Coastguard Worker                    # flags
838*cda5da8dSAndroid Build Coastguard Worker                    flags = _parse_flags(source, state, char)
839*cda5da8dSAndroid Build Coastguard Worker                    if flags is None:  # global flags
840*cda5da8dSAndroid Build Coastguard Worker                        if not first or subpattern:
841*cda5da8dSAndroid Build Coastguard Worker                            raise source.error('global flags not at the start '
842*cda5da8dSAndroid Build Coastguard Worker                                               'of the expression',
843*cda5da8dSAndroid Build Coastguard Worker                                               source.tell() - start)
844*cda5da8dSAndroid Build Coastguard Worker                        verbose = state.flags & SRE_FLAG_VERBOSE
845*cda5da8dSAndroid Build Coastguard Worker                        continue
846*cda5da8dSAndroid Build Coastguard Worker
847*cda5da8dSAndroid Build Coastguard Worker                    add_flags, del_flags = flags
848*cda5da8dSAndroid Build Coastguard Worker                    capture = False
849*cda5da8dSAndroid Build Coastguard Worker                else:
850*cda5da8dSAndroid Build Coastguard Worker                    raise source.error("unknown extension ?" + char,
851*cda5da8dSAndroid Build Coastguard Worker                                       len(char) + 1)
852*cda5da8dSAndroid Build Coastguard Worker
853*cda5da8dSAndroid Build Coastguard Worker            # parse group contents
854*cda5da8dSAndroid Build Coastguard Worker            if capture:
855*cda5da8dSAndroid Build Coastguard Worker                try:
856*cda5da8dSAndroid Build Coastguard Worker                    group = state.opengroup(name)
857*cda5da8dSAndroid Build Coastguard Worker                except error as err:
858*cda5da8dSAndroid Build Coastguard Worker                    raise source.error(err.msg, len(name) + 1) from None
859*cda5da8dSAndroid Build Coastguard Worker            else:
860*cda5da8dSAndroid Build Coastguard Worker                group = None
861*cda5da8dSAndroid Build Coastguard Worker            sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
862*cda5da8dSAndroid Build Coastguard Worker                           not (del_flags & SRE_FLAG_VERBOSE))
863*cda5da8dSAndroid Build Coastguard Worker            p = _parse_sub(source, state, sub_verbose, nested + 1)
864*cda5da8dSAndroid Build Coastguard Worker            if not source.match(")"):
865*cda5da8dSAndroid Build Coastguard Worker                raise source.error("missing ), unterminated subpattern",
866*cda5da8dSAndroid Build Coastguard Worker                                   source.tell() - start)
867*cda5da8dSAndroid Build Coastguard Worker            if group is not None:
868*cda5da8dSAndroid Build Coastguard Worker                state.closegroup(group, p)
869*cda5da8dSAndroid Build Coastguard Worker            if atomic:
870*cda5da8dSAndroid Build Coastguard Worker                assert group is None
871*cda5da8dSAndroid Build Coastguard Worker                subpatternappend((ATOMIC_GROUP, p))
872*cda5da8dSAndroid Build Coastguard Worker            else:
873*cda5da8dSAndroid Build Coastguard Worker                subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p)))
874*cda5da8dSAndroid Build Coastguard Worker
875*cda5da8dSAndroid Build Coastguard Worker        elif this == "^":
876*cda5da8dSAndroid Build Coastguard Worker            subpatternappend((AT, AT_BEGINNING))
877*cda5da8dSAndroid Build Coastguard Worker
878*cda5da8dSAndroid Build Coastguard Worker        elif this == "$":
879*cda5da8dSAndroid Build Coastguard Worker            subpatternappend((AT, AT_END))
880*cda5da8dSAndroid Build Coastguard Worker
881*cda5da8dSAndroid Build Coastguard Worker        else:
882*cda5da8dSAndroid Build Coastguard Worker            raise AssertionError("unsupported special character %r" % (char,))
883*cda5da8dSAndroid Build Coastguard Worker
884*cda5da8dSAndroid Build Coastguard Worker    # unpack non-capturing groups
885*cda5da8dSAndroid Build Coastguard Worker    for i in range(len(subpattern))[::-1]:
886*cda5da8dSAndroid Build Coastguard Worker        op, av = subpattern[i]
887*cda5da8dSAndroid Build Coastguard Worker        if op is SUBPATTERN:
888*cda5da8dSAndroid Build Coastguard Worker            group, add_flags, del_flags, p = av
889*cda5da8dSAndroid Build Coastguard Worker            if group is None and not add_flags and not del_flags:
890*cda5da8dSAndroid Build Coastguard Worker                subpattern[i: i+1] = p
891*cda5da8dSAndroid Build Coastguard Worker
892*cda5da8dSAndroid Build Coastguard Worker    return subpattern
893*cda5da8dSAndroid Build Coastguard Worker
894*cda5da8dSAndroid Build Coastguard Workerdef _parse_flags(source, state, char):
895*cda5da8dSAndroid Build Coastguard Worker    sourceget = source.get
896*cda5da8dSAndroid Build Coastguard Worker    add_flags = 0
897*cda5da8dSAndroid Build Coastguard Worker    del_flags = 0
898*cda5da8dSAndroid Build Coastguard Worker    if char != "-":
899*cda5da8dSAndroid Build Coastguard Worker        while True:
900*cda5da8dSAndroid Build Coastguard Worker            flag = FLAGS[char]
901*cda5da8dSAndroid Build Coastguard Worker            if source.istext:
902*cda5da8dSAndroid Build Coastguard Worker                if char == 'L':
903*cda5da8dSAndroid Build Coastguard Worker                    msg = "bad inline flags: cannot use 'L' flag with a str pattern"
904*cda5da8dSAndroid Build Coastguard Worker                    raise source.error(msg)
905*cda5da8dSAndroid Build Coastguard Worker            else:
906*cda5da8dSAndroid Build Coastguard Worker                if char == 'u':
907*cda5da8dSAndroid Build Coastguard Worker                    msg = "bad inline flags: cannot use 'u' flag with a bytes pattern"
908*cda5da8dSAndroid Build Coastguard Worker                    raise source.error(msg)
909*cda5da8dSAndroid Build Coastguard Worker            add_flags |= flag
910*cda5da8dSAndroid Build Coastguard Worker            if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag:
911*cda5da8dSAndroid Build Coastguard Worker                msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible"
912*cda5da8dSAndroid Build Coastguard Worker                raise source.error(msg)
913*cda5da8dSAndroid Build Coastguard Worker            char = sourceget()
914*cda5da8dSAndroid Build Coastguard Worker            if char is None:
915*cda5da8dSAndroid Build Coastguard Worker                raise source.error("missing -, : or )")
916*cda5da8dSAndroid Build Coastguard Worker            if char in ")-:":
917*cda5da8dSAndroid Build Coastguard Worker                break
918*cda5da8dSAndroid Build Coastguard Worker            if char not in FLAGS:
919*cda5da8dSAndroid Build Coastguard Worker                msg = "unknown flag" if char.isalpha() else "missing -, : or )"
920*cda5da8dSAndroid Build Coastguard Worker                raise source.error(msg, len(char))
921*cda5da8dSAndroid Build Coastguard Worker    if char == ")":
922*cda5da8dSAndroid Build Coastguard Worker        state.flags |= add_flags
923*cda5da8dSAndroid Build Coastguard Worker        return None
924*cda5da8dSAndroid Build Coastguard Worker    if add_flags & GLOBAL_FLAGS:
925*cda5da8dSAndroid Build Coastguard Worker        raise source.error("bad inline flags: cannot turn on global flag", 1)
926*cda5da8dSAndroid Build Coastguard Worker    if char == "-":
927*cda5da8dSAndroid Build Coastguard Worker        char = sourceget()
928*cda5da8dSAndroid Build Coastguard Worker        if char is None:
929*cda5da8dSAndroid Build Coastguard Worker            raise source.error("missing flag")
930*cda5da8dSAndroid Build Coastguard Worker        if char not in FLAGS:
931*cda5da8dSAndroid Build Coastguard Worker            msg = "unknown flag" if char.isalpha() else "missing flag"
932*cda5da8dSAndroid Build Coastguard Worker            raise source.error(msg, len(char))
933*cda5da8dSAndroid Build Coastguard Worker        while True:
934*cda5da8dSAndroid Build Coastguard Worker            flag = FLAGS[char]
935*cda5da8dSAndroid Build Coastguard Worker            if flag & TYPE_FLAGS:
936*cda5da8dSAndroid Build Coastguard Worker                msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'"
937*cda5da8dSAndroid Build Coastguard Worker                raise source.error(msg)
938*cda5da8dSAndroid Build Coastguard Worker            del_flags |= flag
939*cda5da8dSAndroid Build Coastguard Worker            char = sourceget()
940*cda5da8dSAndroid Build Coastguard Worker            if char is None:
941*cda5da8dSAndroid Build Coastguard Worker                raise source.error("missing :")
942*cda5da8dSAndroid Build Coastguard Worker            if char == ":":
943*cda5da8dSAndroid Build Coastguard Worker                break
944*cda5da8dSAndroid Build Coastguard Worker            if char not in FLAGS:
945*cda5da8dSAndroid Build Coastguard Worker                msg = "unknown flag" if char.isalpha() else "missing :"
946*cda5da8dSAndroid Build Coastguard Worker                raise source.error(msg, len(char))
947*cda5da8dSAndroid Build Coastguard Worker    assert char == ":"
948*cda5da8dSAndroid Build Coastguard Worker    if del_flags & GLOBAL_FLAGS:
949*cda5da8dSAndroid Build Coastguard Worker        raise source.error("bad inline flags: cannot turn off global flag", 1)
950*cda5da8dSAndroid Build Coastguard Worker    if add_flags & del_flags:
951*cda5da8dSAndroid Build Coastguard Worker        raise source.error("bad inline flags: flag turned on and off", 1)
952*cda5da8dSAndroid Build Coastguard Worker    return add_flags, del_flags
953*cda5da8dSAndroid Build Coastguard Worker
954*cda5da8dSAndroid Build Coastguard Workerdef fix_flags(src, flags):
955*cda5da8dSAndroid Build Coastguard Worker    # Check and fix flags according to the type of pattern (str or bytes)
956*cda5da8dSAndroid Build Coastguard Worker    if isinstance(src, str):
957*cda5da8dSAndroid Build Coastguard Worker        if flags & SRE_FLAG_LOCALE:
958*cda5da8dSAndroid Build Coastguard Worker            raise ValueError("cannot use LOCALE flag with a str pattern")
959*cda5da8dSAndroid Build Coastguard Worker        if not flags & SRE_FLAG_ASCII:
960*cda5da8dSAndroid Build Coastguard Worker            flags |= SRE_FLAG_UNICODE
961*cda5da8dSAndroid Build Coastguard Worker        elif flags & SRE_FLAG_UNICODE:
962*cda5da8dSAndroid Build Coastguard Worker            raise ValueError("ASCII and UNICODE flags are incompatible")
963*cda5da8dSAndroid Build Coastguard Worker    else:
964*cda5da8dSAndroid Build Coastguard Worker        if flags & SRE_FLAG_UNICODE:
965*cda5da8dSAndroid Build Coastguard Worker            raise ValueError("cannot use UNICODE flag with a bytes pattern")
966*cda5da8dSAndroid Build Coastguard Worker        if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
967*cda5da8dSAndroid Build Coastguard Worker            raise ValueError("ASCII and LOCALE flags are incompatible")
968*cda5da8dSAndroid Build Coastguard Worker    return flags
969*cda5da8dSAndroid Build Coastguard Worker
970*cda5da8dSAndroid Build Coastguard Workerdef parse(str, flags=0, state=None):
971*cda5da8dSAndroid Build Coastguard Worker    # parse 're' pattern into list of (opcode, argument) tuples
972*cda5da8dSAndroid Build Coastguard Worker
973*cda5da8dSAndroid Build Coastguard Worker    source = Tokenizer(str)
974*cda5da8dSAndroid Build Coastguard Worker
975*cda5da8dSAndroid Build Coastguard Worker    if state is None:
976*cda5da8dSAndroid Build Coastguard Worker        state = State()
977*cda5da8dSAndroid Build Coastguard Worker    state.flags = flags
978*cda5da8dSAndroid Build Coastguard Worker    state.str = str
979*cda5da8dSAndroid Build Coastguard Worker
980*cda5da8dSAndroid Build Coastguard Worker    p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
981*cda5da8dSAndroid Build Coastguard Worker    p.state.flags = fix_flags(str, p.state.flags)
982*cda5da8dSAndroid Build Coastguard Worker
983*cda5da8dSAndroid Build Coastguard Worker    if source.next is not None:
984*cda5da8dSAndroid Build Coastguard Worker        assert source.next == ")"
985*cda5da8dSAndroid Build Coastguard Worker        raise source.error("unbalanced parenthesis")
986*cda5da8dSAndroid Build Coastguard Worker
987*cda5da8dSAndroid Build Coastguard Worker    for g in p.state.grouprefpos:
988*cda5da8dSAndroid Build Coastguard Worker        if g >= p.state.groups:
989*cda5da8dSAndroid Build Coastguard Worker            msg = "invalid group reference %d" % g
990*cda5da8dSAndroid Build Coastguard Worker            raise error(msg, str, p.state.grouprefpos[g])
991*cda5da8dSAndroid Build Coastguard Worker
992*cda5da8dSAndroid Build Coastguard Worker    if flags & SRE_FLAG_DEBUG:
993*cda5da8dSAndroid Build Coastguard Worker        p.dump()
994*cda5da8dSAndroid Build Coastguard Worker
995*cda5da8dSAndroid Build Coastguard Worker    return p
996*cda5da8dSAndroid Build Coastguard Worker
997*cda5da8dSAndroid Build Coastguard Workerdef parse_template(source, state):
998*cda5da8dSAndroid Build Coastguard Worker    # parse 're' replacement string into list of literals and
999*cda5da8dSAndroid Build Coastguard Worker    # group references
1000*cda5da8dSAndroid Build Coastguard Worker    s = Tokenizer(source)
1001*cda5da8dSAndroid Build Coastguard Worker    sget = s.get
1002*cda5da8dSAndroid Build Coastguard Worker    groups = []
1003*cda5da8dSAndroid Build Coastguard Worker    literals = []
1004*cda5da8dSAndroid Build Coastguard Worker    literal = []
1005*cda5da8dSAndroid Build Coastguard Worker    lappend = literal.append
1006*cda5da8dSAndroid Build Coastguard Worker    def addgroup(index, pos):
1007*cda5da8dSAndroid Build Coastguard Worker        if index > state.groups:
1008*cda5da8dSAndroid Build Coastguard Worker            raise s.error("invalid group reference %d" % index, pos)
1009*cda5da8dSAndroid Build Coastguard Worker        if literal:
1010*cda5da8dSAndroid Build Coastguard Worker            literals.append(''.join(literal))
1011*cda5da8dSAndroid Build Coastguard Worker            del literal[:]
1012*cda5da8dSAndroid Build Coastguard Worker        groups.append((len(literals), index))
1013*cda5da8dSAndroid Build Coastguard Worker        literals.append(None)
1014*cda5da8dSAndroid Build Coastguard Worker    groupindex = state.groupindex
1015*cda5da8dSAndroid Build Coastguard Worker    while True:
1016*cda5da8dSAndroid Build Coastguard Worker        this = sget()
1017*cda5da8dSAndroid Build Coastguard Worker        if this is None:
1018*cda5da8dSAndroid Build Coastguard Worker            break # end of replacement string
1019*cda5da8dSAndroid Build Coastguard Worker        if this[0] == "\\":
1020*cda5da8dSAndroid Build Coastguard Worker            # group
1021*cda5da8dSAndroid Build Coastguard Worker            c = this[1]
1022*cda5da8dSAndroid Build Coastguard Worker            if c == "g":
1023*cda5da8dSAndroid Build Coastguard Worker                if not s.match("<"):
1024*cda5da8dSAndroid Build Coastguard Worker                    raise s.error("missing <")
1025*cda5da8dSAndroid Build Coastguard Worker                name = s.getuntil(">", "group name")
1026*cda5da8dSAndroid Build Coastguard Worker                if name.isidentifier():
1027*cda5da8dSAndroid Build Coastguard Worker                    s.checkgroupname(name, 1, -1)
1028*cda5da8dSAndroid Build Coastguard Worker                    try:
1029*cda5da8dSAndroid Build Coastguard Worker                        index = groupindex[name]
1030*cda5da8dSAndroid Build Coastguard Worker                    except KeyError:
1031*cda5da8dSAndroid Build Coastguard Worker                        raise IndexError("unknown group name %r" % name) from None
1032*cda5da8dSAndroid Build Coastguard Worker                else:
1033*cda5da8dSAndroid Build Coastguard Worker                    try:
1034*cda5da8dSAndroid Build Coastguard Worker                        index = int(name)
1035*cda5da8dSAndroid Build Coastguard Worker                        if index < 0:
1036*cda5da8dSAndroid Build Coastguard Worker                            raise ValueError
1037*cda5da8dSAndroid Build Coastguard Worker                    except ValueError:
1038*cda5da8dSAndroid Build Coastguard Worker                        raise s.error("bad character in group name %r" % name,
1039*cda5da8dSAndroid Build Coastguard Worker                                      len(name) + 1) from None
1040*cda5da8dSAndroid Build Coastguard Worker                    if index >= MAXGROUPS:
1041*cda5da8dSAndroid Build Coastguard Worker                        raise s.error("invalid group reference %d" % index,
1042*cda5da8dSAndroid Build Coastguard Worker                                      len(name) + 1)
1043*cda5da8dSAndroid Build Coastguard Worker                    if not (name.isdecimal() and name.isascii()):
1044*cda5da8dSAndroid Build Coastguard Worker                        import warnings
1045*cda5da8dSAndroid Build Coastguard Worker                        warnings.warn(
1046*cda5da8dSAndroid Build Coastguard Worker                            "bad character in group name %s at position %d" %
1047*cda5da8dSAndroid Build Coastguard Worker                            (repr(name) if s.istext else ascii(name),
1048*cda5da8dSAndroid Build Coastguard Worker                             s.tell() - len(name) - 1),
1049*cda5da8dSAndroid Build Coastguard Worker                            DeprecationWarning, stacklevel=5
1050*cda5da8dSAndroid Build Coastguard Worker                        )
1051*cda5da8dSAndroid Build Coastguard Worker                addgroup(index, len(name) + 1)
1052*cda5da8dSAndroid Build Coastguard Worker            elif c == "0":
1053*cda5da8dSAndroid Build Coastguard Worker                if s.next in OCTDIGITS:
1054*cda5da8dSAndroid Build Coastguard Worker                    this += sget()
1055*cda5da8dSAndroid Build Coastguard Worker                    if s.next in OCTDIGITS:
1056*cda5da8dSAndroid Build Coastguard Worker                        this += sget()
1057*cda5da8dSAndroid Build Coastguard Worker                lappend(chr(int(this[1:], 8) & 0xff))
1058*cda5da8dSAndroid Build Coastguard Worker            elif c in DIGITS:
1059*cda5da8dSAndroid Build Coastguard Worker                isoctal = False
1060*cda5da8dSAndroid Build Coastguard Worker                if s.next in DIGITS:
1061*cda5da8dSAndroid Build Coastguard Worker                    this += sget()
1062*cda5da8dSAndroid Build Coastguard Worker                    if (c in OCTDIGITS and this[2] in OCTDIGITS and
1063*cda5da8dSAndroid Build Coastguard Worker                        s.next in OCTDIGITS):
1064*cda5da8dSAndroid Build Coastguard Worker                        this += sget()
1065*cda5da8dSAndroid Build Coastguard Worker                        isoctal = True
1066*cda5da8dSAndroid Build Coastguard Worker                        c = int(this[1:], 8)
1067*cda5da8dSAndroid Build Coastguard Worker                        if c > 0o377:
1068*cda5da8dSAndroid Build Coastguard Worker                            raise s.error('octal escape value %s outside of '
1069*cda5da8dSAndroid Build Coastguard Worker                                          'range 0-0o377' % this, len(this))
1070*cda5da8dSAndroid Build Coastguard Worker                        lappend(chr(c))
1071*cda5da8dSAndroid Build Coastguard Worker                if not isoctal:
1072*cda5da8dSAndroid Build Coastguard Worker                    addgroup(int(this[1:]), len(this) - 1)
1073*cda5da8dSAndroid Build Coastguard Worker            else:
1074*cda5da8dSAndroid Build Coastguard Worker                try:
1075*cda5da8dSAndroid Build Coastguard Worker                    this = chr(ESCAPES[this][1])
1076*cda5da8dSAndroid Build Coastguard Worker                except KeyError:
1077*cda5da8dSAndroid Build Coastguard Worker                    if c in ASCIILETTERS:
1078*cda5da8dSAndroid Build Coastguard Worker                        raise s.error('bad escape %s' % this, len(this)) from None
1079*cda5da8dSAndroid Build Coastguard Worker                lappend(this)
1080*cda5da8dSAndroid Build Coastguard Worker        else:
1081*cda5da8dSAndroid Build Coastguard Worker            lappend(this)
1082*cda5da8dSAndroid Build Coastguard Worker    if literal:
1083*cda5da8dSAndroid Build Coastguard Worker        literals.append(''.join(literal))
1084*cda5da8dSAndroid Build Coastguard Worker    if not isinstance(source, str):
1085*cda5da8dSAndroid Build Coastguard Worker        # The tokenizer implicitly decodes bytes objects as latin-1, we must
1086*cda5da8dSAndroid Build Coastguard Worker        # therefore re-encode the final representation.
1087*cda5da8dSAndroid Build Coastguard Worker        literals = [None if s is None else s.encode('latin-1') for s in literals]
1088*cda5da8dSAndroid Build Coastguard Worker    return groups, literals
1089*cda5da8dSAndroid Build Coastguard Worker
1090*cda5da8dSAndroid Build Coastguard Workerdef expand_template(template, match):
1091*cda5da8dSAndroid Build Coastguard Worker    g = match.group
1092*cda5da8dSAndroid Build Coastguard Worker    empty = match.string[:0]
1093*cda5da8dSAndroid Build Coastguard Worker    groups, literals = template
1094*cda5da8dSAndroid Build Coastguard Worker    literals = literals[:]
1095*cda5da8dSAndroid Build Coastguard Worker    try:
1096*cda5da8dSAndroid Build Coastguard Worker        for index, group in groups:
1097*cda5da8dSAndroid Build Coastguard Worker            literals[index] = g(group) or empty
1098*cda5da8dSAndroid Build Coastguard Worker    except IndexError:
1099*cda5da8dSAndroid Build Coastguard Worker        raise error("invalid group reference %d" % index) from None
1100*cda5da8dSAndroid Build Coastguard Worker    return empty.join(literals)
1101