1*cda5da8dSAndroid Build Coastguard Worker# 2*cda5da8dSAndroid Build Coastguard Worker# Secret Labs' Regular Expression Engine 3*cda5da8dSAndroid Build Coastguard Worker# 4*cda5da8dSAndroid Build Coastguard Worker# convert re-style regular expression to sre pattern 5*cda5da8dSAndroid Build Coastguard Worker# 6*cda5da8dSAndroid Build Coastguard Worker# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 7*cda5da8dSAndroid Build Coastguard Worker# 8*cda5da8dSAndroid Build Coastguard Worker# See the __init__.py file for information on usage and redistribution. 9*cda5da8dSAndroid Build Coastguard Worker# 10*cda5da8dSAndroid Build Coastguard Worker 11*cda5da8dSAndroid Build Coastguard Worker"""Internal support module for sre""" 12*cda5da8dSAndroid Build Coastguard Worker 13*cda5da8dSAndroid Build Coastguard Worker# XXX: show string offset and offending character for all errors 14*cda5da8dSAndroid Build Coastguard Worker 15*cda5da8dSAndroid Build Coastguard Workerfrom ._constants import * 16*cda5da8dSAndroid Build Coastguard Worker 17*cda5da8dSAndroid Build Coastguard WorkerSPECIAL_CHARS = ".\\[{()*+?^$|" 18*cda5da8dSAndroid Build Coastguard WorkerREPEAT_CHARS = "*+?{" 19*cda5da8dSAndroid Build Coastguard Worker 20*cda5da8dSAndroid Build Coastguard WorkerDIGITS = frozenset("0123456789") 21*cda5da8dSAndroid Build Coastguard Worker 22*cda5da8dSAndroid Build Coastguard WorkerOCTDIGITS = frozenset("01234567") 23*cda5da8dSAndroid Build Coastguard WorkerHEXDIGITS = frozenset("0123456789abcdefABCDEF") 24*cda5da8dSAndroid Build Coastguard WorkerASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 25*cda5da8dSAndroid Build Coastguard Worker 26*cda5da8dSAndroid Build Coastguard WorkerWHITESPACE = frozenset(" \t\n\r\v\f") 27*cda5da8dSAndroid Build Coastguard Worker 28*cda5da8dSAndroid Build Coastguard Worker_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT}) 29*cda5da8dSAndroid Build Coastguard Worker_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY}) 30*cda5da8dSAndroid Build Coastguard Worker 31*cda5da8dSAndroid Build Coastguard WorkerESCAPES = { 32*cda5da8dSAndroid Build Coastguard Worker r"\a": (LITERAL, ord("\a")), 33*cda5da8dSAndroid Build Coastguard Worker r"\b": (LITERAL, ord("\b")), 34*cda5da8dSAndroid Build Coastguard Worker r"\f": (LITERAL, ord("\f")), 35*cda5da8dSAndroid Build Coastguard Worker r"\n": (LITERAL, ord("\n")), 36*cda5da8dSAndroid Build Coastguard Worker r"\r": (LITERAL, ord("\r")), 37*cda5da8dSAndroid Build Coastguard Worker r"\t": (LITERAL, ord("\t")), 38*cda5da8dSAndroid Build Coastguard Worker r"\v": (LITERAL, ord("\v")), 39*cda5da8dSAndroid Build Coastguard Worker r"\\": (LITERAL, ord("\\")) 40*cda5da8dSAndroid Build Coastguard Worker} 41*cda5da8dSAndroid Build Coastguard Worker 42*cda5da8dSAndroid Build Coastguard WorkerCATEGORIES = { 43*cda5da8dSAndroid Build Coastguard Worker r"\A": (AT, AT_BEGINNING_STRING), # start of string 44*cda5da8dSAndroid Build Coastguard Worker r"\b": (AT, AT_BOUNDARY), 45*cda5da8dSAndroid Build Coastguard Worker r"\B": (AT, AT_NON_BOUNDARY), 46*cda5da8dSAndroid Build Coastguard Worker r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), 47*cda5da8dSAndroid Build Coastguard Worker r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), 48*cda5da8dSAndroid Build Coastguard Worker r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), 49*cda5da8dSAndroid Build Coastguard Worker r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), 50*cda5da8dSAndroid Build Coastguard Worker r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), 51*cda5da8dSAndroid Build Coastguard Worker r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), 52*cda5da8dSAndroid Build Coastguard Worker r"\Z": (AT, AT_END_STRING), # end of string 53*cda5da8dSAndroid Build Coastguard Worker} 54*cda5da8dSAndroid Build Coastguard Worker 55*cda5da8dSAndroid Build Coastguard WorkerFLAGS = { 56*cda5da8dSAndroid Build Coastguard Worker # standard flags 57*cda5da8dSAndroid Build Coastguard Worker "i": SRE_FLAG_IGNORECASE, 58*cda5da8dSAndroid Build Coastguard Worker "L": SRE_FLAG_LOCALE, 59*cda5da8dSAndroid Build Coastguard Worker "m": SRE_FLAG_MULTILINE, 60*cda5da8dSAndroid Build Coastguard Worker "s": SRE_FLAG_DOTALL, 61*cda5da8dSAndroid Build Coastguard Worker "x": SRE_FLAG_VERBOSE, 62*cda5da8dSAndroid Build Coastguard Worker # extensions 63*cda5da8dSAndroid Build Coastguard Worker "a": SRE_FLAG_ASCII, 64*cda5da8dSAndroid Build Coastguard Worker "t": SRE_FLAG_TEMPLATE, 65*cda5da8dSAndroid Build Coastguard Worker "u": SRE_FLAG_UNICODE, 66*cda5da8dSAndroid Build Coastguard Worker} 67*cda5da8dSAndroid Build Coastguard Worker 68*cda5da8dSAndroid Build Coastguard WorkerTYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE 69*cda5da8dSAndroid Build Coastguard WorkerGLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE 70*cda5da8dSAndroid Build Coastguard Worker 71*cda5da8dSAndroid Build Coastguard Workerclass State: 72*cda5da8dSAndroid Build Coastguard Worker # keeps track of state for parsing 73*cda5da8dSAndroid Build Coastguard Worker def __init__(self): 74*cda5da8dSAndroid Build Coastguard Worker self.flags = 0 75*cda5da8dSAndroid Build Coastguard Worker self.groupdict = {} 76*cda5da8dSAndroid Build Coastguard Worker self.groupwidths = [None] # group 0 77*cda5da8dSAndroid Build Coastguard Worker self.lookbehindgroups = None 78*cda5da8dSAndroid Build Coastguard Worker self.grouprefpos = {} 79*cda5da8dSAndroid Build Coastguard Worker @property 80*cda5da8dSAndroid Build Coastguard Worker def groups(self): 81*cda5da8dSAndroid Build Coastguard Worker return len(self.groupwidths) 82*cda5da8dSAndroid Build Coastguard Worker def opengroup(self, name=None): 83*cda5da8dSAndroid Build Coastguard Worker gid = self.groups 84*cda5da8dSAndroid Build Coastguard Worker self.groupwidths.append(None) 85*cda5da8dSAndroid Build Coastguard Worker if self.groups > MAXGROUPS: 86*cda5da8dSAndroid Build Coastguard Worker raise error("too many groups") 87*cda5da8dSAndroid Build Coastguard Worker if name is not None: 88*cda5da8dSAndroid Build Coastguard Worker ogid = self.groupdict.get(name, None) 89*cda5da8dSAndroid Build Coastguard Worker if ogid is not None: 90*cda5da8dSAndroid Build Coastguard Worker raise error("redefinition of group name %r as group %d; " 91*cda5da8dSAndroid Build Coastguard Worker "was group %d" % (name, gid, ogid)) 92*cda5da8dSAndroid Build Coastguard Worker self.groupdict[name] = gid 93*cda5da8dSAndroid Build Coastguard Worker return gid 94*cda5da8dSAndroid Build Coastguard Worker def closegroup(self, gid, p): 95*cda5da8dSAndroid Build Coastguard Worker self.groupwidths[gid] = p.getwidth() 96*cda5da8dSAndroid Build Coastguard Worker def checkgroup(self, gid): 97*cda5da8dSAndroid Build Coastguard Worker return gid < self.groups and self.groupwidths[gid] is not None 98*cda5da8dSAndroid Build Coastguard Worker 99*cda5da8dSAndroid Build Coastguard Worker def checklookbehindgroup(self, gid, source): 100*cda5da8dSAndroid Build Coastguard Worker if self.lookbehindgroups is not None: 101*cda5da8dSAndroid Build Coastguard Worker if not self.checkgroup(gid): 102*cda5da8dSAndroid Build Coastguard Worker raise source.error('cannot refer to an open group') 103*cda5da8dSAndroid Build Coastguard Worker if gid >= self.lookbehindgroups: 104*cda5da8dSAndroid Build Coastguard Worker raise source.error('cannot refer to group defined in the same ' 105*cda5da8dSAndroid Build Coastguard Worker 'lookbehind subpattern') 106*cda5da8dSAndroid Build Coastguard Worker 107*cda5da8dSAndroid Build Coastguard Workerclass SubPattern: 108*cda5da8dSAndroid Build Coastguard Worker # a subpattern, in intermediate form 109*cda5da8dSAndroid Build Coastguard Worker def __init__(self, state, data=None): 110*cda5da8dSAndroid Build Coastguard Worker self.state = state 111*cda5da8dSAndroid Build Coastguard Worker if data is None: 112*cda5da8dSAndroid Build Coastguard Worker data = [] 113*cda5da8dSAndroid Build Coastguard Worker self.data = data 114*cda5da8dSAndroid Build Coastguard Worker self.width = None 115*cda5da8dSAndroid Build Coastguard Worker 116*cda5da8dSAndroid Build Coastguard Worker def dump(self, level=0): 117*cda5da8dSAndroid Build Coastguard Worker nl = True 118*cda5da8dSAndroid Build Coastguard Worker seqtypes = (tuple, list) 119*cda5da8dSAndroid Build Coastguard Worker for op, av in self.data: 120*cda5da8dSAndroid Build Coastguard Worker print(level*" " + str(op), end='') 121*cda5da8dSAndroid Build Coastguard Worker if op is IN: 122*cda5da8dSAndroid Build Coastguard Worker # member sublanguage 123*cda5da8dSAndroid Build Coastguard Worker print() 124*cda5da8dSAndroid Build Coastguard Worker for op, a in av: 125*cda5da8dSAndroid Build Coastguard Worker print((level+1)*" " + str(op), a) 126*cda5da8dSAndroid Build Coastguard Worker elif op is BRANCH: 127*cda5da8dSAndroid Build Coastguard Worker print() 128*cda5da8dSAndroid Build Coastguard Worker for i, a in enumerate(av[1]): 129*cda5da8dSAndroid Build Coastguard Worker if i: 130*cda5da8dSAndroid Build Coastguard Worker print(level*" " + "OR") 131*cda5da8dSAndroid Build Coastguard Worker a.dump(level+1) 132*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF_EXISTS: 133*cda5da8dSAndroid Build Coastguard Worker condgroup, item_yes, item_no = av 134*cda5da8dSAndroid Build Coastguard Worker print('', condgroup) 135*cda5da8dSAndroid Build Coastguard Worker item_yes.dump(level+1) 136*cda5da8dSAndroid Build Coastguard Worker if item_no: 137*cda5da8dSAndroid Build Coastguard Worker print(level*" " + "ELSE") 138*cda5da8dSAndroid Build Coastguard Worker item_no.dump(level+1) 139*cda5da8dSAndroid Build Coastguard Worker elif isinstance(av, seqtypes): 140*cda5da8dSAndroid Build Coastguard Worker nl = False 141*cda5da8dSAndroid Build Coastguard Worker for a in av: 142*cda5da8dSAndroid Build Coastguard Worker if isinstance(a, SubPattern): 143*cda5da8dSAndroid Build Coastguard Worker if not nl: 144*cda5da8dSAndroid Build Coastguard Worker print() 145*cda5da8dSAndroid Build Coastguard Worker a.dump(level+1) 146*cda5da8dSAndroid Build Coastguard Worker nl = True 147*cda5da8dSAndroid Build Coastguard Worker else: 148*cda5da8dSAndroid Build Coastguard Worker if not nl: 149*cda5da8dSAndroid Build Coastguard Worker print(' ', end='') 150*cda5da8dSAndroid Build Coastguard Worker print(a, end='') 151*cda5da8dSAndroid Build Coastguard Worker nl = False 152*cda5da8dSAndroid Build Coastguard Worker if not nl: 153*cda5da8dSAndroid Build Coastguard Worker print() 154*cda5da8dSAndroid Build Coastguard Worker else: 155*cda5da8dSAndroid Build Coastguard Worker print('', av) 156*cda5da8dSAndroid Build Coastguard Worker def __repr__(self): 157*cda5da8dSAndroid Build Coastguard Worker return repr(self.data) 158*cda5da8dSAndroid Build Coastguard Worker def __len__(self): 159*cda5da8dSAndroid Build Coastguard Worker return len(self.data) 160*cda5da8dSAndroid Build Coastguard Worker def __delitem__(self, index): 161*cda5da8dSAndroid Build Coastguard Worker del self.data[index] 162*cda5da8dSAndroid Build Coastguard Worker def __getitem__(self, index): 163*cda5da8dSAndroid Build Coastguard Worker if isinstance(index, slice): 164*cda5da8dSAndroid Build Coastguard Worker return SubPattern(self.state, self.data[index]) 165*cda5da8dSAndroid Build Coastguard Worker return self.data[index] 166*cda5da8dSAndroid Build Coastguard Worker def __setitem__(self, index, code): 167*cda5da8dSAndroid Build Coastguard Worker self.data[index] = code 168*cda5da8dSAndroid Build Coastguard Worker def insert(self, index, code): 169*cda5da8dSAndroid Build Coastguard Worker self.data.insert(index, code) 170*cda5da8dSAndroid Build Coastguard Worker def append(self, code): 171*cda5da8dSAndroid Build Coastguard Worker self.data.append(code) 172*cda5da8dSAndroid Build Coastguard Worker def getwidth(self): 173*cda5da8dSAndroid Build Coastguard Worker # determine the width (min, max) for this subpattern 174*cda5da8dSAndroid Build Coastguard Worker if self.width is not None: 175*cda5da8dSAndroid Build Coastguard Worker return self.width 176*cda5da8dSAndroid Build Coastguard Worker lo = hi = 0 177*cda5da8dSAndroid Build Coastguard Worker for op, av in self.data: 178*cda5da8dSAndroid Build Coastguard Worker if op is BRANCH: 179*cda5da8dSAndroid Build Coastguard Worker i = MAXREPEAT - 1 180*cda5da8dSAndroid Build Coastguard Worker j = 0 181*cda5da8dSAndroid Build Coastguard Worker for av in av[1]: 182*cda5da8dSAndroid Build Coastguard Worker l, h = av.getwidth() 183*cda5da8dSAndroid Build Coastguard Worker i = min(i, l) 184*cda5da8dSAndroid Build Coastguard Worker j = max(j, h) 185*cda5da8dSAndroid Build Coastguard Worker lo = lo + i 186*cda5da8dSAndroid Build Coastguard Worker hi = hi + j 187*cda5da8dSAndroid Build Coastguard Worker elif op is ATOMIC_GROUP: 188*cda5da8dSAndroid Build Coastguard Worker i, j = av.getwidth() 189*cda5da8dSAndroid Build Coastguard Worker lo = lo + i 190*cda5da8dSAndroid Build Coastguard Worker hi = hi + j 191*cda5da8dSAndroid Build Coastguard Worker elif op is SUBPATTERN: 192*cda5da8dSAndroid Build Coastguard Worker i, j = av[-1].getwidth() 193*cda5da8dSAndroid Build Coastguard Worker lo = lo + i 194*cda5da8dSAndroid Build Coastguard Worker hi = hi + j 195*cda5da8dSAndroid Build Coastguard Worker elif op in _REPEATCODES: 196*cda5da8dSAndroid Build Coastguard Worker i, j = av[2].getwidth() 197*cda5da8dSAndroid Build Coastguard Worker lo = lo + i * av[0] 198*cda5da8dSAndroid Build Coastguard Worker hi = hi + j * av[1] 199*cda5da8dSAndroid Build Coastguard Worker elif op in _UNITCODES: 200*cda5da8dSAndroid Build Coastguard Worker lo = lo + 1 201*cda5da8dSAndroid Build Coastguard Worker hi = hi + 1 202*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF: 203*cda5da8dSAndroid Build Coastguard Worker i, j = self.state.groupwidths[av] 204*cda5da8dSAndroid Build Coastguard Worker lo = lo + i 205*cda5da8dSAndroid Build Coastguard Worker hi = hi + j 206*cda5da8dSAndroid Build Coastguard Worker elif op is GROUPREF_EXISTS: 207*cda5da8dSAndroid Build Coastguard Worker i, j = av[1].getwidth() 208*cda5da8dSAndroid Build Coastguard Worker if av[2] is not None: 209*cda5da8dSAndroid Build Coastguard Worker l, h = av[2].getwidth() 210*cda5da8dSAndroid Build Coastguard Worker i = min(i, l) 211*cda5da8dSAndroid Build Coastguard Worker j = max(j, h) 212*cda5da8dSAndroid Build Coastguard Worker else: 213*cda5da8dSAndroid Build Coastguard Worker i = 0 214*cda5da8dSAndroid Build Coastguard Worker lo = lo + i 215*cda5da8dSAndroid Build Coastguard Worker hi = hi + j 216*cda5da8dSAndroid Build Coastguard Worker elif op is SUCCESS: 217*cda5da8dSAndroid Build Coastguard Worker break 218*cda5da8dSAndroid Build Coastguard Worker self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) 219*cda5da8dSAndroid Build Coastguard Worker return self.width 220*cda5da8dSAndroid Build Coastguard Worker 221*cda5da8dSAndroid Build Coastguard Workerclass Tokenizer: 222*cda5da8dSAndroid Build Coastguard Worker def __init__(self, string): 223*cda5da8dSAndroid Build Coastguard Worker self.istext = isinstance(string, str) 224*cda5da8dSAndroid Build Coastguard Worker self.string = string 225*cda5da8dSAndroid Build Coastguard Worker if not self.istext: 226*cda5da8dSAndroid Build Coastguard Worker string = str(string, 'latin1') 227*cda5da8dSAndroid Build Coastguard Worker self.decoded_string = string 228*cda5da8dSAndroid Build Coastguard Worker self.index = 0 229*cda5da8dSAndroid Build Coastguard Worker self.next = None 230*cda5da8dSAndroid Build Coastguard Worker self.__next() 231*cda5da8dSAndroid Build Coastguard Worker def __next(self): 232*cda5da8dSAndroid Build Coastguard Worker index = self.index 233*cda5da8dSAndroid Build Coastguard Worker try: 234*cda5da8dSAndroid Build Coastguard Worker char = self.decoded_string[index] 235*cda5da8dSAndroid Build Coastguard Worker except IndexError: 236*cda5da8dSAndroid Build Coastguard Worker self.next = None 237*cda5da8dSAndroid Build Coastguard Worker return 238*cda5da8dSAndroid Build Coastguard Worker if char == "\\": 239*cda5da8dSAndroid Build Coastguard Worker index += 1 240*cda5da8dSAndroid Build Coastguard Worker try: 241*cda5da8dSAndroid Build Coastguard Worker char += self.decoded_string[index] 242*cda5da8dSAndroid Build Coastguard Worker except IndexError: 243*cda5da8dSAndroid Build Coastguard Worker raise error("bad escape (end of pattern)", 244*cda5da8dSAndroid Build Coastguard Worker self.string, len(self.string) - 1) from None 245*cda5da8dSAndroid Build Coastguard Worker self.index = index + 1 246*cda5da8dSAndroid Build Coastguard Worker self.next = char 247*cda5da8dSAndroid Build Coastguard Worker def match(self, char): 248*cda5da8dSAndroid Build Coastguard Worker if char == self.next: 249*cda5da8dSAndroid Build Coastguard Worker self.__next() 250*cda5da8dSAndroid Build Coastguard Worker return True 251*cda5da8dSAndroid Build Coastguard Worker return False 252*cda5da8dSAndroid Build Coastguard Worker def get(self): 253*cda5da8dSAndroid Build Coastguard Worker this = self.next 254*cda5da8dSAndroid Build Coastguard Worker self.__next() 255*cda5da8dSAndroid Build Coastguard Worker return this 256*cda5da8dSAndroid Build Coastguard Worker def getwhile(self, n, charset): 257*cda5da8dSAndroid Build Coastguard Worker result = '' 258*cda5da8dSAndroid Build Coastguard Worker for _ in range(n): 259*cda5da8dSAndroid Build Coastguard Worker c = self.next 260*cda5da8dSAndroid Build Coastguard Worker if c not in charset: 261*cda5da8dSAndroid Build Coastguard Worker break 262*cda5da8dSAndroid Build Coastguard Worker result += c 263*cda5da8dSAndroid Build Coastguard Worker self.__next() 264*cda5da8dSAndroid Build Coastguard Worker return result 265*cda5da8dSAndroid Build Coastguard Worker def getuntil(self, terminator, name): 266*cda5da8dSAndroid Build Coastguard Worker result = '' 267*cda5da8dSAndroid Build Coastguard Worker while True: 268*cda5da8dSAndroid Build Coastguard Worker c = self.next 269*cda5da8dSAndroid Build Coastguard Worker self.__next() 270*cda5da8dSAndroid Build Coastguard Worker if c is None: 271*cda5da8dSAndroid Build Coastguard Worker if not result: 272*cda5da8dSAndroid Build Coastguard Worker raise self.error("missing " + name) 273*cda5da8dSAndroid Build Coastguard Worker raise self.error("missing %s, unterminated name" % terminator, 274*cda5da8dSAndroid Build Coastguard Worker len(result)) 275*cda5da8dSAndroid Build Coastguard Worker if c == terminator: 276*cda5da8dSAndroid Build Coastguard Worker if not result: 277*cda5da8dSAndroid Build Coastguard Worker raise self.error("missing " + name, 1) 278*cda5da8dSAndroid Build Coastguard Worker break 279*cda5da8dSAndroid Build Coastguard Worker result += c 280*cda5da8dSAndroid Build Coastguard Worker return result 281*cda5da8dSAndroid Build Coastguard Worker @property 282*cda5da8dSAndroid Build Coastguard Worker def pos(self): 283*cda5da8dSAndroid Build Coastguard Worker return self.index - len(self.next or '') 284*cda5da8dSAndroid Build Coastguard Worker def tell(self): 285*cda5da8dSAndroid Build Coastguard Worker return self.index - len(self.next or '') 286*cda5da8dSAndroid Build Coastguard Worker def seek(self, index): 287*cda5da8dSAndroid Build Coastguard Worker self.index = index 288*cda5da8dSAndroid Build Coastguard Worker self.__next() 289*cda5da8dSAndroid Build Coastguard Worker 290*cda5da8dSAndroid Build Coastguard Worker def error(self, msg, offset=0): 291*cda5da8dSAndroid Build Coastguard Worker if not self.istext: 292*cda5da8dSAndroid Build Coastguard Worker msg = msg.encode('ascii', 'backslashreplace').decode('ascii') 293*cda5da8dSAndroid Build Coastguard Worker return error(msg, self.string, self.tell() - offset) 294*cda5da8dSAndroid Build Coastguard Worker 295*cda5da8dSAndroid Build Coastguard Worker def checkgroupname(self, name, offset, nested): 296*cda5da8dSAndroid Build Coastguard Worker if not name.isidentifier(): 297*cda5da8dSAndroid Build Coastguard Worker msg = "bad character in group name %r" % name 298*cda5da8dSAndroid Build Coastguard Worker raise self.error(msg, len(name) + offset) 299*cda5da8dSAndroid Build Coastguard Worker if not (self.istext or name.isascii()): 300*cda5da8dSAndroid Build Coastguard Worker import warnings 301*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 302*cda5da8dSAndroid Build Coastguard Worker "bad character in group name %a at position %d" % 303*cda5da8dSAndroid Build Coastguard Worker (name, self.tell() - len(name) - offset), 304*cda5da8dSAndroid Build Coastguard Worker DeprecationWarning, stacklevel=nested + 7 305*cda5da8dSAndroid Build Coastguard Worker ) 306*cda5da8dSAndroid Build Coastguard Worker 307*cda5da8dSAndroid Build Coastguard Workerdef _class_escape(source, escape): 308*cda5da8dSAndroid Build Coastguard Worker # handle escape code inside character class 309*cda5da8dSAndroid Build Coastguard Worker code = ESCAPES.get(escape) 310*cda5da8dSAndroid Build Coastguard Worker if code: 311*cda5da8dSAndroid Build Coastguard Worker return code 312*cda5da8dSAndroid Build Coastguard Worker code = CATEGORIES.get(escape) 313*cda5da8dSAndroid Build Coastguard Worker if code and code[0] is IN: 314*cda5da8dSAndroid Build Coastguard Worker return code 315*cda5da8dSAndroid Build Coastguard Worker try: 316*cda5da8dSAndroid Build Coastguard Worker c = escape[1:2] 317*cda5da8dSAndroid Build Coastguard Worker if c == "x": 318*cda5da8dSAndroid Build Coastguard Worker # hexadecimal escape (exactly two digits) 319*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(2, HEXDIGITS) 320*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 4: 321*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 322*cda5da8dSAndroid Build Coastguard Worker return LITERAL, int(escape[2:], 16) 323*cda5da8dSAndroid Build Coastguard Worker elif c == "u" and source.istext: 324*cda5da8dSAndroid Build Coastguard Worker # unicode escape (exactly four digits) 325*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(4, HEXDIGITS) 326*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 6: 327*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 328*cda5da8dSAndroid Build Coastguard Worker return LITERAL, int(escape[2:], 16) 329*cda5da8dSAndroid Build Coastguard Worker elif c == "U" and source.istext: 330*cda5da8dSAndroid Build Coastguard Worker # unicode escape (exactly eight digits) 331*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(8, HEXDIGITS) 332*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 10: 333*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 334*cda5da8dSAndroid Build Coastguard Worker c = int(escape[2:], 16) 335*cda5da8dSAndroid Build Coastguard Worker chr(c) # raise ValueError for invalid code 336*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 337*cda5da8dSAndroid Build Coastguard Worker elif c == "N" and source.istext: 338*cda5da8dSAndroid Build Coastguard Worker import unicodedata 339*cda5da8dSAndroid Build Coastguard Worker # named unicode escape e.g. \N{EM DASH} 340*cda5da8dSAndroid Build Coastguard Worker if not source.match('{'): 341*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing {") 342*cda5da8dSAndroid Build Coastguard Worker charname = source.getuntil('}', 'character name') 343*cda5da8dSAndroid Build Coastguard Worker try: 344*cda5da8dSAndroid Build Coastguard Worker c = ord(unicodedata.lookup(charname)) 345*cda5da8dSAndroid Build Coastguard Worker except (KeyError, TypeError): 346*cda5da8dSAndroid Build Coastguard Worker raise source.error("undefined character name %r" % charname, 347*cda5da8dSAndroid Build Coastguard Worker len(charname) + len(r'\N{}')) from None 348*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 349*cda5da8dSAndroid Build Coastguard Worker elif c in OCTDIGITS: 350*cda5da8dSAndroid Build Coastguard Worker # octal escape (up to three digits) 351*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(2, OCTDIGITS) 352*cda5da8dSAndroid Build Coastguard Worker c = int(escape[1:], 8) 353*cda5da8dSAndroid Build Coastguard Worker if c > 0o377: 354*cda5da8dSAndroid Build Coastguard Worker raise source.error('octal escape value %s outside of ' 355*cda5da8dSAndroid Build Coastguard Worker 'range 0-0o377' % escape, len(escape)) 356*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 357*cda5da8dSAndroid Build Coastguard Worker elif c in DIGITS: 358*cda5da8dSAndroid Build Coastguard Worker raise ValueError 359*cda5da8dSAndroid Build Coastguard Worker if len(escape) == 2: 360*cda5da8dSAndroid Build Coastguard Worker if c in ASCIILETTERS: 361*cda5da8dSAndroid Build Coastguard Worker raise source.error('bad escape %s' % escape, len(escape)) 362*cda5da8dSAndroid Build Coastguard Worker return LITERAL, ord(escape[1]) 363*cda5da8dSAndroid Build Coastguard Worker except ValueError: 364*cda5da8dSAndroid Build Coastguard Worker pass 365*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad escape %s" % escape, len(escape)) 366*cda5da8dSAndroid Build Coastguard Worker 367*cda5da8dSAndroid Build Coastguard Workerdef _escape(source, escape, state): 368*cda5da8dSAndroid Build Coastguard Worker # handle escape code in expression 369*cda5da8dSAndroid Build Coastguard Worker code = CATEGORIES.get(escape) 370*cda5da8dSAndroid Build Coastguard Worker if code: 371*cda5da8dSAndroid Build Coastguard Worker return code 372*cda5da8dSAndroid Build Coastguard Worker code = ESCAPES.get(escape) 373*cda5da8dSAndroid Build Coastguard Worker if code: 374*cda5da8dSAndroid Build Coastguard Worker return code 375*cda5da8dSAndroid Build Coastguard Worker try: 376*cda5da8dSAndroid Build Coastguard Worker c = escape[1:2] 377*cda5da8dSAndroid Build Coastguard Worker if c == "x": 378*cda5da8dSAndroid Build Coastguard Worker # hexadecimal escape 379*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(2, HEXDIGITS) 380*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 4: 381*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 382*cda5da8dSAndroid Build Coastguard Worker return LITERAL, int(escape[2:], 16) 383*cda5da8dSAndroid Build Coastguard Worker elif c == "u" and source.istext: 384*cda5da8dSAndroid Build Coastguard Worker # unicode escape (exactly four digits) 385*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(4, HEXDIGITS) 386*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 6: 387*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 388*cda5da8dSAndroid Build Coastguard Worker return LITERAL, int(escape[2:], 16) 389*cda5da8dSAndroid Build Coastguard Worker elif c == "U" and source.istext: 390*cda5da8dSAndroid Build Coastguard Worker # unicode escape (exactly eight digits) 391*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(8, HEXDIGITS) 392*cda5da8dSAndroid Build Coastguard Worker if len(escape) != 10: 393*cda5da8dSAndroid Build Coastguard Worker raise source.error("incomplete escape %s" % escape, len(escape)) 394*cda5da8dSAndroid Build Coastguard Worker c = int(escape[2:], 16) 395*cda5da8dSAndroid Build Coastguard Worker chr(c) # raise ValueError for invalid code 396*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 397*cda5da8dSAndroid Build Coastguard Worker elif c == "N" and source.istext: 398*cda5da8dSAndroid Build Coastguard Worker import unicodedata 399*cda5da8dSAndroid Build Coastguard Worker # named unicode escape e.g. \N{EM DASH} 400*cda5da8dSAndroid Build Coastguard Worker if not source.match('{'): 401*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing {") 402*cda5da8dSAndroid Build Coastguard Worker charname = source.getuntil('}', 'character name') 403*cda5da8dSAndroid Build Coastguard Worker try: 404*cda5da8dSAndroid Build Coastguard Worker c = ord(unicodedata.lookup(charname)) 405*cda5da8dSAndroid Build Coastguard Worker except (KeyError, TypeError): 406*cda5da8dSAndroid Build Coastguard Worker raise source.error("undefined character name %r" % charname, 407*cda5da8dSAndroid Build Coastguard Worker len(charname) + len(r'\N{}')) from None 408*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 409*cda5da8dSAndroid Build Coastguard Worker elif c == "0": 410*cda5da8dSAndroid Build Coastguard Worker # octal escape 411*cda5da8dSAndroid Build Coastguard Worker escape += source.getwhile(2, OCTDIGITS) 412*cda5da8dSAndroid Build Coastguard Worker return LITERAL, int(escape[1:], 8) 413*cda5da8dSAndroid Build Coastguard Worker elif c in DIGITS: 414*cda5da8dSAndroid Build Coastguard Worker # octal escape *or* decimal group reference (sigh) 415*cda5da8dSAndroid Build Coastguard Worker if source.next in DIGITS: 416*cda5da8dSAndroid Build Coastguard Worker escape += source.get() 417*cda5da8dSAndroid Build Coastguard Worker if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and 418*cda5da8dSAndroid Build Coastguard Worker source.next in OCTDIGITS): 419*cda5da8dSAndroid Build Coastguard Worker # got three octal digits; this is an octal escape 420*cda5da8dSAndroid Build Coastguard Worker escape += source.get() 421*cda5da8dSAndroid Build Coastguard Worker c = int(escape[1:], 8) 422*cda5da8dSAndroid Build Coastguard Worker if c > 0o377: 423*cda5da8dSAndroid Build Coastguard Worker raise source.error('octal escape value %s outside of ' 424*cda5da8dSAndroid Build Coastguard Worker 'range 0-0o377' % escape, 425*cda5da8dSAndroid Build Coastguard Worker len(escape)) 426*cda5da8dSAndroid Build Coastguard Worker return LITERAL, c 427*cda5da8dSAndroid Build Coastguard Worker # not an octal escape, so this is a group reference 428*cda5da8dSAndroid Build Coastguard Worker group = int(escape[1:]) 429*cda5da8dSAndroid Build Coastguard Worker if group < state.groups: 430*cda5da8dSAndroid Build Coastguard Worker if not state.checkgroup(group): 431*cda5da8dSAndroid Build Coastguard Worker raise source.error("cannot refer to an open group", 432*cda5da8dSAndroid Build Coastguard Worker len(escape)) 433*cda5da8dSAndroid Build Coastguard Worker state.checklookbehindgroup(group, source) 434*cda5da8dSAndroid Build Coastguard Worker return GROUPREF, group 435*cda5da8dSAndroid Build Coastguard Worker raise source.error("invalid group reference %d" % group, len(escape) - 1) 436*cda5da8dSAndroid Build Coastguard Worker if len(escape) == 2: 437*cda5da8dSAndroid Build Coastguard Worker if c in ASCIILETTERS: 438*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad escape %s" % escape, len(escape)) 439*cda5da8dSAndroid Build Coastguard Worker return LITERAL, ord(escape[1]) 440*cda5da8dSAndroid Build Coastguard Worker except ValueError: 441*cda5da8dSAndroid Build Coastguard Worker pass 442*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad escape %s" % escape, len(escape)) 443*cda5da8dSAndroid Build Coastguard Worker 444*cda5da8dSAndroid Build Coastguard Workerdef _uniq(items): 445*cda5da8dSAndroid Build Coastguard Worker return list(dict.fromkeys(items)) 446*cda5da8dSAndroid Build Coastguard Worker 447*cda5da8dSAndroid Build Coastguard Workerdef _parse_sub(source, state, verbose, nested): 448*cda5da8dSAndroid Build Coastguard Worker # parse an alternation: a|b|c 449*cda5da8dSAndroid Build Coastguard Worker 450*cda5da8dSAndroid Build Coastguard Worker items = [] 451*cda5da8dSAndroid Build Coastguard Worker itemsappend = items.append 452*cda5da8dSAndroid Build Coastguard Worker sourcematch = source.match 453*cda5da8dSAndroid Build Coastguard Worker start = source.tell() 454*cda5da8dSAndroid Build Coastguard Worker while True: 455*cda5da8dSAndroid Build Coastguard Worker itemsappend(_parse(source, state, verbose, nested + 1, 456*cda5da8dSAndroid Build Coastguard Worker not nested and not items)) 457*cda5da8dSAndroid Build Coastguard Worker if not sourcematch("|"): 458*cda5da8dSAndroid Build Coastguard Worker break 459*cda5da8dSAndroid Build Coastguard Worker if not nested: 460*cda5da8dSAndroid Build Coastguard Worker verbose = state.flags & SRE_FLAG_VERBOSE 461*cda5da8dSAndroid Build Coastguard Worker 462*cda5da8dSAndroid Build Coastguard Worker if len(items) == 1: 463*cda5da8dSAndroid Build Coastguard Worker return items[0] 464*cda5da8dSAndroid Build Coastguard Worker 465*cda5da8dSAndroid Build Coastguard Worker subpattern = SubPattern(state) 466*cda5da8dSAndroid Build Coastguard Worker 467*cda5da8dSAndroid Build Coastguard Worker # check if all items share a common prefix 468*cda5da8dSAndroid Build Coastguard Worker while True: 469*cda5da8dSAndroid Build Coastguard Worker prefix = None 470*cda5da8dSAndroid Build Coastguard Worker for item in items: 471*cda5da8dSAndroid Build Coastguard Worker if not item: 472*cda5da8dSAndroid Build Coastguard Worker break 473*cda5da8dSAndroid Build Coastguard Worker if prefix is None: 474*cda5da8dSAndroid Build Coastguard Worker prefix = item[0] 475*cda5da8dSAndroid Build Coastguard Worker elif item[0] != prefix: 476*cda5da8dSAndroid Build Coastguard Worker break 477*cda5da8dSAndroid Build Coastguard Worker else: 478*cda5da8dSAndroid Build Coastguard Worker # all subitems start with a common "prefix". 479*cda5da8dSAndroid Build Coastguard Worker # move it out of the branch 480*cda5da8dSAndroid Build Coastguard Worker for item in items: 481*cda5da8dSAndroid Build Coastguard Worker del item[0] 482*cda5da8dSAndroid Build Coastguard Worker subpattern.append(prefix) 483*cda5da8dSAndroid Build Coastguard Worker continue # check next one 484*cda5da8dSAndroid Build Coastguard Worker break 485*cda5da8dSAndroid Build Coastguard Worker 486*cda5da8dSAndroid Build Coastguard Worker # check if the branch can be replaced by a character set 487*cda5da8dSAndroid Build Coastguard Worker set = [] 488*cda5da8dSAndroid Build Coastguard Worker for item in items: 489*cda5da8dSAndroid Build Coastguard Worker if len(item) != 1: 490*cda5da8dSAndroid Build Coastguard Worker break 491*cda5da8dSAndroid Build Coastguard Worker op, av = item[0] 492*cda5da8dSAndroid Build Coastguard Worker if op is LITERAL: 493*cda5da8dSAndroid Build Coastguard Worker set.append((op, av)) 494*cda5da8dSAndroid Build Coastguard Worker elif op is IN and av[0][0] is not NEGATE: 495*cda5da8dSAndroid Build Coastguard Worker set.extend(av) 496*cda5da8dSAndroid Build Coastguard Worker else: 497*cda5da8dSAndroid Build Coastguard Worker break 498*cda5da8dSAndroid Build Coastguard Worker else: 499*cda5da8dSAndroid Build Coastguard Worker # we can store this as a character set instead of a 500*cda5da8dSAndroid Build Coastguard Worker # branch (the compiler may optimize this even more) 501*cda5da8dSAndroid Build Coastguard Worker subpattern.append((IN, _uniq(set))) 502*cda5da8dSAndroid Build Coastguard Worker return subpattern 503*cda5da8dSAndroid Build Coastguard Worker 504*cda5da8dSAndroid Build Coastguard Worker subpattern.append((BRANCH, (None, items))) 505*cda5da8dSAndroid Build Coastguard Worker return subpattern 506*cda5da8dSAndroid Build Coastguard Worker 507*cda5da8dSAndroid Build Coastguard Workerdef _parse(source, state, verbose, nested, first=False): 508*cda5da8dSAndroid Build Coastguard Worker # parse a simple pattern 509*cda5da8dSAndroid Build Coastguard Worker subpattern = SubPattern(state) 510*cda5da8dSAndroid Build Coastguard Worker 511*cda5da8dSAndroid Build Coastguard Worker # precompute constants into local variables 512*cda5da8dSAndroid Build Coastguard Worker subpatternappend = subpattern.append 513*cda5da8dSAndroid Build Coastguard Worker sourceget = source.get 514*cda5da8dSAndroid Build Coastguard Worker sourcematch = source.match 515*cda5da8dSAndroid Build Coastguard Worker _len = len 516*cda5da8dSAndroid Build Coastguard Worker _ord = ord 517*cda5da8dSAndroid Build Coastguard Worker 518*cda5da8dSAndroid Build Coastguard Worker while True: 519*cda5da8dSAndroid Build Coastguard Worker 520*cda5da8dSAndroid Build Coastguard Worker this = source.next 521*cda5da8dSAndroid Build Coastguard Worker if this is None: 522*cda5da8dSAndroid Build Coastguard Worker break # end of pattern 523*cda5da8dSAndroid Build Coastguard Worker if this in "|)": 524*cda5da8dSAndroid Build Coastguard Worker break # end of subpattern 525*cda5da8dSAndroid Build Coastguard Worker sourceget() 526*cda5da8dSAndroid Build Coastguard Worker 527*cda5da8dSAndroid Build Coastguard Worker if verbose: 528*cda5da8dSAndroid Build Coastguard Worker # skip whitespace and comments 529*cda5da8dSAndroid Build Coastguard Worker if this in WHITESPACE: 530*cda5da8dSAndroid Build Coastguard Worker continue 531*cda5da8dSAndroid Build Coastguard Worker if this == "#": 532*cda5da8dSAndroid Build Coastguard Worker while True: 533*cda5da8dSAndroid Build Coastguard Worker this = sourceget() 534*cda5da8dSAndroid Build Coastguard Worker if this is None or this == "\n": 535*cda5da8dSAndroid Build Coastguard Worker break 536*cda5da8dSAndroid Build Coastguard Worker continue 537*cda5da8dSAndroid Build Coastguard Worker 538*cda5da8dSAndroid Build Coastguard Worker if this[0] == "\\": 539*cda5da8dSAndroid Build Coastguard Worker code = _escape(source, this, state) 540*cda5da8dSAndroid Build Coastguard Worker subpatternappend(code) 541*cda5da8dSAndroid Build Coastguard Worker 542*cda5da8dSAndroid Build Coastguard Worker elif this not in SPECIAL_CHARS: 543*cda5da8dSAndroid Build Coastguard Worker subpatternappend((LITERAL, _ord(this))) 544*cda5da8dSAndroid Build Coastguard Worker 545*cda5da8dSAndroid Build Coastguard Worker elif this == "[": 546*cda5da8dSAndroid Build Coastguard Worker here = source.tell() - 1 547*cda5da8dSAndroid Build Coastguard Worker # character set 548*cda5da8dSAndroid Build Coastguard Worker set = [] 549*cda5da8dSAndroid Build Coastguard Worker setappend = set.append 550*cda5da8dSAndroid Build Coastguard Worker## if sourcematch(":"): 551*cda5da8dSAndroid Build Coastguard Worker## pass # handle character classes 552*cda5da8dSAndroid Build Coastguard Worker if source.next == '[': 553*cda5da8dSAndroid Build Coastguard Worker import warnings 554*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 555*cda5da8dSAndroid Build Coastguard Worker 'Possible nested set at position %d' % source.tell(), 556*cda5da8dSAndroid Build Coastguard Worker FutureWarning, stacklevel=nested + 6 557*cda5da8dSAndroid Build Coastguard Worker ) 558*cda5da8dSAndroid Build Coastguard Worker negate = sourcematch("^") 559*cda5da8dSAndroid Build Coastguard Worker # check remaining characters 560*cda5da8dSAndroid Build Coastguard Worker while True: 561*cda5da8dSAndroid Build Coastguard Worker this = sourceget() 562*cda5da8dSAndroid Build Coastguard Worker if this is None: 563*cda5da8dSAndroid Build Coastguard Worker raise source.error("unterminated character set", 564*cda5da8dSAndroid Build Coastguard Worker source.tell() - here) 565*cda5da8dSAndroid Build Coastguard Worker if this == "]" and set: 566*cda5da8dSAndroid Build Coastguard Worker break 567*cda5da8dSAndroid Build Coastguard Worker elif this[0] == "\\": 568*cda5da8dSAndroid Build Coastguard Worker code1 = _class_escape(source, this) 569*cda5da8dSAndroid Build Coastguard Worker else: 570*cda5da8dSAndroid Build Coastguard Worker if set and this in '-&~|' and source.next == this: 571*cda5da8dSAndroid Build Coastguard Worker import warnings 572*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 573*cda5da8dSAndroid Build Coastguard Worker 'Possible set %s at position %d' % ( 574*cda5da8dSAndroid Build Coastguard Worker 'difference' if this == '-' else 575*cda5da8dSAndroid Build Coastguard Worker 'intersection' if this == '&' else 576*cda5da8dSAndroid Build Coastguard Worker 'symmetric difference' if this == '~' else 577*cda5da8dSAndroid Build Coastguard Worker 'union', 578*cda5da8dSAndroid Build Coastguard Worker source.tell() - 1), 579*cda5da8dSAndroid Build Coastguard Worker FutureWarning, stacklevel=nested + 6 580*cda5da8dSAndroid Build Coastguard Worker ) 581*cda5da8dSAndroid Build Coastguard Worker code1 = LITERAL, _ord(this) 582*cda5da8dSAndroid Build Coastguard Worker if sourcematch("-"): 583*cda5da8dSAndroid Build Coastguard Worker # potential range 584*cda5da8dSAndroid Build Coastguard Worker that = sourceget() 585*cda5da8dSAndroid Build Coastguard Worker if that is None: 586*cda5da8dSAndroid Build Coastguard Worker raise source.error("unterminated character set", 587*cda5da8dSAndroid Build Coastguard Worker source.tell() - here) 588*cda5da8dSAndroid Build Coastguard Worker if that == "]": 589*cda5da8dSAndroid Build Coastguard Worker if code1[0] is IN: 590*cda5da8dSAndroid Build Coastguard Worker code1 = code1[1][0] 591*cda5da8dSAndroid Build Coastguard Worker setappend(code1) 592*cda5da8dSAndroid Build Coastguard Worker setappend((LITERAL, _ord("-"))) 593*cda5da8dSAndroid Build Coastguard Worker break 594*cda5da8dSAndroid Build Coastguard Worker if that[0] == "\\": 595*cda5da8dSAndroid Build Coastguard Worker code2 = _class_escape(source, that) 596*cda5da8dSAndroid Build Coastguard Worker else: 597*cda5da8dSAndroid Build Coastguard Worker if that == '-': 598*cda5da8dSAndroid Build Coastguard Worker import warnings 599*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 600*cda5da8dSAndroid Build Coastguard Worker 'Possible set difference at position %d' % ( 601*cda5da8dSAndroid Build Coastguard Worker source.tell() - 2), 602*cda5da8dSAndroid Build Coastguard Worker FutureWarning, stacklevel=nested + 6 603*cda5da8dSAndroid Build Coastguard Worker ) 604*cda5da8dSAndroid Build Coastguard Worker code2 = LITERAL, _ord(that) 605*cda5da8dSAndroid Build Coastguard Worker if code1[0] != LITERAL or code2[0] != LITERAL: 606*cda5da8dSAndroid Build Coastguard Worker msg = "bad character range %s-%s" % (this, that) 607*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(this) + 1 + len(that)) 608*cda5da8dSAndroid Build Coastguard Worker lo = code1[1] 609*cda5da8dSAndroid Build Coastguard Worker hi = code2[1] 610*cda5da8dSAndroid Build Coastguard Worker if hi < lo: 611*cda5da8dSAndroid Build Coastguard Worker msg = "bad character range %s-%s" % (this, that) 612*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(this) + 1 + len(that)) 613*cda5da8dSAndroid Build Coastguard Worker setappend((RANGE, (lo, hi))) 614*cda5da8dSAndroid Build Coastguard Worker else: 615*cda5da8dSAndroid Build Coastguard Worker if code1[0] is IN: 616*cda5da8dSAndroid Build Coastguard Worker code1 = code1[1][0] 617*cda5da8dSAndroid Build Coastguard Worker setappend(code1) 618*cda5da8dSAndroid Build Coastguard Worker 619*cda5da8dSAndroid Build Coastguard Worker set = _uniq(set) 620*cda5da8dSAndroid Build Coastguard Worker # XXX: <fl> should move set optimization to compiler! 621*cda5da8dSAndroid Build Coastguard Worker if _len(set) == 1 and set[0][0] is LITERAL: 622*cda5da8dSAndroid Build Coastguard Worker # optimization 623*cda5da8dSAndroid Build Coastguard Worker if negate: 624*cda5da8dSAndroid Build Coastguard Worker subpatternappend((NOT_LITERAL, set[0][1])) 625*cda5da8dSAndroid Build Coastguard Worker else: 626*cda5da8dSAndroid Build Coastguard Worker subpatternappend(set[0]) 627*cda5da8dSAndroid Build Coastguard Worker else: 628*cda5da8dSAndroid Build Coastguard Worker if negate: 629*cda5da8dSAndroid Build Coastguard Worker set.insert(0, (NEGATE, None)) 630*cda5da8dSAndroid Build Coastguard Worker # charmap optimization can't be added here because 631*cda5da8dSAndroid Build Coastguard Worker # global flags still are not known 632*cda5da8dSAndroid Build Coastguard Worker subpatternappend((IN, set)) 633*cda5da8dSAndroid Build Coastguard Worker 634*cda5da8dSAndroid Build Coastguard Worker elif this in REPEAT_CHARS: 635*cda5da8dSAndroid Build Coastguard Worker # repeat previous item 636*cda5da8dSAndroid Build Coastguard Worker here = source.tell() 637*cda5da8dSAndroid Build Coastguard Worker if this == "?": 638*cda5da8dSAndroid Build Coastguard Worker min, max = 0, 1 639*cda5da8dSAndroid Build Coastguard Worker elif this == "*": 640*cda5da8dSAndroid Build Coastguard Worker min, max = 0, MAXREPEAT 641*cda5da8dSAndroid Build Coastguard Worker 642*cda5da8dSAndroid Build Coastguard Worker elif this == "+": 643*cda5da8dSAndroid Build Coastguard Worker min, max = 1, MAXREPEAT 644*cda5da8dSAndroid Build Coastguard Worker elif this == "{": 645*cda5da8dSAndroid Build Coastguard Worker if source.next == "}": 646*cda5da8dSAndroid Build Coastguard Worker subpatternappend((LITERAL, _ord(this))) 647*cda5da8dSAndroid Build Coastguard Worker continue 648*cda5da8dSAndroid Build Coastguard Worker 649*cda5da8dSAndroid Build Coastguard Worker min, max = 0, MAXREPEAT 650*cda5da8dSAndroid Build Coastguard Worker lo = hi = "" 651*cda5da8dSAndroid Build Coastguard Worker while source.next in DIGITS: 652*cda5da8dSAndroid Build Coastguard Worker lo += sourceget() 653*cda5da8dSAndroid Build Coastguard Worker if sourcematch(","): 654*cda5da8dSAndroid Build Coastguard Worker while source.next in DIGITS: 655*cda5da8dSAndroid Build Coastguard Worker hi += sourceget() 656*cda5da8dSAndroid Build Coastguard Worker else: 657*cda5da8dSAndroid Build Coastguard Worker hi = lo 658*cda5da8dSAndroid Build Coastguard Worker if not sourcematch("}"): 659*cda5da8dSAndroid Build Coastguard Worker subpatternappend((LITERAL, _ord(this))) 660*cda5da8dSAndroid Build Coastguard Worker source.seek(here) 661*cda5da8dSAndroid Build Coastguard Worker continue 662*cda5da8dSAndroid Build Coastguard Worker 663*cda5da8dSAndroid Build Coastguard Worker if lo: 664*cda5da8dSAndroid Build Coastguard Worker min = int(lo) 665*cda5da8dSAndroid Build Coastguard Worker if min >= MAXREPEAT: 666*cda5da8dSAndroid Build Coastguard Worker raise OverflowError("the repetition number is too large") 667*cda5da8dSAndroid Build Coastguard Worker if hi: 668*cda5da8dSAndroid Build Coastguard Worker max = int(hi) 669*cda5da8dSAndroid Build Coastguard Worker if max >= MAXREPEAT: 670*cda5da8dSAndroid Build Coastguard Worker raise OverflowError("the repetition number is too large") 671*cda5da8dSAndroid Build Coastguard Worker if max < min: 672*cda5da8dSAndroid Build Coastguard Worker raise source.error("min repeat greater than max repeat", 673*cda5da8dSAndroid Build Coastguard Worker source.tell() - here) 674*cda5da8dSAndroid Build Coastguard Worker else: 675*cda5da8dSAndroid Build Coastguard Worker raise AssertionError("unsupported quantifier %r" % (char,)) 676*cda5da8dSAndroid Build Coastguard Worker # figure out which item to repeat 677*cda5da8dSAndroid Build Coastguard Worker if subpattern: 678*cda5da8dSAndroid Build Coastguard Worker item = subpattern[-1:] 679*cda5da8dSAndroid Build Coastguard Worker else: 680*cda5da8dSAndroid Build Coastguard Worker item = None 681*cda5da8dSAndroid Build Coastguard Worker if not item or item[0][0] is AT: 682*cda5da8dSAndroid Build Coastguard Worker raise source.error("nothing to repeat", 683*cda5da8dSAndroid Build Coastguard Worker source.tell() - here + len(this)) 684*cda5da8dSAndroid Build Coastguard Worker if item[0][0] in _REPEATCODES: 685*cda5da8dSAndroid Build Coastguard Worker raise source.error("multiple repeat", 686*cda5da8dSAndroid Build Coastguard Worker source.tell() - here + len(this)) 687*cda5da8dSAndroid Build Coastguard Worker if item[0][0] is SUBPATTERN: 688*cda5da8dSAndroid Build Coastguard Worker group, add_flags, del_flags, p = item[0][1] 689*cda5da8dSAndroid Build Coastguard Worker if group is None and not add_flags and not del_flags: 690*cda5da8dSAndroid Build Coastguard Worker item = p 691*cda5da8dSAndroid Build Coastguard Worker if sourcematch("?"): 692*cda5da8dSAndroid Build Coastguard Worker # Non-Greedy Match 693*cda5da8dSAndroid Build Coastguard Worker subpattern[-1] = (MIN_REPEAT, (min, max, item)) 694*cda5da8dSAndroid Build Coastguard Worker elif sourcematch("+"): 695*cda5da8dSAndroid Build Coastguard Worker # Possessive Match (Always Greedy) 696*cda5da8dSAndroid Build Coastguard Worker subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item)) 697*cda5da8dSAndroid Build Coastguard Worker else: 698*cda5da8dSAndroid Build Coastguard Worker # Greedy Match 699*cda5da8dSAndroid Build Coastguard Worker subpattern[-1] = (MAX_REPEAT, (min, max, item)) 700*cda5da8dSAndroid Build Coastguard Worker 701*cda5da8dSAndroid Build Coastguard Worker elif this == ".": 702*cda5da8dSAndroid Build Coastguard Worker subpatternappend((ANY, None)) 703*cda5da8dSAndroid Build Coastguard Worker 704*cda5da8dSAndroid Build Coastguard Worker elif this == "(": 705*cda5da8dSAndroid Build Coastguard Worker start = source.tell() - 1 706*cda5da8dSAndroid Build Coastguard Worker capture = True 707*cda5da8dSAndroid Build Coastguard Worker atomic = False 708*cda5da8dSAndroid Build Coastguard Worker name = None 709*cda5da8dSAndroid Build Coastguard Worker add_flags = 0 710*cda5da8dSAndroid Build Coastguard Worker del_flags = 0 711*cda5da8dSAndroid Build Coastguard Worker if sourcematch("?"): 712*cda5da8dSAndroid Build Coastguard Worker # options 713*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 714*cda5da8dSAndroid Build Coastguard Worker if char is None: 715*cda5da8dSAndroid Build Coastguard Worker raise source.error("unexpected end of pattern") 716*cda5da8dSAndroid Build Coastguard Worker if char == "P": 717*cda5da8dSAndroid Build Coastguard Worker # python extensions 718*cda5da8dSAndroid Build Coastguard Worker if sourcematch("<"): 719*cda5da8dSAndroid Build Coastguard Worker # named group: skip forward to end of name 720*cda5da8dSAndroid Build Coastguard Worker name = source.getuntil(">", "group name") 721*cda5da8dSAndroid Build Coastguard Worker source.checkgroupname(name, 1, nested) 722*cda5da8dSAndroid Build Coastguard Worker elif sourcematch("="): 723*cda5da8dSAndroid Build Coastguard Worker # named backreference 724*cda5da8dSAndroid Build Coastguard Worker name = source.getuntil(")", "group name") 725*cda5da8dSAndroid Build Coastguard Worker source.checkgroupname(name, 1, nested) 726*cda5da8dSAndroid Build Coastguard Worker gid = state.groupdict.get(name) 727*cda5da8dSAndroid Build Coastguard Worker if gid is None: 728*cda5da8dSAndroid Build Coastguard Worker msg = "unknown group name %r" % name 729*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(name) + 1) 730*cda5da8dSAndroid Build Coastguard Worker if not state.checkgroup(gid): 731*cda5da8dSAndroid Build Coastguard Worker raise source.error("cannot refer to an open group", 732*cda5da8dSAndroid Build Coastguard Worker len(name) + 1) 733*cda5da8dSAndroid Build Coastguard Worker state.checklookbehindgroup(gid, source) 734*cda5da8dSAndroid Build Coastguard Worker subpatternappend((GROUPREF, gid)) 735*cda5da8dSAndroid Build Coastguard Worker continue 736*cda5da8dSAndroid Build Coastguard Worker 737*cda5da8dSAndroid Build Coastguard Worker else: 738*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 739*cda5da8dSAndroid Build Coastguard Worker if char is None: 740*cda5da8dSAndroid Build Coastguard Worker raise source.error("unexpected end of pattern") 741*cda5da8dSAndroid Build Coastguard Worker raise source.error("unknown extension ?P" + char, 742*cda5da8dSAndroid Build Coastguard Worker len(char) + 2) 743*cda5da8dSAndroid Build Coastguard Worker elif char == ":": 744*cda5da8dSAndroid Build Coastguard Worker # non-capturing group 745*cda5da8dSAndroid Build Coastguard Worker capture = False 746*cda5da8dSAndroid Build Coastguard Worker elif char == "#": 747*cda5da8dSAndroid Build Coastguard Worker # comment 748*cda5da8dSAndroid Build Coastguard Worker while True: 749*cda5da8dSAndroid Build Coastguard Worker if source.next is None: 750*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing ), unterminated comment", 751*cda5da8dSAndroid Build Coastguard Worker source.tell() - start) 752*cda5da8dSAndroid Build Coastguard Worker if sourceget() == ")": 753*cda5da8dSAndroid Build Coastguard Worker break 754*cda5da8dSAndroid Build Coastguard Worker continue 755*cda5da8dSAndroid Build Coastguard Worker 756*cda5da8dSAndroid Build Coastguard Worker elif char in "=!<": 757*cda5da8dSAndroid Build Coastguard Worker # lookahead assertions 758*cda5da8dSAndroid Build Coastguard Worker dir = 1 759*cda5da8dSAndroid Build Coastguard Worker if char == "<": 760*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 761*cda5da8dSAndroid Build Coastguard Worker if char is None: 762*cda5da8dSAndroid Build Coastguard Worker raise source.error("unexpected end of pattern") 763*cda5da8dSAndroid Build Coastguard Worker if char not in "=!": 764*cda5da8dSAndroid Build Coastguard Worker raise source.error("unknown extension ?<" + char, 765*cda5da8dSAndroid Build Coastguard Worker len(char) + 2) 766*cda5da8dSAndroid Build Coastguard Worker dir = -1 # lookbehind 767*cda5da8dSAndroid Build Coastguard Worker lookbehindgroups = state.lookbehindgroups 768*cda5da8dSAndroid Build Coastguard Worker if lookbehindgroups is None: 769*cda5da8dSAndroid Build Coastguard Worker state.lookbehindgroups = state.groups 770*cda5da8dSAndroid Build Coastguard Worker p = _parse_sub(source, state, verbose, nested + 1) 771*cda5da8dSAndroid Build Coastguard Worker if dir < 0: 772*cda5da8dSAndroid Build Coastguard Worker if lookbehindgroups is None: 773*cda5da8dSAndroid Build Coastguard Worker state.lookbehindgroups = None 774*cda5da8dSAndroid Build Coastguard Worker if not sourcematch(")"): 775*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing ), unterminated subpattern", 776*cda5da8dSAndroid Build Coastguard Worker source.tell() - start) 777*cda5da8dSAndroid Build Coastguard Worker if char == "=": 778*cda5da8dSAndroid Build Coastguard Worker subpatternappend((ASSERT, (dir, p))) 779*cda5da8dSAndroid Build Coastguard Worker else: 780*cda5da8dSAndroid Build Coastguard Worker subpatternappend((ASSERT_NOT, (dir, p))) 781*cda5da8dSAndroid Build Coastguard Worker continue 782*cda5da8dSAndroid Build Coastguard Worker 783*cda5da8dSAndroid Build Coastguard Worker elif char == "(": 784*cda5da8dSAndroid Build Coastguard Worker # conditional backreference group 785*cda5da8dSAndroid Build Coastguard Worker condname = source.getuntil(")", "group name") 786*cda5da8dSAndroid Build Coastguard Worker if condname.isidentifier(): 787*cda5da8dSAndroid Build Coastguard Worker source.checkgroupname(condname, 1, nested) 788*cda5da8dSAndroid Build Coastguard Worker condgroup = state.groupdict.get(condname) 789*cda5da8dSAndroid Build Coastguard Worker if condgroup is None: 790*cda5da8dSAndroid Build Coastguard Worker msg = "unknown group name %r" % condname 791*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(condname) + 1) 792*cda5da8dSAndroid Build Coastguard Worker else: 793*cda5da8dSAndroid Build Coastguard Worker try: 794*cda5da8dSAndroid Build Coastguard Worker condgroup = int(condname) 795*cda5da8dSAndroid Build Coastguard Worker if condgroup < 0: 796*cda5da8dSAndroid Build Coastguard Worker raise ValueError 797*cda5da8dSAndroid Build Coastguard Worker except ValueError: 798*cda5da8dSAndroid Build Coastguard Worker msg = "bad character in group name %r" % condname 799*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(condname) + 1) from None 800*cda5da8dSAndroid Build Coastguard Worker if not condgroup: 801*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad group number", 802*cda5da8dSAndroid Build Coastguard Worker len(condname) + 1) 803*cda5da8dSAndroid Build Coastguard Worker if condgroup >= MAXGROUPS: 804*cda5da8dSAndroid Build Coastguard Worker msg = "invalid group reference %d" % condgroup 805*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(condname) + 1) 806*cda5da8dSAndroid Build Coastguard Worker if condgroup not in state.grouprefpos: 807*cda5da8dSAndroid Build Coastguard Worker state.grouprefpos[condgroup] = ( 808*cda5da8dSAndroid Build Coastguard Worker source.tell() - len(condname) - 1 809*cda5da8dSAndroid Build Coastguard Worker ) 810*cda5da8dSAndroid Build Coastguard Worker if not (condname.isdecimal() and condname.isascii()): 811*cda5da8dSAndroid Build Coastguard Worker import warnings 812*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 813*cda5da8dSAndroid Build Coastguard Worker "bad character in group name %s at position %d" % 814*cda5da8dSAndroid Build Coastguard Worker (repr(condname) if source.istext else ascii(condname), 815*cda5da8dSAndroid Build Coastguard Worker source.tell() - len(condname) - 1), 816*cda5da8dSAndroid Build Coastguard Worker DeprecationWarning, stacklevel=nested + 6 817*cda5da8dSAndroid Build Coastguard Worker ) 818*cda5da8dSAndroid Build Coastguard Worker state.checklookbehindgroup(condgroup, source) 819*cda5da8dSAndroid Build Coastguard Worker item_yes = _parse(source, state, verbose, nested + 1) 820*cda5da8dSAndroid Build Coastguard Worker if source.match("|"): 821*cda5da8dSAndroid Build Coastguard Worker item_no = _parse(source, state, verbose, nested + 1) 822*cda5da8dSAndroid Build Coastguard Worker if source.next == "|": 823*cda5da8dSAndroid Build Coastguard Worker raise source.error("conditional backref with more than two branches") 824*cda5da8dSAndroid Build Coastguard Worker else: 825*cda5da8dSAndroid Build Coastguard Worker item_no = None 826*cda5da8dSAndroid Build Coastguard Worker if not source.match(")"): 827*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing ), unterminated subpattern", 828*cda5da8dSAndroid Build Coastguard Worker source.tell() - start) 829*cda5da8dSAndroid Build Coastguard Worker subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) 830*cda5da8dSAndroid Build Coastguard Worker continue 831*cda5da8dSAndroid Build Coastguard Worker 832*cda5da8dSAndroid Build Coastguard Worker elif char == ">": 833*cda5da8dSAndroid Build Coastguard Worker # non-capturing, atomic group 834*cda5da8dSAndroid Build Coastguard Worker capture = False 835*cda5da8dSAndroid Build Coastguard Worker atomic = True 836*cda5da8dSAndroid Build Coastguard Worker elif char in FLAGS or char == "-": 837*cda5da8dSAndroid Build Coastguard Worker # flags 838*cda5da8dSAndroid Build Coastguard Worker flags = _parse_flags(source, state, char) 839*cda5da8dSAndroid Build Coastguard Worker if flags is None: # global flags 840*cda5da8dSAndroid Build Coastguard Worker if not first or subpattern: 841*cda5da8dSAndroid Build Coastguard Worker raise source.error('global flags not at the start ' 842*cda5da8dSAndroid Build Coastguard Worker 'of the expression', 843*cda5da8dSAndroid Build Coastguard Worker source.tell() - start) 844*cda5da8dSAndroid Build Coastguard Worker verbose = state.flags & SRE_FLAG_VERBOSE 845*cda5da8dSAndroid Build Coastguard Worker continue 846*cda5da8dSAndroid Build Coastguard Worker 847*cda5da8dSAndroid Build Coastguard Worker add_flags, del_flags = flags 848*cda5da8dSAndroid Build Coastguard Worker capture = False 849*cda5da8dSAndroid Build Coastguard Worker else: 850*cda5da8dSAndroid Build Coastguard Worker raise source.error("unknown extension ?" + char, 851*cda5da8dSAndroid Build Coastguard Worker len(char) + 1) 852*cda5da8dSAndroid Build Coastguard Worker 853*cda5da8dSAndroid Build Coastguard Worker # parse group contents 854*cda5da8dSAndroid Build Coastguard Worker if capture: 855*cda5da8dSAndroid Build Coastguard Worker try: 856*cda5da8dSAndroid Build Coastguard Worker group = state.opengroup(name) 857*cda5da8dSAndroid Build Coastguard Worker except error as err: 858*cda5da8dSAndroid Build Coastguard Worker raise source.error(err.msg, len(name) + 1) from None 859*cda5da8dSAndroid Build Coastguard Worker else: 860*cda5da8dSAndroid Build Coastguard Worker group = None 861*cda5da8dSAndroid Build Coastguard Worker sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and 862*cda5da8dSAndroid Build Coastguard Worker not (del_flags & SRE_FLAG_VERBOSE)) 863*cda5da8dSAndroid Build Coastguard Worker p = _parse_sub(source, state, sub_verbose, nested + 1) 864*cda5da8dSAndroid Build Coastguard Worker if not source.match(")"): 865*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing ), unterminated subpattern", 866*cda5da8dSAndroid Build Coastguard Worker source.tell() - start) 867*cda5da8dSAndroid Build Coastguard Worker if group is not None: 868*cda5da8dSAndroid Build Coastguard Worker state.closegroup(group, p) 869*cda5da8dSAndroid Build Coastguard Worker if atomic: 870*cda5da8dSAndroid Build Coastguard Worker assert group is None 871*cda5da8dSAndroid Build Coastguard Worker subpatternappend((ATOMIC_GROUP, p)) 872*cda5da8dSAndroid Build Coastguard Worker else: 873*cda5da8dSAndroid Build Coastguard Worker subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p))) 874*cda5da8dSAndroid Build Coastguard Worker 875*cda5da8dSAndroid Build Coastguard Worker elif this == "^": 876*cda5da8dSAndroid Build Coastguard Worker subpatternappend((AT, AT_BEGINNING)) 877*cda5da8dSAndroid Build Coastguard Worker 878*cda5da8dSAndroid Build Coastguard Worker elif this == "$": 879*cda5da8dSAndroid Build Coastguard Worker subpatternappend((AT, AT_END)) 880*cda5da8dSAndroid Build Coastguard Worker 881*cda5da8dSAndroid Build Coastguard Worker else: 882*cda5da8dSAndroid Build Coastguard Worker raise AssertionError("unsupported special character %r" % (char,)) 883*cda5da8dSAndroid Build Coastguard Worker 884*cda5da8dSAndroid Build Coastguard Worker # unpack non-capturing groups 885*cda5da8dSAndroid Build Coastguard Worker for i in range(len(subpattern))[::-1]: 886*cda5da8dSAndroid Build Coastguard Worker op, av = subpattern[i] 887*cda5da8dSAndroid Build Coastguard Worker if op is SUBPATTERN: 888*cda5da8dSAndroid Build Coastguard Worker group, add_flags, del_flags, p = av 889*cda5da8dSAndroid Build Coastguard Worker if group is None and not add_flags and not del_flags: 890*cda5da8dSAndroid Build Coastguard Worker subpattern[i: i+1] = p 891*cda5da8dSAndroid Build Coastguard Worker 892*cda5da8dSAndroid Build Coastguard Worker return subpattern 893*cda5da8dSAndroid Build Coastguard Worker 894*cda5da8dSAndroid Build Coastguard Workerdef _parse_flags(source, state, char): 895*cda5da8dSAndroid Build Coastguard Worker sourceget = source.get 896*cda5da8dSAndroid Build Coastguard Worker add_flags = 0 897*cda5da8dSAndroid Build Coastguard Worker del_flags = 0 898*cda5da8dSAndroid Build Coastguard Worker if char != "-": 899*cda5da8dSAndroid Build Coastguard Worker while True: 900*cda5da8dSAndroid Build Coastguard Worker flag = FLAGS[char] 901*cda5da8dSAndroid Build Coastguard Worker if source.istext: 902*cda5da8dSAndroid Build Coastguard Worker if char == 'L': 903*cda5da8dSAndroid Build Coastguard Worker msg = "bad inline flags: cannot use 'L' flag with a str pattern" 904*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg) 905*cda5da8dSAndroid Build Coastguard Worker else: 906*cda5da8dSAndroid Build Coastguard Worker if char == 'u': 907*cda5da8dSAndroid Build Coastguard Worker msg = "bad inline flags: cannot use 'u' flag with a bytes pattern" 908*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg) 909*cda5da8dSAndroid Build Coastguard Worker add_flags |= flag 910*cda5da8dSAndroid Build Coastguard Worker if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag: 911*cda5da8dSAndroid Build Coastguard Worker msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible" 912*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg) 913*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 914*cda5da8dSAndroid Build Coastguard Worker if char is None: 915*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing -, : or )") 916*cda5da8dSAndroid Build Coastguard Worker if char in ")-:": 917*cda5da8dSAndroid Build Coastguard Worker break 918*cda5da8dSAndroid Build Coastguard Worker if char not in FLAGS: 919*cda5da8dSAndroid Build Coastguard Worker msg = "unknown flag" if char.isalpha() else "missing -, : or )" 920*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(char)) 921*cda5da8dSAndroid Build Coastguard Worker if char == ")": 922*cda5da8dSAndroid Build Coastguard Worker state.flags |= add_flags 923*cda5da8dSAndroid Build Coastguard Worker return None 924*cda5da8dSAndroid Build Coastguard Worker if add_flags & GLOBAL_FLAGS: 925*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad inline flags: cannot turn on global flag", 1) 926*cda5da8dSAndroid Build Coastguard Worker if char == "-": 927*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 928*cda5da8dSAndroid Build Coastguard Worker if char is None: 929*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing flag") 930*cda5da8dSAndroid Build Coastguard Worker if char not in FLAGS: 931*cda5da8dSAndroid Build Coastguard Worker msg = "unknown flag" if char.isalpha() else "missing flag" 932*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(char)) 933*cda5da8dSAndroid Build Coastguard Worker while True: 934*cda5da8dSAndroid Build Coastguard Worker flag = FLAGS[char] 935*cda5da8dSAndroid Build Coastguard Worker if flag & TYPE_FLAGS: 936*cda5da8dSAndroid Build Coastguard Worker msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'" 937*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg) 938*cda5da8dSAndroid Build Coastguard Worker del_flags |= flag 939*cda5da8dSAndroid Build Coastguard Worker char = sourceget() 940*cda5da8dSAndroid Build Coastguard Worker if char is None: 941*cda5da8dSAndroid Build Coastguard Worker raise source.error("missing :") 942*cda5da8dSAndroid Build Coastguard Worker if char == ":": 943*cda5da8dSAndroid Build Coastguard Worker break 944*cda5da8dSAndroid Build Coastguard Worker if char not in FLAGS: 945*cda5da8dSAndroid Build Coastguard Worker msg = "unknown flag" if char.isalpha() else "missing :" 946*cda5da8dSAndroid Build Coastguard Worker raise source.error(msg, len(char)) 947*cda5da8dSAndroid Build Coastguard Worker assert char == ":" 948*cda5da8dSAndroid Build Coastguard Worker if del_flags & GLOBAL_FLAGS: 949*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad inline flags: cannot turn off global flag", 1) 950*cda5da8dSAndroid Build Coastguard Worker if add_flags & del_flags: 951*cda5da8dSAndroid Build Coastguard Worker raise source.error("bad inline flags: flag turned on and off", 1) 952*cda5da8dSAndroid Build Coastguard Worker return add_flags, del_flags 953*cda5da8dSAndroid Build Coastguard Worker 954*cda5da8dSAndroid Build Coastguard Workerdef fix_flags(src, flags): 955*cda5da8dSAndroid Build Coastguard Worker # Check and fix flags according to the type of pattern (str or bytes) 956*cda5da8dSAndroid Build Coastguard Worker if isinstance(src, str): 957*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_LOCALE: 958*cda5da8dSAndroid Build Coastguard Worker raise ValueError("cannot use LOCALE flag with a str pattern") 959*cda5da8dSAndroid Build Coastguard Worker if not flags & SRE_FLAG_ASCII: 960*cda5da8dSAndroid Build Coastguard Worker flags |= SRE_FLAG_UNICODE 961*cda5da8dSAndroid Build Coastguard Worker elif flags & SRE_FLAG_UNICODE: 962*cda5da8dSAndroid Build Coastguard Worker raise ValueError("ASCII and UNICODE flags are incompatible") 963*cda5da8dSAndroid Build Coastguard Worker else: 964*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_UNICODE: 965*cda5da8dSAndroid Build Coastguard Worker raise ValueError("cannot use UNICODE flag with a bytes pattern") 966*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: 967*cda5da8dSAndroid Build Coastguard Worker raise ValueError("ASCII and LOCALE flags are incompatible") 968*cda5da8dSAndroid Build Coastguard Worker return flags 969*cda5da8dSAndroid Build Coastguard Worker 970*cda5da8dSAndroid Build Coastguard Workerdef parse(str, flags=0, state=None): 971*cda5da8dSAndroid Build Coastguard Worker # parse 're' pattern into list of (opcode, argument) tuples 972*cda5da8dSAndroid Build Coastguard Worker 973*cda5da8dSAndroid Build Coastguard Worker source = Tokenizer(str) 974*cda5da8dSAndroid Build Coastguard Worker 975*cda5da8dSAndroid Build Coastguard Worker if state is None: 976*cda5da8dSAndroid Build Coastguard Worker state = State() 977*cda5da8dSAndroid Build Coastguard Worker state.flags = flags 978*cda5da8dSAndroid Build Coastguard Worker state.str = str 979*cda5da8dSAndroid Build Coastguard Worker 980*cda5da8dSAndroid Build Coastguard Worker p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0) 981*cda5da8dSAndroid Build Coastguard Worker p.state.flags = fix_flags(str, p.state.flags) 982*cda5da8dSAndroid Build Coastguard Worker 983*cda5da8dSAndroid Build Coastguard Worker if source.next is not None: 984*cda5da8dSAndroid Build Coastguard Worker assert source.next == ")" 985*cda5da8dSAndroid Build Coastguard Worker raise source.error("unbalanced parenthesis") 986*cda5da8dSAndroid Build Coastguard Worker 987*cda5da8dSAndroid Build Coastguard Worker for g in p.state.grouprefpos: 988*cda5da8dSAndroid Build Coastguard Worker if g >= p.state.groups: 989*cda5da8dSAndroid Build Coastguard Worker msg = "invalid group reference %d" % g 990*cda5da8dSAndroid Build Coastguard Worker raise error(msg, str, p.state.grouprefpos[g]) 991*cda5da8dSAndroid Build Coastguard Worker 992*cda5da8dSAndroid Build Coastguard Worker if flags & SRE_FLAG_DEBUG: 993*cda5da8dSAndroid Build Coastguard Worker p.dump() 994*cda5da8dSAndroid Build Coastguard Worker 995*cda5da8dSAndroid Build Coastguard Worker return p 996*cda5da8dSAndroid Build Coastguard Worker 997*cda5da8dSAndroid Build Coastguard Workerdef parse_template(source, state): 998*cda5da8dSAndroid Build Coastguard Worker # parse 're' replacement string into list of literals and 999*cda5da8dSAndroid Build Coastguard Worker # group references 1000*cda5da8dSAndroid Build Coastguard Worker s = Tokenizer(source) 1001*cda5da8dSAndroid Build Coastguard Worker sget = s.get 1002*cda5da8dSAndroid Build Coastguard Worker groups = [] 1003*cda5da8dSAndroid Build Coastguard Worker literals = [] 1004*cda5da8dSAndroid Build Coastguard Worker literal = [] 1005*cda5da8dSAndroid Build Coastguard Worker lappend = literal.append 1006*cda5da8dSAndroid Build Coastguard Worker def addgroup(index, pos): 1007*cda5da8dSAndroid Build Coastguard Worker if index > state.groups: 1008*cda5da8dSAndroid Build Coastguard Worker raise s.error("invalid group reference %d" % index, pos) 1009*cda5da8dSAndroid Build Coastguard Worker if literal: 1010*cda5da8dSAndroid Build Coastguard Worker literals.append(''.join(literal)) 1011*cda5da8dSAndroid Build Coastguard Worker del literal[:] 1012*cda5da8dSAndroid Build Coastguard Worker groups.append((len(literals), index)) 1013*cda5da8dSAndroid Build Coastguard Worker literals.append(None) 1014*cda5da8dSAndroid Build Coastguard Worker groupindex = state.groupindex 1015*cda5da8dSAndroid Build Coastguard Worker while True: 1016*cda5da8dSAndroid Build Coastguard Worker this = sget() 1017*cda5da8dSAndroid Build Coastguard Worker if this is None: 1018*cda5da8dSAndroid Build Coastguard Worker break # end of replacement string 1019*cda5da8dSAndroid Build Coastguard Worker if this[0] == "\\": 1020*cda5da8dSAndroid Build Coastguard Worker # group 1021*cda5da8dSAndroid Build Coastguard Worker c = this[1] 1022*cda5da8dSAndroid Build Coastguard Worker if c == "g": 1023*cda5da8dSAndroid Build Coastguard Worker if not s.match("<"): 1024*cda5da8dSAndroid Build Coastguard Worker raise s.error("missing <") 1025*cda5da8dSAndroid Build Coastguard Worker name = s.getuntil(">", "group name") 1026*cda5da8dSAndroid Build Coastguard Worker if name.isidentifier(): 1027*cda5da8dSAndroid Build Coastguard Worker s.checkgroupname(name, 1, -1) 1028*cda5da8dSAndroid Build Coastguard Worker try: 1029*cda5da8dSAndroid Build Coastguard Worker index = groupindex[name] 1030*cda5da8dSAndroid Build Coastguard Worker except KeyError: 1031*cda5da8dSAndroid Build Coastguard Worker raise IndexError("unknown group name %r" % name) from None 1032*cda5da8dSAndroid Build Coastguard Worker else: 1033*cda5da8dSAndroid Build Coastguard Worker try: 1034*cda5da8dSAndroid Build Coastguard Worker index = int(name) 1035*cda5da8dSAndroid Build Coastguard Worker if index < 0: 1036*cda5da8dSAndroid Build Coastguard Worker raise ValueError 1037*cda5da8dSAndroid Build Coastguard Worker except ValueError: 1038*cda5da8dSAndroid Build Coastguard Worker raise s.error("bad character in group name %r" % name, 1039*cda5da8dSAndroid Build Coastguard Worker len(name) + 1) from None 1040*cda5da8dSAndroid Build Coastguard Worker if index >= MAXGROUPS: 1041*cda5da8dSAndroid Build Coastguard Worker raise s.error("invalid group reference %d" % index, 1042*cda5da8dSAndroid Build Coastguard Worker len(name) + 1) 1043*cda5da8dSAndroid Build Coastguard Worker if not (name.isdecimal() and name.isascii()): 1044*cda5da8dSAndroid Build Coastguard Worker import warnings 1045*cda5da8dSAndroid Build Coastguard Worker warnings.warn( 1046*cda5da8dSAndroid Build Coastguard Worker "bad character in group name %s at position %d" % 1047*cda5da8dSAndroid Build Coastguard Worker (repr(name) if s.istext else ascii(name), 1048*cda5da8dSAndroid Build Coastguard Worker s.tell() - len(name) - 1), 1049*cda5da8dSAndroid Build Coastguard Worker DeprecationWarning, stacklevel=5 1050*cda5da8dSAndroid Build Coastguard Worker ) 1051*cda5da8dSAndroid Build Coastguard Worker addgroup(index, len(name) + 1) 1052*cda5da8dSAndroid Build Coastguard Worker elif c == "0": 1053*cda5da8dSAndroid Build Coastguard Worker if s.next in OCTDIGITS: 1054*cda5da8dSAndroid Build Coastguard Worker this += sget() 1055*cda5da8dSAndroid Build Coastguard Worker if s.next in OCTDIGITS: 1056*cda5da8dSAndroid Build Coastguard Worker this += sget() 1057*cda5da8dSAndroid Build Coastguard Worker lappend(chr(int(this[1:], 8) & 0xff)) 1058*cda5da8dSAndroid Build Coastguard Worker elif c in DIGITS: 1059*cda5da8dSAndroid Build Coastguard Worker isoctal = False 1060*cda5da8dSAndroid Build Coastguard Worker if s.next in DIGITS: 1061*cda5da8dSAndroid Build Coastguard Worker this += sget() 1062*cda5da8dSAndroid Build Coastguard Worker if (c in OCTDIGITS and this[2] in OCTDIGITS and 1063*cda5da8dSAndroid Build Coastguard Worker s.next in OCTDIGITS): 1064*cda5da8dSAndroid Build Coastguard Worker this += sget() 1065*cda5da8dSAndroid Build Coastguard Worker isoctal = True 1066*cda5da8dSAndroid Build Coastguard Worker c = int(this[1:], 8) 1067*cda5da8dSAndroid Build Coastguard Worker if c > 0o377: 1068*cda5da8dSAndroid Build Coastguard Worker raise s.error('octal escape value %s outside of ' 1069*cda5da8dSAndroid Build Coastguard Worker 'range 0-0o377' % this, len(this)) 1070*cda5da8dSAndroid Build Coastguard Worker lappend(chr(c)) 1071*cda5da8dSAndroid Build Coastguard Worker if not isoctal: 1072*cda5da8dSAndroid Build Coastguard Worker addgroup(int(this[1:]), len(this) - 1) 1073*cda5da8dSAndroid Build Coastguard Worker else: 1074*cda5da8dSAndroid Build Coastguard Worker try: 1075*cda5da8dSAndroid Build Coastguard Worker this = chr(ESCAPES[this][1]) 1076*cda5da8dSAndroid Build Coastguard Worker except KeyError: 1077*cda5da8dSAndroid Build Coastguard Worker if c in ASCIILETTERS: 1078*cda5da8dSAndroid Build Coastguard Worker raise s.error('bad escape %s' % this, len(this)) from None 1079*cda5da8dSAndroid Build Coastguard Worker lappend(this) 1080*cda5da8dSAndroid Build Coastguard Worker else: 1081*cda5da8dSAndroid Build Coastguard Worker lappend(this) 1082*cda5da8dSAndroid Build Coastguard Worker if literal: 1083*cda5da8dSAndroid Build Coastguard Worker literals.append(''.join(literal)) 1084*cda5da8dSAndroid Build Coastguard Worker if not isinstance(source, str): 1085*cda5da8dSAndroid Build Coastguard Worker # The tokenizer implicitly decodes bytes objects as latin-1, we must 1086*cda5da8dSAndroid Build Coastguard Worker # therefore re-encode the final representation. 1087*cda5da8dSAndroid Build Coastguard Worker literals = [None if s is None else s.encode('latin-1') for s in literals] 1088*cda5da8dSAndroid Build Coastguard Worker return groups, literals 1089*cda5da8dSAndroid Build Coastguard Worker 1090*cda5da8dSAndroid Build Coastguard Workerdef expand_template(template, match): 1091*cda5da8dSAndroid Build Coastguard Worker g = match.group 1092*cda5da8dSAndroid Build Coastguard Worker empty = match.string[:0] 1093*cda5da8dSAndroid Build Coastguard Worker groups, literals = template 1094*cda5da8dSAndroid Build Coastguard Worker literals = literals[:] 1095*cda5da8dSAndroid Build Coastguard Worker try: 1096*cda5da8dSAndroid Build Coastguard Worker for index, group in groups: 1097*cda5da8dSAndroid Build Coastguard Worker literals[index] = g(group) or empty 1098*cda5da8dSAndroid Build Coastguard Worker except IndexError: 1099*cda5da8dSAndroid Build Coastguard Worker raise error("invalid group reference %d" % index) from None 1100*cda5da8dSAndroid Build Coastguard Worker return empty.join(literals) 1101