1*cda5da8dSAndroid Build Coastguard Worker# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) 2*cda5da8dSAndroid Build Coastguard Worker 3*cda5da8dSAndroid Build Coastguard Workerimport stringprep, re, codecs 4*cda5da8dSAndroid Build Coastguard Workerfrom unicodedata import ucd_3_2_0 as unicodedata 5*cda5da8dSAndroid Build Coastguard Worker 6*cda5da8dSAndroid Build Coastguard Worker# IDNA section 3.1 7*cda5da8dSAndroid Build Coastguard Workerdots = re.compile("[\u002E\u3002\uFF0E\uFF61]") 8*cda5da8dSAndroid Build Coastguard Worker 9*cda5da8dSAndroid Build Coastguard Worker# IDNA section 5 10*cda5da8dSAndroid Build Coastguard Workerace_prefix = b"xn--" 11*cda5da8dSAndroid Build Coastguard Workersace_prefix = "xn--" 12*cda5da8dSAndroid Build Coastguard Worker 13*cda5da8dSAndroid Build Coastguard Worker# This assumes query strings, so AllowUnassigned is true 14*cda5da8dSAndroid Build Coastguard Workerdef nameprep(label): 15*cda5da8dSAndroid Build Coastguard Worker # Map 16*cda5da8dSAndroid Build Coastguard Worker newlabel = [] 17*cda5da8dSAndroid Build Coastguard Worker for c in label: 18*cda5da8dSAndroid Build Coastguard Worker if stringprep.in_table_b1(c): 19*cda5da8dSAndroid Build Coastguard Worker # Map to nothing 20*cda5da8dSAndroid Build Coastguard Worker continue 21*cda5da8dSAndroid Build Coastguard Worker newlabel.append(stringprep.map_table_b2(c)) 22*cda5da8dSAndroid Build Coastguard Worker label = "".join(newlabel) 23*cda5da8dSAndroid Build Coastguard Worker 24*cda5da8dSAndroid Build Coastguard Worker # Normalize 25*cda5da8dSAndroid Build Coastguard Worker label = unicodedata.normalize("NFKC", label) 26*cda5da8dSAndroid Build Coastguard Worker 27*cda5da8dSAndroid Build Coastguard Worker # Prohibit 28*cda5da8dSAndroid Build Coastguard Worker for c in label: 29*cda5da8dSAndroid Build Coastguard Worker if stringprep.in_table_c12(c) or \ 30*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c22(c) or \ 31*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c3(c) or \ 32*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c4(c) or \ 33*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c5(c) or \ 34*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c6(c) or \ 35*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c7(c) or \ 36*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c8(c) or \ 37*cda5da8dSAndroid Build Coastguard Worker stringprep.in_table_c9(c): 38*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Invalid character %r" % c) 39*cda5da8dSAndroid Build Coastguard Worker 40*cda5da8dSAndroid Build Coastguard Worker # Check bidi 41*cda5da8dSAndroid Build Coastguard Worker RandAL = [stringprep.in_table_d1(x) for x in label] 42*cda5da8dSAndroid Build Coastguard Worker if any(RandAL): 43*cda5da8dSAndroid Build Coastguard Worker # There is a RandAL char in the string. Must perform further 44*cda5da8dSAndroid Build Coastguard Worker # tests: 45*cda5da8dSAndroid Build Coastguard Worker # 1) The characters in section 5.8 MUST be prohibited. 46*cda5da8dSAndroid Build Coastguard Worker # This is table C.8, which was already checked 47*cda5da8dSAndroid Build Coastguard Worker # 2) If a string contains any RandALCat character, the string 48*cda5da8dSAndroid Build Coastguard Worker # MUST NOT contain any LCat character. 49*cda5da8dSAndroid Build Coastguard Worker if any(stringprep.in_table_d2(x) for x in label): 50*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Violation of BIDI requirement 2") 51*cda5da8dSAndroid Build Coastguard Worker # 3) If a string contains any RandALCat character, a 52*cda5da8dSAndroid Build Coastguard Worker # RandALCat character MUST be the first character of the 53*cda5da8dSAndroid Build Coastguard Worker # string, and a RandALCat character MUST be the last 54*cda5da8dSAndroid Build Coastguard Worker # character of the string. 55*cda5da8dSAndroid Build Coastguard Worker if not RandAL[0] or not RandAL[-1]: 56*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Violation of BIDI requirement 3") 57*cda5da8dSAndroid Build Coastguard Worker 58*cda5da8dSAndroid Build Coastguard Worker return label 59*cda5da8dSAndroid Build Coastguard Worker 60*cda5da8dSAndroid Build Coastguard Workerdef ToASCII(label): 61*cda5da8dSAndroid Build Coastguard Worker try: 62*cda5da8dSAndroid Build Coastguard Worker # Step 1: try ASCII 63*cda5da8dSAndroid Build Coastguard Worker label = label.encode("ascii") 64*cda5da8dSAndroid Build Coastguard Worker except UnicodeError: 65*cda5da8dSAndroid Build Coastguard Worker pass 66*cda5da8dSAndroid Build Coastguard Worker else: 67*cda5da8dSAndroid Build Coastguard Worker # Skip to step 3: UseSTD3ASCIIRules is false, so 68*cda5da8dSAndroid Build Coastguard Worker # Skip to step 8. 69*cda5da8dSAndroid Build Coastguard Worker if 0 < len(label) < 64: 70*cda5da8dSAndroid Build Coastguard Worker return label 71*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("label empty or too long") 72*cda5da8dSAndroid Build Coastguard Worker 73*cda5da8dSAndroid Build Coastguard Worker # Step 2: nameprep 74*cda5da8dSAndroid Build Coastguard Worker label = nameprep(label) 75*cda5da8dSAndroid Build Coastguard Worker 76*cda5da8dSAndroid Build Coastguard Worker # Step 3: UseSTD3ASCIIRules is false 77*cda5da8dSAndroid Build Coastguard Worker # Step 4: try ASCII 78*cda5da8dSAndroid Build Coastguard Worker try: 79*cda5da8dSAndroid Build Coastguard Worker label = label.encode("ascii") 80*cda5da8dSAndroid Build Coastguard Worker except UnicodeError: 81*cda5da8dSAndroid Build Coastguard Worker pass 82*cda5da8dSAndroid Build Coastguard Worker else: 83*cda5da8dSAndroid Build Coastguard Worker # Skip to step 8. 84*cda5da8dSAndroid Build Coastguard Worker if 0 < len(label) < 64: 85*cda5da8dSAndroid Build Coastguard Worker return label 86*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("label empty or too long") 87*cda5da8dSAndroid Build Coastguard Worker 88*cda5da8dSAndroid Build Coastguard Worker # Step 5: Check ACE prefix 89*cda5da8dSAndroid Build Coastguard Worker if label.startswith(sace_prefix): 90*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Label starts with ACE prefix") 91*cda5da8dSAndroid Build Coastguard Worker 92*cda5da8dSAndroid Build Coastguard Worker # Step 6: Encode with PUNYCODE 93*cda5da8dSAndroid Build Coastguard Worker label = label.encode("punycode") 94*cda5da8dSAndroid Build Coastguard Worker 95*cda5da8dSAndroid Build Coastguard Worker # Step 7: Prepend ACE prefix 96*cda5da8dSAndroid Build Coastguard Worker label = ace_prefix + label 97*cda5da8dSAndroid Build Coastguard Worker 98*cda5da8dSAndroid Build Coastguard Worker # Step 8: Check size 99*cda5da8dSAndroid Build Coastguard Worker if 0 < len(label) < 64: 100*cda5da8dSAndroid Build Coastguard Worker return label 101*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("label empty or too long") 102*cda5da8dSAndroid Build Coastguard Worker 103*cda5da8dSAndroid Build Coastguard Workerdef ToUnicode(label): 104*cda5da8dSAndroid Build Coastguard Worker # Step 1: Check for ASCII 105*cda5da8dSAndroid Build Coastguard Worker if isinstance(label, bytes): 106*cda5da8dSAndroid Build Coastguard Worker pure_ascii = True 107*cda5da8dSAndroid Build Coastguard Worker else: 108*cda5da8dSAndroid Build Coastguard Worker try: 109*cda5da8dSAndroid Build Coastguard Worker label = label.encode("ascii") 110*cda5da8dSAndroid Build Coastguard Worker pure_ascii = True 111*cda5da8dSAndroid Build Coastguard Worker except UnicodeError: 112*cda5da8dSAndroid Build Coastguard Worker pure_ascii = False 113*cda5da8dSAndroid Build Coastguard Worker if not pure_ascii: 114*cda5da8dSAndroid Build Coastguard Worker # Step 2: Perform nameprep 115*cda5da8dSAndroid Build Coastguard Worker label = nameprep(label) 116*cda5da8dSAndroid Build Coastguard Worker # It doesn't say this, but apparently, it should be ASCII now 117*cda5da8dSAndroid Build Coastguard Worker try: 118*cda5da8dSAndroid Build Coastguard Worker label = label.encode("ascii") 119*cda5da8dSAndroid Build Coastguard Worker except UnicodeError: 120*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Invalid character in IDN label") 121*cda5da8dSAndroid Build Coastguard Worker # Step 3: Check for ACE prefix 122*cda5da8dSAndroid Build Coastguard Worker if not label.startswith(ace_prefix): 123*cda5da8dSAndroid Build Coastguard Worker return str(label, "ascii") 124*cda5da8dSAndroid Build Coastguard Worker 125*cda5da8dSAndroid Build Coastguard Worker # Step 4: Remove ACE prefix 126*cda5da8dSAndroid Build Coastguard Worker label1 = label[len(ace_prefix):] 127*cda5da8dSAndroid Build Coastguard Worker 128*cda5da8dSAndroid Build Coastguard Worker # Step 5: Decode using PUNYCODE 129*cda5da8dSAndroid Build Coastguard Worker result = label1.decode("punycode") 130*cda5da8dSAndroid Build Coastguard Worker 131*cda5da8dSAndroid Build Coastguard Worker # Step 6: Apply ToASCII 132*cda5da8dSAndroid Build Coastguard Worker label2 = ToASCII(result) 133*cda5da8dSAndroid Build Coastguard Worker 134*cda5da8dSAndroid Build Coastguard Worker # Step 7: Compare the result of step 6 with the one of step 3 135*cda5da8dSAndroid Build Coastguard Worker # label2 will already be in lower case. 136*cda5da8dSAndroid Build Coastguard Worker if str(label, "ascii").lower() != str(label2, "ascii"): 137*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("IDNA does not round-trip", label, label2) 138*cda5da8dSAndroid Build Coastguard Worker 139*cda5da8dSAndroid Build Coastguard Worker # Step 8: return the result of step 5 140*cda5da8dSAndroid Build Coastguard Worker return result 141*cda5da8dSAndroid Build Coastguard Worker 142*cda5da8dSAndroid Build Coastguard Worker### Codec APIs 143*cda5da8dSAndroid Build Coastguard Worker 144*cda5da8dSAndroid Build Coastguard Workerclass Codec(codecs.Codec): 145*cda5da8dSAndroid Build Coastguard Worker def encode(self, input, errors='strict'): 146*cda5da8dSAndroid Build Coastguard Worker 147*cda5da8dSAndroid Build Coastguard Worker if errors != 'strict': 148*cda5da8dSAndroid Build Coastguard Worker # IDNA is quite clear that implementations must be strict 149*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("unsupported error handling "+errors) 150*cda5da8dSAndroid Build Coastguard Worker 151*cda5da8dSAndroid Build Coastguard Worker if not input: 152*cda5da8dSAndroid Build Coastguard Worker return b'', 0 153*cda5da8dSAndroid Build Coastguard Worker 154*cda5da8dSAndroid Build Coastguard Worker try: 155*cda5da8dSAndroid Build Coastguard Worker result = input.encode('ascii') 156*cda5da8dSAndroid Build Coastguard Worker except UnicodeEncodeError: 157*cda5da8dSAndroid Build Coastguard Worker pass 158*cda5da8dSAndroid Build Coastguard Worker else: 159*cda5da8dSAndroid Build Coastguard Worker # ASCII name: fast path 160*cda5da8dSAndroid Build Coastguard Worker labels = result.split(b'.') 161*cda5da8dSAndroid Build Coastguard Worker for label in labels[:-1]: 162*cda5da8dSAndroid Build Coastguard Worker if not (0 < len(label) < 64): 163*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("label empty or too long") 164*cda5da8dSAndroid Build Coastguard Worker if len(labels[-1]) >= 64: 165*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("label too long") 166*cda5da8dSAndroid Build Coastguard Worker return result, len(input) 167*cda5da8dSAndroid Build Coastguard Worker 168*cda5da8dSAndroid Build Coastguard Worker result = bytearray() 169*cda5da8dSAndroid Build Coastguard Worker labels = dots.split(input) 170*cda5da8dSAndroid Build Coastguard Worker if labels and not labels[-1]: 171*cda5da8dSAndroid Build Coastguard Worker trailing_dot = b'.' 172*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 173*cda5da8dSAndroid Build Coastguard Worker else: 174*cda5da8dSAndroid Build Coastguard Worker trailing_dot = b'' 175*cda5da8dSAndroid Build Coastguard Worker for label in labels: 176*cda5da8dSAndroid Build Coastguard Worker if result: 177*cda5da8dSAndroid Build Coastguard Worker # Join with U+002E 178*cda5da8dSAndroid Build Coastguard Worker result.extend(b'.') 179*cda5da8dSAndroid Build Coastguard Worker result.extend(ToASCII(label)) 180*cda5da8dSAndroid Build Coastguard Worker return bytes(result+trailing_dot), len(input) 181*cda5da8dSAndroid Build Coastguard Worker 182*cda5da8dSAndroid Build Coastguard Worker def decode(self, input, errors='strict'): 183*cda5da8dSAndroid Build Coastguard Worker 184*cda5da8dSAndroid Build Coastguard Worker if errors != 'strict': 185*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Unsupported error handling "+errors) 186*cda5da8dSAndroid Build Coastguard Worker 187*cda5da8dSAndroid Build Coastguard Worker if not input: 188*cda5da8dSAndroid Build Coastguard Worker return "", 0 189*cda5da8dSAndroid Build Coastguard Worker 190*cda5da8dSAndroid Build Coastguard Worker # IDNA allows decoding to operate on Unicode strings, too. 191*cda5da8dSAndroid Build Coastguard Worker if not isinstance(input, bytes): 192*cda5da8dSAndroid Build Coastguard Worker # XXX obviously wrong, see #3232 193*cda5da8dSAndroid Build Coastguard Worker input = bytes(input) 194*cda5da8dSAndroid Build Coastguard Worker 195*cda5da8dSAndroid Build Coastguard Worker if ace_prefix not in input: 196*cda5da8dSAndroid Build Coastguard Worker # Fast path 197*cda5da8dSAndroid Build Coastguard Worker try: 198*cda5da8dSAndroid Build Coastguard Worker return input.decode('ascii'), len(input) 199*cda5da8dSAndroid Build Coastguard Worker except UnicodeDecodeError: 200*cda5da8dSAndroid Build Coastguard Worker pass 201*cda5da8dSAndroid Build Coastguard Worker 202*cda5da8dSAndroid Build Coastguard Worker labels = input.split(b".") 203*cda5da8dSAndroid Build Coastguard Worker 204*cda5da8dSAndroid Build Coastguard Worker if labels and len(labels[-1]) == 0: 205*cda5da8dSAndroid Build Coastguard Worker trailing_dot = '.' 206*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 207*cda5da8dSAndroid Build Coastguard Worker else: 208*cda5da8dSAndroid Build Coastguard Worker trailing_dot = '' 209*cda5da8dSAndroid Build Coastguard Worker 210*cda5da8dSAndroid Build Coastguard Worker result = [] 211*cda5da8dSAndroid Build Coastguard Worker for label in labels: 212*cda5da8dSAndroid Build Coastguard Worker result.append(ToUnicode(label)) 213*cda5da8dSAndroid Build Coastguard Worker 214*cda5da8dSAndroid Build Coastguard Worker return ".".join(result)+trailing_dot, len(input) 215*cda5da8dSAndroid Build Coastguard Worker 216*cda5da8dSAndroid Build Coastguard Workerclass IncrementalEncoder(codecs.BufferedIncrementalEncoder): 217*cda5da8dSAndroid Build Coastguard Worker def _buffer_encode(self, input, errors, final): 218*cda5da8dSAndroid Build Coastguard Worker if errors != 'strict': 219*cda5da8dSAndroid Build Coastguard Worker # IDNA is quite clear that implementations must be strict 220*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("unsupported error handling "+errors) 221*cda5da8dSAndroid Build Coastguard Worker 222*cda5da8dSAndroid Build Coastguard Worker if not input: 223*cda5da8dSAndroid Build Coastguard Worker return (b'', 0) 224*cda5da8dSAndroid Build Coastguard Worker 225*cda5da8dSAndroid Build Coastguard Worker labels = dots.split(input) 226*cda5da8dSAndroid Build Coastguard Worker trailing_dot = b'' 227*cda5da8dSAndroid Build Coastguard Worker if labels: 228*cda5da8dSAndroid Build Coastguard Worker if not labels[-1]: 229*cda5da8dSAndroid Build Coastguard Worker trailing_dot = b'.' 230*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 231*cda5da8dSAndroid Build Coastguard Worker elif not final: 232*cda5da8dSAndroid Build Coastguard Worker # Keep potentially unfinished label until the next call 233*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 234*cda5da8dSAndroid Build Coastguard Worker if labels: 235*cda5da8dSAndroid Build Coastguard Worker trailing_dot = b'.' 236*cda5da8dSAndroid Build Coastguard Worker 237*cda5da8dSAndroid Build Coastguard Worker result = bytearray() 238*cda5da8dSAndroid Build Coastguard Worker size = 0 239*cda5da8dSAndroid Build Coastguard Worker for label in labels: 240*cda5da8dSAndroid Build Coastguard Worker if size: 241*cda5da8dSAndroid Build Coastguard Worker # Join with U+002E 242*cda5da8dSAndroid Build Coastguard Worker result.extend(b'.') 243*cda5da8dSAndroid Build Coastguard Worker size += 1 244*cda5da8dSAndroid Build Coastguard Worker result.extend(ToASCII(label)) 245*cda5da8dSAndroid Build Coastguard Worker size += len(label) 246*cda5da8dSAndroid Build Coastguard Worker 247*cda5da8dSAndroid Build Coastguard Worker result += trailing_dot 248*cda5da8dSAndroid Build Coastguard Worker size += len(trailing_dot) 249*cda5da8dSAndroid Build Coastguard Worker return (bytes(result), size) 250*cda5da8dSAndroid Build Coastguard Worker 251*cda5da8dSAndroid Build Coastguard Workerclass IncrementalDecoder(codecs.BufferedIncrementalDecoder): 252*cda5da8dSAndroid Build Coastguard Worker def _buffer_decode(self, input, errors, final): 253*cda5da8dSAndroid Build Coastguard Worker if errors != 'strict': 254*cda5da8dSAndroid Build Coastguard Worker raise UnicodeError("Unsupported error handling "+errors) 255*cda5da8dSAndroid Build Coastguard Worker 256*cda5da8dSAndroid Build Coastguard Worker if not input: 257*cda5da8dSAndroid Build Coastguard Worker return ("", 0) 258*cda5da8dSAndroid Build Coastguard Worker 259*cda5da8dSAndroid Build Coastguard Worker # IDNA allows decoding to operate on Unicode strings, too. 260*cda5da8dSAndroid Build Coastguard Worker if isinstance(input, str): 261*cda5da8dSAndroid Build Coastguard Worker labels = dots.split(input) 262*cda5da8dSAndroid Build Coastguard Worker else: 263*cda5da8dSAndroid Build Coastguard Worker # Must be ASCII string 264*cda5da8dSAndroid Build Coastguard Worker input = str(input, "ascii") 265*cda5da8dSAndroid Build Coastguard Worker labels = input.split(".") 266*cda5da8dSAndroid Build Coastguard Worker 267*cda5da8dSAndroid Build Coastguard Worker trailing_dot = '' 268*cda5da8dSAndroid Build Coastguard Worker if labels: 269*cda5da8dSAndroid Build Coastguard Worker if not labels[-1]: 270*cda5da8dSAndroid Build Coastguard Worker trailing_dot = '.' 271*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 272*cda5da8dSAndroid Build Coastguard Worker elif not final: 273*cda5da8dSAndroid Build Coastguard Worker # Keep potentially unfinished label until the next call 274*cda5da8dSAndroid Build Coastguard Worker del labels[-1] 275*cda5da8dSAndroid Build Coastguard Worker if labels: 276*cda5da8dSAndroid Build Coastguard Worker trailing_dot = '.' 277*cda5da8dSAndroid Build Coastguard Worker 278*cda5da8dSAndroid Build Coastguard Worker result = [] 279*cda5da8dSAndroid Build Coastguard Worker size = 0 280*cda5da8dSAndroid Build Coastguard Worker for label in labels: 281*cda5da8dSAndroid Build Coastguard Worker result.append(ToUnicode(label)) 282*cda5da8dSAndroid Build Coastguard Worker if size: 283*cda5da8dSAndroid Build Coastguard Worker size += 1 284*cda5da8dSAndroid Build Coastguard Worker size += len(label) 285*cda5da8dSAndroid Build Coastguard Worker 286*cda5da8dSAndroid Build Coastguard Worker result = ".".join(result) + trailing_dot 287*cda5da8dSAndroid Build Coastguard Worker size += len(trailing_dot) 288*cda5da8dSAndroid Build Coastguard Worker return (result, size) 289*cda5da8dSAndroid Build Coastguard Worker 290*cda5da8dSAndroid Build Coastguard Workerclass StreamWriter(Codec,codecs.StreamWriter): 291*cda5da8dSAndroid Build Coastguard Worker pass 292*cda5da8dSAndroid Build Coastguard Worker 293*cda5da8dSAndroid Build Coastguard Workerclass StreamReader(Codec,codecs.StreamReader): 294*cda5da8dSAndroid Build Coastguard Worker pass 295*cda5da8dSAndroid Build Coastguard Worker 296*cda5da8dSAndroid Build Coastguard Worker### encodings module API 297*cda5da8dSAndroid Build Coastguard Worker 298*cda5da8dSAndroid Build Coastguard Workerdef getregentry(): 299*cda5da8dSAndroid Build Coastguard Worker return codecs.CodecInfo( 300*cda5da8dSAndroid Build Coastguard Worker name='idna', 301*cda5da8dSAndroid Build Coastguard Worker encode=Codec().encode, 302*cda5da8dSAndroid Build Coastguard Worker decode=Codec().decode, 303*cda5da8dSAndroid Build Coastguard Worker incrementalencoder=IncrementalEncoder, 304*cda5da8dSAndroid Build Coastguard Worker incrementaldecoder=IncrementalDecoder, 305*cda5da8dSAndroid Build Coastguard Worker streamwriter=StreamWriter, 306*cda5da8dSAndroid Build Coastguard Worker streamreader=StreamReader, 307*cda5da8dSAndroid Build Coastguard Worker ) 308