1*51f0e3d5SAndroid Build Coastguard Worker#!/usr/bin/env python 2*51f0e3d5SAndroid Build Coastguard Worker 3*51f0e3d5SAndroid Build Coastguard Workerfrom urllib2 import urlopen 4*51f0e3d5SAndroid Build Coastguard Workerfrom datetime import date 5*51f0e3d5SAndroid Build Coastguard Worker 6*51f0e3d5SAndroid Build Coastguard WorkerURL='http://data.iana.org/TLD/tlds-alpha-by-domain.txt' 7*51f0e3d5SAndroid Build Coastguard Worker 8*51f0e3d5SAndroid Build Coastguard WorkerTLD_PREFIX = r""" 9*51f0e3d5SAndroid Build Coastguard Worker /** 10*51f0e3d5SAndroid Build Coastguard Worker * Regular expression to match all IANA top-level domains. 11*51f0e3d5SAndroid Build Coastguard Worker * List accurate as of {gen_date}. List taken from: 12*51f0e3d5SAndroid Build Coastguard Worker * {url} 13*51f0e3d5SAndroid Build Coastguard Worker * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 14*51f0e3d5SAndroid Build Coastguard Worker */ 15*51f0e3d5SAndroid Build Coastguard Worker public static final String TOP_LEVEL_DOMAIN_STR = 16*51f0e3d5SAndroid Build Coastguard Worker""" 17*51f0e3d5SAndroid Build Coastguard WorkerTLD_SUFFIX = '";' 18*51f0e3d5SAndroid Build Coastguard Worker 19*51f0e3d5SAndroid Build Coastguard WorkerURL_PREFIX = r""" 20*51f0e3d5SAndroid Build Coastguard Worker /** 21*51f0e3d5SAndroid Build Coastguard Worker * Regular expression to match all IANA top-level domains for WEB_URL. 22*51f0e3d5SAndroid Build Coastguard Worker * List accurate as of {gen_date}. List taken from: 23*51f0e3d5SAndroid Build Coastguard Worker * {url} 24*51f0e3d5SAndroid Build Coastguard Worker * This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py 25*51f0e3d5SAndroid Build Coastguard Worker */ 26*51f0e3d5SAndroid Build Coastguard Worker public static final String TOP_LEVEL_DOMAIN_STR_FOR_WEB_URL = 27*51f0e3d5SAndroid Build Coastguard Worker "(?:" 28*51f0e3d5SAndroid Build Coastguard Worker""" 29*51f0e3d5SAndroid Build Coastguard Worker 30*51f0e3d5SAndroid Build Coastguard WorkerURL_SUFFIX = ';' 31*51f0e3d5SAndroid Build Coastguard WorkerTAB = ' ' 32*51f0e3d5SAndroid Build Coastguard Worker 33*51f0e3d5SAndroid Build Coastguard Workerclass BucketOutput: 34*51f0e3d5SAndroid Build Coastguard Worker def __init__(self): 35*51f0e3d5SAndroid Build Coastguard Worker self.buffer = TAB 36*51f0e3d5SAndroid Build Coastguard Worker self.lineLength = len(TAB) 37*51f0e3d5SAndroid Build Coastguard Worker 38*51f0e3d5SAndroid Build Coastguard Worker def __iadd__(self, other): 39*51f0e3d5SAndroid Build Coastguard Worker self.buffer += other 40*51f0e3d5SAndroid Build Coastguard Worker self.lineLength += len(other) 41*51f0e3d5SAndroid Build Coastguard Worker return self 42*51f0e3d5SAndroid Build Coastguard Worker 43*51f0e3d5SAndroid Build Coastguard Worker def addPipe(self): 44*51f0e3d5SAndroid Build Coastguard Worker if self.lineLength > 90: 45*51f0e3d5SAndroid Build Coastguard Worker self.buffer += '"\n' 46*51f0e3d5SAndroid Build Coastguard Worker self.buffer += TAB 47*51f0e3d5SAndroid Build Coastguard Worker self.buffer += '+ "' 48*51f0e3d5SAndroid Build Coastguard Worker self.lineLength = len(TAB) 49*51f0e3d5SAndroid Build Coastguard Worker 50*51f0e3d5SAndroid Build Coastguard Worker self += '|' 51*51f0e3d5SAndroid Build Coastguard Worker 52*51f0e3d5SAndroid Build Coastguard Worker def value(self): 53*51f0e3d5SAndroid Build Coastguard Worker return self.buffer 54*51f0e3d5SAndroid Build Coastguard Worker 55*51f0e3d5SAndroid Build Coastguard Workerclass Bucket: 56*51f0e3d5SAndroid Build Coastguard Worker def __init__(self, baseLetter): 57*51f0e3d5SAndroid Build Coastguard Worker self.base=baseLetter 58*51f0e3d5SAndroid Build Coastguard Worker self.words=[] 59*51f0e3d5SAndroid Build Coastguard Worker self.letters=[] 60*51f0e3d5SAndroid Build Coastguard Worker 61*51f0e3d5SAndroid Build Coastguard Worker def dump(self, isWebUrl=False, isFirst=False, isLast=False): 62*51f0e3d5SAndroid Build Coastguard Worker if (len(self.words) == 0) and (len(self.letters) == 0): 63*51f0e3d5SAndroid Build Coastguard Worker return '' 64*51f0e3d5SAndroid Build Coastguard Worker 65*51f0e3d5SAndroid Build Coastguard Worker self.words.sort() 66*51f0e3d5SAndroid Build Coastguard Worker self.letters.sort() 67*51f0e3d5SAndroid Build Coastguard Worker 68*51f0e3d5SAndroid Build Coastguard Worker output = BucketOutput() 69*51f0e3d5SAndroid Build Coastguard Worker 70*51f0e3d5SAndroid Build Coastguard Worker if isFirst: 71*51f0e3d5SAndroid Build Coastguard Worker if isWebUrl: 72*51f0e3d5SAndroid Build Coastguard Worker output += '+ "' 73*51f0e3d5SAndroid Build Coastguard Worker else: 74*51f0e3d5SAndroid Build Coastguard Worker output += '"(' 75*51f0e3d5SAndroid Build Coastguard Worker else: 76*51f0e3d5SAndroid Build Coastguard Worker output += '+ "|' 77*51f0e3d5SAndroid Build Coastguard Worker 78*51f0e3d5SAndroid Build Coastguard Worker if len(self.words) != 0: 79*51f0e3d5SAndroid Build Coastguard Worker output += '(' 80*51f0e3d5SAndroid Build Coastguard Worker 81*51f0e3d5SAndroid Build Coastguard Worker if isWebUrl: 82*51f0e3d5SAndroid Build Coastguard Worker output += '?:' 83*51f0e3d5SAndroid Build Coastguard Worker 84*51f0e3d5SAndroid Build Coastguard Worker firstWord = 1 85*51f0e3d5SAndroid Build Coastguard Worker for word in self.words: 86*51f0e3d5SAndroid Build Coastguard Worker if firstWord == 0: 87*51f0e3d5SAndroid Build Coastguard Worker output.addPipe() 88*51f0e3d5SAndroid Build Coastguard Worker firstWord = 0 89*51f0e3d5SAndroid Build Coastguard Worker for letter in word: 90*51f0e3d5SAndroid Build Coastguard Worker if letter == '-': 91*51f0e3d5SAndroid Build Coastguard Worker output += '\\\\' # escape the '-' character. 92*51f0e3d5SAndroid Build Coastguard Worker output += letter 93*51f0e3d5SAndroid Build Coastguard Worker 94*51f0e3d5SAndroid Build Coastguard Worker if len(self.words) > 0 and len(self.letters) > 0: 95*51f0e3d5SAndroid Build Coastguard Worker output.addPipe() 96*51f0e3d5SAndroid Build Coastguard Worker 97*51f0e3d5SAndroid Build Coastguard Worker if len(self.letters) == 1: 98*51f0e3d5SAndroid Build Coastguard Worker output += '%c%c' % (self.base, self.letters[0]) 99*51f0e3d5SAndroid Build Coastguard Worker elif len(self.letters) > 0: 100*51f0e3d5SAndroid Build Coastguard Worker output += '%c[' % self.base 101*51f0e3d5SAndroid Build Coastguard Worker 102*51f0e3d5SAndroid Build Coastguard Worker for letter in self.letters: 103*51f0e3d5SAndroid Build Coastguard Worker output += letter 104*51f0e3d5SAndroid Build Coastguard Worker 105*51f0e3d5SAndroid Build Coastguard Worker output += ']' 106*51f0e3d5SAndroid Build Coastguard Worker 107*51f0e3d5SAndroid Build Coastguard Worker if len(self.words) != 0: 108*51f0e3d5SAndroid Build Coastguard Worker output += ')' 109*51f0e3d5SAndroid Build Coastguard Worker 110*51f0e3d5SAndroid Build Coastguard Worker if not isLast: 111*51f0e3d5SAndroid Build Coastguard Worker output += '"' 112*51f0e3d5SAndroid Build Coastguard Worker output += '\n' 113*51f0e3d5SAndroid Build Coastguard Worker 114*51f0e3d5SAndroid Build Coastguard Worker return output.value(); 115*51f0e3d5SAndroid Build Coastguard Worker 116*51f0e3d5SAndroid Build Coastguard Worker def add(self, line): 117*51f0e3d5SAndroid Build Coastguard Worker length = len(line) 118*51f0e3d5SAndroid Build Coastguard Worker 119*51f0e3d5SAndroid Build Coastguard Worker if line.startswith('#') or (length == 0): 120*51f0e3d5SAndroid Build Coastguard Worker return; 121*51f0e3d5SAndroid Build Coastguard Worker 122*51f0e3d5SAndroid Build Coastguard Worker if length == 2: 123*51f0e3d5SAndroid Build Coastguard Worker self.letters.append(line[1:2]) 124*51f0e3d5SAndroid Build Coastguard Worker else: 125*51f0e3d5SAndroid Build Coastguard Worker self.words.append(line) 126*51f0e3d5SAndroid Build Coastguard Worker 127*51f0e3d5SAndroid Build Coastguard Workerdef getBucket(buckets, line): 128*51f0e3d5SAndroid Build Coastguard Worker letter = line[0] 129*51f0e3d5SAndroid Build Coastguard Worker bucket = buckets.get(letter) 130*51f0e3d5SAndroid Build Coastguard Worker 131*51f0e3d5SAndroid Build Coastguard Worker if bucket is None: 132*51f0e3d5SAndroid Build Coastguard Worker bucket = Bucket(letter) 133*51f0e3d5SAndroid Build Coastguard Worker buckets[letter] = bucket 134*51f0e3d5SAndroid Build Coastguard Worker 135*51f0e3d5SAndroid Build Coastguard Worker return bucket 136*51f0e3d5SAndroid Build Coastguard Worker 137*51f0e3d5SAndroid Build Coastguard Workerdef makePattern(prefix, suffix, buckets, isWebUrl=False): 138*51f0e3d5SAndroid Build Coastguard Worker output = prefix.format(gen_date = date.today(), url=URL) 139*51f0e3d5SAndroid Build Coastguard Worker 140*51f0e3d5SAndroid Build Coastguard Worker output += getBucket(buckets, 'a').dump(isFirst=True, isWebUrl=isWebUrl) 141*51f0e3d5SAndroid Build Coastguard Worker 142*51f0e3d5SAndroid Build Coastguard Worker for letter in range(ord('b'), ord('z')): 143*51f0e3d5SAndroid Build Coastguard Worker output += getBucket(buckets, chr(letter)).dump(isWebUrl=isWebUrl) 144*51f0e3d5SAndroid Build Coastguard Worker 145*51f0e3d5SAndroid Build Coastguard Worker output += getBucket(buckets, 'z').dump(isLast=True, isWebUrl=isWebUrl) 146*51f0e3d5SAndroid Build Coastguard Worker 147*51f0e3d5SAndroid Build Coastguard Worker if isWebUrl: 148*51f0e3d5SAndroid Build Coastguard Worker output += '))"' 149*51f0e3d5SAndroid Build Coastguard Worker else: 150*51f0e3d5SAndroid Build Coastguard Worker output += ')' 151*51f0e3d5SAndroid Build Coastguard Worker 152*51f0e3d5SAndroid Build Coastguard Worker output += suffix 153*51f0e3d5SAndroid Build Coastguard Worker 154*51f0e3d5SAndroid Build Coastguard Worker print output 155*51f0e3d5SAndroid Build Coastguard Worker 156*51f0e3d5SAndroid Build Coastguard Workerif __name__ == "__main__": 157*51f0e3d5SAndroid Build Coastguard Worker f = urlopen(URL) 158*51f0e3d5SAndroid Build Coastguard Worker domains = f.readlines() 159*51f0e3d5SAndroid Build Coastguard Worker f.close() 160*51f0e3d5SAndroid Build Coastguard Worker 161*51f0e3d5SAndroid Build Coastguard Worker buckets = {} 162*51f0e3d5SAndroid Build Coastguard Worker 163*51f0e3d5SAndroid Build Coastguard Worker for domain in domains: 164*51f0e3d5SAndroid Build Coastguard Worker domain = domain.lower() 165*51f0e3d5SAndroid Build Coastguard Worker 166*51f0e3d5SAndroid Build Coastguard Worker if len(domain) > 0: 167*51f0e3d5SAndroid Build Coastguard Worker getBucket(buckets, domain[0]).add(domain.strip()) 168*51f0e3d5SAndroid Build Coastguard Worker 169*51f0e3d5SAndroid Build Coastguard Worker if domain.startswith('xn--'): 170*51f0e3d5SAndroid Build Coastguard Worker puny = domain.strip()[4:] 171*51f0e3d5SAndroid Build Coastguard Worker result = puny.decode('punycode') 172*51f0e3d5SAndroid Build Coastguard Worker result = repr(result) 173*51f0e3d5SAndroid Build Coastguard Worker getBucket(buckets, 'xn--').add(result[2:-1]) 174*51f0e3d5SAndroid Build Coastguard Worker 175*51f0e3d5SAndroid Build Coastguard Worker makePattern(TLD_PREFIX, TLD_SUFFIX, buckets, isWebUrl=False) 176*51f0e3d5SAndroid Build Coastguard Worker makePattern(URL_PREFIX, URL_SUFFIX, buckets, isWebUrl=True) 177