1*67e74705SXin Li#!/usr/bin/env python 2*67e74705SXin Li 3*67e74705SXin Liimport os 4*67e74705SXin Liimport re 5*67e74705SXin Liimport subprocess 6*67e74705SXin Liimport sys 7*67e74705SXin Liimport tempfile 8*67e74705SXin Li 9*67e74705SXin Li### 10*67e74705SXin Li 11*67e74705SXin Liclass DeltaAlgorithm(object): 12*67e74705SXin Li def __init__(self): 13*67e74705SXin Li self.cache = set() 14*67e74705SXin Li 15*67e74705SXin Li def test(self, changes): 16*67e74705SXin Li abstract 17*67e74705SXin Li 18*67e74705SXin Li ### 19*67e74705SXin Li 20*67e74705SXin Li def getTestResult(self, changes): 21*67e74705SXin Li # There is no reason to cache successful tests because we will 22*67e74705SXin Li # always reduce the changeset when we see one. 23*67e74705SXin Li 24*67e74705SXin Li changeset = frozenset(changes) 25*67e74705SXin Li if changeset in self.cache: 26*67e74705SXin Li return False 27*67e74705SXin Li elif not self.test(changes): 28*67e74705SXin Li self.cache.add(changeset) 29*67e74705SXin Li return False 30*67e74705SXin Li else: 31*67e74705SXin Li return True 32*67e74705SXin Li 33*67e74705SXin Li def run(self, changes, force=False): 34*67e74705SXin Li # Make sure the initial test passes, if not then (a) either 35*67e74705SXin Li # the user doesn't expect monotonicity, and we may end up 36*67e74705SXin Li # doing O(N^2) tests, or (b) the test is wrong. Avoid the 37*67e74705SXin Li # O(N^2) case unless user requests it. 38*67e74705SXin Li if not force: 39*67e74705SXin Li if not self.getTestResult(changes): 40*67e74705SXin Li raise ValueError,'Initial test passed to delta fails.' 41*67e74705SXin Li 42*67e74705SXin Li # Check empty set first to quickly find poor test functions. 43*67e74705SXin Li if self.getTestResult(set()): 44*67e74705SXin Li return set() 45*67e74705SXin Li else: 46*67e74705SXin Li return self.delta(changes, self.split(changes)) 47*67e74705SXin Li 48*67e74705SXin Li def split(self, S): 49*67e74705SXin Li """split(set) -> [sets] 50*67e74705SXin Li 51*67e74705SXin Li Partition a set into one or two pieces. 52*67e74705SXin Li """ 53*67e74705SXin Li 54*67e74705SXin Li # There are many ways to split, we could do a better job with more 55*67e74705SXin Li # context information (but then the API becomes grosser). 56*67e74705SXin Li L = list(S) 57*67e74705SXin Li mid = len(L)//2 58*67e74705SXin Li if mid==0: 59*67e74705SXin Li return L, 60*67e74705SXin Li else: 61*67e74705SXin Li return L[:mid],L[mid:] 62*67e74705SXin Li 63*67e74705SXin Li def delta(self, c, sets): 64*67e74705SXin Li # assert(reduce(set.union, sets, set()) == c) 65*67e74705SXin Li 66*67e74705SXin Li # If there is nothing left we can remove, we are done. 67*67e74705SXin Li if len(sets) <= 1: 68*67e74705SXin Li return c 69*67e74705SXin Li 70*67e74705SXin Li # Look for a passing subset. 71*67e74705SXin Li res = self.search(c, sets) 72*67e74705SXin Li if res is not None: 73*67e74705SXin Li return res 74*67e74705SXin Li 75*67e74705SXin Li # Otherwise, partition sets if possible; if not we are done. 76*67e74705SXin Li refined = sum(map(list, map(self.split, sets)), []) 77*67e74705SXin Li if len(refined) == len(sets): 78*67e74705SXin Li return c 79*67e74705SXin Li 80*67e74705SXin Li return self.delta(c, refined) 81*67e74705SXin Li 82*67e74705SXin Li def search(self, c, sets): 83*67e74705SXin Li for i,S in enumerate(sets): 84*67e74705SXin Li # If test passes on this subset alone, recurse. 85*67e74705SXin Li if self.getTestResult(S): 86*67e74705SXin Li return self.delta(S, self.split(S)) 87*67e74705SXin Li 88*67e74705SXin Li # Otherwise if we have more than two sets, see if test 89*67e74705SXin Li # pases without this subset. 90*67e74705SXin Li if len(sets) > 2: 91*67e74705SXin Li complement = sum(sets[:i] + sets[i+1:],[]) 92*67e74705SXin Li if self.getTestResult(complement): 93*67e74705SXin Li return self.delta(complement, sets[:i] + sets[i+1:]) 94*67e74705SXin Li 95*67e74705SXin Li### 96*67e74705SXin Li 97*67e74705SXin Liclass Token: 98*67e74705SXin Li def __init__(self, type, data, flags, file, line, column): 99*67e74705SXin Li self.type = type 100*67e74705SXin Li self.data = data 101*67e74705SXin Li self.flags = flags 102*67e74705SXin Li self.file = file 103*67e74705SXin Li self.line = line 104*67e74705SXin Li self.column = column 105*67e74705SXin Li 106*67e74705SXin LikTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", 107*67e74705SXin Li re.DOTALL | re.MULTILINE) 108*67e74705SXin Li 109*67e74705SXin Lidef getTokens(path): 110*67e74705SXin Li p = subprocess.Popen(['clang','-dump-raw-tokens',path], 111*67e74705SXin Li stdin=subprocess.PIPE, 112*67e74705SXin Li stdout=subprocess.PIPE, 113*67e74705SXin Li stderr=subprocess.PIPE) 114*67e74705SXin Li out,err = p.communicate() 115*67e74705SXin Li 116*67e74705SXin Li tokens = [] 117*67e74705SXin Li collect = None 118*67e74705SXin Li for ln in err.split('\n'): 119*67e74705SXin Li # Silly programmers refuse to print in simple machine readable 120*67e74705SXin Li # formats. Whatever. 121*67e74705SXin Li if collect is None: 122*67e74705SXin Li collect = ln 123*67e74705SXin Li else: 124*67e74705SXin Li collect = collect + '\n' + ln 125*67e74705SXin Li if 'Loc=<' in ln and ln.endswith('>'): 126*67e74705SXin Li ln,collect = collect,None 127*67e74705SXin Li tokens.append(Token(*kTokenRE.match(ln).groups())) 128*67e74705SXin Li 129*67e74705SXin Li return tokens 130*67e74705SXin Li 131*67e74705SXin Li### 132*67e74705SXin Li 133*67e74705SXin Liclass TMBDDelta(DeltaAlgorithm): 134*67e74705SXin Li def __init__(self, testProgram, tokenLists, log): 135*67e74705SXin Li def patchName(name, suffix): 136*67e74705SXin Li base,ext = os.path.splitext(name) 137*67e74705SXin Li return base + '.' + suffix + ext 138*67e74705SXin Li super(TMBDDelta, self).__init__() 139*67e74705SXin Li self.testProgram = testProgram 140*67e74705SXin Li self.tokenLists = tokenLists 141*67e74705SXin Li self.tempFiles = [patchName(f,'tmp') 142*67e74705SXin Li for f,_ in self.tokenLists] 143*67e74705SXin Li self.targetFiles = [patchName(f,'ok') 144*67e74705SXin Li for f,_ in self.tokenLists] 145*67e74705SXin Li self.log = log 146*67e74705SXin Li self.numTests = 0 147*67e74705SXin Li 148*67e74705SXin Li def writeFiles(self, changes, fileNames): 149*67e74705SXin Li assert len(fileNames) == len(self.tokenLists) 150*67e74705SXin Li byFile = [[] for i in self.tokenLists] 151*67e74705SXin Li for i,j in changes: 152*67e74705SXin Li byFile[i].append(j) 153*67e74705SXin Li 154*67e74705SXin Li for i,(file,tokens) in enumerate(self.tokenLists): 155*67e74705SXin Li f = open(fileNames[i],'w') 156*67e74705SXin Li for j in byFile[i]: 157*67e74705SXin Li f.write(tokens[j]) 158*67e74705SXin Li f.close() 159*67e74705SXin Li 160*67e74705SXin Li return byFile 161*67e74705SXin Li 162*67e74705SXin Li def test(self, changes): 163*67e74705SXin Li self.numTests += 1 164*67e74705SXin Li 165*67e74705SXin Li byFile = self.writeFiles(changes, self.tempFiles) 166*67e74705SXin Li 167*67e74705SXin Li if self.log: 168*67e74705SXin Li print >>sys.stderr, 'TEST - ', 169*67e74705SXin Li if self.log > 1: 170*67e74705SXin Li for i,(file,_) in enumerate(self.tokenLists): 171*67e74705SXin Li indices = byFile[i] 172*67e74705SXin Li if i: 173*67e74705SXin Li sys.stderr.write('\n ') 174*67e74705SXin Li sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) 175*67e74705SXin Li prev = None 176*67e74705SXin Li for j in byFile[i]: 177*67e74705SXin Li if prev is None or j != prev + 1: 178*67e74705SXin Li if prev: 179*67e74705SXin Li sys.stderr.write('%d][' % prev) 180*67e74705SXin Li sys.stderr.write(str(j)) 181*67e74705SXin Li sys.stderr.write(':') 182*67e74705SXin Li prev = j 183*67e74705SXin Li if byFile[i]: 184*67e74705SXin Li sys.stderr.write(str(byFile[i][-1])) 185*67e74705SXin Li sys.stderr.write('] ') 186*67e74705SXin Li else: 187*67e74705SXin Li print >>sys.stderr, ', '.join(['%s:%d tokens' % (file, len(byFile[i])) 188*67e74705SXin Li for i,(file,_) in enumerate(self.tokenLists)]), 189*67e74705SXin Li 190*67e74705SXin Li p = subprocess.Popen([self.testProgram] + self.tempFiles) 191*67e74705SXin Li res = p.wait() == 0 192*67e74705SXin Li 193*67e74705SXin Li if res: 194*67e74705SXin Li self.writeFiles(changes, self.targetFiles) 195*67e74705SXin Li 196*67e74705SXin Li if self.log: 197*67e74705SXin Li print >>sys.stderr, '=> %s' % res 198*67e74705SXin Li else: 199*67e74705SXin Li if res: 200*67e74705SXin Li print '\nSUCCESS (%d tokens)' % len(changes) 201*67e74705SXin Li else: 202*67e74705SXin Li sys.stderr.write('.') 203*67e74705SXin Li 204*67e74705SXin Li return res 205*67e74705SXin Li 206*67e74705SXin Li def run(self): 207*67e74705SXin Li res = super(TMBDDelta, self).run([(i,j) 208*67e74705SXin Li for i,(file,tokens) in enumerate(self.tokenLists) 209*67e74705SXin Li for j in range(len(tokens))]) 210*67e74705SXin Li self.writeFiles(res, self.targetFiles) 211*67e74705SXin Li if not self.log: 212*67e74705SXin Li print >>sys.stderr 213*67e74705SXin Li return res 214*67e74705SXin Li 215*67e74705SXin Lidef tokenBasedMultiDelta(program, files, log): 216*67e74705SXin Li # Read in the lists of tokens. 217*67e74705SXin Li tokenLists = [(file, [t.data for t in getTokens(file)]) 218*67e74705SXin Li for file in files] 219*67e74705SXin Li 220*67e74705SXin Li numTokens = sum([len(tokens) for _,tokens in tokenLists]) 221*67e74705SXin Li print "Delta on %s with %d tokens." % (', '.join(files), numTokens) 222*67e74705SXin Li 223*67e74705SXin Li tbmd = TMBDDelta(program, tokenLists, log) 224*67e74705SXin Li 225*67e74705SXin Li res = tbmd.run() 226*67e74705SXin Li 227*67e74705SXin Li print "Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), 228*67e74705SXin Li len(res), 229*67e74705SXin Li tbmd.numTests) 230*67e74705SXin Li 231*67e74705SXin Lidef main(): 232*67e74705SXin Li from optparse import OptionParser, OptionGroup 233*67e74705SXin Li parser = OptionParser("%prog <test program> {files+}") 234*67e74705SXin Li parser.add_option("", "--debug", dest="debugLevel", 235*67e74705SXin Li help="set debug level [default %default]", 236*67e74705SXin Li action="store", type=int, default=0) 237*67e74705SXin Li (opts, args) = parser.parse_args() 238*67e74705SXin Li 239*67e74705SXin Li if len(args) <= 1: 240*67e74705SXin Li parser.error('Invalid number of arguments.') 241*67e74705SXin Li 242*67e74705SXin Li program,files = args[0],args[1:] 243*67e74705SXin Li 244*67e74705SXin Li md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 245*67e74705SXin Li 246*67e74705SXin Liif __name__ == '__main__': 247*67e74705SXin Li try: 248*67e74705SXin Li main() 249*67e74705SXin Li except KeyboardInterrupt: 250*67e74705SXin Li print >>sys.stderr,'Interrupted.' 251*67e74705SXin Li os._exit(1) # Avoid freeing our giant cache. 252