xref: /aosp_15_r20/external/clang/utils/token-delta.py (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li#!/usr/bin/env python
2*67e74705SXin Li
3*67e74705SXin Liimport os
4*67e74705SXin Liimport re
5*67e74705SXin Liimport subprocess
6*67e74705SXin Liimport sys
7*67e74705SXin Liimport tempfile
8*67e74705SXin Li
9*67e74705SXin Li###
10*67e74705SXin Li
11*67e74705SXin Liclass DeltaAlgorithm(object):
12*67e74705SXin Li    def __init__(self):
13*67e74705SXin Li        self.cache = set()
14*67e74705SXin Li
15*67e74705SXin Li    def test(self, changes):
16*67e74705SXin Li        abstract
17*67e74705SXin Li
18*67e74705SXin Li    ###
19*67e74705SXin Li
20*67e74705SXin Li    def getTestResult(self, changes):
21*67e74705SXin Li        # There is no reason to cache successful tests because we will
22*67e74705SXin Li        # always reduce the changeset when we see one.
23*67e74705SXin Li
24*67e74705SXin Li        changeset = frozenset(changes)
25*67e74705SXin Li        if changeset in self.cache:
26*67e74705SXin Li            return False
27*67e74705SXin Li        elif not self.test(changes):
28*67e74705SXin Li            self.cache.add(changeset)
29*67e74705SXin Li            return False
30*67e74705SXin Li        else:
31*67e74705SXin Li            return True
32*67e74705SXin Li
33*67e74705SXin Li    def run(self, changes, force=False):
34*67e74705SXin Li        # Make sure the initial test passes, if not then (a) either
35*67e74705SXin Li        # the user doesn't expect monotonicity, and we may end up
36*67e74705SXin Li        # doing O(N^2) tests, or (b) the test is wrong. Avoid the
37*67e74705SXin Li        # O(N^2) case unless user requests it.
38*67e74705SXin Li        if not force:
39*67e74705SXin Li            if not self.getTestResult(changes):
40*67e74705SXin Li                raise ValueError,'Initial test passed to delta fails.'
41*67e74705SXin Li
42*67e74705SXin Li        # Check empty set first to quickly find poor test functions.
43*67e74705SXin Li        if self.getTestResult(set()):
44*67e74705SXin Li            return set()
45*67e74705SXin Li        else:
46*67e74705SXin Li            return self.delta(changes, self.split(changes))
47*67e74705SXin Li
48*67e74705SXin Li    def split(self, S):
49*67e74705SXin Li        """split(set) -> [sets]
50*67e74705SXin Li
51*67e74705SXin Li        Partition a set into one or two pieces.
52*67e74705SXin Li        """
53*67e74705SXin Li
54*67e74705SXin Li        # There are many ways to split, we could do a better job with more
55*67e74705SXin Li        # context information (but then the API becomes grosser).
56*67e74705SXin Li        L = list(S)
57*67e74705SXin Li        mid = len(L)//2
58*67e74705SXin Li        if mid==0:
59*67e74705SXin Li            return L,
60*67e74705SXin Li        else:
61*67e74705SXin Li            return L[:mid],L[mid:]
62*67e74705SXin Li
63*67e74705SXin Li    def delta(self, c, sets):
64*67e74705SXin Li        # assert(reduce(set.union, sets, set()) == c)
65*67e74705SXin Li
66*67e74705SXin Li        # If there is nothing left we can remove, we are done.
67*67e74705SXin Li        if len(sets) <= 1:
68*67e74705SXin Li            return c
69*67e74705SXin Li
70*67e74705SXin Li        # Look for a passing subset.
71*67e74705SXin Li        res = self.search(c, sets)
72*67e74705SXin Li        if res is not None:
73*67e74705SXin Li            return res
74*67e74705SXin Li
75*67e74705SXin Li        # Otherwise, partition sets if possible; if not we are done.
76*67e74705SXin Li        refined = sum(map(list, map(self.split, sets)), [])
77*67e74705SXin Li        if len(refined) == len(sets):
78*67e74705SXin Li            return c
79*67e74705SXin Li
80*67e74705SXin Li        return self.delta(c, refined)
81*67e74705SXin Li
82*67e74705SXin Li    def search(self, c, sets):
83*67e74705SXin Li        for i,S in enumerate(sets):
84*67e74705SXin Li            # If test passes on this subset alone, recurse.
85*67e74705SXin Li            if self.getTestResult(S):
86*67e74705SXin Li                return self.delta(S, self.split(S))
87*67e74705SXin Li
88*67e74705SXin Li            # Otherwise if we have more than two sets, see if test
89*67e74705SXin Li            # pases without this subset.
90*67e74705SXin Li            if len(sets) > 2:
91*67e74705SXin Li                complement = sum(sets[:i] + sets[i+1:],[])
92*67e74705SXin Li                if self.getTestResult(complement):
93*67e74705SXin Li                    return self.delta(complement, sets[:i] + sets[i+1:])
94*67e74705SXin Li
95*67e74705SXin Li###
96*67e74705SXin Li
97*67e74705SXin Liclass Token:
98*67e74705SXin Li    def __init__(self, type, data, flags, file, line, column):
99*67e74705SXin Li        self.type   = type
100*67e74705SXin Li        self.data   = data
101*67e74705SXin Li        self.flags  = flags
102*67e74705SXin Li        self.file   = file
103*67e74705SXin Li        self.line   = line
104*67e74705SXin Li        self.column = column
105*67e74705SXin Li
106*67e74705SXin LikTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""",
107*67e74705SXin Li                      re.DOTALL | re.MULTILINE)
108*67e74705SXin Li
109*67e74705SXin Lidef getTokens(path):
110*67e74705SXin Li    p = subprocess.Popen(['clang','-dump-raw-tokens',path],
111*67e74705SXin Li                         stdin=subprocess.PIPE,
112*67e74705SXin Li                         stdout=subprocess.PIPE,
113*67e74705SXin Li                         stderr=subprocess.PIPE)
114*67e74705SXin Li    out,err = p.communicate()
115*67e74705SXin Li
116*67e74705SXin Li    tokens = []
117*67e74705SXin Li    collect = None
118*67e74705SXin Li    for ln in err.split('\n'):
119*67e74705SXin Li        # Silly programmers refuse to print in simple machine readable
120*67e74705SXin Li        # formats. Whatever.
121*67e74705SXin Li        if collect is None:
122*67e74705SXin Li            collect = ln
123*67e74705SXin Li        else:
124*67e74705SXin Li            collect = collect + '\n' + ln
125*67e74705SXin Li        if 'Loc=<' in ln and ln.endswith('>'):
126*67e74705SXin Li            ln,collect = collect,None
127*67e74705SXin Li            tokens.append(Token(*kTokenRE.match(ln).groups()))
128*67e74705SXin Li
129*67e74705SXin Li    return tokens
130*67e74705SXin Li
131*67e74705SXin Li###
132*67e74705SXin Li
133*67e74705SXin Liclass TMBDDelta(DeltaAlgorithm):
134*67e74705SXin Li    def __init__(self, testProgram, tokenLists, log):
135*67e74705SXin Li        def patchName(name, suffix):
136*67e74705SXin Li            base,ext = os.path.splitext(name)
137*67e74705SXin Li            return base + '.' + suffix + ext
138*67e74705SXin Li        super(TMBDDelta, self).__init__()
139*67e74705SXin Li        self.testProgram = testProgram
140*67e74705SXin Li        self.tokenLists = tokenLists
141*67e74705SXin Li        self.tempFiles = [patchName(f,'tmp')
142*67e74705SXin Li                            for f,_ in self.tokenLists]
143*67e74705SXin Li        self.targetFiles = [patchName(f,'ok')
144*67e74705SXin Li                            for f,_ in self.tokenLists]
145*67e74705SXin Li        self.log = log
146*67e74705SXin Li        self.numTests = 0
147*67e74705SXin Li
148*67e74705SXin Li    def writeFiles(self, changes, fileNames):
149*67e74705SXin Li        assert len(fileNames) == len(self.tokenLists)
150*67e74705SXin Li        byFile = [[] for i in self.tokenLists]
151*67e74705SXin Li        for i,j in changes:
152*67e74705SXin Li            byFile[i].append(j)
153*67e74705SXin Li
154*67e74705SXin Li        for i,(file,tokens) in enumerate(self.tokenLists):
155*67e74705SXin Li            f = open(fileNames[i],'w')
156*67e74705SXin Li            for j in byFile[i]:
157*67e74705SXin Li                f.write(tokens[j])
158*67e74705SXin Li            f.close()
159*67e74705SXin Li
160*67e74705SXin Li        return byFile
161*67e74705SXin Li
162*67e74705SXin Li    def test(self, changes):
163*67e74705SXin Li        self.numTests += 1
164*67e74705SXin Li
165*67e74705SXin Li        byFile = self.writeFiles(changes, self.tempFiles)
166*67e74705SXin Li
167*67e74705SXin Li        if self.log:
168*67e74705SXin Li            print >>sys.stderr, 'TEST - ',
169*67e74705SXin Li            if self.log > 1:
170*67e74705SXin Li                for i,(file,_) in enumerate(self.tokenLists):
171*67e74705SXin Li                    indices = byFile[i]
172*67e74705SXin Li                    if i:
173*67e74705SXin Li                        sys.stderr.write('\n      ')
174*67e74705SXin Li                    sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i])))
175*67e74705SXin Li                    prev = None
176*67e74705SXin Li                    for j in byFile[i]:
177*67e74705SXin Li                        if prev is None or j != prev + 1:
178*67e74705SXin Li                            if prev:
179*67e74705SXin Li                                sys.stderr.write('%d][' % prev)
180*67e74705SXin Li                            sys.stderr.write(str(j))
181*67e74705SXin Li                            sys.stderr.write(':')
182*67e74705SXin Li                        prev = j
183*67e74705SXin Li                    if byFile[i]:
184*67e74705SXin Li                        sys.stderr.write(str(byFile[i][-1]))
185*67e74705SXin Li                    sys.stderr.write('] ')
186*67e74705SXin Li            else:
187*67e74705SXin Li                print >>sys.stderr, ', '.join(['%s:%d tokens' % (file, len(byFile[i]))
188*67e74705SXin Li                                               for i,(file,_) in enumerate(self.tokenLists)]),
189*67e74705SXin Li
190*67e74705SXin Li        p = subprocess.Popen([self.testProgram] + self.tempFiles)
191*67e74705SXin Li        res = p.wait() == 0
192*67e74705SXin Li
193*67e74705SXin Li        if res:
194*67e74705SXin Li            self.writeFiles(changes, self.targetFiles)
195*67e74705SXin Li
196*67e74705SXin Li        if self.log:
197*67e74705SXin Li            print >>sys.stderr, '=> %s' % res
198*67e74705SXin Li        else:
199*67e74705SXin Li            if res:
200*67e74705SXin Li                print '\nSUCCESS (%d tokens)' % len(changes)
201*67e74705SXin Li            else:
202*67e74705SXin Li                sys.stderr.write('.')
203*67e74705SXin Li
204*67e74705SXin Li        return res
205*67e74705SXin Li
206*67e74705SXin Li    def run(self):
207*67e74705SXin Li        res = super(TMBDDelta, self).run([(i,j)
208*67e74705SXin Li                                          for i,(file,tokens) in enumerate(self.tokenLists)
209*67e74705SXin Li                                          for j in range(len(tokens))])
210*67e74705SXin Li        self.writeFiles(res, self.targetFiles)
211*67e74705SXin Li        if not self.log:
212*67e74705SXin Li            print >>sys.stderr
213*67e74705SXin Li        return res
214*67e74705SXin Li
215*67e74705SXin Lidef tokenBasedMultiDelta(program, files, log):
216*67e74705SXin Li    # Read in the lists of tokens.
217*67e74705SXin Li    tokenLists = [(file, [t.data for t in getTokens(file)])
218*67e74705SXin Li                  for file in files]
219*67e74705SXin Li
220*67e74705SXin Li    numTokens = sum([len(tokens) for _,tokens in tokenLists])
221*67e74705SXin Li    print "Delta on %s with %d tokens." % (', '.join(files), numTokens)
222*67e74705SXin Li
223*67e74705SXin Li    tbmd = TMBDDelta(program, tokenLists, log)
224*67e74705SXin Li
225*67e74705SXin Li    res = tbmd.run()
226*67e74705SXin Li
227*67e74705SXin Li    print "Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles),
228*67e74705SXin Li                                                         len(res),
229*67e74705SXin Li                                                         tbmd.numTests)
230*67e74705SXin Li
231*67e74705SXin Lidef main():
232*67e74705SXin Li    from optparse import OptionParser, OptionGroup
233*67e74705SXin Li    parser = OptionParser("%prog <test program> {files+}")
234*67e74705SXin Li    parser.add_option("", "--debug", dest="debugLevel",
235*67e74705SXin Li                     help="set debug level [default %default]",
236*67e74705SXin Li                     action="store", type=int, default=0)
237*67e74705SXin Li    (opts, args) = parser.parse_args()
238*67e74705SXin Li
239*67e74705SXin Li    if len(args) <= 1:
240*67e74705SXin Li        parser.error('Invalid number of arguments.')
241*67e74705SXin Li
242*67e74705SXin Li    program,files = args[0],args[1:]
243*67e74705SXin Li
244*67e74705SXin Li    md = tokenBasedMultiDelta(program, files, log=opts.debugLevel)
245*67e74705SXin Li
246*67e74705SXin Liif __name__ == '__main__':
247*67e74705SXin Li    try:
248*67e74705SXin Li        main()
249*67e74705SXin Li    except KeyboardInterrupt:
250*67e74705SXin Li        print >>sys.stderr,'Interrupted.'
251*67e74705SXin Li        os._exit(1) # Avoid freeing our giant cache.
252