1*c8dee2aaSAndroid Build Coastguard Worker#!/usr/bin/env python 2*c8dee2aaSAndroid Build Coastguard Worker 3*c8dee2aaSAndroid Build Coastguard Workerimport argparse 4*c8dee2aaSAndroid Build Coastguard Workerimport sys 5*c8dee2aaSAndroid Build Coastguard Worker 6*c8dee2aaSAndroid Build Coastguard Workerhave_scipy = True 7*c8dee2aaSAndroid Build Coastguard Workertry: 8*c8dee2aaSAndroid Build Coastguard Worker import scipy.stats 9*c8dee2aaSAndroid Build Coastguard Workerexcept: 10*c8dee2aaSAndroid Build Coastguard Worker have_scipy = False 11*c8dee2aaSAndroid Build Coastguard Worker 12*c8dee2aaSAndroid Build Coastguard WorkerSIGNIFICANCE_THRESHOLD = 0.0001 13*c8dee2aaSAndroid Build Coastguard Worker 14*c8dee2aaSAndroid Build Coastguard Workerparser = argparse.ArgumentParser( 15*c8dee2aaSAndroid Build Coastguard Worker formatter_class=argparse.RawDescriptionHelpFormatter, 16*c8dee2aaSAndroid Build Coastguard Worker description='Compare performance of two runs from nanobench.') 17*c8dee2aaSAndroid Build Coastguard Workerparser.add_argument('--use_means', action='store_true', default=False, 18*c8dee2aaSAndroid Build Coastguard Worker help='Use means to calculate performance ratios.') 19*c8dee2aaSAndroid Build Coastguard Workerparser.add_argument('baseline', help='Baseline file.') 20*c8dee2aaSAndroid Build Coastguard Workerparser.add_argument('experiment', help='Experiment file.') 21*c8dee2aaSAndroid Build Coastguard Workerargs = parser.parse_args() 22*c8dee2aaSAndroid Build Coastguard Worker 23*c8dee2aaSAndroid Build Coastguard Workera,b = {},{} 24*c8dee2aaSAndroid Build Coastguard Workerfor (path, d) in [(args.baseline, a), (args.experiment, b)]: 25*c8dee2aaSAndroid Build Coastguard Worker for line in open(path): 26*c8dee2aaSAndroid Build Coastguard Worker try: 27*c8dee2aaSAndroid Build Coastguard Worker tokens = line.split() 28*c8dee2aaSAndroid Build Coastguard Worker if tokens[0] != "Samples:": 29*c8dee2aaSAndroid Build Coastguard Worker continue 30*c8dee2aaSAndroid Build Coastguard Worker samples = tokens[1:-1] 31*c8dee2aaSAndroid Build Coastguard Worker label = tokens[-1] 32*c8dee2aaSAndroid Build Coastguard Worker d[label] = map(float, samples) 33*c8dee2aaSAndroid Build Coastguard Worker except: 34*c8dee2aaSAndroid Build Coastguard Worker pass 35*c8dee2aaSAndroid Build Coastguard Worker 36*c8dee2aaSAndroid Build Coastguard Workercommon = set(a.keys()).intersection(b.keys()) 37*c8dee2aaSAndroid Build Coastguard Worker 38*c8dee2aaSAndroid Build Coastguard Workerdef mean(xs): 39*c8dee2aaSAndroid Build Coastguard Worker return sum(xs) / len(xs) 40*c8dee2aaSAndroid Build Coastguard Worker 41*c8dee2aaSAndroid Build Coastguard Workerps = [] 42*c8dee2aaSAndroid Build Coastguard Workerfor key in common: 43*c8dee2aaSAndroid Build Coastguard Worker p, asem, bsem = 0, 0, 0 44*c8dee2aaSAndroid Build Coastguard Worker m = mean if args.use_means else min 45*c8dee2aaSAndroid Build Coastguard Worker am, bm = m(a[key]), m(b[key]) 46*c8dee2aaSAndroid Build Coastguard Worker if have_scipy: 47*c8dee2aaSAndroid Build Coastguard Worker _, p = scipy.stats.mannwhitneyu(a[key], b[key]) 48*c8dee2aaSAndroid Build Coastguard Worker asem, bsem = scipy.stats.sem(a[key]), scipy.stats.sem(b[key]) 49*c8dee2aaSAndroid Build Coastguard Worker ps.append((bm/am, p, key, am, bm, asem, bsem)) 50*c8dee2aaSAndroid Build Coastguard Workerps.sort(reverse=True) 51*c8dee2aaSAndroid Build Coastguard Worker 52*c8dee2aaSAndroid Build Coastguard Workerdef humanize(ns): 53*c8dee2aaSAndroid Build Coastguard Worker for threshold, suffix in [(1e9, 's'), (1e6, 'ms'), (1e3, 'us'), (1e0, 'ns')]: 54*c8dee2aaSAndroid Build Coastguard Worker if ns > threshold: 55*c8dee2aaSAndroid Build Coastguard Worker return "%.3g%s" % (ns/threshold, suffix) 56*c8dee2aaSAndroid Build Coastguard Worker 57*c8dee2aaSAndroid Build Coastguard Workermaxlen = max(map(len, common)) 58*c8dee2aaSAndroid Build Coastguard Worker 59*c8dee2aaSAndroid Build Coastguard Worker# We print only signficant changes in benchmark timing distribution. 60*c8dee2aaSAndroid Build Coastguard Workerbonferroni = SIGNIFICANCE_THRESHOLD / len(ps) # Adjust for the fact we've run multiple tests. 61*c8dee2aaSAndroid Build Coastguard Workerfor ratio, p, key, am, bm, asem, bsem in ps: 62*c8dee2aaSAndroid Build Coastguard Worker if p < bonferroni: 63*c8dee2aaSAndroid Build Coastguard Worker str_ratio = ('%.2gx' if ratio < 1 else '%.3gx') % ratio 64*c8dee2aaSAndroid Build Coastguard Worker if args.use_means: 65*c8dee2aaSAndroid Build Coastguard Worker print '%*s\t%6s(%6s) -> %6s(%6s)\t%s' % (maxlen, key, humanize(am), humanize(asem), 66*c8dee2aaSAndroid Build Coastguard Worker humanize(bm), humanize(bsem), str_ratio) 67*c8dee2aaSAndroid Build Coastguard Worker else: 68*c8dee2aaSAndroid Build Coastguard Worker print '%*s\t%6s -> %6s\t%s' % (maxlen, key, humanize(am), humanize(bm), str_ratio) 69