1*760c253cSXin Li#!/usr/bin/env python3 2*760c253cSXin Li# -*- coding: utf-8 -*- 3*760c253cSXin Li# Copyright 2016 The ChromiumOS Authors 4*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 5*760c253cSXin Li# found in the LICENSE file. 6*760c253cSXin Li 7*760c253cSXin Li"""Given a specially-formatted JSON object, generates results report(s). 8*760c253cSXin Li 9*760c253cSXin LiThe JSON object should look like: 10*760c253cSXin Li{"data": BenchmarkData, "platforms": BenchmarkPlatforms} 11*760c253cSXin Li 12*760c253cSXin LiBenchmarkPlatforms is a [str], each of which names a platform the benchmark 13*760c253cSXin Li was run on (e.g. peppy, shamu, ...). Note that the order of this list is 14*760c253cSXin Li related with the order of items in BenchmarkData. 15*760c253cSXin Li 16*760c253cSXin LiBenchmarkData is a {str: [PlatformData]}. The str is the name of the benchmark, 17*760c253cSXin Liand a PlatformData is a set of data for a given platform. There must be one 18*760c253cSXin LiPlatformData for each benchmark, for each element in BenchmarkPlatforms. 19*760c253cSXin Li 20*760c253cSXin LiA PlatformData is a [{str: float}], where each str names a metric we recorded, 21*760c253cSXin Liand the float is the value for that metric. Each element is considered to be 22*760c253cSXin Lithe metrics collected from an independent run of this benchmark. NOTE: Each 23*760c253cSXin LiPlatformData is expected to have a "retval" key, with the return value of 24*760c253cSXin Lithe benchmark. If the benchmark is successful, said return value should be 0. 25*760c253cSXin LiOtherwise, this will break some of our JSON functionality. 26*760c253cSXin Li 27*760c253cSXin LiPutting it all together, a JSON object will end up looking like: 28*760c253cSXin Li { "platforms": ["peppy", "peppy-new-crosstool"], 29*760c253cSXin Li "data": { 30*760c253cSXin Li "bench_draw_line": [ 31*760c253cSXin Li [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0}, 32*760c253cSXin Li {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}], 33*760c253cSXin Li [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0}, 34*760c253cSXin Li {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}] 35*760c253cSXin Li ] 36*760c253cSXin Li } 37*760c253cSXin Li } 38*760c253cSXin Li 39*760c253cSXin LiWhich says that we ran a benchmark on platforms named peppy, and 40*760c253cSXin Li peppy-new-crosstool. 41*760c253cSXin LiWe ran one benchmark, named bench_draw_line. 42*760c253cSXin LiIt was run twice on each platform. 43*760c253cSXin LiPeppy's runs took 1.321ms and 1.920ms, while peppy-new-crosstool's took 1.221ms 44*760c253cSXin Li and 1.423ms. None of the runs failed to complete. 45*760c253cSXin Li""" 46*760c253cSXin Li 47*760c253cSXin Li 48*760c253cSXin Liimport argparse 49*760c253cSXin Liimport functools 50*760c253cSXin Liimport json 51*760c253cSXin Liimport os 52*760c253cSXin Liimport sys 53*760c253cSXin Liimport traceback 54*760c253cSXin Li 55*760c253cSXin Lifrom results_report import BenchmarkResults 56*760c253cSXin Lifrom results_report import HTMLResultsReport 57*760c253cSXin Lifrom results_report import JSONResultsReport 58*760c253cSXin Lifrom results_report import TextResultsReport 59*760c253cSXin Li 60*760c253cSXin Li 61*760c253cSXin Lidef CountBenchmarks(benchmark_runs): 62*760c253cSXin Li """Counts the number of iterations for each benchmark in benchmark_runs.""" 63*760c253cSXin Li 64*760c253cSXin Li # Example input for benchmark_runs: 65*760c253cSXin Li # {"bench": [[run1, run2, run3], [run1, run2, run3, run4]]} 66*760c253cSXin Li def _MaxLen(results): 67*760c253cSXin Li return 0 if not results else max(len(r) for r in results) 68*760c253cSXin Li 69*760c253cSXin Li return [ 70*760c253cSXin Li (name, _MaxLen(results)) for name, results in benchmark_runs.items() 71*760c253cSXin Li ] 72*760c253cSXin Li 73*760c253cSXin Li 74*760c253cSXin Lidef CutResultsInPlace(results, max_keys=50, complain_on_update=True): 75*760c253cSXin Li """Limits the given benchmark results to max_keys keys in-place. 76*760c253cSXin Li 77*760c253cSXin Li This takes the `data` field from the benchmark input, and mutates each 78*760c253cSXin Li benchmark run to contain `max_keys` elements (ignoring special elements, like 79*760c253cSXin Li "retval"). At the moment, it just selects the first `max_keys` keyvals, 80*760c253cSXin Li alphabetically. 81*760c253cSXin Li 82*760c253cSXin Li If complain_on_update is true, this will print a message noting that a 83*760c253cSXin Li truncation occurred. 84*760c253cSXin Li 85*760c253cSXin Li This returns the `results` object that was passed in, for convenience. 86*760c253cSXin Li 87*760c253cSXin Li e.g. 88*760c253cSXin Li >>> benchmark_data = { 89*760c253cSXin Li ... "bench_draw_line": [ 90*760c253cSXin Li ... [{"time (ms)": 1.321, "memory (mb)": 128.1, "retval": 0}, 91*760c253cSXin Li ... {"time (ms)": 1.920, "memory (mb)": 128.4, "retval": 0}], 92*760c253cSXin Li ... [{"time (ms)": 1.221, "memory (mb)": 124.3, "retval": 0}, 93*760c253cSXin Li ... {"time (ms)": 1.423, "memory (mb)": 123.9, "retval": 0}] 94*760c253cSXin Li ... ] 95*760c253cSXin Li ... } 96*760c253cSXin Li >>> CutResultsInPlace(benchmark_data, max_keys=1, complain_on_update=False) 97*760c253cSXin Li { 98*760c253cSXin Li 'bench_draw_line': [ 99*760c253cSXin Li [{'memory (mb)': 128.1, 'retval': 0}, 100*760c253cSXin Li {'memory (mb)': 128.4, 'retval': 0}], 101*760c253cSXin Li [{'memory (mb)': 124.3, 'retval': 0}, 102*760c253cSXin Li {'memory (mb)': 123.9, 'retval': 0}] 103*760c253cSXin Li ] 104*760c253cSXin Li } 105*760c253cSXin Li """ 106*760c253cSXin Li actually_updated = False 107*760c253cSXin Li for bench_results in results.values(): 108*760c253cSXin Li for platform_results in bench_results: 109*760c253cSXin Li for i, result in enumerate(platform_results): 110*760c253cSXin Li # Keep the keys that come earliest when sorted alphabetically. 111*760c253cSXin Li # Forcing alphabetical order is arbitrary, but necessary; otherwise, 112*760c253cSXin Li # the keyvals we'd emit would depend on our iteration order through a 113*760c253cSXin Li # map. 114*760c253cSXin Li removable_keys = sorted(k for k in result if k != "retval") 115*760c253cSXin Li retained_keys = removable_keys[:max_keys] 116*760c253cSXin Li platform_results[i] = {k: result[k] for k in retained_keys} 117*760c253cSXin Li # retval needs to be passed through all of the time. 118*760c253cSXin Li retval = result.get("retval") 119*760c253cSXin Li if retval is not None: 120*760c253cSXin Li platform_results[i]["retval"] = retval 121*760c253cSXin Li actually_updated = actually_updated or len( 122*760c253cSXin Li retained_keys 123*760c253cSXin Li ) != len(removable_keys) 124*760c253cSXin Li 125*760c253cSXin Li if actually_updated and complain_on_update: 126*760c253cSXin Li print( 127*760c253cSXin Li "Warning: Some benchmark keyvals have been truncated.", 128*760c253cSXin Li file=sys.stderr, 129*760c253cSXin Li ) 130*760c253cSXin Li return results 131*760c253cSXin Li 132*760c253cSXin Li 133*760c253cSXin Lidef _PositiveInt(s): 134*760c253cSXin Li i = int(s) 135*760c253cSXin Li if i < 0: 136*760c253cSXin Li raise argparse.ArgumentTypeError("%d is not a positive integer." % (i,)) 137*760c253cSXin Li return i 138*760c253cSXin Li 139*760c253cSXin Li 140*760c253cSXin Lidef _AccumulateActions(args): 141*760c253cSXin Li """Given program arguments, determines what actions we want to run. 142*760c253cSXin Li 143*760c253cSXin Li Returns [(ResultsReportCtor, str)], where ResultsReportCtor can construct a 144*760c253cSXin Li ResultsReport, and the str is the file extension for the given report. 145*760c253cSXin Li """ 146*760c253cSXin Li results = [] 147*760c253cSXin Li # The order of these is arbitrary. 148*760c253cSXin Li if args.json: 149*760c253cSXin Li results.append((JSONResultsReport, "json")) 150*760c253cSXin Li if args.text: 151*760c253cSXin Li results.append((TextResultsReport, "txt")) 152*760c253cSXin Li if args.email: 153*760c253cSXin Li email_ctor = functools.partial(TextResultsReport, email=True) 154*760c253cSXin Li results.append((email_ctor, "email")) 155*760c253cSXin Li # We emit HTML if nothing else was specified. 156*760c253cSXin Li if args.html or not results: 157*760c253cSXin Li results.append((HTMLResultsReport, "html")) 158*760c253cSXin Li return results 159*760c253cSXin Li 160*760c253cSXin Li 161*760c253cSXin Li# Note: get_contents is a function, because it may be expensive (generating some 162*760c253cSXin Li# HTML reports takes O(seconds) on my machine, depending on the size of the 163*760c253cSXin Li# input data). 164*760c253cSXin Lidef WriteFile(output_prefix, extension, get_contents, overwrite, verbose): 165*760c253cSXin Li """Writes `contents` to a file named "${output_prefix}.${extension}". 166*760c253cSXin Li 167*760c253cSXin Li get_contents should be a zero-args function that returns a string (of the 168*760c253cSXin Li contents to write). 169*760c253cSXin Li If output_prefix == '-', this writes to stdout. 170*760c253cSXin Li If overwrite is False, this will not overwrite files. 171*760c253cSXin Li """ 172*760c253cSXin Li if output_prefix == "-": 173*760c253cSXin Li if verbose: 174*760c253cSXin Li print("Writing %s report to stdout" % (extension,), file=sys.stderr) 175*760c253cSXin Li sys.stdout.write(get_contents()) 176*760c253cSXin Li return 177*760c253cSXin Li 178*760c253cSXin Li file_name = "%s.%s" % (output_prefix, extension) 179*760c253cSXin Li if not overwrite and os.path.exists(file_name): 180*760c253cSXin Li raise IOError( 181*760c253cSXin Li "Refusing to write %s -- it already exists" % (file_name,) 182*760c253cSXin Li ) 183*760c253cSXin Li 184*760c253cSXin Li with open(file_name, "w") as out_file: 185*760c253cSXin Li if verbose: 186*760c253cSXin Li print( 187*760c253cSXin Li "Writing %s report to %s" % (extension, file_name), 188*760c253cSXin Li file=sys.stderr, 189*760c253cSXin Li ) 190*760c253cSXin Li out_file.write(get_contents()) 191*760c253cSXin Li 192*760c253cSXin Li 193*760c253cSXin Lidef RunActions(actions, benchmark_results, output_prefix, overwrite, verbose): 194*760c253cSXin Li """Runs `actions`, returning True if all succeeded.""" 195*760c253cSXin Li failed = False 196*760c253cSXin Li 197*760c253cSXin Li report_ctor = None # Make the linter happy 198*760c253cSXin Li for report_ctor, extension in actions: 199*760c253cSXin Li try: 200*760c253cSXin Li get_contents = lambda: report_ctor(benchmark_results).GetReport() 201*760c253cSXin Li WriteFile( 202*760c253cSXin Li output_prefix, extension, get_contents, overwrite, verbose 203*760c253cSXin Li ) 204*760c253cSXin Li except Exception: 205*760c253cSXin Li # Complain and move along; we may have more actions that might complete 206*760c253cSXin Li # successfully. 207*760c253cSXin Li failed = True 208*760c253cSXin Li traceback.print_exc() 209*760c253cSXin Li return not failed 210*760c253cSXin Li 211*760c253cSXin Li 212*760c253cSXin Lidef PickInputFile(input_name): 213*760c253cSXin Li """Given program arguments, returns file to read for benchmark input.""" 214*760c253cSXin Li return sys.stdin if input_name == "-" else open(input_name) 215*760c253cSXin Li 216*760c253cSXin Li 217*760c253cSXin Lidef _NoPerfReport(_label_name, _benchmark_name, _benchmark_iteration): 218*760c253cSXin Li return {} 219*760c253cSXin Li 220*760c253cSXin Li 221*760c253cSXin Lidef _ParseArgs(argv): 222*760c253cSXin Li parser = argparse.ArgumentParser( 223*760c253cSXin Li description="Turns JSON into results " "report(s)." 224*760c253cSXin Li ) 225*760c253cSXin Li parser.add_argument( 226*760c253cSXin Li "-v", 227*760c253cSXin Li "--verbose", 228*760c253cSXin Li action="store_true", 229*760c253cSXin Li help="Be a tiny bit more verbose.", 230*760c253cSXin Li ) 231*760c253cSXin Li parser.add_argument( 232*760c253cSXin Li "-f", 233*760c253cSXin Li "--force", 234*760c253cSXin Li action="store_true", 235*760c253cSXin Li help="Overwrite existing results files.", 236*760c253cSXin Li ) 237*760c253cSXin Li parser.add_argument( 238*760c253cSXin Li "-o", 239*760c253cSXin Li "--output", 240*760c253cSXin Li default="report", 241*760c253cSXin Li type=str, 242*760c253cSXin Li help="Prefix of the output filename (default: report). " 243*760c253cSXin Li "- means stdout.", 244*760c253cSXin Li ) 245*760c253cSXin Li parser.add_argument( 246*760c253cSXin Li "-i", 247*760c253cSXin Li "--input", 248*760c253cSXin Li required=True, 249*760c253cSXin Li type=str, 250*760c253cSXin Li help="Where to read the JSON from. - means stdin.", 251*760c253cSXin Li ) 252*760c253cSXin Li parser.add_argument( 253*760c253cSXin Li "-l", 254*760c253cSXin Li "--statistic-limit", 255*760c253cSXin Li default=0, 256*760c253cSXin Li type=_PositiveInt, 257*760c253cSXin Li help="The maximum number of benchmark statistics to " 258*760c253cSXin Li "display from a single run. 0 implies unlimited.", 259*760c253cSXin Li ) 260*760c253cSXin Li parser.add_argument( 261*760c253cSXin Li "--json", action="store_true", help="Output a JSON report." 262*760c253cSXin Li ) 263*760c253cSXin Li parser.add_argument( 264*760c253cSXin Li "--text", action="store_true", help="Output a text report." 265*760c253cSXin Li ) 266*760c253cSXin Li parser.add_argument( 267*760c253cSXin Li "--email", 268*760c253cSXin Li action="store_true", 269*760c253cSXin Li help="Output a text report suitable for email.", 270*760c253cSXin Li ) 271*760c253cSXin Li parser.add_argument( 272*760c253cSXin Li "--html", 273*760c253cSXin Li action="store_true", 274*760c253cSXin Li help="Output an HTML report (this is the default if no " 275*760c253cSXin Li "other output format is specified).", 276*760c253cSXin Li ) 277*760c253cSXin Li return parser.parse_args(argv) 278*760c253cSXin Li 279*760c253cSXin Li 280*760c253cSXin Lidef Main(argv): 281*760c253cSXin Li args = _ParseArgs(argv) 282*760c253cSXin Li with PickInputFile(args.input) as in_file: 283*760c253cSXin Li raw_results = json.load(in_file) 284*760c253cSXin Li 285*760c253cSXin Li platform_names = raw_results["platforms"] 286*760c253cSXin Li results = raw_results["data"] 287*760c253cSXin Li if args.statistic_limit: 288*760c253cSXin Li results = CutResultsInPlace(results, max_keys=args.statistic_limit) 289*760c253cSXin Li benches = CountBenchmarks(results) 290*760c253cSXin Li # In crosperf, a label is essentially a platform+configuration. So, a name of 291*760c253cSXin Li # a label and a name of a platform are equivalent for our purposes. 292*760c253cSXin Li bench_results = BenchmarkResults( 293*760c253cSXin Li label_names=platform_names, 294*760c253cSXin Li benchmark_names_and_iterations=benches, 295*760c253cSXin Li run_keyvals=results, 296*760c253cSXin Li read_perf_report=_NoPerfReport, 297*760c253cSXin Li ) 298*760c253cSXin Li actions = _AccumulateActions(args) 299*760c253cSXin Li ok = RunActions( 300*760c253cSXin Li actions, bench_results, args.output, args.force, args.verbose 301*760c253cSXin Li ) 302*760c253cSXin Li return 0 if ok else 1 303*760c253cSXin Li 304*760c253cSXin Li 305*760c253cSXin Liif __name__ == "__main__": 306*760c253cSXin Li sys.exit(Main(sys.argv[1:])) 307