1*760c253cSXin Li# Copyright 2013 The ChromiumOS Authors 2*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 3*760c253cSXin Li# found in the LICENSE file. 4*760c253cSXin Li 5*760c253cSXin Li"""Define a type that wraps a Benchmark instance.""" 6*760c253cSXin Li 7*760c253cSXin Li 8*760c253cSXin Liimport math 9*760c253cSXin Liimport statistics 10*760c253cSXin Lifrom typing import Any 11*760c253cSXin Li 12*760c253cSXin Liimport numpy as np 13*760c253cSXin Li 14*760c253cSXin Li 15*760c253cSXin Li# See crbug.com/673558 for how these are estimated. 16*760c253cSXin Li_estimated_stddev = { 17*760c253cSXin Li "octane": 0.015, 18*760c253cSXin Li "kraken": 0.019, 19*760c253cSXin Li "speedometer": 0.007, 20*760c253cSXin Li "speedometer2": 0.006, 21*760c253cSXin Li "dromaeo.domcoreattr": 0.023, 22*760c253cSXin Li "dromaeo.domcoremodify": 0.011, 23*760c253cSXin Li "graphics_WebGLAquarium": 0.008, 24*760c253cSXin Li "page_cycler_v2.typical_25": 0.021, 25*760c253cSXin Li "loading.desktop": 0.021, # Copied from page_cycler initially 26*760c253cSXin Li} 27*760c253cSXin Li 28*760c253cSXin Li# Numpy makes it hard to know the real type of some inputs 29*760c253cSXin Li# and outputs, so this type alias is just for docs. 30*760c253cSXin LiFloatLike = Any 31*760c253cSXin Li 32*760c253cSXin Li 33*760c253cSXin Lidef isf(x: FloatLike, mu=0.0, sigma=1.0, pitch=0.01) -> FloatLike: 34*760c253cSXin Li """Compute the inverse survival function for value x. 35*760c253cSXin Li 36*760c253cSXin Li In the abscence of using scipy.stats.norm's isf(), this function 37*760c253cSXin Li attempts to re-implement the inverse survival function by calculating 38*760c253cSXin Li the numerical inverse of the survival function, interpolating between 39*760c253cSXin Li table values. See bug b/284489250 for details. 40*760c253cSXin Li 41*760c253cSXin Li Survival function as defined by: 42*760c253cSXin Li https://en.wikipedia.org/wiki/Survival_function 43*760c253cSXin Li 44*760c253cSXin Li Examples: 45*760c253cSXin Li >>> -2.0e-16 < isf(0.5) < 2.0e-16 46*760c253cSXin Li True 47*760c253cSXin Li 48*760c253cSXin Li Args: 49*760c253cSXin Li x: float or numpy array-like to compute the ISF for. 50*760c253cSXin Li mu: Center of the underlying normal distribution. 51*760c253cSXin Li sigma: Spread of the underlying normal distribution. 52*760c253cSXin Li pitch: Absolute spacing between y-value interpolation points. 53*760c253cSXin Li 54*760c253cSXin Li Returns: 55*760c253cSXin Li float or numpy array-like representing the ISF of `x`. 56*760c253cSXin Li """ 57*760c253cSXin Li norm = statistics.NormalDist(mu, sigma) 58*760c253cSXin Li # np.interp requires a monotonically increasing x table. 59*760c253cSXin Li # Because the survival table is monotonically decreasing, we have to 60*760c253cSXin Li # reverse the y_vals too. 61*760c253cSXin Li y_vals = np.flip(np.arange(-4.0, 4.0, pitch)) 62*760c253cSXin Li survival_table = np.fromiter( 63*760c253cSXin Li (1.0 - norm.cdf(y) for y in y_vals), y_vals.dtype 64*760c253cSXin Li ) 65*760c253cSXin Li return np.interp(x, survival_table, y_vals) 66*760c253cSXin Li 67*760c253cSXin Li 68*760c253cSXin Li# Get #samples needed to guarantee a given confidence interval, assuming the 69*760c253cSXin Li# samples follow normal distribution. 70*760c253cSXin Lidef _samples(b: str) -> int: 71*760c253cSXin Li # TODO: Make this an option 72*760c253cSXin Li # CI = (0.9, 0.02), i.e., 90% chance that |sample mean - true mean| < 2%. 73*760c253cSXin Li p = 0.9 74*760c253cSXin Li e = 0.02 75*760c253cSXin Li if b not in _estimated_stddev: 76*760c253cSXin Li return 1 77*760c253cSXin Li d = _estimated_stddev[b] 78*760c253cSXin Li # Get at least 2 samples so as to calculate standard deviation, which is 79*760c253cSXin Li # needed in T-test for p-value. 80*760c253cSXin Li n = int(math.ceil((isf((1 - p) / 2) * d / e) ** 2)) 81*760c253cSXin Li return n if n > 1 else 2 82*760c253cSXin Li 83*760c253cSXin Li 84*760c253cSXin Liclass Benchmark(object): 85*760c253cSXin Li """Class representing a benchmark to be run. 86*760c253cSXin Li 87*760c253cSXin Li Contains details of the benchmark suite, arguments to pass to the suite, 88*760c253cSXin Li iterations to run the benchmark suite and so on. Note that the benchmark name 89*760c253cSXin Li can be different to the test suite name. For example, you may want to have 90*760c253cSXin Li two different benchmarks which run the same test_name with different 91*760c253cSXin Li arguments. 92*760c253cSXin Li """ 93*760c253cSXin Li 94*760c253cSXin Li def __init__( 95*760c253cSXin Li self, 96*760c253cSXin Li name, 97*760c253cSXin Li test_name, 98*760c253cSXin Li test_args, 99*760c253cSXin Li iterations, 100*760c253cSXin Li rm_chroot_tmp, 101*760c253cSXin Li perf_args, 102*760c253cSXin Li suite="", 103*760c253cSXin Li show_all_results=False, 104*760c253cSXin Li retries=0, 105*760c253cSXin Li run_local=False, 106*760c253cSXin Li cwp_dso="", 107*760c253cSXin Li weight=0, 108*760c253cSXin Li ): 109*760c253cSXin Li self.name = name 110*760c253cSXin Li # For telemetry, this is the benchmark name. 111*760c253cSXin Li self.test_name = test_name 112*760c253cSXin Li # For telemetry, this is the data. 113*760c253cSXin Li self.test_args = test_args 114*760c253cSXin Li self.iterations = iterations if iterations > 0 else _samples(name) 115*760c253cSXin Li self.perf_args = perf_args 116*760c253cSXin Li self.rm_chroot_tmp = rm_chroot_tmp 117*760c253cSXin Li self.iteration_adjusted = False 118*760c253cSXin Li self.suite = suite 119*760c253cSXin Li self.show_all_results = show_all_results 120*760c253cSXin Li self.retries = retries 121*760c253cSXin Li if self.suite == "telemetry": 122*760c253cSXin Li self.show_all_results = True 123*760c253cSXin Li if run_local and self.suite != "telemetry_Crosperf": 124*760c253cSXin Li raise RuntimeError( 125*760c253cSXin Li "run_local is only supported by telemetry_Crosperf." 126*760c253cSXin Li ) 127*760c253cSXin Li self.run_local = run_local 128*760c253cSXin Li self.cwp_dso = cwp_dso 129*760c253cSXin Li self.weight = weight 130