1*760c253cSXin Li# -*- coding: utf-8 -*- 2*760c253cSXin Li# Copyright 2011 The ChromiumOS Authors 3*760c253cSXin Li# Use of this source code is governed by a BSD-style license that can be 4*760c253cSXin Li# found in the LICENSE file. 5*760c253cSXin Li 6*760c253cSXin Li"""The experiment runner module.""" 7*760c253cSXin Li 8*760c253cSXin Liimport getpass 9*760c253cSXin Liimport os 10*760c253cSXin Liimport shutil 11*760c253cSXin Liimport time 12*760c253cSXin Li 13*760c253cSXin Lifrom cros_utils import command_executer 14*760c253cSXin Lifrom cros_utils import logger 15*760c253cSXin Lifrom cros_utils.email_sender import EmailSender 16*760c253cSXin Lifrom cros_utils.file_utils import FileUtils 17*760c253cSXin Lifrom experiment_status import ExperimentStatus 18*760c253cSXin Liimport lock_machine 19*760c253cSXin Lifrom results_cache import CacheConditions 20*760c253cSXin Lifrom results_cache import ResultsCache 21*760c253cSXin Lifrom results_report import HTMLResultsReport 22*760c253cSXin Lifrom results_report import JSONResultsReport 23*760c253cSXin Lifrom results_report import TextResultsReport 24*760c253cSXin Lifrom schedv2 import Schedv2 25*760c253cSXin Liimport test_flag 26*760c253cSXin Li 27*760c253cSXin Liimport config 28*760c253cSXin Li 29*760c253cSXin Li 30*760c253cSXin Lidef _WriteJSONReportToFile(experiment, results_dir, json_report): 31*760c253cSXin Li """Writes a JSON report to a file in results_dir.""" 32*760c253cSXin Li has_llvm = any("llvm" in l.compiler for l in experiment.labels) 33*760c253cSXin Li compiler_string = "llvm" if has_llvm else "gcc" 34*760c253cSXin Li board = experiment.labels[0].board 35*760c253cSXin Li filename = "report_%s_%s_%s.%s.json" % ( 36*760c253cSXin Li board, 37*760c253cSXin Li json_report.date, 38*760c253cSXin Li json_report.time.replace(":", "."), 39*760c253cSXin Li compiler_string, 40*760c253cSXin Li ) 41*760c253cSXin Li fullname = os.path.join(results_dir, filename) 42*760c253cSXin Li report_text = json_report.GetReport() 43*760c253cSXin Li with open(fullname, "w") as out_file: 44*760c253cSXin Li out_file.write(report_text) 45*760c253cSXin Li 46*760c253cSXin Li 47*760c253cSXin Liclass ExperimentRunner(object): 48*760c253cSXin Li """ExperimentRunner Class.""" 49*760c253cSXin Li 50*760c253cSXin Li STATUS_TIME_DELAY = 30 51*760c253cSXin Li THREAD_MONITOR_DELAY = 2 52*760c253cSXin Li 53*760c253cSXin Li SUCCEEDED = 0 54*760c253cSXin Li HAS_FAILURE = 1 55*760c253cSXin Li ALL_FAILED = 2 56*760c253cSXin Li 57*760c253cSXin Li def __init__( 58*760c253cSXin Li self, 59*760c253cSXin Li experiment, 60*760c253cSXin Li json_report, 61*760c253cSXin Li using_schedv2=False, 62*760c253cSXin Li log=None, 63*760c253cSXin Li cmd_exec=None, 64*760c253cSXin Li ): 65*760c253cSXin Li self._experiment = experiment 66*760c253cSXin Li self.l = log or logger.GetLogger(experiment.log_dir) 67*760c253cSXin Li self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l) 68*760c253cSXin Li self._terminated = False 69*760c253cSXin Li self.json_report = json_report 70*760c253cSXin Li self.locked_machines = [] 71*760c253cSXin Li if experiment.log_level != "verbose": 72*760c253cSXin Li self.STATUS_TIME_DELAY = 10 73*760c253cSXin Li 74*760c253cSXin Li # Setting this to True will use crosperf sched v2 (feature in progress). 75*760c253cSXin Li self._using_schedv2 = using_schedv2 76*760c253cSXin Li 77*760c253cSXin Li def _GetMachineList(self): 78*760c253cSXin Li """Return a list of all requested machines. 79*760c253cSXin Li 80*760c253cSXin Li Create a list of all the requested machines, both global requests and 81*760c253cSXin Li label-specific requests, and return the list. 82*760c253cSXin Li """ 83*760c253cSXin Li machines = self._experiment.remote 84*760c253cSXin Li # All Label.remote is a sublist of experiment.remote. 85*760c253cSXin Li for l in self._experiment.labels: 86*760c253cSXin Li for r in l.remote: 87*760c253cSXin Li assert r in machines 88*760c253cSXin Li return machines 89*760c253cSXin Li 90*760c253cSXin Li def _UpdateMachineList(self, locked_machines): 91*760c253cSXin Li """Update machines lists to contain only locked machines. 92*760c253cSXin Li 93*760c253cSXin Li Go through all the lists of requested machines, both global and 94*760c253cSXin Li label-specific requests, and remove any machine that we were not 95*760c253cSXin Li able to lock. 96*760c253cSXin Li 97*760c253cSXin Li Args: 98*760c253cSXin Li locked_machines: A list of the machines we successfully locked. 99*760c253cSXin Li """ 100*760c253cSXin Li for m in self._experiment.remote: 101*760c253cSXin Li if m not in locked_machines: 102*760c253cSXin Li self._experiment.remote.remove(m) 103*760c253cSXin Li 104*760c253cSXin Li for l in self._experiment.labels: 105*760c253cSXin Li for m in l.remote: 106*760c253cSXin Li if m not in locked_machines: 107*760c253cSXin Li l.remote.remove(m) 108*760c253cSXin Li 109*760c253cSXin Li def _GetMachineType(self, lock_mgr, machine): 110*760c253cSXin Li """Get where is the machine from. 111*760c253cSXin Li 112*760c253cSXin Li Returns: 113*760c253cSXin Li The location of the machine: local or crosfleet 114*760c253cSXin Li """ 115*760c253cSXin Li # We assume that lab machine always starts with chromeos*, and local 116*760c253cSXin Li # machines are ip address. 117*760c253cSXin Li if "chromeos" in machine: 118*760c253cSXin Li if lock_mgr.CheckMachineInCrosfleet(machine): 119*760c253cSXin Li return "crosfleet" 120*760c253cSXin Li else: 121*760c253cSXin Li raise RuntimeError("Lab machine not in Crosfleet.") 122*760c253cSXin Li return "local" 123*760c253cSXin Li 124*760c253cSXin Li def _LockAllMachines(self, experiment): 125*760c253cSXin Li """Attempt to globally lock all of the machines requested for run. 126*760c253cSXin Li 127*760c253cSXin Li This method tries to lock all machines requested for this crosperf run 128*760c253cSXin Li in three different modes automatically, to prevent any other crosperf runs 129*760c253cSXin Li from being able to update/use the machines while this experiment is 130*760c253cSXin Li running: 131*760c253cSXin Li - Crosfleet machines: Use crosfleet lease-dut mechanism to lease 132*760c253cSXin Li - Local machines: Use file lock mechanism to lock 133*760c253cSXin Li """ 134*760c253cSXin Li if test_flag.GetTestMode(): 135*760c253cSXin Li self.locked_machines = self._GetMachineList() 136*760c253cSXin Li experiment.locked_machines = self.locked_machines 137*760c253cSXin Li else: 138*760c253cSXin Li experiment.lock_mgr = lock_machine.LockManager( 139*760c253cSXin Li self._GetMachineList(), 140*760c253cSXin Li "", 141*760c253cSXin Li experiment.labels[0].chromeos_root, 142*760c253cSXin Li experiment.locks_dir, 143*760c253cSXin Li log=self.l, 144*760c253cSXin Li ) 145*760c253cSXin Li for m in experiment.lock_mgr.machines: 146*760c253cSXin Li machine_type = self._GetMachineType(experiment.lock_mgr, m) 147*760c253cSXin Li if machine_type == "local": 148*760c253cSXin Li experiment.lock_mgr.AddMachineToLocal(m) 149*760c253cSXin Li elif machine_type == "crosfleet": 150*760c253cSXin Li experiment.lock_mgr.AddMachineToCrosfleet(m) 151*760c253cSXin Li machine_states = experiment.lock_mgr.GetMachineStates("lock") 152*760c253cSXin Li experiment.lock_mgr.CheckMachineLocks(machine_states, "lock") 153*760c253cSXin Li self.locked_machines = experiment.lock_mgr.UpdateMachines(True) 154*760c253cSXin Li experiment.locked_machines = self.locked_machines 155*760c253cSXin Li self._UpdateMachineList(self.locked_machines) 156*760c253cSXin Li experiment.machine_manager.RemoveNonLockedMachines( 157*760c253cSXin Li self.locked_machines 158*760c253cSXin Li ) 159*760c253cSXin Li if not self.locked_machines: 160*760c253cSXin Li raise RuntimeError("Unable to lock any machines.") 161*760c253cSXin Li 162*760c253cSXin Li def _ClearCacheEntries(self, experiment): 163*760c253cSXin Li for br in experiment.benchmark_runs: 164*760c253cSXin Li cache = ResultsCache() 165*760c253cSXin Li cache.Init( 166*760c253cSXin Li br.label.chromeos_image, 167*760c253cSXin Li br.label.chromeos_root, 168*760c253cSXin Li br.benchmark.test_name, 169*760c253cSXin Li br.iteration, 170*760c253cSXin Li br.test_args, 171*760c253cSXin Li br.profiler_args, 172*760c253cSXin Li br.machine_manager, 173*760c253cSXin Li br.machine, 174*760c253cSXin Li br.label.board, 175*760c253cSXin Li br.cache_conditions, 176*760c253cSXin Li br.logger(), 177*760c253cSXin Li br.log_level, 178*760c253cSXin Li br.label, 179*760c253cSXin Li br.share_cache, 180*760c253cSXin Li br.benchmark.suite, 181*760c253cSXin Li br.benchmark.show_all_results, 182*760c253cSXin Li br.benchmark.run_local, 183*760c253cSXin Li br.benchmark.cwp_dso, 184*760c253cSXin Li ) 185*760c253cSXin Li cache_dir = cache.GetCacheDirForWrite() 186*760c253cSXin Li if os.path.exists(cache_dir): 187*760c253cSXin Li self.l.LogOutput("Removing cache dir: %s" % cache_dir) 188*760c253cSXin Li shutil.rmtree(cache_dir) 189*760c253cSXin Li 190*760c253cSXin Li def _Run(self, experiment): 191*760c253cSXin Li try: 192*760c253cSXin Li # We should not lease machines if tests are launched via `crosfleet 193*760c253cSXin Li # create-test`. This is because leasing DUT in crosfleet will create a 194*760c253cSXin Li # no-op task on the DUT and new test created will be hanging there. 195*760c253cSXin Li # TODO(zhizhouy): Need to check whether machine is ready or not before 196*760c253cSXin Li # assigning a test to it. 197*760c253cSXin Li if not experiment.no_lock and not experiment.crosfleet: 198*760c253cSXin Li self._LockAllMachines(experiment) 199*760c253cSXin Li # Calculate all checksums of avaiable/locked machines, to ensure same 200*760c253cSXin Li # label has same machines for testing 201*760c253cSXin Li experiment.SetCheckSums(forceSameImage=True) 202*760c253cSXin Li if self._using_schedv2: 203*760c253cSXin Li schedv2 = Schedv2(experiment) 204*760c253cSXin Li experiment.set_schedv2(schedv2) 205*760c253cSXin Li if CacheConditions.FALSE in experiment.cache_conditions: 206*760c253cSXin Li self._ClearCacheEntries(experiment) 207*760c253cSXin Li status = ExperimentStatus(experiment) 208*760c253cSXin Li experiment.Run() 209*760c253cSXin Li last_status_time = 0 210*760c253cSXin Li last_status_string = "" 211*760c253cSXin Li try: 212*760c253cSXin Li if experiment.log_level != "verbose": 213*760c253cSXin Li self.l.LogStartDots() 214*760c253cSXin Li while not experiment.IsComplete(): 215*760c253cSXin Li if last_status_time + self.STATUS_TIME_DELAY < time.time(): 216*760c253cSXin Li last_status_time = time.time() 217*760c253cSXin Li border = "==============================" 218*760c253cSXin Li if experiment.log_level == "verbose": 219*760c253cSXin Li self.l.LogOutput(border) 220*760c253cSXin Li self.l.LogOutput(status.GetProgressString()) 221*760c253cSXin Li self.l.LogOutput(status.GetStatusString()) 222*760c253cSXin Li self.l.LogOutput(border) 223*760c253cSXin Li else: 224*760c253cSXin Li current_status_string = status.GetStatusString() 225*760c253cSXin Li if current_status_string != last_status_string: 226*760c253cSXin Li self.l.LogEndDots() 227*760c253cSXin Li self.l.LogOutput(border) 228*760c253cSXin Li self.l.LogOutput(current_status_string) 229*760c253cSXin Li self.l.LogOutput(border) 230*760c253cSXin Li last_status_string = current_status_string 231*760c253cSXin Li else: 232*760c253cSXin Li self.l.LogAppendDot() 233*760c253cSXin Li time.sleep(self.THREAD_MONITOR_DELAY) 234*760c253cSXin Li except KeyboardInterrupt: 235*760c253cSXin Li self._terminated = True 236*760c253cSXin Li self.l.LogError("Ctrl-c pressed. Cleaning up...") 237*760c253cSXin Li experiment.Terminate() 238*760c253cSXin Li raise 239*760c253cSXin Li except SystemExit: 240*760c253cSXin Li self._terminated = True 241*760c253cSXin Li self.l.LogError("Unexpected exit. Cleaning up...") 242*760c253cSXin Li experiment.Terminate() 243*760c253cSXin Li raise 244*760c253cSXin Li finally: 245*760c253cSXin Li experiment.Cleanup() 246*760c253cSXin Li 247*760c253cSXin Li def _PrintTable(self, experiment): 248*760c253cSXin Li self.l.LogOutput( 249*760c253cSXin Li TextResultsReport.FromExperiment(experiment).GetReport() 250*760c253cSXin Li ) 251*760c253cSXin Li 252*760c253cSXin Li def _Email(self, experiment): 253*760c253cSXin Li # Only email by default if a new run was completed. 254*760c253cSXin Li send_mail = False 255*760c253cSXin Li for benchmark_run in experiment.benchmark_runs: 256*760c253cSXin Li if not benchmark_run.cache_hit: 257*760c253cSXin Li send_mail = True 258*760c253cSXin Li break 259*760c253cSXin Li if ( 260*760c253cSXin Li not send_mail 261*760c253cSXin Li and not experiment.email_to 262*760c253cSXin Li or config.GetConfig("no_email") 263*760c253cSXin Li ): 264*760c253cSXin Li return 265*760c253cSXin Li 266*760c253cSXin Li label_names = [] 267*760c253cSXin Li for label in experiment.labels: 268*760c253cSXin Li label_names.append(label.name) 269*760c253cSXin Li subject = "%s: %s" % (experiment.name, " vs. ".join(label_names)) 270*760c253cSXin Li 271*760c253cSXin Li text_report = TextResultsReport.FromExperiment( 272*760c253cSXin Li experiment, True 273*760c253cSXin Li ).GetReport() 274*760c253cSXin Li text_report += ( 275*760c253cSXin Li "\nResults are stored in %s.\n" % experiment.results_directory 276*760c253cSXin Li ) 277*760c253cSXin Li text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report 278*760c253cSXin Li html_report = HTMLResultsReport.FromExperiment(experiment).GetReport() 279*760c253cSXin Li attachment = EmailSender.Attachment("report.html", html_report) 280*760c253cSXin Li email_to = experiment.email_to or [] 281*760c253cSXin Li email_to.append(getpass.getuser()) 282*760c253cSXin Li EmailSender().SendEmail( 283*760c253cSXin Li email_to, 284*760c253cSXin Li subject, 285*760c253cSXin Li text_report, 286*760c253cSXin Li attachments=[attachment], 287*760c253cSXin Li msg_type="html", 288*760c253cSXin Li ) 289*760c253cSXin Li 290*760c253cSXin Li def _StoreResults(self, experiment): 291*760c253cSXin Li if self._terminated: 292*760c253cSXin Li return self.ALL_FAILED 293*760c253cSXin Li 294*760c253cSXin Li results_directory = experiment.results_directory 295*760c253cSXin Li FileUtils().RmDir(results_directory) 296*760c253cSXin Li FileUtils().MkDirP(results_directory) 297*760c253cSXin Li self.l.LogOutput("Storing experiment file in %s." % results_directory) 298*760c253cSXin Li experiment_file_path = os.path.join(results_directory, "experiment.exp") 299*760c253cSXin Li FileUtils().WriteFile(experiment_file_path, experiment.experiment_file) 300*760c253cSXin Li 301*760c253cSXin Li all_failed = True 302*760c253cSXin Li 303*760c253cSXin Li topstats_file = os.path.join(results_directory, "topstats.log") 304*760c253cSXin Li self.l.LogOutput( 305*760c253cSXin Li "Storing top statistics of each benchmark run into %s." 306*760c253cSXin Li % topstats_file 307*760c253cSXin Li ) 308*760c253cSXin Li # Track if any iterations for a given benchmark has passed for each 309*760c253cSXin Li # label. 310*760c253cSXin Li benchmarks_passes = {} 311*760c253cSXin Li with open(topstats_file, "w") as top_fd: 312*760c253cSXin Li for benchmark_run in experiment.benchmark_runs: 313*760c253cSXin Li benchmarks_passes.setdefault( 314*760c253cSXin Li benchmark_run.label.name, 315*760c253cSXin Li {benchmark_run.benchmark.name: False}, 316*760c253cSXin Li ) 317*760c253cSXin Li if benchmark_run.result: 318*760c253cSXin Li if not benchmark_run.result.retval: 319*760c253cSXin Li all_failed = False 320*760c253cSXin Li benchmarks_passes[benchmark_run.label.name][ 321*760c253cSXin Li benchmark_run.benchmark.name 322*760c253cSXin Li ] = True 323*760c253cSXin Li # Header with benchmark run name. 324*760c253cSXin Li top_fd.write("%s\n" % str(benchmark_run)) 325*760c253cSXin Li # Formatted string with top statistics. 326*760c253cSXin Li top_fd.write(benchmark_run.result.FormatStringTopCommands()) 327*760c253cSXin Li top_fd.write("\n\n") 328*760c253cSXin Li 329*760c253cSXin Li if all_failed: 330*760c253cSXin Li return self.ALL_FAILED 331*760c253cSXin Li # Set has_passes if atleast one iteration of all benchmarks has passed 332*760c253cSXin Li # for every label. 333*760c253cSXin Li has_passes = True 334*760c253cSXin Li for benchmarks in benchmarks_passes.values(): 335*760c253cSXin Li has_passes = has_passes and all(benchmarks.values()) 336*760c253cSXin Li 337*760c253cSXin Li self.l.LogOutput("Storing results of each benchmark run.") 338*760c253cSXin Li for benchmark_run in experiment.benchmark_runs: 339*760c253cSXin Li if benchmark_run.result: 340*760c253cSXin Li benchmark_run_name = "".join( 341*760c253cSXin Li ch for ch in benchmark_run.name if ch.isalnum() 342*760c253cSXin Li ) 343*760c253cSXin Li benchmark_run_path = os.path.join( 344*760c253cSXin Li results_directory, benchmark_run_name 345*760c253cSXin Li ) 346*760c253cSXin Li if experiment.compress_results: 347*760c253cSXin Li benchmark_run.result.CompressResultsTo(benchmark_run_path) 348*760c253cSXin Li else: 349*760c253cSXin Li benchmark_run.result.CopyResultsTo(benchmark_run_path) 350*760c253cSXin Li # Don't remove benchmark tmp if it was a cache hit. 351*760c253cSXin Li benchmark_run.result.CleanUp( 352*760c253cSXin Li benchmark_run.benchmark.rm_chroot_tmp 353*760c253cSXin Li and not benchmark_run.cache_hit 354*760c253cSXin Li ) 355*760c253cSXin Li 356*760c253cSXin Li self.l.LogOutput("Storing results report in %s." % results_directory) 357*760c253cSXin Li results_table_path = os.path.join(results_directory, "results.html") 358*760c253cSXin Li report = HTMLResultsReport.FromExperiment(experiment).GetReport() 359*760c253cSXin Li if self.json_report: 360*760c253cSXin Li json_report = JSONResultsReport.FromExperiment( 361*760c253cSXin Li experiment, json_args={"indent": 2} 362*760c253cSXin Li ) 363*760c253cSXin Li _WriteJSONReportToFile(experiment, results_directory, json_report) 364*760c253cSXin Li 365*760c253cSXin Li FileUtils().WriteFile(results_table_path, report) 366*760c253cSXin Li 367*760c253cSXin Li self.l.LogOutput( 368*760c253cSXin Li "Storing email message body in %s." % results_directory 369*760c253cSXin Li ) 370*760c253cSXin Li msg_file_path = os.path.join(results_directory, "msg_body.html") 371*760c253cSXin Li text_report = TextResultsReport.FromExperiment( 372*760c253cSXin Li experiment, True 373*760c253cSXin Li ).GetReport() 374*760c253cSXin Li text_report += ( 375*760c253cSXin Li "\nResults are stored in %s.\n" % experiment.results_directory 376*760c253cSXin Li ) 377*760c253cSXin Li msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report 378*760c253cSXin Li FileUtils().WriteFile(msg_file_path, msg_body) 379*760c253cSXin Li 380*760c253cSXin Li return self.SUCCEEDED if has_passes else self.HAS_FAILURE 381*760c253cSXin Li 382*760c253cSXin Li def Run(self): 383*760c253cSXin Li try: 384*760c253cSXin Li self._Run(self._experiment) 385*760c253cSXin Li finally: 386*760c253cSXin Li # Always print the report at the end of the run. 387*760c253cSXin Li self._PrintTable(self._experiment) 388*760c253cSXin Li ret = self._StoreResults(self._experiment) 389*760c253cSXin Li if ret != self.ALL_FAILED: 390*760c253cSXin Li self._Email(self._experiment) 391*760c253cSXin Li return ret 392*760c253cSXin Li 393*760c253cSXin Li 394*760c253cSXin Liclass MockExperimentRunner(ExperimentRunner): 395*760c253cSXin Li """Mocked ExperimentRunner for testing.""" 396*760c253cSXin Li 397*760c253cSXin Li def __init__(self, experiment, json_report): 398*760c253cSXin Li super(MockExperimentRunner, self).__init__(experiment, json_report) 399*760c253cSXin Li 400*760c253cSXin Li def _Run(self, experiment): 401*760c253cSXin Li self.l.LogOutput( 402*760c253cSXin Li "Would run the following experiment: '%s'." % experiment.name 403*760c253cSXin Li ) 404*760c253cSXin Li 405*760c253cSXin Li def _PrintTable(self, experiment): 406*760c253cSXin Li self.l.LogOutput("Would print the experiment table.") 407*760c253cSXin Li 408*760c253cSXin Li def _Email(self, experiment): 409*760c253cSXin Li self.l.LogOutput("Would send result email.") 410*760c253cSXin Li 411*760c253cSXin Li def _StoreResults(self, experiment): 412*760c253cSXin Li self.l.LogOutput("Would store the results.") 413