1*9c5db199SXin Li#!/usr/bin/python3 2*9c5db199SXin Li# 3*9c5db199SXin Li# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 4*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 5*9c5db199SXin Li# found in the LICENSE file. 6*9c5db199SXin Li 7*9c5db199SXin Liimport datetime as datetime_base 8*9c5db199SXin Liimport logging 9*9c5db199SXin Lifrom datetime import datetime 10*9c5db199SXin Li 11*9c5db199SXin Liimport common 12*9c5db199SXin Li 13*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config 14*9c5db199SXin Lifrom autotest_lib.server import utils 15*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import reporting_utils 16*9c5db199SXin Li 17*9c5db199SXin LiCONFIG = global_config.global_config 18*9c5db199SXin Li 19*9c5db199SXin Li 20*9c5db199SXin Liclass DUTsNotAvailableError(utils.TestLabException): 21*9c5db199SXin Li """Raised when a DUT label combination is not available in the lab.""" 22*9c5db199SXin Li 23*9c5db199SXin Li 24*9c5db199SXin Liclass NotEnoughDutsError(utils.TestLabException): 25*9c5db199SXin Li """Rasied when the lab doesn't have the minimum number of duts.""" 26*9c5db199SXin Li 27*9c5db199SXin Li def __init__(self, labels, num_available, num_required, hosts): 28*9c5db199SXin Li """Initialize instance. 29*9c5db199SXin Li 30*9c5db199SXin Li Please pass arguments by keyword. 31*9c5db199SXin Li 32*9c5db199SXin Li @param labels: Labels required, including board an pool labels. 33*9c5db199SXin Li @param num_available: Number of available hosts. 34*9c5db199SXin Li @param num_required: Number of hosts required. 35*9c5db199SXin Li @param hosts: Sequence of Host instances for given board and pool. 36*9c5db199SXin Li """ 37*9c5db199SXin Li self.labels = labels 38*9c5db199SXin Li self.num_available = num_available 39*9c5db199SXin Li self.num_required = num_required 40*9c5db199SXin Li self.hosts = hosts 41*9c5db199SXin Li self.bug_id = None 42*9c5db199SXin Li self.suite_name = None 43*9c5db199SXin Li self.build = None 44*9c5db199SXin Li 45*9c5db199SXin Li 46*9c5db199SXin Li def __repr__(self): 47*9c5db199SXin Li return ( 48*9c5db199SXin Li '<{cls} at 0x{id:x} with' 49*9c5db199SXin Li ' labels={this.labels!r},' 50*9c5db199SXin Li ' num_available={this.num_available!r},' 51*9c5db199SXin Li ' num_required={this.num_required!r},' 52*9c5db199SXin Li ' bug_id={this.bug_id!r},' 53*9c5db199SXin Li ' suite_name={this.suite_name!r},' 54*9c5db199SXin Li ' build={this.build!r}>' 55*9c5db199SXin Li .format(cls=type(self).__name__, id=id(self), this=self) 56*9c5db199SXin Li ) 57*9c5db199SXin Li 58*9c5db199SXin Li 59*9c5db199SXin Li def __str__(self): 60*9c5db199SXin Li msg_parts = [ 61*9c5db199SXin Li 'Not enough DUTs for requirements: {this.labels};' 62*9c5db199SXin Li ' required: {this.num_required}, found: {this.num_available}' 63*9c5db199SXin Li ] 64*9c5db199SXin Li format_dict = {'this': self} 65*9c5db199SXin Li if self.bug_id is not None: 66*9c5db199SXin Li msg_parts.append('bug: {bug_url}') 67*9c5db199SXin Li format_dict['bug_url'] = reporting_utils.link_crbug(self.bug_id) 68*9c5db199SXin Li if self.suite_name is not None: 69*9c5db199SXin Li msg_parts.append('suite: {this.suite_name}') 70*9c5db199SXin Li if self.build is not None: 71*9c5db199SXin Li msg_parts.append('build: {this.build}') 72*9c5db199SXin Li return ', '.join(msg_parts).format(**format_dict) 73*9c5db199SXin Li 74*9c5db199SXin Li 75*9c5db199SXin Liclass SimpleTimer(object): 76*9c5db199SXin Li """A simple timer used to periodically check if a deadline has passed.""" 77*9c5db199SXin Li 78*9c5db199SXin Li def _reset(self): 79*9c5db199SXin Li """Reset the deadline.""" 80*9c5db199SXin Li if not self.interval_hours or self.interval_hours < 0: 81*9c5db199SXin Li logging.error('Bad interval %s', self.interval_hours) 82*9c5db199SXin Li self.deadline = None 83*9c5db199SXin Li return 84*9c5db199SXin Li self.deadline = datetime.now() + datetime_base.timedelta( 85*9c5db199SXin Li hours=self.interval_hours) 86*9c5db199SXin Li 87*9c5db199SXin Li 88*9c5db199SXin Li def __init__(self, interval_hours=0.5): 89*9c5db199SXin Li """Initialize a simple periodic deadline timer. 90*9c5db199SXin Li 91*9c5db199SXin Li @param interval_hours: Interval of the deadline. 92*9c5db199SXin Li """ 93*9c5db199SXin Li self.interval_hours = interval_hours 94*9c5db199SXin Li self._reset() 95*9c5db199SXin Li 96*9c5db199SXin Li 97*9c5db199SXin Li def poll(self): 98*9c5db199SXin Li """Poll the timer to see if we've hit the deadline. 99*9c5db199SXin Li 100*9c5db199SXin Li This method resets the deadline if it has passed. If the deadline 101*9c5db199SXin Li hasn't been set, or the current time is less than the deadline, the 102*9c5db199SXin Li method returns False. 103*9c5db199SXin Li 104*9c5db199SXin Li @return: True if the deadline has passed, False otherwise. 105*9c5db199SXin Li """ 106*9c5db199SXin Li if not self.deadline or datetime.now() < self.deadline: 107*9c5db199SXin Li return False 108*9c5db199SXin Li self._reset() 109*9c5db199SXin Li return True 110*9c5db199SXin Li 111*9c5db199SXin Li 112*9c5db199SXin Liclass JobTimer(object): 113*9c5db199SXin Li """Utility class capable of measuring job timeouts. 114*9c5db199SXin Li """ 115*9c5db199SXin Li 116*9c5db199SXin Li # Format used in datetime - string conversion. 117*9c5db199SXin Li time_format = '%m-%d-%Y [%H:%M:%S]' 118*9c5db199SXin Li 119*9c5db199SXin Li def __init__(self, job_created_time, timeout_mins): 120*9c5db199SXin Li """JobTimer constructor. 121*9c5db199SXin Li 122*9c5db199SXin Li @param job_created_time: float representing the time a job was 123*9c5db199SXin Li created. Eg: time.time() 124*9c5db199SXin Li @param timeout_mins: float representing the timeout in minutes. 125*9c5db199SXin Li """ 126*9c5db199SXin Li self.job_created_time = datetime.fromtimestamp(job_created_time) 127*9c5db199SXin Li self.timeout_hours = datetime_base.timedelta(hours=timeout_mins/60.0) 128*9c5db199SXin Li self.debug_output_timer = SimpleTimer(interval_hours=0.5) 129*9c5db199SXin Li self.past_halftime = False 130*9c5db199SXin Li 131*9c5db199SXin Li 132*9c5db199SXin Li @classmethod 133*9c5db199SXin Li def format_time(cls, datetime_obj): 134*9c5db199SXin Li """Get the string formatted version of the datetime object. 135*9c5db199SXin Li 136*9c5db199SXin Li @param datetime_obj: A datetime.datetime object. 137*9c5db199SXin Li Eg: datetime.datetime.now() 138*9c5db199SXin Li 139*9c5db199SXin Li @return: A formatted string containing the date/time of the 140*9c5db199SXin Li input datetime. 141*9c5db199SXin Li """ 142*9c5db199SXin Li return datetime_obj.strftime(cls.time_format) 143*9c5db199SXin Li 144*9c5db199SXin Li 145*9c5db199SXin Li def elapsed_time(self): 146*9c5db199SXin Li """Get the time elapsed since this job was created. 147*9c5db199SXin Li 148*9c5db199SXin Li @return: A timedelta object representing the elapsed time. 149*9c5db199SXin Li """ 150*9c5db199SXin Li return datetime.now() - self.job_created_time 151*9c5db199SXin Li 152*9c5db199SXin Li 153*9c5db199SXin Li def is_suite_timeout(self): 154*9c5db199SXin Li """Check if the suite timed out. 155*9c5db199SXin Li 156*9c5db199SXin Li @return: True if more than timeout_hours has elapsed since the suite job 157*9c5db199SXin Li was created. 158*9c5db199SXin Li """ 159*9c5db199SXin Li if self.elapsed_time() >= self.timeout_hours: 160*9c5db199SXin Li logging.info('Suite timed out. Started on %s, timed out on %s', 161*9c5db199SXin Li self.format_time(self.job_created_time), 162*9c5db199SXin Li self.format_time(datetime.now())) 163*9c5db199SXin Li return True 164*9c5db199SXin Li return False 165*9c5db199SXin Li 166*9c5db199SXin Li 167*9c5db199SXin Li def first_past_halftime(self): 168*9c5db199SXin Li """Check if we just crossed half time. 169*9c5db199SXin Li 170*9c5db199SXin Li This method will only return True once, the first time it is called 171*9c5db199SXin Li after a job's elapsed time is past half its timeout. 172*9c5db199SXin Li 173*9c5db199SXin Li @return True: If this is the first call of the method after halftime. 174*9c5db199SXin Li """ 175*9c5db199SXin Li if (not self.past_halftime and 176*9c5db199SXin Li self.elapsed_time() > self.timeout_hours/2): 177*9c5db199SXin Li self.past_halftime = True 178*9c5db199SXin Li return True 179*9c5db199SXin Li return False 180*9c5db199SXin Li 181*9c5db199SXin Li 182*9c5db199SXin Liclass RPCHelper(object): 183*9c5db199SXin Li """A class to help diagnose a suite run through the rpc interface. 184*9c5db199SXin Li """ 185*9c5db199SXin Li 186*9c5db199SXin Li def __init__(self, rpc_interface): 187*9c5db199SXin Li """Constructor for rpc helper class. 188*9c5db199SXin Li 189*9c5db199SXin Li @param rpc_interface: An rpc object, eg: A RetryingAFE instance. 190*9c5db199SXin Li """ 191*9c5db199SXin Li self.rpc_interface = rpc_interface 192*9c5db199SXin Li 193*9c5db199SXin Li 194*9c5db199SXin Li def check_dut_availability(self, labels, minimum_duts=0, 195*9c5db199SXin Li skip_duts_check=False): 196*9c5db199SXin Li """Check if DUT availability for a given board and pool is less than 197*9c5db199SXin Li minimum. 198*9c5db199SXin Li 199*9c5db199SXin Li @param labels: DUT label dependencies, including board and pool 200*9c5db199SXin Li labels. 201*9c5db199SXin Li @param minimum_duts: Minimum Number of available machines required to 202*9c5db199SXin Li run the suite. Default is set to 0, which means do 203*9c5db199SXin Li not force the check of available machines before 204*9c5db199SXin Li running the suite. 205*9c5db199SXin Li @param skip_duts_check: If True, skip minimum available DUTs check. 206*9c5db199SXin Li @raise: NotEnoughDutsError if DUT availability is lower than minimum. 207*9c5db199SXin Li @raise: DUTsNotAvailableError if no host found for requested 208*9c5db199SXin Li board/pool. 209*9c5db199SXin Li """ 210*9c5db199SXin Li if minimum_duts == 0: 211*9c5db199SXin Li return 212*9c5db199SXin Li 213*9c5db199SXin Li hosts = self.rpc_interface.get_hosts( 214*9c5db199SXin Li invalid=False, multiple_labels=labels) 215*9c5db199SXin Li if not hosts: 216*9c5db199SXin Li raise DUTsNotAvailableError( 217*9c5db199SXin Li 'No hosts found for labels %r. The test lab ' 218*9c5db199SXin Li 'currently does not cover test for those DUTs.' % 219*9c5db199SXin Li (labels,)) 220*9c5db199SXin Li 221*9c5db199SXin Li if skip_duts_check: 222*9c5db199SXin Li # Bypass minimum avilable DUTs check 223*9c5db199SXin Li logging.debug('skip_duts_check is on, do not enforce minimum ' 224*9c5db199SXin Li 'DUTs check.') 225*9c5db199SXin Li return 226*9c5db199SXin Li 227*9c5db199SXin Li if len(hosts) < minimum_duts: 228*9c5db199SXin Li logging.debug('The total number of DUTs for %r is %d, ' 229*9c5db199SXin Li 'which is less than %d, the required minimum ' 230*9c5db199SXin Li 'number of available DUTS', labels, len(hosts), 231*9c5db199SXin Li minimum_duts) 232*9c5db199SXin Li 233*9c5db199SXin Li available_hosts = 0 234*9c5db199SXin Li for host in hosts: 235*9c5db199SXin Li if host.is_available(): 236*9c5db199SXin Li available_hosts += 1 237*9c5db199SXin Li logging.debug('%d of %d DUTs are available for %r.', 238*9c5db199SXin Li available_hosts, len(hosts), labels) 239*9c5db199SXin Li if available_hosts < minimum_duts: 240*9c5db199SXin Li raise NotEnoughDutsError( 241*9c5db199SXin Li labels=labels, 242*9c5db199SXin Li num_available=available_hosts, 243*9c5db199SXin Li num_required=minimum_duts, 244*9c5db199SXin Li hosts=hosts) 245*9c5db199SXin Li 246*9c5db199SXin Li 247*9c5db199SXin Li def diagnose_job(self, job_id, instance_server): 248*9c5db199SXin Li """Diagnose a suite job. 249*9c5db199SXin Li 250*9c5db199SXin Li Logs information about the jobs that are still to run in the suite. 251*9c5db199SXin Li 252*9c5db199SXin Li @param job_id: The id of the suite job to get information about. 253*9c5db199SXin Li No meaningful information gets logged if the id is for a sub-job. 254*9c5db199SXin Li @param instance_server: The instance server. 255*9c5db199SXin Li Eg: cautotest, cautotest-cq, localhost. 256*9c5db199SXin Li """ 257*9c5db199SXin Li incomplete_jobs = self.rpc_interface.get_jobs( 258*9c5db199SXin Li parent_job_id=job_id, summary=True, 259*9c5db199SXin Li hostqueueentry__complete=False) 260*9c5db199SXin Li if incomplete_jobs: 261*9c5db199SXin Li logging.info('\n%s printing summary of incomplete jobs (%s):\n', 262*9c5db199SXin Li JobTimer.format_time(datetime.now()), 263*9c5db199SXin Li len(incomplete_jobs)) 264*9c5db199SXin Li for job in incomplete_jobs: 265*9c5db199SXin Li logging.info('%s: %s', job.testname[job.testname.rfind('/')+1:], 266*9c5db199SXin Li reporting_utils.link_job(job.id, instance_server)) 267*9c5db199SXin Li else: 268*9c5db199SXin Li logging.info('All jobs in suite have already completed.') 269