1# Lint as: python2, python3 2# Copyright 2016 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import contextlib 7import datetime 8import logging 9import pprint 10import time 11 12import common 13from autotest_lib.client.common_lib import error 14from autotest_lib.client.common_lib import utils as client_utils 15from autotest_lib.client.common_lib.cros.network import ap_constants 16from autotest_lib.client.common_lib.cros.network import iw_runner 17from autotest_lib.server import hosts 18from autotest_lib.server import site_linux_system 19from autotest_lib.server.cros import host_lock_manager 20from autotest_lib.server.cros.ap_configurators import ap_batch_locker 21from autotest_lib.server.cros.ap_configurators \ 22 import ap_configurator_factory 23from autotest_lib.server.cros.network import chaos_clique_utils as utils 24from autotest_lib.server.cros.network import wifi_client 25 26# Webdriver main hostname 27# TODO b:169251326 terms below are set outside of this codebase and should 28# be updated when possible ("master" -> "main"). # nocheck 29MAINNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com' # nocheck 30WEBDRIVER_PORT = 9515 31 32 33class ChaosRunner(object): 34 """Object to run a network_WiFi_ChaosXXX test.""" 35 36 37 def __init__(self, test, host, spec, broken_pdus=list()): 38 """Initializes and runs test. 39 40 @param test: a string, test name. 41 @param host: an Autotest host object, device under test. 42 @param spec: an APSpec object. 43 @param broken_pdus: list of offline PDUs. 44 45 """ 46 self._test = test 47 self._host = host 48 self._ap_spec = spec 49 self._broken_pdus = broken_pdus 50 # Log server and DUT times 51 dt = datetime.datetime.now() 52 logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y')) 53 logging.info('DUT time: %s', self._host.run('date').stdout.strip()) 54 55 56 def run(self, job, batch_size=10, tries=10, capturer_hostname=None, 57 conn_worker=None, work_client_hostname=None, 58 disabled_sysinfo=False): 59 """Executes Chaos test. 60 61 @param job: an Autotest job object. 62 @param batch_size: an integer, max number of APs to lock in one batch. 63 @param tries: an integer, number of iterations to run per AP. 64 @param capturer_hostname: a string or None, hostname or IP of capturer. 65 @param conn_worker: ConnectionWorkerAbstract or None, to run extra 66 work after successful connection. 67 @param work_client_hostname: a string or None, hostname of work client 68 @param disabled_sysinfo: a bool, disable collection of logs from DUT. 69 70 71 @raises TestError: Issues locking VM webdriver instance 72 """ 73 74 lock_manager = host_lock_manager.HostLockManager() 75 # TODO b:169251326 terms below are set outside of this codebase and 76 # should be updated when possible ("master" -> "main"). # nocheck 77 webdriver_main = hosts.SSHHost(MAINNAME, 78 user='chaosvmmaster') # nocheck 79 host_prefix = self._host.hostname.split('-')[0] 80 with host_lock_manager.HostsLockedBy(lock_manager): 81 capture_host = utils.allocate_packet_capturer( 82 lock_manager, hostname=capturer_hostname, 83 prefix=host_prefix) 84 # Cleanup and reboot packet capturer before the test. 85 utils.sanitize_client(capture_host) 86 capturer = site_linux_system.LinuxSystem(capture_host, {}, 87 'packet_capturer') 88 89 # Run iw scan and abort if more than allowed number of APs are up. 90 iw_command = iw_runner.IwRunner(capture_host) 91 start_time = time.time() 92 logging.info('Performing a scan with a max timeout of 30 seconds.') 93 capture_interface = 'wlan0' 94 capturer_info = capture_host.run('cat /etc/lsb-release', 95 ignore_status=True, timeout=5).stdout 96 if 'whirlwind' in capturer_info: 97 # Use the dual band aux radio for scanning networks. 98 capture_interface = 'wlan2' 99 while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: 100 networks = iw_command.scan(capture_interface) 101 if networks is None: 102 if (time.time() - start_time == 103 ap_constants.MAX_SCAN_TIMEOUT): 104 raise error.TestError( 105 'Packet capturer is not responding to scans. Check' 106 'device and re-run test') 107 continue 108 elif len(networks) < ap_constants.MAX_SSID_COUNT: 109 break 110 elif len(networks) >= ap_constants.MAX_SSID_COUNT: 111 raise error.TestError( 112 'Probably someone is already running a ' 113 'chaos test?!') 114 115 if conn_worker is not None: 116 work_client_machine = utils.allocate_packet_capturer( 117 lock_manager, hostname=work_client_hostname) 118 conn_worker.prepare_work_client(work_client_machine) 119 120 # Lock VM. If on, power off; always power on. Then create a tunnel. 121 webdriver_instance = utils.allocate_webdriver_instance(lock_manager) 122 123 if utils.is_VM_running(webdriver_main, webdriver_instance): 124 logging.info('VM %s was on; powering off for a clean instance', 125 webdriver_instance) 126 utils.power_off_VM(webdriver_main, webdriver_instance) 127 logging.info('Allow VM time to gracefully shut down') 128 time.sleep(5) 129 130 logging.info('Starting up VM %s', webdriver_instance) 131 utils.power_on_VM(webdriver_main, webdriver_instance) 132 logging.info('Allow VM time to power on before creating a tunnel.') 133 time.sleep(30) 134 135 if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname): 136 self._ap_spec._webdriver_hostname = webdriver_instance.hostname 137 else: 138 # If in the lab then port forwarding must be done so webdriver 139 # connection will be over localhost. 140 self._ap_spec._webdriver_hostname = 'localhost' 141 webdriver_tunnel = webdriver_instance.create_ssh_tunnel( 142 WEBDRIVER_PORT, WEBDRIVER_PORT) 143 logging.info('Wait for tunnel to be created.') 144 for i in range(3): 145 time.sleep(10) 146 results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT, 147 ignore_status=True) 148 if results: 149 break 150 if not results: 151 raise error.TestError( 152 'Unable to listen to WEBDRIVER_PORT: %s', results) 153 154 batch_locker = ap_batch_locker.ApBatchLocker( 155 lock_manager, self._ap_spec, 156 ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) 157 158 while batch_locker.has_more_aps(): 159 # Work around for CrOS devices only:crbug.com/358716 160 utils.sanitize_client(self._host) 161 healthy_dut = True 162 163 with contextlib.closing(wifi_client.WiFiClient( 164 hosts.create_host( 165 { 166 'hostname' : self._host.hostname, 167 'afe_host' : self._host._afe_host, 168 'host_info_store': 169 self._host.host_info_store, 170 }, 171 host_class=self._host.__class__, 172 ), 173 './debug', 174 False, 175 )) as client: 176 177 aps = batch_locker.get_ap_batch(batch_size=batch_size) 178 if not aps: 179 logging.info('No more APs to test.') 180 break 181 182 # Power down all of the APs because some can get grumpy 183 # if they are configured several times and remain on. 184 # User the cartridge to down group power downs and 185 # configurations. 186 utils.power_down_aps(aps, self._broken_pdus) 187 utils.configure_aps(aps, self._ap_spec, self._broken_pdus) 188 189 aps = utils.filter_quarantined_and_config_failed_aps(aps, 190 batch_locker, job, self._broken_pdus) 191 192 for ap in aps: 193 # http://crbug.com/306687 194 if ap.ssid == None: 195 logging.error('The SSID was not set for the AP:%s', 196 ap) 197 198 healthy_dut = utils.is_dut_healthy(client, ap) 199 200 if not healthy_dut: 201 logging.error('DUT is not healthy, rebooting.') 202 batch_locker.unlock_and_reclaim_aps() 203 break 204 205 networks = utils.return_available_networks( 206 ap, capturer, job, self._ap_spec) 207 208 if networks is None: 209 # If scan returned no networks, iw scan failed. 210 # Reboot the packet capturer device and 211 # reconfigure the capturer. 212 batch_locker.unlock_and_reclaim_ap(ap.host_name) 213 logging.error('Packet capture is not healthy, ' 214 'rebooting.') 215 capturer.host.reboot() 216 capturer = site_linux_system.LinuxSystem( 217 capture_host, {},'packet_capturer') 218 continue 219 if networks == list(): 220 # Packet capturer did not find the SSID in scan or 221 # there was a security mismatch. 222 utils.release_ap(ap, batch_locker, self._broken_pdus) 223 continue 224 225 assoc_params = ap.get_association_parameters() 226 227 if not utils.is_conn_worker_healthy( 228 conn_worker, ap, assoc_params, job): 229 utils.release_ap( 230 ap, batch_locker, self._broken_pdus) 231 continue 232 233 name = ap.name 234 kernel_ver = self._host.get_kernel_ver() 235 firmware_ver = utils.get_firmware_ver(self._host) 236 if not firmware_ver: 237 firmware_ver = "Unknown" 238 239 debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++', 240 'SSID': ap._ssid, 241 'DUT': client.wifi_mac, 242 'AP Info': ap.name, 243 'kernel_version': kernel_ver, 244 'wifi_firmware_version': firmware_ver} 245 debug_string = pprint.pformat(debug_dict) 246 247 logging.info('Waiting %d seconds for the AP dhcp ' 248 'server', ap.dhcp_delay) 249 time.sleep(ap.dhcp_delay) 250 251 result = job.run_test(self._test, 252 capturer=capturer, 253 capturer_frequency=networks[0].frequency, 254 capturer_ht_type=networks[0].width, 255 host=self._host, 256 assoc_params=assoc_params, 257 client=client, 258 tries=tries, 259 debug_info=debug_string, 260 # Copy all logs from the system 261 disabled_sysinfo=disabled_sysinfo, 262 conn_worker=conn_worker, 263 tag=ap.ssid if conn_worker is None else 264 '%s.%s' % (conn_worker.name, ap.ssid)) 265 266 utils.release_ap(ap, batch_locker, self._broken_pdus) 267 268 if conn_worker is not None: 269 conn_worker.cleanup() 270 271 if not healthy_dut: 272 continue 273 274 batch_locker.unlock_aps() 275 276 if webdriver_tunnel: 277 webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel) 278 webdriver_instance.close() 279 capturer.close() 280 logging.info('Powering off VM %s', webdriver_instance) 281 utils.power_off_VM(webdriver_main, webdriver_instance) 282 lock_manager.unlock(webdriver_instance.hostname) 283 284 if self._broken_pdus: 285 logging.info('PDU is down!!!\nThe following PDUs are down:\n') 286 pprint.pprint(self._broken_pdus) 287 288 factory = ap_configurator_factory.APConfiguratorFactory( 289 ap_constants.AP_TEST_TYPE_CHAOS) 290 factory.turn_off_all_routers(self._broken_pdus) 291