1*9c5db199SXin Li#!/usr/bin/env python3 2*9c5db199SXin Li# Copyright 2020 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li 6*9c5db199SXin Liimport logging 7*9c5db199SXin Li 8*9c5db199SXin Li 9*9c5db199SXin Liimport common 10*9c5db199SXin Lifrom autotest_lib.client.common_lib import error 11*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils as client_utils 12*9c5db199SXin Lifrom autotest_lib.server.cros.storage import storage_validate as storage 13*9c5db199SXin Lifrom autotest_lib.server.cros.servo.keyboard import servo_keyboard_flasher 14*9c5db199SXin Lifrom autotest_lib.server.cros.repair import mac_address_helper 15*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import base 16*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import constants 17*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import rpm_validator 18*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import servo_updater 19*9c5db199SXin Li 20*9c5db199SXin Litry: 21*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import metrics 22*9c5db199SXin Liexcept ImportError: 23*9c5db199SXin Li metrics = client_utils.metrics_mock 24*9c5db199SXin Li 25*9c5db199SXin Li# Common status used for statistics. 26*9c5db199SXin LiSTATUS_FAIL = 'fail' 27*9c5db199SXin LiSTATUS_SUCCESS = 'success' 28*9c5db199SXin LiSTATUS_SKIPPED = 'skipped' 29*9c5db199SXin Li 30*9c5db199SXin Li 31*9c5db199SXin Liclass VerifyDutStorage(base._BaseDUTVerifier): 32*9c5db199SXin Li """Verify the state of the storage on the DUT 33*9c5db199SXin Li 34*9c5db199SXin Li The process to determine the type of storage and read metrics 35*9c5db199SXin Li of usage and EOL(end-of-life) information to determine the 36*9c5db199SXin Li state. 37*9c5db199SXin Li Supported storage types: MMS, NVME, SSD. 38*9c5db199SXin Li Possible states are: 39*9c5db199SXin Li UNKNOWN - not access to the DUT, not determine type of storage, 40*9c5db199SXin Li not information to determine metrics 41*9c5db199SXin Li NORMAL - the storage is in good shape and will work stable 42*9c5db199SXin Li device will work stable. (supported for all types) 43*9c5db199SXin Li ACCEPTABLE - the storage almost used all resources, device will 44*9c5db199SXin Li work stable but it is better be ready for replacement 45*9c5db199SXin Li device will work stable. (supported by MMS, NVME) 46*9c5db199SXin Li NEED_REPLACEMENT - the storage broken or worn off the life limit 47*9c5db199SXin Li device can work by not stable and can cause the 48*9c5db199SXin Li flakiness on the tests. (supported by all types) 49*9c5db199SXin Li """ 50*9c5db199SXin Li def __init__(self, dut_host): 51*9c5db199SXin Li super(VerifyDutStorage, self).__init__(dut_host) 52*9c5db199SXin Li self._state = None 53*9c5db199SXin Li 54*9c5db199SXin Li def _verify(self, set_label=True, run_badblocks=None): 55*9c5db199SXin Li if not self.host_is_up(): 56*9c5db199SXin Li logging.info('Host is down; Skipping the verification') 57*9c5db199SXin Li return 58*9c5db199SXin Li try: 59*9c5db199SXin Li validator = storage.StorageStateValidator(self.get_host()) 60*9c5db199SXin Li storage_type = validator.get_type() 61*9c5db199SXin Li logging.debug('Detected storage type: %s', storage_type) 62*9c5db199SXin Li storage_state = validator.get_state(run_badblocks=run_badblocks) 63*9c5db199SXin Li logging.debug('Detected storage state: %s', storage_state) 64*9c5db199SXin Li state = self.convert_state(storage_state) 65*9c5db199SXin Li if state and set_label: 66*9c5db199SXin Li self._set_host_info_state(constants.DUT_STORAGE_STATE_PREFIX, 67*9c5db199SXin Li state) 68*9c5db199SXin Li if state == constants.HW_STATE_NEED_REPLACEMENT: 69*9c5db199SXin Li self.get_host().set_device_needs_replacement( 70*9c5db199SXin Li resultdir=self.get_result_dir()) 71*9c5db199SXin Li self._state = state 72*9c5db199SXin Li except Exception as e: 73*9c5db199SXin Li raise base.AuditError('Exception during getting state of' 74*9c5db199SXin Li ' storage %s' % str(e)) 75*9c5db199SXin Li 76*9c5db199SXin Li def convert_state(self, state): 77*9c5db199SXin Li """Mapping state from validator to verifier""" 78*9c5db199SXin Li if state == storage.STORAGE_STATE_NORMAL: 79*9c5db199SXin Li return constants.HW_STATE_NORMAL 80*9c5db199SXin Li if state == storage.STORAGE_STATE_WARNING: 81*9c5db199SXin Li return constants.HW_STATE_ACCEPTABLE 82*9c5db199SXin Li if state == storage.STORAGE_STATE_CRITICAL: 83*9c5db199SXin Li return constants.HW_STATE_NEED_REPLACEMENT 84*9c5db199SXin Li return None 85*9c5db199SXin Li 86*9c5db199SXin Li def get_state(self): 87*9c5db199SXin Li return self._state 88*9c5db199SXin Li 89*9c5db199SXin Li 90*9c5db199SXin Liclass VerifyServoUsb(base._BaseServoVerifier): 91*9c5db199SXin Li """Verify the state of the USB-drive on the Servo 92*9c5db199SXin Li 93*9c5db199SXin Li The process to determine by checking the USB-drive on having any 94*9c5db199SXin Li bad sectors on it. 95*9c5db199SXin Li Possible states are: 96*9c5db199SXin Li UNKNOWN - not access to the device or servo, not available 97*9c5db199SXin Li software on the servo. 98*9c5db199SXin Li NORMAL - the device available for testing and not bad sectors. 99*9c5db199SXin Li was found on it, device will work stable 100*9c5db199SXin Li NEED_REPLACEMENT - the device available for testing and 101*9c5db199SXin Li some bad sectors were found on it. The device can 102*9c5db199SXin Li work but cause flakiness in the tests or repair process. 103*9c5db199SXin Li 104*9c5db199SXin Li badblocks errors: 105*9c5db199SXin Li No such device or address while trying to determine device size 106*9c5db199SXin Li """ 107*9c5db199SXin Li def _verify(self): 108*9c5db199SXin Li if not self.servo_is_up(): 109*9c5db199SXin Li logging.info('Servo not initialized; Skipping the verification') 110*9c5db199SXin Li return 111*9c5db199SXin Li try: 112*9c5db199SXin Li usb = self.get_host()._probe_and_validate_usb_dev() 113*9c5db199SXin Li logging.debug('USB path: %s', usb) 114*9c5db199SXin Li except Exception as e: 115*9c5db199SXin Li usb = '' 116*9c5db199SXin Li logging.debug('(Not critical) %s', e) 117*9c5db199SXin Li if not usb: 118*9c5db199SXin Li self._set_state(constants.HW_STATE_NOT_DETECTED) 119*9c5db199SXin Li return 120*9c5db199SXin Li # basic readonly check 121*9c5db199SXin Li 122*9c5db199SXin Li # path to USB if DUT is sshable 123*9c5db199SXin Li logging.info('Starting verification of USB drive...') 124*9c5db199SXin Li dut_usb = None 125*9c5db199SXin Li if self.host_is_up(): 126*9c5db199SXin Li dut_usb = self._usb_path_on_dut() 127*9c5db199SXin Li state = None 128*9c5db199SXin Li try: 129*9c5db199SXin Li if dut_usb: 130*9c5db199SXin Li logging.info('Try run check on DUT side.') 131*9c5db199SXin Li state = self._run_check_on_host(self._dut_host, dut_usb) 132*9c5db199SXin Li else: 133*9c5db199SXin Li logging.info('Try run check on ServoHost side.') 134*9c5db199SXin Li servo = self.get_host().get_servo() 135*9c5db199SXin Li servo_usb = servo.probe_host_usb_dev() 136*9c5db199SXin Li state = self._run_check_on_host(self.get_host(), servo_usb) 137*9c5db199SXin Li except Exception as e: 138*9c5db199SXin Li if 'Timeout encountered:' in str(e): 139*9c5db199SXin Li logging.info('Timeout during running action') 140*9c5db199SXin Li metrics.Counter( 141*9c5db199SXin Li 'chromeos/autotest/audit/servo/usb/timeout' 142*9c5db199SXin Li ).increment(fields={'host': self._dut_host.hostname}) 143*9c5db199SXin Li else: 144*9c5db199SXin Li # badblocks generate errors when device not reachable or 145*9c5db199SXin Li # cannot read system information to execute process 146*9c5db199SXin Li state = constants.HW_STATE_NEED_REPLACEMENT 147*9c5db199SXin Li logging.debug(str(e)) 148*9c5db199SXin Li 149*9c5db199SXin Li self._set_state(state) 150*9c5db199SXin Li logging.info('Finished verification of USB drive.') 151*9c5db199SXin Li 152*9c5db199SXin Li self._install_stable_image() 153*9c5db199SXin Li 154*9c5db199SXin Li def _usb_path_on_dut(self): 155*9c5db199SXin Li """Return path to the USB detected on DUT side.""" 156*9c5db199SXin Li servo = self.get_host().get_servo() 157*9c5db199SXin Li servo.switch_usbkey('dut') 158*9c5db199SXin Li result = self._dut_host.run('ls /dev/sd[a-z]') 159*9c5db199SXin Li for path in result.stdout.splitlines(): 160*9c5db199SXin Li cmd = ('. /usr/share/misc/chromeos-common.sh; get_device_type %s' % 161*9c5db199SXin Li path) 162*9c5db199SXin Li check_run = self._dut_host.run(cmd, timeout=30, ignore_status=True) 163*9c5db199SXin Li if check_run.stdout.strip() != 'USB': 164*9c5db199SXin Li continue 165*9c5db199SXin Li if self._quick_check_if_device_responsive(self._dut_host, path): 166*9c5db199SXin Li logging.info('USB drive detected on DUT side as %s', path) 167*9c5db199SXin Li return path 168*9c5db199SXin Li return None 169*9c5db199SXin Li 170*9c5db199SXin Li def _quick_check_if_device_responsive(self, host, usb_path): 171*9c5db199SXin Li """Verify that device """ 172*9c5db199SXin Li validate_cmd = 'fdisk -l %s' % usb_path 173*9c5db199SXin Li try: 174*9c5db199SXin Li resp = host.run(validate_cmd, ignore_status=True, timeout=30) 175*9c5db199SXin Li if resp.exit_status == 0: 176*9c5db199SXin Li return True 177*9c5db199SXin Li logging.error('USB %s is not detected by fdisk!', usb_path) 178*9c5db199SXin Li except error.AutoservRunError as e: 179*9c5db199SXin Li if 'Timeout encountered' in str(e): 180*9c5db199SXin Li logging.warning('Timeout encountered during fdisk run.') 181*9c5db199SXin Li else: 182*9c5db199SXin Li logging.error('(Not critical) fdisk check fail for %s; %s', 183*9c5db199SXin Li usb_path, str(e)) 184*9c5db199SXin Li return False 185*9c5db199SXin Li 186*9c5db199SXin Li def _run_check_on_host(self, host, usb): 187*9c5db199SXin Li """Run badblocks on the provided host. 188*9c5db199SXin Li 189*9c5db199SXin Li @params host: Host where USB drive mounted 190*9c5db199SXin Li @params usb: Path to USB drive. (e.g. /dev/sda) 191*9c5db199SXin Li """ 192*9c5db199SXin Li command = 'badblocks -w -e 5 -b 4096 -t random %s' % usb 193*9c5db199SXin Li logging.info('Running command: %s', command) 194*9c5db199SXin Li # The response is the list of bad block on USB. 195*9c5db199SXin Li # Extended time for 2 hour to run USB verification. 196*9c5db199SXin Li # TODO (otabek@) (b:153661014#comment2) bring F3 to run 197*9c5db199SXin Li # check faster if badblocks cannot finish in 2 hours. 198*9c5db199SXin Li result = host.run(command, timeout=7200).stdout.strip() 199*9c5db199SXin Li logging.info("Check result: '%s'", result) 200*9c5db199SXin Li if result: 201*9c5db199SXin Li # So has result is Bad and empty is Good. 202*9c5db199SXin Li return constants.HW_STATE_NEED_REPLACEMENT 203*9c5db199SXin Li return constants.HW_STATE_NORMAL 204*9c5db199SXin Li 205*9c5db199SXin Li def _install_stable_image(self): 206*9c5db199SXin Li """Install stable image to the USB drive.""" 207*9c5db199SXin Li # install fresh image to the USB because badblocks formats it 208*9c5db199SXin Li # https://crbug.com/1091406 209*9c5db199SXin Li try: 210*9c5db199SXin Li logging.debug('Started to install test image to USB-drive') 211*9c5db199SXin Li _, image_path = self._dut_host.stage_image_for_servo() 212*9c5db199SXin Li self.get_host().get_servo().image_to_servo_usb(image_path, 213*9c5db199SXin Li power_off_dut=False) 214*9c5db199SXin Li logging.debug('Finished installing test image to USB-drive') 215*9c5db199SXin Li except: 216*9c5db199SXin Li # ignore any error which happined during install image 217*9c5db199SXin Li # it not relative to the main goal 218*9c5db199SXin Li logging.info('Fail to install test image to USB-drive') 219*9c5db199SXin Li 220*9c5db199SXin Li def _set_state(self, state): 221*9c5db199SXin Li if state: 222*9c5db199SXin Li self._set_host_info_state(constants.SERVO_USB_STATE_PREFIX, state) 223*9c5db199SXin Li 224*9c5db199SXin Li 225*9c5db199SXin Liclass VerifyServoFw(base._BaseServoVerifier): 226*9c5db199SXin Li """Force update Servo firmware if it not up-to-date. 227*9c5db199SXin Li 228*9c5db199SXin Li This is rarely case when servo firmware was not updated by labstation 229*9c5db199SXin Li when servod started. This should ensure that the servo_v4 and 230*9c5db199SXin Li servo_micro is up-to-date. 231*9c5db199SXin Li """ 232*9c5db199SXin Li def _verify(self): 233*9c5db199SXin Li if not self.servo_host_is_up(): 234*9c5db199SXin Li logging.info('Servo host is down; Skipping the verification') 235*9c5db199SXin Li return 236*9c5db199SXin Li servo_updater.update_servo_firmware( 237*9c5db199SXin Li self.get_host(), 238*9c5db199SXin Li force_update=True) 239*9c5db199SXin Li 240*9c5db199SXin Li 241*9c5db199SXin Liclass VerifyRPMConfig(base._BaseDUTVerifier): 242*9c5db199SXin Li """Check RPM config of the setup. 243*9c5db199SXin Li 244*9c5db199SXin Li This check run against RPM configs settings. 245*9c5db199SXin Li """ 246*9c5db199SXin Li 247*9c5db199SXin Li def _verify(self): 248*9c5db199SXin Li if not self.host_is_up(): 249*9c5db199SXin Li logging.info('Host is down; Skipping the verification') 250*9c5db199SXin Li return 251*9c5db199SXin Li rpm_validator.verify_unsafe(self.get_host()) 252*9c5db199SXin Li 253*9c5db199SXin Li 254*9c5db199SXin Liclass FlashServoKeyboardMapVerifier(base._BaseDUTVerifier): 255*9c5db199SXin Li """Flash the keyboard map on servo.""" 256*9c5db199SXin Li 257*9c5db199SXin Li def _verify(self): 258*9c5db199SXin Li if not self.host_is_up(): 259*9c5db199SXin Li raise base.AuditError('Host is down') 260*9c5db199SXin Li if not self.servo_is_up(): 261*9c5db199SXin Li raise base.AuditError('Servo not initialized') 262*9c5db199SXin Li 263*9c5db199SXin Li host = self.get_host() 264*9c5db199SXin Li flasher = servo_keyboard_flasher.ServoKeyboardMapFlasher() 265*9c5db199SXin Li if flasher.is_image_supported(host): 266*9c5db199SXin Li flasher.update(host) 267*9c5db199SXin Li 268*9c5db199SXin Li 269*9c5db199SXin Liclass VerifyDUTMacAddress(base._BaseDUTVerifier): 270*9c5db199SXin Li """Verify and update cached NIC mac address on servo. 271*9c5db199SXin Li 272*9c5db199SXin Li Servo_v4 plugged to the DUT and providing NIC for that. We caching mac 273*9c5db199SXin Li address on servod side to better debugging. 274*9c5db199SXin Li """ 275*9c5db199SXin Li 276*9c5db199SXin Li def _verify(self): 277*9c5db199SXin Li if not self.host_is_up(): 278*9c5db199SXin Li raise base.AuditError('Host is down.') 279*9c5db199SXin Li if not self.servo_is_up(): 280*9c5db199SXin Li raise base.AuditError('Servo host is down.') 281*9c5db199SXin Li 282*9c5db199SXin Li helper = mac_address_helper.MacAddressHelper() 283*9c5db199SXin Li helper.update_if_needed(self.get_host()) 284