xref: /aosp_15_r20/external/autotest/site_utils/admin_audit/verifiers.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li#!/usr/bin/env python3
2*9c5db199SXin Li# Copyright 2020 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Liimport logging
7*9c5db199SXin Li
8*9c5db199SXin Li
9*9c5db199SXin Liimport common
10*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
11*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils as client_utils
12*9c5db199SXin Lifrom autotest_lib.server.cros.storage import storage_validate as storage
13*9c5db199SXin Lifrom autotest_lib.server.cros.servo.keyboard import servo_keyboard_flasher
14*9c5db199SXin Lifrom autotest_lib.server.cros.repair import mac_address_helper
15*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import base
16*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import constants
17*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import rpm_validator
18*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import servo_updater
19*9c5db199SXin Li
20*9c5db199SXin Litry:
21*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
22*9c5db199SXin Liexcept ImportError:
23*9c5db199SXin Li    metrics = client_utils.metrics_mock
24*9c5db199SXin Li
25*9c5db199SXin Li# Common status used for statistics.
26*9c5db199SXin LiSTATUS_FAIL = 'fail'
27*9c5db199SXin LiSTATUS_SUCCESS = 'success'
28*9c5db199SXin LiSTATUS_SKIPPED = 'skipped'
29*9c5db199SXin Li
30*9c5db199SXin Li
31*9c5db199SXin Liclass VerifyDutStorage(base._BaseDUTVerifier):
32*9c5db199SXin Li    """Verify the state of the storage on the DUT
33*9c5db199SXin Li
34*9c5db199SXin Li    The process to determine the type of storage and read metrics
35*9c5db199SXin Li    of usage and EOL(end-of-life) information to determine the
36*9c5db199SXin Li    state.
37*9c5db199SXin Li    Supported storage types: MMS, NVME, SSD.
38*9c5db199SXin Li    Possible states are:
39*9c5db199SXin Li      UNKNOWN - not access to the DUT, not determine type of storage,
40*9c5db199SXin Li                not information to determine metrics
41*9c5db199SXin Li      NORMAL - the storage is in good shape and will work stable
42*9c5db199SXin Li                device will work stable. (supported for all types)
43*9c5db199SXin Li      ACCEPTABLE - the storage almost used all resources, device will
44*9c5db199SXin Li                work stable but it is better be ready for replacement
45*9c5db199SXin Li                device will work stable. (supported by MMS, NVME)
46*9c5db199SXin Li      NEED_REPLACEMENT - the storage broken or worn off the life limit
47*9c5db199SXin Li                device can work by not stable and can cause the
48*9c5db199SXin Li                flakiness on the tests. (supported by all types)
49*9c5db199SXin Li    """
50*9c5db199SXin Li    def __init__(self, dut_host):
51*9c5db199SXin Li        super(VerifyDutStorage, self).__init__(dut_host)
52*9c5db199SXin Li        self._state = None
53*9c5db199SXin Li
54*9c5db199SXin Li    def _verify(self, set_label=True, run_badblocks=None):
55*9c5db199SXin Li        if not self.host_is_up():
56*9c5db199SXin Li            logging.info('Host is down; Skipping the verification')
57*9c5db199SXin Li            return
58*9c5db199SXin Li        try:
59*9c5db199SXin Li            validator = storage.StorageStateValidator(self.get_host())
60*9c5db199SXin Li            storage_type = validator.get_type()
61*9c5db199SXin Li            logging.debug('Detected storage type: %s', storage_type)
62*9c5db199SXin Li            storage_state = validator.get_state(run_badblocks=run_badblocks)
63*9c5db199SXin Li            logging.debug('Detected storage state: %s', storage_state)
64*9c5db199SXin Li            state = self.convert_state(storage_state)
65*9c5db199SXin Li            if state and set_label:
66*9c5db199SXin Li                self._set_host_info_state(constants.DUT_STORAGE_STATE_PREFIX,
67*9c5db199SXin Li                                          state)
68*9c5db199SXin Li                if state == constants.HW_STATE_NEED_REPLACEMENT:
69*9c5db199SXin Li                    self.get_host().set_device_needs_replacement(
70*9c5db199SXin Li                        resultdir=self.get_result_dir())
71*9c5db199SXin Li            self._state = state
72*9c5db199SXin Li        except Exception as e:
73*9c5db199SXin Li            raise base.AuditError('Exception during getting state of'
74*9c5db199SXin Li                                  ' storage %s' % str(e))
75*9c5db199SXin Li
76*9c5db199SXin Li    def convert_state(self, state):
77*9c5db199SXin Li        """Mapping state from validator to verifier"""
78*9c5db199SXin Li        if state == storage.STORAGE_STATE_NORMAL:
79*9c5db199SXin Li            return constants.HW_STATE_NORMAL
80*9c5db199SXin Li        if state == storage.STORAGE_STATE_WARNING:
81*9c5db199SXin Li            return constants.HW_STATE_ACCEPTABLE
82*9c5db199SXin Li        if state == storage.STORAGE_STATE_CRITICAL:
83*9c5db199SXin Li            return constants.HW_STATE_NEED_REPLACEMENT
84*9c5db199SXin Li        return None
85*9c5db199SXin Li
86*9c5db199SXin Li    def get_state(self):
87*9c5db199SXin Li        return self._state
88*9c5db199SXin Li
89*9c5db199SXin Li
90*9c5db199SXin Liclass VerifyServoUsb(base._BaseServoVerifier):
91*9c5db199SXin Li    """Verify the state of the USB-drive on the Servo
92*9c5db199SXin Li
93*9c5db199SXin Li    The process to determine by checking the USB-drive on having any
94*9c5db199SXin Li    bad sectors on it.
95*9c5db199SXin Li    Possible states are:
96*9c5db199SXin Li      UNKNOWN - not access to the device or servo, not available
97*9c5db199SXin Li                software on the servo.
98*9c5db199SXin Li      NORMAL - the device available for testing and not bad sectors.
99*9c5db199SXin Li                was found on it, device will work stable
100*9c5db199SXin Li      NEED_REPLACEMENT - the device available for testing and
101*9c5db199SXin Li                some bad sectors were found on it. The device can
102*9c5db199SXin Li                work but cause flakiness in the tests or repair process.
103*9c5db199SXin Li
104*9c5db199SXin Li    badblocks errors:
105*9c5db199SXin Li    No such device or address while trying to determine device size
106*9c5db199SXin Li    """
107*9c5db199SXin Li    def _verify(self):
108*9c5db199SXin Li        if not self.servo_is_up():
109*9c5db199SXin Li            logging.info('Servo not initialized; Skipping the verification')
110*9c5db199SXin Li            return
111*9c5db199SXin Li        try:
112*9c5db199SXin Li            usb = self.get_host()._probe_and_validate_usb_dev()
113*9c5db199SXin Li            logging.debug('USB path: %s', usb)
114*9c5db199SXin Li        except Exception as e:
115*9c5db199SXin Li            usb = ''
116*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
117*9c5db199SXin Li        if not usb:
118*9c5db199SXin Li            self._set_state(constants.HW_STATE_NOT_DETECTED)
119*9c5db199SXin Li            return
120*9c5db199SXin Li        # basic readonly check
121*9c5db199SXin Li
122*9c5db199SXin Li        # path to USB if DUT is sshable
123*9c5db199SXin Li        logging.info('Starting verification of USB drive...')
124*9c5db199SXin Li        dut_usb = None
125*9c5db199SXin Li        if self.host_is_up():
126*9c5db199SXin Li            dut_usb = self._usb_path_on_dut()
127*9c5db199SXin Li        state = None
128*9c5db199SXin Li        try:
129*9c5db199SXin Li            if dut_usb:
130*9c5db199SXin Li                logging.info('Try run check on DUT side.')
131*9c5db199SXin Li                state = self._run_check_on_host(self._dut_host, dut_usb)
132*9c5db199SXin Li            else:
133*9c5db199SXin Li                logging.info('Try run check on ServoHost side.')
134*9c5db199SXin Li                servo = self.get_host().get_servo()
135*9c5db199SXin Li                servo_usb = servo.probe_host_usb_dev()
136*9c5db199SXin Li                state = self._run_check_on_host(self.get_host(), servo_usb)
137*9c5db199SXin Li        except Exception as e:
138*9c5db199SXin Li            if 'Timeout encountered:' in str(e):
139*9c5db199SXin Li                logging.info('Timeout during running action')
140*9c5db199SXin Li                metrics.Counter(
141*9c5db199SXin Li                    'chromeos/autotest/audit/servo/usb/timeout'
142*9c5db199SXin Li                    ).increment(fields={'host': self._dut_host.hostname})
143*9c5db199SXin Li            else:
144*9c5db199SXin Li                # badblocks generate errors when device not reachable or
145*9c5db199SXin Li                # cannot read system information to execute process
146*9c5db199SXin Li                state = constants.HW_STATE_NEED_REPLACEMENT
147*9c5db199SXin Li            logging.debug(str(e))
148*9c5db199SXin Li
149*9c5db199SXin Li        self._set_state(state)
150*9c5db199SXin Li        logging.info('Finished verification of USB drive.')
151*9c5db199SXin Li
152*9c5db199SXin Li        self._install_stable_image()
153*9c5db199SXin Li
154*9c5db199SXin Li    def _usb_path_on_dut(self):
155*9c5db199SXin Li        """Return path to the USB detected on DUT side."""
156*9c5db199SXin Li        servo = self.get_host().get_servo()
157*9c5db199SXin Li        servo.switch_usbkey('dut')
158*9c5db199SXin Li        result = self._dut_host.run('ls /dev/sd[a-z]')
159*9c5db199SXin Li        for path in result.stdout.splitlines():
160*9c5db199SXin Li            cmd = ('. /usr/share/misc/chromeos-common.sh; get_device_type %s' %
161*9c5db199SXin Li                   path)
162*9c5db199SXin Li            check_run = self._dut_host.run(cmd, timeout=30, ignore_status=True)
163*9c5db199SXin Li            if check_run.stdout.strip() != 'USB':
164*9c5db199SXin Li                continue
165*9c5db199SXin Li            if self._quick_check_if_device_responsive(self._dut_host, path):
166*9c5db199SXin Li                logging.info('USB drive detected on DUT side as %s', path)
167*9c5db199SXin Li                return path
168*9c5db199SXin Li        return None
169*9c5db199SXin Li
170*9c5db199SXin Li    def _quick_check_if_device_responsive(self, host, usb_path):
171*9c5db199SXin Li        """Verify that device """
172*9c5db199SXin Li        validate_cmd = 'fdisk -l %s' % usb_path
173*9c5db199SXin Li        try:
174*9c5db199SXin Li            resp = host.run(validate_cmd, ignore_status=True, timeout=30)
175*9c5db199SXin Li            if resp.exit_status == 0:
176*9c5db199SXin Li                return True
177*9c5db199SXin Li            logging.error('USB %s is not detected by fdisk!', usb_path)
178*9c5db199SXin Li        except error.AutoservRunError as e:
179*9c5db199SXin Li            if 'Timeout encountered' in str(e):
180*9c5db199SXin Li                logging.warning('Timeout encountered during fdisk run.')
181*9c5db199SXin Li            else:
182*9c5db199SXin Li                logging.error('(Not critical) fdisk check fail for %s; %s',
183*9c5db199SXin Li                              usb_path, str(e))
184*9c5db199SXin Li        return False
185*9c5db199SXin Li
186*9c5db199SXin Li    def _run_check_on_host(self, host, usb):
187*9c5db199SXin Li        """Run badblocks on the provided host.
188*9c5db199SXin Li
189*9c5db199SXin Li        @params host:   Host where USB drive mounted
190*9c5db199SXin Li        @params usb:    Path to USB drive. (e.g. /dev/sda)
191*9c5db199SXin Li        """
192*9c5db199SXin Li        command = 'badblocks -w -e 5 -b 4096 -t random %s' % usb
193*9c5db199SXin Li        logging.info('Running command: %s', command)
194*9c5db199SXin Li        # The response is the list of bad block on USB.
195*9c5db199SXin Li        # Extended time for 2 hour to run USB verification.
196*9c5db199SXin Li        # TODO (otabek@) (b:153661014#comment2) bring F3 to run
197*9c5db199SXin Li        # check faster if badblocks cannot finish in 2 hours.
198*9c5db199SXin Li        result = host.run(command, timeout=7200).stdout.strip()
199*9c5db199SXin Li        logging.info("Check result: '%s'", result)
200*9c5db199SXin Li        if result:
201*9c5db199SXin Li            # So has result is Bad and empty is Good.
202*9c5db199SXin Li            return constants.HW_STATE_NEED_REPLACEMENT
203*9c5db199SXin Li        return constants.HW_STATE_NORMAL
204*9c5db199SXin Li
205*9c5db199SXin Li    def _install_stable_image(self):
206*9c5db199SXin Li        """Install stable image to the USB drive."""
207*9c5db199SXin Li        # install fresh image to the USB because badblocks formats it
208*9c5db199SXin Li        # https://crbug.com/1091406
209*9c5db199SXin Li        try:
210*9c5db199SXin Li            logging.debug('Started to install test image to USB-drive')
211*9c5db199SXin Li            _, image_path = self._dut_host.stage_image_for_servo()
212*9c5db199SXin Li            self.get_host().get_servo().image_to_servo_usb(image_path,
213*9c5db199SXin Li                                                           power_off_dut=False)
214*9c5db199SXin Li            logging.debug('Finished installing test image to USB-drive')
215*9c5db199SXin Li        except:
216*9c5db199SXin Li            # ignore any error which happined during install image
217*9c5db199SXin Li            # it not relative to the main goal
218*9c5db199SXin Li            logging.info('Fail to install test image to USB-drive')
219*9c5db199SXin Li
220*9c5db199SXin Li    def _set_state(self, state):
221*9c5db199SXin Li        if state:
222*9c5db199SXin Li            self._set_host_info_state(constants.SERVO_USB_STATE_PREFIX, state)
223*9c5db199SXin Li
224*9c5db199SXin Li
225*9c5db199SXin Liclass VerifyServoFw(base._BaseServoVerifier):
226*9c5db199SXin Li    """Force update Servo firmware if it not up-to-date.
227*9c5db199SXin Li
228*9c5db199SXin Li    This is rarely case when servo firmware was not updated by labstation
229*9c5db199SXin Li    when servod started. This should ensure that the servo_v4 and
230*9c5db199SXin Li    servo_micro is up-to-date.
231*9c5db199SXin Li    """
232*9c5db199SXin Li    def _verify(self):
233*9c5db199SXin Li        if not self.servo_host_is_up():
234*9c5db199SXin Li            logging.info('Servo host is down; Skipping the verification')
235*9c5db199SXin Li            return
236*9c5db199SXin Li        servo_updater.update_servo_firmware(
237*9c5db199SXin Li            self.get_host(),
238*9c5db199SXin Li            force_update=True)
239*9c5db199SXin Li
240*9c5db199SXin Li
241*9c5db199SXin Liclass VerifyRPMConfig(base._BaseDUTVerifier):
242*9c5db199SXin Li    """Check RPM config of the setup.
243*9c5db199SXin Li
244*9c5db199SXin Li    This check run against RPM configs settings.
245*9c5db199SXin Li    """
246*9c5db199SXin Li
247*9c5db199SXin Li    def _verify(self):
248*9c5db199SXin Li        if not self.host_is_up():
249*9c5db199SXin Li            logging.info('Host is down; Skipping the verification')
250*9c5db199SXin Li            return
251*9c5db199SXin Li        rpm_validator.verify_unsafe(self.get_host())
252*9c5db199SXin Li
253*9c5db199SXin Li
254*9c5db199SXin Liclass FlashServoKeyboardMapVerifier(base._BaseDUTVerifier):
255*9c5db199SXin Li    """Flash the keyboard map on servo."""
256*9c5db199SXin Li
257*9c5db199SXin Li    def _verify(self):
258*9c5db199SXin Li        if not self.host_is_up():
259*9c5db199SXin Li            raise base.AuditError('Host is down')
260*9c5db199SXin Li        if not self.servo_is_up():
261*9c5db199SXin Li            raise base.AuditError('Servo not initialized')
262*9c5db199SXin Li
263*9c5db199SXin Li        host = self.get_host()
264*9c5db199SXin Li        flasher = servo_keyboard_flasher.ServoKeyboardMapFlasher()
265*9c5db199SXin Li        if flasher.is_image_supported(host):
266*9c5db199SXin Li            flasher.update(host)
267*9c5db199SXin Li
268*9c5db199SXin Li
269*9c5db199SXin Liclass VerifyDUTMacAddress(base._BaseDUTVerifier):
270*9c5db199SXin Li    """Verify and update cached NIC mac address on servo.
271*9c5db199SXin Li
272*9c5db199SXin Li    Servo_v4 plugged to the DUT and providing NIC for that. We caching mac
273*9c5db199SXin Li    address on servod side to better debugging.
274*9c5db199SXin Li    """
275*9c5db199SXin Li
276*9c5db199SXin Li    def _verify(self):
277*9c5db199SXin Li        if not self.host_is_up():
278*9c5db199SXin Li            raise base.AuditError('Host is down.')
279*9c5db199SXin Li        if not self.servo_is_up():
280*9c5db199SXin Li            raise base.AuditError('Servo host is down.')
281*9c5db199SXin Li
282*9c5db199SXin Li        helper = mac_address_helper.MacAddressHelper()
283*9c5db199SXin Li        helper.update_if_needed(self.get_host())
284