xref: /aosp_15_r20/external/autotest/site_utils/deployment/prepare/dut.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1#!/usr/bin/env python3
2# Copyright 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""library functions to prepare a DUT for lab deployment.
7
8This library will be shared between Autotest and Skylab DUT deployment tools.
9"""
10
11from __future__ import absolute_import
12from __future__ import division
13from __future__ import print_function
14
15import contextlib
16import time
17
18import common
19import logging
20from autotest_lib.client.common_lib import error
21from autotest_lib.client.common_lib import utils
22from autotest_lib.server import hosts
23from autotest_lib.server import site_utils as server_utils
24from autotest_lib.server.hosts import host_info
25from autotest_lib.server.hosts import servo_host
26from autotest_lib.server.hosts import cros_constants
27from autotest_lib.server.hosts import servo_constants
28
29
30_FIRMWARE_UPDATE_TIMEOUT = 600
31# Check battery level with retries.
32# If battery level is low then sleep to 15 minutes.
33_BATTERY_LEVEL_CHECK_RETRIES = 8
34_BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT = 900
35# We expecting that battery will change more than 4% for 15 minutes.
36_BATTERY_LEVEL_CHANGE_IN_ONE_RETRY = 4
37
38
39@contextlib.contextmanager
40def create_cros_host(hostname, board, model, servo_hostname, servo_port,
41                servo_serial=None, logs_dir=None):
42    """Yield a server.hosts.CrosHost object to use for DUT preparation.
43
44    This object contains just enough inventory data to be able to prepare the
45    DUT for lab deployment. It does not contain any reference to AFE / Skylab so
46    that DUT preparation is guaranteed to be isolated from the scheduling
47    infrastructure.
48
49    @param hostname:        FQDN of the host to prepare.
50    @param board:           The autotest board label for the DUT.
51    @param model:           The autotest model label for the DUT.
52    @param servo_hostname:  FQDN of the servo host controlling the DUT.
53    @param servo_port:      Servo host port used for the controlling servo.
54    @param servo_serial:    (Optional) Serial number of the controlling servo.
55    @param logs_dir:        (Optional) Directory to save logs obtained from the
56                            host.
57
58    @yield a server.hosts.Host object.
59    """
60    labels = [
61            'board:%s' % board,
62            'model:%s' % model,
63    ]
64    attributes = {
65            servo_constants.SERVO_HOST_ATTR: servo_hostname,
66            servo_constants.SERVO_PORT_ATTR: servo_port,
67    }
68    if servo_serial is not None:
69        attributes[servo_constants.SERVO_SERIAL_ATTR] = servo_serial
70
71    store = host_info.InMemoryHostInfoStore(info=host_info.HostInfo(
72            labels=labels,
73            attributes=attributes,
74    ))
75    machine_dict = _get_machine_dict(hostname, store)
76    host = hosts.create_host(machine_dict)
77    servohost = servo_host.ServoHost(
78            **servo_host.get_servo_args_for_host(host))
79    _prepare_servo(servohost)
80    host.set_servo_host(servohost)
81    host.servo.uart_logs_dir = logs_dir
82    try:
83        yield host
84    finally:
85        host.close()
86
87
88def _get_machine_dict(hostname, host_info_store):
89    """Helper function to generate a machine_dic to feed hosts.create_host.
90
91    @param hostname
92    @param host_info_store
93
94    @return A dict that hosts.create_host can consume.
95    """
96    return {'hostname': hostname,
97            'host_info_store': host_info_store,
98            'afe_host': server_utils.EmptyAFEHost(),
99            }
100
101
102def download_image_to_servo_usb(host, build):
103    """Download the given image to the USB attached to host's servo.
104
105    @param host   A server.hosts.Host object.
106    @param build  A ChromeOS version string for the build to download.
107    """
108    _, update_url = host.stage_image_for_servo(build)
109    host.servo.image_to_servo_usb(update_url)
110
111
112def try_reset_by_servo(host):
113    """Reboot the DUT by run cold_reset by servo.
114
115    Cold reset implemented as
116    `dut-control -p <SERVO-PORT> power_state:reset`.
117
118    @params host: CrosHost instance with initialized servo instance.
119    """
120    logging.info('Attempting reset via servo...')
121    host.servo.get_power_state_controller().reset()
122
123    logging.info('Waiting for DUT to come back up.')
124    if not host.wait_up(timeout=host.BOOT_TIMEOUT):
125        raise error.AutoservError(
126            'DUT failed to come back after %d seconds' % host.BOOT_TIMEOUT)
127
128
129def power_cycle_via_servo(host, recover_src=False):
130    """Power cycle a host though it's attached servo.
131
132    @param host: A server.hosts.Host object.
133    @param recover_src: Indicate if we need switch servo_v4_role
134           back to src mode.
135    """
136    try:
137        logging.info('Shutting down %s from via ssh.', host.hostname)
138        host.halt()
139    except Exception as e:
140        logging.info('Unable to shutdown DUT via ssh; %s', str(e))
141
142    if recover_src:
143        host.servo.set_servo_v4_role('src')
144
145    logging.info('Power cycling DUT through servo...')
146    host.servo.get_power_state_controller().power_off()
147    host.servo.switch_usbkey('off')
148    time.sleep(host.SHUTDOWN_TIMEOUT)
149    # N.B. The Servo API requires that we use power_on() here
150    # for two reasons:
151    #  1) After turning on a DUT in recovery mode, you must turn
152    #     it off and then on with power_on() once more to
153    #     disable recovery mode (this is a Parrot specific
154    #     requirement).
155    #  2) After power_off(), the only way to turn on is with
156    #     power_on() (this is a Storm specific requirement).
157    time.sleep(host.SHUTDOWN_TIMEOUT)
158    host.servo.get_power_state_controller().power_on()
159
160    logging.info('Waiting for DUT to come back up.')
161    if not host.wait_up(timeout=host.BOOT_TIMEOUT):
162        raise error.AutoservError('DUT failed to come back after %d seconds' %
163                                  host.BOOT_TIMEOUT)
164
165
166def verify_battery_status(host):
167    """Verify that battery status.
168
169    If DUT battery still in the factory mode then DUT required re-work.
170
171    @param host server.hosts.CrosHost object.
172    @raise Exception: if status as unexpected value.
173    """
174    logging.info("Started to verify battery status")
175    host_info = host.host_info_store.get()
176    if host_info.get_label_value('power') != 'battery':
177        logging.info("Skepping due DUT does not have the battery")
178        return
179    power_info = host.get_power_supply_info()
180    # Dues overheat battery in the audio-boxes the device can be deployed
181    # without battery.
182    if 'Battery' not in power_info and host_info.has_label('audio_box'):
183        logging.info('Device does not have battery.'
184                     ' Skip battery verification as it is audio_box setup.')
185        return
186    battery_path = power_info['Battery']['path']
187    cmd = 'cat %s/status' % battery_path
188    status = host.run(cmd, timeout=30, ignore_status=True).stdout.strip()
189    if status not in ['Charging', 'Discharging', 'Full']:
190        raise Exception(
191                'Unexpected battery status. Please verify that DUT prepared'
192                ' for deployment.')
193
194    # Verify battery level to avoid cases when DUT in factory mode which can
195    # block battery from charging. Retry check will take 8 attempts by
196    # 15 minutes to allow battery to reach required level.
197    battery_level_good = False
198    last_battery_level = 0
199    for _ in range(_BATTERY_LEVEL_CHECK_RETRIES):
200        power_info = host.get_power_supply_info()
201        battery_level = float(power_info['Battery']['percentage'])
202        # Verify if battery reached the required level
203        battery_level_good = battery_level >= cros_constants.MIN_BATTERY_LEVEL
204        if battery_level_good:
205            # Stop retry as battery reached the required level
206            break
207        logging.info(
208                'Battery level %s%% is lower than expected %s%%.'
209                ' Sleep for %s seconds to try again', battery_level,
210                cros_constants.MIN_BATTERY_LEVEL,
211                _BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT)
212        time.sleep(_BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT)
213
214        if last_battery_level > 0:
215            # If level of battery is changing less than 4% per 15 minutes
216            # then we can assume that the battery is not charging as expected
217            # or stuck on some level.
218            battery_level_change = abs(last_battery_level - battery_level)
219            if battery_level_change < _BATTERY_LEVEL_CHANGE_IN_ONE_RETRY:
220                logging.info(
221                        'Battery charged less than 4%% for 15 minutes which'
222                        ' means that something wrong with charging.'
223                        ' Stop retry to charge it. Battery level: %s%%',
224                        battery_level)
225                break
226        last_battery_level = battery_level
227    if not battery_level_good:
228        raise Exception(
229                'Battery is not charged or discharging.'
230                ' Please verify that DUT connected to power and charging.'
231                ' Possible that the DUT is not ready for deployment in lab.')
232    logging.info("Battery status verification passed!")
233
234
235def verify_servo(host):
236    """Verify that we have good Servo.
237
238    The servo_topology and servo_type will be clean up when initiate the
239    deploy process by run add-dut or update-dut.
240    """
241    host_info = host.host_info_store.get()
242    if host_info.os == 'labstation':
243        # skip labstation because they do not have servo
244        return
245    servo_host = host._servo_host
246    if not servo_host:
247        raise Exception('Servo host is not initialized. All DUTs need to have'
248                        ' a stable and working servo.')
249    if host._servo_host.is_servo_topology_supported():
250        servo_topology = host._servo_host.get_topology()
251        if not servo_topology or servo_topology.is_empty():
252            raise Exception(
253                    'Servo topology is not initialized. All DUTs need to have'
254                    ' a stable and working servo.')
255    servo_type = host.servo.get_servo_type()
256    if not servo_type:
257        raise Exception(
258                'The servo_type did not received from Servo. Please verify'
259                ' that Servo is in good state. All DUTs need to have a stable'
260                ' and working servo.')
261    if not host.is_servo_in_working_state():
262        raise Exception(
263                'Servo is not initialized properly or did not passed one or'
264                ' more verifiers. All DUTs need to have a stable and working'
265                ' servo.')
266    host._set_servo_topology()
267    logging.info("Servo initialized and working as expected.")
268
269
270def verify_ccd_testlab_enable(host):
271    """Verify that ccd testlab enable when DUT support cr50.
272
273    The new deploy process required to deploy DUTs with testlab enable when
274    connection to the servo by type-c, so we will be sure that communication
275    by servo is permanent, it's critical for auto-repair capability.
276
277    @param host server.hosts.CrosHost object.
278    """
279
280    host_info = host.host_info_store.get()
281    if host_info.os == 'labstation':
282        # skip labstation because they do not has servo
283        return
284
285    # Only verify for ccd servo connection
286    if host.servo and host.servo.get_main_servo_device() == 'ccd_cr50':
287        if not host.servo.has_control('cr50_testlab'):
288            raise Exception(
289                'CCD connection required support of cr50 on the DUT. Please '
290                'verify which servo need to be used for DUT setup.')
291
292        status = host.servo.get('cr50_testlab')
293        if status == 'on':
294            logging.info("CCD testlab mode is enabled on the DUT.")
295        else:
296            raise Exception(
297                'CCD testlab mode is not enabled on the DUT, enable '
298                'testlab mode is required for all DUTs that support CR50.')
299
300
301def verify_labstation_RPM_config_unsafe(host):
302    """Verify that we can power cycle a labstation with its RPM information.
303    Any host without RPM information will be safely skipped.
304
305    @param host: any host
306
307    This procedure is intended to catch inaccurate RPM info when the
308    host is deployed.
309
310    If the RPM config information is wrong, then this command will fail.
311
312    Note that we do not cleanly stop servod as part of power-cycling the DUT;
313    therefore calling this function is not safe in general.
314
315    """
316    host_info = host.host_info_store.get()
317
318    powerunit_hostname = host_info.attributes.get('powerunit_hostname')
319    powerunit_outlet   = host_info.attributes.get('powerunit_outlet')
320
321    powerunit_hasinfo = (bool(powerunit_hostname), bool(powerunit_outlet))
322
323    if powerunit_hasinfo == (True, True):
324        pass
325    elif powerunit_hasinfo == (False, False):
326        logging.info("intentionally skipping labstation %s", host.hostname)
327        return
328    else:
329        msg = "inconsistent power info: %s %s" % (
330            powerunit_hostname, powerunit_outlet
331        )
332        logging.error(msg)
333        raise Exception(msg)
334
335    logging.info("Shutting down labstation...")
336    host.rpm_power_off_and_wait()
337    host.rpm_power_on_and_wait()
338    logging.info("RPM Check Successful")
339
340
341def verify_boot_into_rec_mode(host):
342    """Verify that we can boot into USB when in recover mode, and reset tpm.
343
344    The new deploy process will install test image before firmware update, so
345    we don't need boot into recovery mode during deploy, but we still want to
346    make sure that DUT can boot into recover mode as it's critical for
347    auto-repair capability.
348
349    @param host   servers.host.Host object.
350    """
351    try:
352        # The DUT could be start with un-sshable state, so do shutdown from
353        # DUT side in a try block.
354        logging.info('Shutting down %s from via ssh.', host.hostname)
355        host.halt()
356    except Exception as e:
357        logging.info('Unable to shutdown DUT via ssh; %s', str(e))
358
359    host.servo.get_power_state_controller().power_off()
360    time.sleep(host.SHUTDOWN_TIMEOUT)
361    logging.info("Booting DUT into recovery mode...")
362    need_snk = host.require_snk_mode_in_recovery()
363    host.servo.boot_in_recovery_mode(snk_mode=need_snk)
364    try:
365        if not host.wait_up(timeout=host.USB_BOOT_TIMEOUT):
366            raise Exception('DUT failed to boot into recovery mode.')
367
368        logging.info('Resetting the TPM status')
369        try:
370            host.run('chromeos-tpm-recovery')
371        except error.AutoservRunError:
372            logging.warning('chromeos-tpm-recovery is too old.')
373    except Exception:
374        # Restore the servo_v4 role to src if we called boot_in_recovery_mode
375        # method with snk_mode=True earlier. If no exception raise, recover
376        # src mode will be handled by power_cycle_via_servo() method.
377        if need_snk:
378            host.servo.set_servo_v4_role('src')
379        raise
380
381    logging.info("Rebooting host into normal mode.")
382    power_cycle_via_servo(host, recover_src=need_snk)
383    logging.info("Verify boot into recovery mode completed successfully.")
384
385
386def install_test_image(host):
387    """Initial install a test image on a DUT.
388
389    This function assumes that the required image is already downloaded onto the
390    USB key connected to the DUT via servo, and the DUT is in dev mode with
391    dev_boot_usb enabled.
392
393    @param host   servers.host.Host object.
394    """
395    servo = host.servo
396    # First power on.  We sleep to allow the firmware plenty of time
397    # to display the dev-mode screen; some boards take their time to
398    # be ready for the ctrl+U after power on.
399    servo.get_power_state_controller().power_off()
400    time.sleep(host.SHUTDOWN_TIMEOUT)
401    servo.switch_usbkey('dut')
402    servo.get_power_state_controller().power_on()
403
404    # Type ctrl+U repeatedly for up to BOOT_TIMEOUT or until DUT boots.
405    boot_deadline = time.time() + host.BOOT_TIMEOUT
406    while time.time() < boot_deadline:
407        logging.info("Pressing ctrl+u")
408        servo.ctrl_u()
409        if host.ping_wait_up(timeout=5):
410            break
411    else:
412        raise Exception('DUT failed to boot from USB for install test image.')
413
414    host.run('chromeos-install --yes', timeout=host.ADMIN_INSTALL_TIMEOUT)
415
416    logging.info("Rebooting DUT to boot from hard drive.")
417    try:
418        host.reboot()
419    except Exception as e:
420        logging.info('Failed to reboot DUT via ssh; %s', str(e))
421        try_reset_by_servo(host)
422    logging.info("Install test image completed successfully.")
423
424
425def reinstall_test_image(host):
426    """Install the test image of given build to DUT.
427
428    This function assumes that the required image is already downloaded onto the
429    USB key connected to the DUT via servo.
430
431    @param host   servers.host.Host object.
432    """
433    host.servo_install()
434
435
436def flash_firmware_using_servo(host, build):
437    """Flash DUT firmware directly using servo.
438
439    Rather than running `chromeos-firmwareupdate` on DUT, we can flash DUT
440    firmware directly using servo (run command `flashrom`, etc. on servo). In
441    this way, we don't require DUT to be in dev mode and with dev_boot_usb
442    enabled."""
443    host.firmware_install(build)
444
445
446def install_firmware(host):
447    """Install dev-signed firmware after removing write-protect.
448
449    At start, it's assumed that hardware write-protect is disabled,
450    the DUT is in dev mode, and the servo's USB stick already has a
451    test image installed.
452
453    The firmware is installed by powering on and typing ctrl+U on
454    the keyboard in order to boot the test image from USB.  Once
455    the DUT is booted, we run a series of commands to install the
456    read-only firmware from the test image.  Then we clear debug
457    mode, and shut down.
458
459    @param host   Host instance to use for servo and ssh operations.
460    """
461    logging.info("Started install firmware on the DUT.")
462    # Disable software-controlled write-protect for both FPROMs, and
463    # install the RO firmware.
464    for fprom in ['host', 'ec']:
465        host.run('flashrom -p %s --wp-disable' % fprom,
466                 ignore_status=True)
467
468    fw_update_log = '/mnt/stateful_partition/home/root/cros-fw-update.log'
469    pid = _start_firmware_update(host, fw_update_log)
470    _wait_firmware_update_process(host, pid)
471    _check_firmware_update_result(host, fw_update_log)
472
473    try:
474        host.reboot()
475    except Exception as e:
476        logging.debug('Failed to reboot the DUT after update firmware; %s', e)
477        try_reset_by_servo(host)
478
479    # Once we confirmed DUT can boot from new firmware, get us out of
480    # dev-mode and clear GBB flags.  GBB flags are non-zero because
481    # boot from USB was enabled.
482    logging.info("Resting gbb flags and disable dev mode.")
483    host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
484             ignore_status=True)
485    host.run('crossystem disable_dev_request=1',
486             ignore_status=True)
487
488    logging.info("Rebooting DUT in normal mode(non-dev).")
489    try:
490        host.reboot()
491    except Exception as e:
492        logging.debug(
493                'Failed to reboot the DUT after switch to'
494                ' non-dev mode; %s', e)
495        try_reset_by_servo(host)
496    logging.info("Install firmware completed successfully.")
497
498
499def _start_firmware_update(host, result_file):
500    """Run `chromeos-firmwareupdate` in background.
501
502    In scenario servo v4 type C, some boards of DUT may lose ethernet
503    connectivity on firmware update. There's no way to bring it back except
504    rebooting the system.
505
506    @param host         Host instance to use for servo and ssh operations.
507    @param result_file  Path on DUT to save operation logs.
508
509    @returns The process id."""
510    # TODO(guocb): Use `make_dev_firmware` to re-sign from MP to test/dev.
511    fw_update_cmd = 'chromeos-firmwareupdate --mode=factory --force'
512
513    cmd = [
514        "date > %s" % result_file,
515        "nohup %s &>> %s" % (fw_update_cmd, result_file),
516        "/usr/local/bin/hooks/check_ethernet.hook"
517    ]
518    return host.run_background(';'.join(cmd))
519
520
521def _wait_firmware_update_process(host, pid, timeout=_FIRMWARE_UPDATE_TIMEOUT):
522    """Wait `chromeos-firmwareupdate` to finish.
523
524    @param host     Host instance to use for servo and ssh operations.
525    @param pid      The process ID of `chromeos-firmwareupdate`.
526    @param timeout  Maximum time to wait for firmware updating.
527    """
528    try:
529        utils.poll_for_condition(
530            lambda: host.run('ps -f -p %s' % pid, timeout=20).exit_status,
531            exception=Exception(
532                    "chromeos-firmwareupdate (pid: %s) didn't complete in %s "
533                    'seconds.' % (pid, timeout)),
534            timeout=_FIRMWARE_UPDATE_TIMEOUT,
535            sleep_interval=10,
536        )
537    except error.AutoservRunError:
538        # We lose the connectivity, so the DUT should be booting up.
539        if not host.wait_up(timeout=host.USB_BOOT_TIMEOUT):
540            raise Exception(
541                    'DUT failed to boot up after firmware updating.')
542
543
544def _check_firmware_update_result(host, result_file):
545    """Check if firmware updating is good or not.
546
547    @param host         Host instance to use for servo and ssh operations.
548    @param result_file  Path of the file saving output of
549                        `chromeos-firmwareupdate`.
550    """
551    fw_update_was_good = ">> DONE: Firmware updater exits successfully."
552    result = host.run('cat %s' % result_file)
553    if result.stdout.rstrip().rsplit('\n', 1)[1] != fw_update_was_good:
554        raise Exception("chromeos-firmwareupdate failed!")
555
556
557def _prepare_servo(servohost):
558    """Prepare servo connected to host for installation steps.
559
560    @param servohost  A server.hosts.servo_host.ServoHost object.
561    """
562    # Stopping `servod` on the servo host will force `repair()` to
563    # restart it.  We want that restart for a few reasons:
564    #   + `servod` caches knowledge about the image on the USB stick.
565    #     We want to clear the cache to force the USB stick to be
566    #     re-imaged unconditionally.
567    #   + If there's a problem with servod that verify and repair
568    #     can't find, this provides a UI through which `servod` can
569    #     be restarted.
570    servohost.run('stop servod PORT=%d' % servohost.servo_port,
571                  ignore_status=True)
572    servohost.repair()
573
574    if not servohost.get_servo().probe_host_usb_dev():
575        raise Exception('No USB stick detected on Servo host')
576
577
578def setup_hwid_and_serialnumber(host):
579    """Do initial setup for ChromeOS host.
580
581    @param host    servers.host.Host object.
582    """
583    if not hasattr(host, 'host_info_store'):
584        raise Exception('%s does not have host_info_store' % host.hostname)
585
586    info = host.host_info_store.get()
587    hwid = host.run('crossystem hwid', ignore_status=True).stdout
588    serial_number = host.run('vpd -g serial_number', ignore_status=True).stdout
589
590    if not hwid and not serial_number:
591        raise Exception(
592                'Failed to retrieve HWID and SerialNumber from host %s' %
593                host.hostname)
594    if not serial_number:
595        raise Exception('Failed to retrieve SerialNumber from host %s' %
596                        host.hostname)
597    if not hwid:
598        raise Exception('Failed to retrieve HWID from host %s' % host.hostname)
599
600    info.attributes['HWID'] = hwid
601    info.attributes['serial_number'] = serial_number
602    if info != host.host_info_store.get():
603        host.host_info_store.commit(info)
604    logging.info("Reading HWID and SerialNumber completed successfully.")
605