1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Li# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 4*9c5db199SXin Li# found in the LICENSE file. 5*9c5db199SXin Li# 6*9c5db199SXin Li# Expects to be run in an environment with sudo and no interactive password 7*9c5db199SXin Li# prompt, such as within the Chromium OS development chroot. 8*9c5db199SXin Li 9*9c5db199SXin Li 10*9c5db199SXin Li"""This file provides core logic for servo verify/repair process.""" 11*9c5db199SXin Li 12*9c5db199SXin Li 13*9c5db199SXin Lifrom __future__ import absolute_import 14*9c5db199SXin Lifrom __future__ import division 15*9c5db199SXin Lifrom __future__ import print_function 16*9c5db199SXin Li 17*9c5db199SXin Liimport logging 18*9c5db199SXin Liimport os 19*9c5db199SXin Liimport re 20*9c5db199SXin Liimport tarfile 21*9c5db199SXin Liimport threading 22*9c5db199SXin Liimport json 23*9c5db199SXin Liimport tempfile 24*9c5db199SXin Liimport time 25*9c5db199SXin Liimport six 26*9c5db199SXin Liimport six.moves.xmlrpc_client 27*9c5db199SXin Li 28*9c5db199SXin Litry: 29*9c5db199SXin Li import docker 30*9c5db199SXin Li from autotest_lib.site_utils.docker import utils as docker_utils 31*9c5db199SXin Liexcept ImportError: 32*9c5db199SXin Li logging.info("Docker API is not installed in this environment") 33*9c5db199SXin Li 34*9c5db199SXin Lifrom autotest_lib.client.bin import utils 35*9c5db199SXin Lifrom autotest_lib.client.common_lib import error 36*9c5db199SXin Lifrom autotest_lib.client.common_lib import hosts 37*9c5db199SXin Lifrom autotest_lib.client.common_lib import lsbrelease_utils 38*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry 39*9c5db199SXin Lifrom autotest_lib.server import crashcollect 40*9c5db199SXin Lifrom autotest_lib.server.cros.servo import servo 41*9c5db199SXin Lifrom autotest_lib.server.hosts import servo_repair 42*9c5db199SXin Lifrom autotest_lib.server.hosts import base_servohost 43*9c5db199SXin Lifrom autotest_lib.server.hosts import servo_constants 44*9c5db199SXin Lifrom autotest_lib.server.cros.faft.utils import config 45*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config 46*9c5db199SXin Lifrom autotest_lib.server.cros.servo.topology import servo_topology 47*9c5db199SXin Li 48*9c5db199SXin Litry: 49*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import metrics 50*9c5db199SXin Liexcept ImportError: 51*9c5db199SXin Li metrics = utils.metrics_mock 52*9c5db199SXin Li 53*9c5db199SXin Li_CONFIG = global_config.global_config 54*9c5db199SXin Li 55*9c5db199SXin LiSERVOD_CONTAINER_IMAGE_PATH = "us-docker.pkg.dev/chromeos-partner-moblab/common-core" 56*9c5db199SXin Li 57*9c5db199SXin Li# Create the flag on drone or inside the ssp container to enable servod debug mode. 58*9c5db199SXin Li# The servod container will not be deleted after being stopped. 59*9c5db199SXin LiSERVOD_DEBUG_FLAG = '/servod_debug' 60*9c5db199SXin Li 61*9c5db199SXin LiDOCKER_SERVOD_DEBUG_MODE = os.environ.get('DOCKER_SERVOD_DEBUG_MODE', '0') 62*9c5db199SXin Li 63*9c5db199SXin Li 64*9c5db199SXin Liclass ServoHost(base_servohost.BaseServoHost): 65*9c5db199SXin Li """Host class for a servo host(e.g. beaglebone, labstation) 66*9c5db199SXin Li that with a servo instance for a specific port. 67*9c5db199SXin Li 68*9c5db199SXin Li @type _servo: servo.Servo | None 69*9c5db199SXin Li """ 70*9c5db199SXin Li 71*9c5db199SXin Li DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999')) 72*9c5db199SXin Li 73*9c5db199SXin Li # Timeout for initializing servo signals. 74*9c5db199SXin Li INITIALIZE_SERVO_TIMEOUT_SECS = 60 75*9c5db199SXin Li 76*9c5db199SXin Li # Default timeout for run terminal command. 77*9c5db199SXin Li DEFAULT_TERMINAL_TIMEOUT = 30 78*9c5db199SXin Li 79*9c5db199SXin Li # Ready test function 80*9c5db199SXin Li SERVO_READY_METHOD = 'get_version' 81*9c5db199SXin Li 82*9c5db199SXin Li # Directory prefix on the servo host where the servod logs are stored. 83*9c5db199SXin Li SERVOD_LOG_PREFIX = '/var/log/servod' 84*9c5db199SXin Li 85*9c5db199SXin Li # Exit code to use when symlinks for servod logs are not found. 86*9c5db199SXin Li NO_SYMLINKS_CODE = 9 87*9c5db199SXin Li 88*9c5db199SXin Li # Directory in the job's results directory to dump the logs into. 89*9c5db199SXin Li LOG_DIR = 'servod' 90*9c5db199SXin Li 91*9c5db199SXin Li # Prefix for joint loglevel files in the logs. 92*9c5db199SXin Li JOINT_LOG_PREFIX = 'log' 93*9c5db199SXin Li 94*9c5db199SXin Li # Regex group to extract timestamp from logfile name. 95*9c5db199SXin Li TS_GROUP = 'ts' 96*9c5db199SXin Li 97*9c5db199SXin Li # This regex is used to extract the timestamp from servod logs. 98*9c5db199SXin Li # files always start with log. 99*9c5db199SXin Li TS_RE = (r'log.' 100*9c5db199SXin Li # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS 101*9c5db199SXin Li r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})' 102*9c5db199SXin Li # The loglevel is optional depending on labstation version. 103*9c5db199SXin Li r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP) 104*9c5db199SXin Li TS_EXTRACTOR = re.compile(TS_RE) 105*9c5db199SXin Li 106*9c5db199SXin Li # Regex group to extract MCU name from logline in servod logs. 107*9c5db199SXin Li MCU_GROUP = 'mcu' 108*9c5db199SXin Li 109*9c5db199SXin Li # Regex group to extract logline from MCU logline in servod logs. 110*9c5db199SXin Li LINE_GROUP = 'line' 111*9c5db199SXin Li 112*9c5db199SXin Li # This regex is used to extract the mcu and the line content from an 113*9c5db199SXin Li # MCU logline in servod logs. e.g. EC or servo_v4 console logs. 114*9c5db199SXin Li # Here is an example log-line: 115*9c5db199SXin Li # 116*9c5db199SXin Li # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG - 117*9c5db199SXin Li # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1 118*9c5db199SXin Li # 119*9c5db199SXin Li # Here is conceptually how they are formatted: 120*9c5db199SXin Li # 121*9c5db199SXin Li # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> - 122*9c5db199SXin Li # <output> 123*9c5db199SXin Li # 124*9c5db199SXin Li # The log format starts with a timestamp 125*9c5db199SXin Li MCU_RE = (r'[\d\-]+ [\d:,]+ ' 126*9c5db199SXin Li # The mcu that is logging this is next. 127*9c5db199SXin Li r'- (?P<%s>[\w/]+) - ' 128*9c5db199SXin Li # Next, we have more log outputs before the actual line. 129*9c5db199SXin Li # Information about the file line, logging function etc. 130*9c5db199SXin Li # Anchor on EC3PO Console, LogConsoleOutput and dev/pts. 131*9c5db199SXin Li # NOTE: if the log format changes, this regex needs to be 132*9c5db199SXin Li # adjusted. 133*9c5db199SXin Li r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - ' 134*9c5db199SXin Li # Lastly, we get the MCU's console line. 135*9c5db199SXin Li r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP)) 136*9c5db199SXin Li MCU_EXTRACTOR = re.compile(MCU_RE) 137*9c5db199SXin Li 138*9c5db199SXin Li # Regex to detect timeout messages when USBC pigtail has timeout issue. 139*9c5db199SXin Li # e.g.: [475635.427072 PD TMOUT RX 1/1] 140*9c5db199SXin Li USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]' 141*9c5db199SXin Li 142*9c5db199SXin Li # Suffix to identify compressed logfiles. 143*9c5db199SXin Li COMPRESSION_SUFFIX = '.tbz2' 144*9c5db199SXin Li 145*9c5db199SXin Li # A suffix to mark servod log directories that came from instance that 146*9c5db199SXin Li # ran during this servo_host, but are not the last one running e.g. when 147*9c5db199SXin Li # an instance (on purpose, or due to a bug) restarted in the middle of the 148*9c5db199SXin Li # run. 149*9c5db199SXin Li OLD_LOG_SUFFIX = 'old' 150*9c5db199SXin Li 151*9c5db199SXin Li def _init_attributes(self): 152*9c5db199SXin Li self._servo_state = None 153*9c5db199SXin Li self.servo_port = None 154*9c5db199SXin Li self.servo_board = None 155*9c5db199SXin Li self._ec_supported = None 156*9c5db199SXin Li self.servo_model = None 157*9c5db199SXin Li self.servo_serial = None 158*9c5db199SXin Li self.servo_setup = None 159*9c5db199SXin Li self.servo_recovery = None 160*9c5db199SXin Li self.servo_fw_channel = None 161*9c5db199SXin Li self.additional_servod_args = None 162*9c5db199SXin Li self._dut_health_profile = None 163*9c5db199SXin Li # The flag that indicate if a servo is connected to a smart usbhub. 164*9c5db199SXin Li # TODO(xianuowang@) remove this flag once all usbhubs in the lab 165*9c5db199SXin Li # get replaced. 166*9c5db199SXin Li self.smart_usbhub = None 167*9c5db199SXin Li self._servo = None 168*9c5db199SXin Li self._topology = None 169*9c5db199SXin Li self._tunnel_proxy = None 170*9c5db199SXin Li self._tunnel_proxy_lock = threading.Lock() 171*9c5db199SXin Li self._initial_instance_ts = None 172*9c5db199SXin Li # Flag to make sure that multiple calls to close do not result in the 173*9c5db199SXin Li # logic executing multiple times. 174*9c5db199SXin Li self._closed = False 175*9c5db199SXin Li # Per-thread local data 176*9c5db199SXin Li self._local = threading.local() 177*9c5db199SXin Li 178*9c5db199SXin Li def _initialize(self, 179*9c5db199SXin Li servo_host='localhost', 180*9c5db199SXin Li servo_port=DEFAULT_PORT, 181*9c5db199SXin Li servo_board=None, 182*9c5db199SXin Li servo_model=None, 183*9c5db199SXin Li servo_serial=None, 184*9c5db199SXin Li servo_setup=None, 185*9c5db199SXin Li servo_recovery=None, 186*9c5db199SXin Li servo_fw_channel=None, 187*9c5db199SXin Li additional_servod_args=None, 188*9c5db199SXin Li is_in_lab=None, 189*9c5db199SXin Li *args, 190*9c5db199SXin Li **dargs): 191*9c5db199SXin Li """Initialize a ServoHost instance. 192*9c5db199SXin Li 193*9c5db199SXin Li A ServoHost instance represents a host that controls a servo. 194*9c5db199SXin Li 195*9c5db199SXin Li @param servo_host: Name of the host where the servod process 196*9c5db199SXin Li is running. 197*9c5db199SXin Li @param servo_port: Port the servod process is listening on. Defaults 198*9c5db199SXin Li to the SERVOD_PORT environment variable if set, 199*9c5db199SXin Li otherwise 9999. 200*9c5db199SXin Li @param servo_board: Board that the servo is connected to. 201*9c5db199SXin Li @param servo_model: Model that the servo is connected to. 202*9c5db199SXin Li @param servo_serial: Serial number of the servo device. 203*9c5db199SXin Li @param servo_setup: Type of servo setup, e.g. REGULAR or DUAL_V4. 204*9c5db199SXin Li @param additional_servod_args: Additional args that will append to 205*9c5db199SXin Li servod start command. 206*9c5db199SXin Li @param is_in_lab: True if the servo host is in Cros Lab. Default is set 207*9c5db199SXin Li to None, for which utils.host_is_in_lab_zone will be 208*9c5db199SXin Li called to check if the servo host is in Cros lab. 209*9c5db199SXin Li 210*9c5db199SXin Li """ 211*9c5db199SXin Li super(ServoHost, self)._initialize(hostname=servo_host, 212*9c5db199SXin Li is_in_lab=is_in_lab, *args, **dargs) 213*9c5db199SXin Li self._init_attributes() 214*9c5db199SXin Li self.servo_port = int(servo_port) 215*9c5db199SXin Li self.servo_board = servo_board 216*9c5db199SXin Li self.servo_model = servo_model 217*9c5db199SXin Li self.servo_serial = servo_serial 218*9c5db199SXin Li self.servo_setup = servo_setup 219*9c5db199SXin Li self.servo_recovery = servo_recovery 220*9c5db199SXin Li self.servo_fw_channel = servo_fw_channel 221*9c5db199SXin Li self.additional_servod_args = additional_servod_args 222*9c5db199SXin Li 223*9c5db199SXin Li # The location of the log files on the servo host for this instance. 224*9c5db199SXin Li self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX, 225*9c5db199SXin Li self.servo_port) 226*9c5db199SXin Li # Path of the servo host lock file. 227*9c5db199SXin Li self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) + 228*9c5db199SXin Li self.LOCK_FILE_POSTFIX) 229*9c5db199SXin Li # File path to declare a reboot request. 230*9c5db199SXin Li self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) + 231*9c5db199SXin Li self.REBOOT_FILE_POSTFIX) 232*9c5db199SXin Li 233*9c5db199SXin Li # Lock the servo host if it's an in-lab labstation to prevent other 234*9c5db199SXin Li # task to reboot it until current task completes. We also wait and 235*9c5db199SXin Li # make sure the labstation is up here, in the case of the labstation is 236*9c5db199SXin Li # in the middle of reboot. 237*9c5db199SXin Li self._is_locked = False 238*9c5db199SXin Li if (not self.is_containerized_servod() 239*9c5db199SXin Li and self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab() 240*9c5db199SXin Li and self.is_labstation()): 241*9c5db199SXin Li self._lock() 242*9c5db199SXin Li try: 243*9c5db199SXin Li self.wait_ready() 244*9c5db199SXin Li except Exception as e: 245*9c5db199SXin Li logging.info( 246*9c5db199SXin Li 'Unexpected error while ensure labstation' 247*9c5db199SXin Li ' readiness; %s', str(e)) 248*9c5db199SXin Li 249*9c5db199SXin Li self._repair_strategy = ( 250*9c5db199SXin Li servo_repair.create_servo_repair_strategy()) 251*9c5db199SXin Li 252*9c5db199SXin Li def __str__(self): 253*9c5db199SXin Li return "<%s '%s:%s'>" % ( 254*9c5db199SXin Li type(self).__name__, self.hostname, self.servo_port) 255*9c5db199SXin Li 256*9c5db199SXin Li def connect_servo(self): 257*9c5db199SXin Li """ Initialize and setup servo for later use. 258*9c5db199SXin Li """ 259*9c5db199SXin Li self.initialize_servo() 260*9c5db199SXin Li self.initialize_dut_for_servo() 261*9c5db199SXin Li 262*9c5db199SXin Li def initialize_servo(self): 263*9c5db199SXin Li """Establish a connection to the servod server on this host. 264*9c5db199SXin Li 265*9c5db199SXin Li Initializes `self._servo` and then verifies that all network 266*9c5db199SXin Li connections are working. This will create an ssh tunnel if 267*9c5db199SXin Li it's required. 268*9c5db199SXin Li """ 269*9c5db199SXin Li self._servo = servo.Servo(servo_host=self, 270*9c5db199SXin Li servo_serial=self.servo_serial) 271*9c5db199SXin Li 272*9c5db199SXin Li def initialize_dut_for_servo(self): 273*9c5db199SXin Li """This method will do some setup for dut control, e.g. setup 274*9c5db199SXin Li main servo_v4 device, and also testing the connection between servo 275*9c5db199SXin Li and DUT. As a side effect of testing the connection, all signals on 276*9c5db199SXin Li the target servo are reset to default values, and the USB stick is 277*9c5db199SXin Li set to the neutral (off) position. 278*9c5db199SXin Li """ 279*9c5db199SXin Li if not self._servo: 280*9c5db199SXin Li raise hosts.AutoservVerifyError('Servo object needs to be' 281*9c5db199SXin Li ' initialized before initialize' 282*9c5db199SXin Li ' DUT.') 283*9c5db199SXin Li timeout, _ = retry.timeout( 284*9c5db199SXin Li self._servo.initialize_dut, 285*9c5db199SXin Li timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS) 286*9c5db199SXin Li if timeout: 287*9c5db199SXin Li raise hosts.AutoservVerifyError('Initialize dut for servo timed' 288*9c5db199SXin Li ' out.') 289*9c5db199SXin Li 290*9c5db199SXin Li def disconnect_servo(self): 291*9c5db199SXin Li """Disconnect our servo if it exists. 292*9c5db199SXin Li 293*9c5db199SXin Li If we've previously successfully connected to our servo, 294*9c5db199SXin Li disconnect any established ssh tunnel, and set `self._servo` 295*9c5db199SXin Li back to `None`. 296*9c5db199SXin Li """ 297*9c5db199SXin Li if self._servo: 298*9c5db199SXin Li # N.B. This call is safe even without a tunnel: 299*9c5db199SXin Li # rpc_server_tracker.disconnect() silently ignores 300*9c5db199SXin Li # unknown ports. 301*9c5db199SXin Li self.rpc_server_tracker.disconnect(self.servo_port) 302*9c5db199SXin Li self._servo = None 303*9c5db199SXin Li 304*9c5db199SXin Li def _to_str(self, maybe_str): 305*9c5db199SXin Li """If the param is bytes, convert it to a str.""" 306*9c5db199SXin Li # b/217780680, Make this compatible with python3, 307*9c5db199SXin Li if isinstance(maybe_str, bytes): 308*9c5db199SXin Li return maybe_str.decode('utf-8', errors='replace') 309*9c5db199SXin Li return maybe_str 310*9c5db199SXin Li 311*9c5db199SXin Li def _maybe_create_servod_ssh_tunnel_proxy(self): 312*9c5db199SXin Li """Create a xmlrpc proxy for use with a ssh tunnel. 313*9c5db199SXin Li A lock is used to safely create a singleton proxy. 314*9c5db199SXin Li """ 315*9c5db199SXin Li with self._tunnel_proxy_lock: 316*9c5db199SXin Li if self._tunnel_proxy is None: 317*9c5db199SXin Li self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect( 318*9c5db199SXin Li None, 319*9c5db199SXin Li self.servo_port, 320*9c5db199SXin Li ready_test_name=self.SERVO_READY_METHOD, 321*9c5db199SXin Li timeout_seconds=60, 322*9c5db199SXin Li request_timeout_seconds=3600, 323*9c5db199SXin Li server_desc=str(self)) 324*9c5db199SXin Li 325*9c5db199SXin Li def get_servod_server_proxy(self): 326*9c5db199SXin Li """Return a proxy if it exists; otherwise, create a new one. 327*9c5db199SXin Li A proxy can either be a ssh tunnel based proxy, or a httplib 328*9c5db199SXin Li based proxy. 329*9c5db199SXin Li 330*9c5db199SXin Li @returns: An xmlrpclib.ServerProxy that is connected to the servod 331*9c5db199SXin Li server on the host. 332*9c5db199SXin Li """ 333*9c5db199SXin Li if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO 334*9c5db199SXin Li and not self.is_localhost() 335*9c5db199SXin Li and not self.is_containerized_servod()): 336*9c5db199SXin Li # Check for existing ssh tunnel proxy. 337*9c5db199SXin Li if self._tunnel_proxy is None: 338*9c5db199SXin Li self._maybe_create_servod_ssh_tunnel_proxy() 339*9c5db199SXin Li return self._tunnel_proxy 340*9c5db199SXin Li else: 341*9c5db199SXin Li # xmlrpc/httplib is not thread-safe, so each thread must have its 342*9c5db199SXin Li # own separate proxy connection. 343*9c5db199SXin Li if not hasattr(self._local, "_per_thread_proxy"): 344*9c5db199SXin Li if self.is_containerized_servod(): 345*9c5db199SXin Li ip_addr = docker_utils.get_container_ip(self.hostname) 346*9c5db199SXin Li remote = 'http://%s:%s' % (ip_addr, self.servo_port) 347*9c5db199SXin Li else: 348*9c5db199SXin Li remote = 'http://%s:%s' % (self.hostname, self.servo_port) 349*9c5db199SXin Li logging.debug('Servo Proxy RPC URL: %s', remote) 350*9c5db199SXin Li self._local._per_thread_proxy = six.moves.xmlrpc_client.ServerProxy(remote) 351*9c5db199SXin Li return self._local._per_thread_proxy 352*9c5db199SXin Li 353*9c5db199SXin Li def verify(self, silent=False): 354*9c5db199SXin Li """Update the servo host and verify it's in a good state. 355*9c5db199SXin Li 356*9c5db199SXin Li @param silent If true, suppress logging in `status.log`. 357*9c5db199SXin Li """ 358*9c5db199SXin Li message = 'Beginning verify for servo host %s port %s serial %s' 359*9c5db199SXin Li message %= (self.hostname, self.servo_port, self.servo_serial) 360*9c5db199SXin Li self.record('INFO', None, None, message) 361*9c5db199SXin Li try: 362*9c5db199SXin Li self._repair_strategy.verify(self, silent) 363*9c5db199SXin Li self._servo_state = servo_constants.SERVO_STATE_WORKING 364*9c5db199SXin Li self.record('INFO', None, None, 365*9c5db199SXin Li 'ServoHost verify set servo_state as WORKING') 366*9c5db199SXin Li except Exception as e: 367*9c5db199SXin Li if not self.is_localhost(): 368*9c5db199SXin Li self._servo_state = self.determine_servo_state() 369*9c5db199SXin Li self.record('INFO', None, None, 370*9c5db199SXin Li 'ServoHost verify set servo_state as %s' 371*9c5db199SXin Li % self._servo_state) 372*9c5db199SXin Li if self._is_critical_error(e): 373*9c5db199SXin Li raise 374*9c5db199SXin Li 375*9c5db199SXin Li def _get_default_usbkey_mount_path(self): 376*9c5db199SXin Li return '/media/servo_usb/%s' % self.servo_port 377*9c5db199SXin Li 378*9c5db199SXin Li def get_image_name_from_usbkey(self, usbkey_dev): 379*9c5db199SXin Li """Mount usb drive and check ChromeOS image name on it if there is 380*9c5db199SXin Li one. This method assumes the image_usbkey_direction is already set 381*9c5db199SXin Li to servo side. 382*9c5db199SXin Li 383*9c5db199SXin Li @param usbkey_dev: usbkey dev path(e.g. /dev/sdb). 384*9c5db199SXin Li 385*9c5db199SXin Li @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0, 386*9c5db199SXin Li or empty string if no test image detected, or unexpected 387*9c5db199SXin Li error occurred. 388*9c5db199SXin Li """ 389*9c5db199SXin Li logging.info('Checking ChromeOS image name on usbkey.') 390*9c5db199SXin Li mount_dst = self._get_default_usbkey_mount_path() 391*9c5db199SXin Li # Unmount if there is an existing stale mount. 392*9c5db199SXin Li self._unmount_drive(mount_dst) 393*9c5db199SXin Li # ChromeOS root fs is in /dev/sdx3 394*9c5db199SXin Li mount_src = usbkey_dev + '3' 395*9c5db199SXin Li try: 396*9c5db199SXin Li if not self._mount_drive(mount_src, mount_dst): 397*9c5db199SXin Li logging.debug('Unexpected error occurred on mount usb drive.') 398*9c5db199SXin Li return '' 399*9c5db199SXin Li 400*9c5db199SXin Li release_content = self._to_str( 401*9c5db199SXin Li self.run('cat %s/etc/lsb-release' % mount_dst, 402*9c5db199SXin Li ignore_status=True).stdout.strip()) 403*9c5db199SXin Li 404*9c5db199SXin Li if not re.search(r'RELEASE_TRACK=.*test', release_content): 405*9c5db199SXin Li logging.info('The image on usbkey is not a test image') 406*9c5db199SXin Li return '' 407*9c5db199SXin Li 408*9c5db199SXin Li return lsbrelease_utils.get_chromeos_release_builder_path( 409*9c5db199SXin Li lsb_release_content=release_content) 410*9c5db199SXin Li finally: 411*9c5db199SXin Li logging.debug('Image check compeleted, unmounting the usb drive.') 412*9c5db199SXin Li self._unmount_drive(mount_dst) 413*9c5db199SXin Li 414*9c5db199SXin Li def _extract_firmware_image_from_usbkey(self, fw_dst): 415*9c5db199SXin Li """Extract firmware images from the usbkey on servo, this method 416*9c5db199SXin Li assumes there is already a ChromeOS test image staged on servo. 417*9c5db199SXin Li 418*9c5db199SXin Li @param fw_dst: the path that we'll copy firmware images to. 419*9c5db199SXin Li 420*9c5db199SXin Li @returns: a json format string of firmware manifest data. 421*9c5db199SXin Li """ 422*9c5db199SXin Li usbkey_dev = self._probe_and_validate_usb_dev() 423*9c5db199SXin Li if not usbkey_dev: 424*9c5db199SXin Li raise hosts.AutoservRepairError('Unexpected error occurred when' 425*9c5db199SXin Li ' probe usbkey dev path, please check logs for detail.') 426*9c5db199SXin Li 427*9c5db199SXin Li mount_dst = self._get_default_usbkey_mount_path() 428*9c5db199SXin Li # Unmount if there is an existing stale mount. 429*9c5db199SXin Li self._unmount_drive(mount_dst) 430*9c5db199SXin Li # ChromeOS root fs is in /dev/sdx3 431*9c5db199SXin Li mount_src = usbkey_dev + '3' 432*9c5db199SXin Li try: 433*9c5db199SXin Li if not self._mount_drive(mount_src, mount_dst): 434*9c5db199SXin Li raise hosts.AutoservRepairError('Failed to extract firmware' 435*9c5db199SXin Li ' image; Unable to mount %s.' % usbkey_dev, 436*9c5db199SXin Li 'unable to mount usbkey') 437*9c5db199SXin Li updater_bin = os.path.join(mount_dst, 438*9c5db199SXin Li 'usr/sbin/chromeos-firmwareupdate') 439*9c5db199SXin Li self.run('%s --unpack %s' % (updater_bin, fw_dst)) 440*9c5db199SXin Li return self._to_str(self.run('%s --manifest' % updater_bin).stdout) 441*9c5db199SXin Li 442*9c5db199SXin Li finally: 443*9c5db199SXin Li self._unmount_drive(mount_dst) 444*9c5db199SXin Li 445*9c5db199SXin Li def prepare_repair_firmware_image(self, fw_dst=None): 446*9c5db199SXin Li """Prepare firmware image on the servohost for auto repair process 447*9c5db199SXin Li to consume. 448*9c5db199SXin Li 449*9c5db199SXin Li @param fw_dst: the path that we want to store firmware image on 450*9c5db199SXin Li the servohost. 451*9c5db199SXin Li 452*9c5db199SXin Li @returns: A tuple that containes ec firmware image path and bios 453*9c5db199SXin Li firmware image path on the servohost, or None if type of 454*9c5db199SXin Li image is not available based on manifest and dut's model. 455*9c5db199SXin Li """ 456*9c5db199SXin Li model = self.servo_model or self._dut_host_info.model 457*9c5db199SXin Li if not model: 458*9c5db199SXin Li raise hosts.AutoservRepairError( 459*9c5db199SXin Li 'Could not determine DUT\'s model.', 460*9c5db199SXin Li 'model infomation unknown') 461*9c5db199SXin Li 462*9c5db199SXin Li if not fw_dst: 463*9c5db199SXin Li fw_dst = '/tmp/firmware_image/%s' % self.servo_port 464*9c5db199SXin Li # Cleanup and re-create dst path to have a fresh start. 465*9c5db199SXin Li self.run('rm -rf %s' % fw_dst) 466*9c5db199SXin Li self.run('mkdir -p %s' % fw_dst) 467*9c5db199SXin Li 468*9c5db199SXin Li manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst)) 469*9c5db199SXin Li # For models that have packed $MODEL_signed variant, we want use the 470*9c5db199SXin Li # 'signed' variant once we get DVT devices, so try to read manifest 471*9c5db199SXin Li # from $MODEL_signed first. 472*9c5db199SXin Li build = manifest.get('%s_signed' % model) or manifest.get(model) 473*9c5db199SXin Li if not build: 474*9c5db199SXin Li raise hosts.AutoservRepairError('Could not find firmware manifest' 475*9c5db199SXin Li ' for model:%s' % model, 'model manifest not found') 476*9c5db199SXin Li try: 477*9c5db199SXin Li ec_image = os.path.join(fw_dst, build['ec']['image']) 478*9c5db199SXin Li except KeyError: 479*9c5db199SXin Li ec_image = None 480*9c5db199SXin Li try: 481*9c5db199SXin Li bios_image = os.path.join(fw_dst, build['host']['image']) 482*9c5db199SXin Li except KeyError: 483*9c5db199SXin Li bios_image = None 484*9c5db199SXin Li if not ec_image and not bios_image: 485*9c5db199SXin Li raise hosts.AutoservRepairError('Could not find any firmware image' 486*9c5db199SXin Li ' for model:%s' % model, 'cannot find firmware image') 487*9c5db199SXin Li return ec_image, bios_image 488*9c5db199SXin Li 489*9c5db199SXin Li def flash_ap_firmware_via_servo(self, image): 490*9c5db199SXin Li """Flash AP firmware by use a provided image. 491*9c5db199SXin Li 492*9c5db199SXin Li This is will be a short term enhanment for infra repair use, it use 493*9c5db199SXin Li 'futility update' which will automatically determine various parameters 494*9c5db199SXin Li needed for flashrom, and will preserve the GBB, VPD, and HWID for 495*9c5db199SXin Li AP firmware update. 496*9c5db199SXin Li @TODO(xianuowang@) Remove this method once b/148403277 implemented. 497*9c5db199SXin Li 498*9c5db199SXin Li @param image: the firmware image path on servohost. 499*9c5db199SXin Li """ 500*9c5db199SXin Li cmd = 'futility update -i %s --servo_port=%s' 501*9c5db199SXin Li self.run(cmd % (image, self.servo_port), timeout=900) 502*9c5db199SXin Li 503*9c5db199SXin Li def _probe_and_validate_usb_dev(self): 504*9c5db199SXin Li """This method probe the usb dev path by talking to servo, and then 505*9c5db199SXin Li validate the dev path is valid block device to servohost. 506*9c5db199SXin Li Possible output: 507*9c5db199SXin Li 1. Encounter error during probe usb dev, returns empty string. 508*9c5db199SXin Li 2. probe usb dev completed without error but cannot find usb dev, 509*9c5db199SXin Li raise AutoservRepairError. 510*9c5db199SXin Li 3. probe usb dev find a usb dev path, but failed validation in this 511*9c5db199SXin Li method, raise AutoservRepairError. 512*9c5db199SXin Li 513*9c5db199SXin Li @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string 514*9c5db199SXin Li if unexpected error occurred during probe. 515*9c5db199SXin Li @raises: AutoservRepairError if servo couldn't probe the usb dev path 516*9c5db199SXin Li (servo.probe_host_usb_dev() returns empty string), or the dev path is 517*9c5db199SXin Li not valid block device to servohost. 518*9c5db199SXin Li """ 519*9c5db199SXin Li logging.info('Validating image usbkey on servo.') 520*9c5db199SXin Li try: 521*9c5db199SXin Li usb_dev = self._servo.probe_host_usb_dev() 522*9c5db199SXin Li except Exception as e: 523*9c5db199SXin Li # We don't want any unexpected or transient servo communicating 524*9c5db199SXin Li # failure block usb repair, so capture all errors here. 525*9c5db199SXin Li logging.error(e, exc_info=True) 526*9c5db199SXin Li logging.error('Unexpected error occurred on get usbkey dev path,' 527*9c5db199SXin Li ' skipping usbkey validation.') 528*9c5db199SXin Li return '' 529*9c5db199SXin Li 530*9c5db199SXin Li if usb_dev: 531*9c5db199SXin Li # probe_host_usb_dev() sometimes return stale record, 532*9c5db199SXin Li # so we need to make sure the path exists in fdisk. 533*9c5db199SXin Li validate_cmd = 'fdisk -l %s' % usb_dev 534*9c5db199SXin Li try: 535*9c5db199SXin Li resp = self.run(validate_cmd, ignore_status=True, timeout=30) 536*9c5db199SXin Li if resp.exit_status == 0: 537*9c5db199SXin Li return usb_dev 538*9c5db199SXin Li logging.error('%s is reported from "image_usbkey_dev" control' 539*9c5db199SXin Li ' but not detected by fdisk!', usb_dev) 540*9c5db199SXin Li except error.AutoservRunError as e: 541*9c5db199SXin Li if 'Timeout encountered' in str(e): 542*9c5db199SXin Li logging.warning('Timeout encountered during fdisk run,' 543*9c5db199SXin Li ' skipping usbkey validation.') 544*9c5db199SXin Li return '' 545*9c5db199SXin Li raise 546*9c5db199SXin Li 547*9c5db199SXin Li raise hosts.AutoservRepairError( 548*9c5db199SXin Li 'No usbkey detected on servo, the usbkey may be either missing' 549*9c5db199SXin Li ' or broken. Please replace usbkey on the servo and retry.', 550*9c5db199SXin Li 'missing usbkey') 551*9c5db199SXin Li 552*9c5db199SXin Li def is_ec_supported(self): 553*9c5db199SXin Li """Check if ec is supported on the servo_board""" 554*9c5db199SXin Li if self.servo_board: 555*9c5db199SXin Li if self._ec_supported is not None: 556*9c5db199SXin Li return self._ec_supported 557*9c5db199SXin Li try: 558*9c5db199SXin Li frm_config = config.Config(self.servo_board, self.servo_model) 559*9c5db199SXin Li self._ec_supported = getattr(frm_config, 'chrome_ec', False) 560*9c5db199SXin Li return self._ec_supported 561*9c5db199SXin Li except Exception as e: 562*9c5db199SXin Li logging.error( 563*9c5db199SXin Li 'Unexpected error when read from firmware' 564*9c5db199SXin Li ' configs; %s', e) 565*9c5db199SXin Li else: 566*9c5db199SXin Li logging.debug('Cannot detect if DUT has EC as board unknown.') 567*9c5db199SXin Li return False 568*9c5db199SXin Li 569*9c5db199SXin Li def validate_image_usbkey(self): 570*9c5db199SXin Li """This method first validate if there is a recover usbkey on servo 571*9c5db199SXin Li that accessible to servohost, and second check if a ChromeOS image is 572*9c5db199SXin Li already on the usb drive and return the image_name so we can avoid 573*9c5db199SXin Li unnecessary download and flash to the recover usbkey on servo. 574*9c5db199SXin Li 575*9c5db199SXin Li Please note that, there is special error handling logic here: 576*9c5db199SXin Li 1. If unexpected error happens, we return empty string. So repair 577*9c5db199SXin Li actions will not get blocked. 578*9c5db199SXin Li 2. If no working usbkey present on servo, but no errors, we'll raise 579*9c5db199SXin Li AutoservRepairError here. 580*9c5db199SXin Li 581*9c5db199SXin Li @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0, 582*9c5db199SXin Li or empty string if no test image detected, or unexpected 583*9c5db199SXin Li error occurred. 584*9c5db199SXin Li @raises: AutoservRepairError if the usbkey is not detected on servo. 585*9c5db199SXin Li """ 586*9c5db199SXin Li usb_dev = self._probe_and_validate_usb_dev() 587*9c5db199SXin Li if usb_dev: 588*9c5db199SXin Li return self.get_image_name_from_usbkey(usb_dev) 589*9c5db199SXin Li else: 590*9c5db199SXin Li return '' 591*9c5db199SXin Li 592*9c5db199SXin Li def repair(self, silent=False): 593*9c5db199SXin Li """Attempt to repair servo host. 594*9c5db199SXin Li 595*9c5db199SXin Li @param silent If true, suppress logging in `status.log`. 596*9c5db199SXin Li """ 597*9c5db199SXin Li message = 'Beginning repair for servo host %s port %s serial %s' 598*9c5db199SXin Li message %= (self.hostname, self.servo_port, self.servo_serial) 599*9c5db199SXin Li self.record('INFO', None, None, message) 600*9c5db199SXin Li try: 601*9c5db199SXin Li self._repair_strategy.repair(self, silent) 602*9c5db199SXin Li self._servo_state = servo_constants.SERVO_STATE_WORKING 603*9c5db199SXin Li self.record('INFO', None, None, 604*9c5db199SXin Li 'ServoHost repair set servo_state as WORKING') 605*9c5db199SXin Li # If target is a labstation then try to withdraw any existing 606*9c5db199SXin Li # reboot request created by this servo because it passed repair. 607*9c5db199SXin Li if self.is_labstation(): 608*9c5db199SXin Li self.withdraw_reboot_request() 609*9c5db199SXin Li except Exception as e: 610*9c5db199SXin Li if not self.is_localhost(): 611*9c5db199SXin Li self._servo_state = self.determine_servo_state() 612*9c5db199SXin Li self.record('INFO', None, None, 613*9c5db199SXin Li 'ServoHost repair set servo_state as %s' 614*9c5db199SXin Li % self._servo_state) 615*9c5db199SXin Li if self._is_critical_error(e): 616*9c5db199SXin Li self.disconnect_servo() 617*9c5db199SXin Li self.stop_servod() 618*9c5db199SXin Li raise 619*9c5db199SXin Li 620*9c5db199SXin Li def _is_critical_error(self, error): 621*9c5db199SXin Li if (isinstance(error, hosts.AutoservVerifyDependencyError) 622*9c5db199SXin Li and not error.is_critical()): 623*9c5db199SXin Li logging.warning('Non-critical verify failure(s) detected during' 624*9c5db199SXin Li ' verify/repair servo, servo connection will' 625*9c5db199SXin Li ' still be up but may not be fully functional.' 626*9c5db199SXin Li ' Some repair actions and servo dependent' 627*9c5db199SXin Li ' tests may not run.') 628*9c5db199SXin Li return False 629*9c5db199SXin Li logging.info( 630*9c5db199SXin Li 'Critical verify failure(s) detected during repair/verify ' 631*9c5db199SXin Li 'servo. Disconnecting servo and running `stop servod`, all' 632*9c5db199SXin Li ' repair actions and tests that depends on servo will not ' 633*9c5db199SXin Li 'run.') 634*9c5db199SXin Li return True 635*9c5db199SXin Li 636*9c5db199SXin Li def get_servo(self): 637*9c5db199SXin Li """Get the cached servo.Servo object. 638*9c5db199SXin Li 639*9c5db199SXin Li @return: a servo.Servo object. 640*9c5db199SXin Li @rtype: autotest_lib.server.cros.servo.servo.Servo 641*9c5db199SXin Li """ 642*9c5db199SXin Li return self._servo 643*9c5db199SXin Li 644*9c5db199SXin Li def request_reboot(self): 645*9c5db199SXin Li """Request servohost to be rebooted when it's safe to by touch a file. 646*9c5db199SXin Li """ 647*9c5db199SXin Li logging.debug('Request to reboot servohost %s has been created by ' 648*9c5db199SXin Li 'servo with port # %s', self.hostname, self.servo_port) 649*9c5db199SXin Li self.run('touch %s' % self._reboot_file, ignore_status=True) 650*9c5db199SXin Li 651*9c5db199SXin Li def withdraw_reboot_request(self): 652*9c5db199SXin Li """Withdraw a servohost reboot request if exists by remove the flag 653*9c5db199SXin Li file. 654*9c5db199SXin Li """ 655*9c5db199SXin Li logging.debug('Withdrawing request to reboot servohost %s that created' 656*9c5db199SXin Li ' by servo with port # %s if exists.', 657*9c5db199SXin Li self.hostname, self.servo_port) 658*9c5db199SXin Li self.run('rm -f %s' % self._reboot_file, ignore_status=True) 659*9c5db199SXin Li 660*9c5db199SXin Li def start_servod(self, quick_startup=False): 661*9c5db199SXin Li """Start the servod process on servohost. 662*9c5db199SXin Li """ 663*9c5db199SXin Li # Skip if running on the localhost.(crbug.com/1038168) 664*9c5db199SXin Li if self.is_localhost(): 665*9c5db199SXin Li logging.debug("Servohost is a localhost, skipping start servod.") 666*9c5db199SXin Li return 667*9c5db199SXin Li 668*9c5db199SXin Li if self.is_containerized_servod(): 669*9c5db199SXin Li return self.start_containerized_servod() 670*9c5db199SXin Li 671*9c5db199SXin Li cmd = 'start servod' 672*9c5db199SXin Li if self.servo_board: 673*9c5db199SXin Li cmd += ' BOARD=%s' % self.servo_board 674*9c5db199SXin Li if self.servo_model: 675*9c5db199SXin Li cmd += ' MODEL=%s' % self.servo_model 676*9c5db199SXin Li else: 677*9c5db199SXin Li logging.warning('Board for DUT is unknown; starting servod' 678*9c5db199SXin Li ' assuming a pre-configured board.') 679*9c5db199SXin Li 680*9c5db199SXin Li cmd += ' PORT=%d' % self.servo_port 681*9c5db199SXin Li if self.servo_serial: 682*9c5db199SXin Li cmd += ' SERIAL=%s' % self.servo_serial 683*9c5db199SXin Li 684*9c5db199SXin Li # Start servod with dual_v4 based on servo_setup. 685*9c5db199SXin Li if self.is_dual_setup(): 686*9c5db199SXin Li cmd += ' DUAL_V4=1' 687*9c5db199SXin Li 688*9c5db199SXin Li # Start servod with CONFIG=cr50.xml which required for some pools. 689*9c5db199SXin Li if self._require_cr50_servod_config(): 690*9c5db199SXin Li cmd += ' CONFIG=cr50.xml' 691*9c5db199SXin Li 692*9c5db199SXin Li if self.servo_recovery == True: 693*9c5db199SXin Li cmd += ' REC_MODE=1' 694*9c5db199SXin Li 695*9c5db199SXin Li # Adding customized args if any. 696*9c5db199SXin Li if self.additional_servod_args: 697*9c5db199SXin Li cmd += ' ' + self.additional_servod_args 698*9c5db199SXin Li 699*9c5db199SXin Li # Remove the symbolic links from the logs. This helps ensure that 700*9c5db199SXin Li # a failed servod instantiation does not cause us to grab old logs 701*9c5db199SXin Li # by mistake. 702*9c5db199SXin Li self.remove_latest_log_symlinks() 703*9c5db199SXin Li self.run(cmd, timeout=60) 704*9c5db199SXin Li 705*9c5db199SXin Li # There's a lag between when `start servod` completes and when 706*9c5db199SXin Li # the _ServodConnectionVerifier trigger can actually succeed. 707*9c5db199SXin Li # The call to time.sleep() below gives time to make sure that 708*9c5db199SXin Li # the trigger won't fail after we return. 709*9c5db199SXin Li 710*9c5db199SXin Li # Normally servod on servo_v3 and labstation take ~10 seconds to ready, 711*9c5db199SXin Li # But in the rare case all servo on a labstation are in heavy use they 712*9c5db199SXin Li # may take ~30 seconds. So the timeout value will double these value, 713*9c5db199SXin Li # and we'll try quick start up when first time initialize servohost, 714*9c5db199SXin Li # and use standard start up timeout in repair. 715*9c5db199SXin Li if quick_startup: 716*9c5db199SXin Li timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT 717*9c5db199SXin Li else: 718*9c5db199SXin Li timeout = servo_constants.SERVOD_STARTUP_TIMEOUT 719*9c5db199SXin Li logging.debug('Wait %s seconds for servod process fully up.', timeout) 720*9c5db199SXin Li time.sleep(timeout) 721*9c5db199SXin Li # Cache the initial instance timestamp to check against servod restarts 722*9c5db199SXin Li self._initial_instance_ts = self.get_instance_logs_ts() 723*9c5db199SXin Li 724*9c5db199SXin Li def stop_servod(self): 725*9c5db199SXin Li """Stop the servod process on servohost. 726*9c5db199SXin Li """ 727*9c5db199SXin Li # Skip if running on the localhost.(crbug.com/1038168) 728*9c5db199SXin Li if self.is_localhost(): 729*9c5db199SXin Li logging.debug("Servohost is a localhost, skipping stop servod.") 730*9c5db199SXin Li return 731*9c5db199SXin Li 732*9c5db199SXin Li if self.is_containerized_servod(): 733*9c5db199SXin Li # TODO(gregorynisbet): Remove this message in 2022Q2. 734*9c5db199SXin Li logging.debug("ServoHost: Detected containerized servod.") 735*9c5db199SXin Li remove_container = True 736*9c5db199SXin Li if DOCKER_SERVOD_DEBUG_MODE == '1' or os.path.exists( 737*9c5db199SXin Li SERVOD_DEBUG_FLAG): 738*9c5db199SXin Li remove_container = False 739*9c5db199SXin Li self.stop_containerized_servod(remove_container=remove_container) 740*9c5db199SXin Li return 741*9c5db199SXin Li 742*9c5db199SXin Li logging.debug('Stopping servod on port %s', self.servo_port) 743*9c5db199SXin Li self.run('stop servod PORT=%d' % self.servo_port, 744*9c5db199SXin Li timeout=60, ignore_status=True) 745*9c5db199SXin Li logging.debug('Wait %s seconds for servod process fully teardown.', 746*9c5db199SXin Li servo_constants.SERVOD_TEARDOWN_TIMEOUT) 747*9c5db199SXin Li time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT) 748*9c5db199SXin Li 749*9c5db199SXin Li def wait_for_init_servod_in_container(self, container): 750*9c5db199SXin Li """Waits for servod process to be ready to listen inside container.""" 751*9c5db199SXin Li ready_output = "Instance associated with id %s ready" % self.servo_port 752*9c5db199SXin Li if not container: 753*9c5db199SXin Li logging.debug("Container object is None.") 754*9c5db199SXin Li return False 755*9c5db199SXin Li try: 756*9c5db199SXin Li # Executes servodtool command to wait for servod to be active. 757*9c5db199SXin Li exit_code, output = container.exec_run( 758*9c5db199SXin Li cmd="servodtool instance wait-for-active -p %s" % 759*9c5db199SXin Li self.servo_port, 760*9c5db199SXin Li stdout=True) 761*9c5db199SXin Li # b/217780680, Make this compatible with python3, 762*9c5db199SXin Li if isinstance(output, bytes): 763*9c5db199SXin Li output = output.decode(errors='replace') 764*9c5db199SXin Li if exit_code != 0 or ready_output not in output: 765*9c5db199SXin Li logging.debug( 766*9c5db199SXin Li 'Failed to start servod process inside container,' 767*9c5db199SXin Li 'exit_code=%s, output=%s.', exit_code, output) 768*9c5db199SXin Li return False 769*9c5db199SXin Li except docker.errors.APIError as e: 770*9c5db199SXin Li logging.error('%s', e) 771*9c5db199SXin Li return True 772*9c5db199SXin Li 773*9c5db199SXin Li def start_containerized_servod(self, with_servod=True): 774*9c5db199SXin Li """Start the servod process on servohost.""" 775*9c5db199SXin Li logging.info("Starting servod container %s.", 776*9c5db199SXin Li self.servod_container_name) 777*9c5db199SXin Li client = docker_utils.get_docker_client() 778*9c5db199SXin Li logging.debug("Docker deamon ping %s", client.ping()) 779*9c5db199SXin Li labels = {'WITH_SERVOD': str(with_servod)} 780*9c5db199SXin Li try: 781*9c5db199SXin Li if self.is_up(with_servod=with_servod): 782*9c5db199SXin Li logging.warning("Container already exists - not starting") 783*9c5db199SXin Li return 784*9c5db199SXin Li logging.info( 785*9c5db199SXin Li 'Servod container either does not exist or is not running.' 786*9c5db199SXin Li ) 787*9c5db199SXin Li self.stop_containerized_servod() 788*9c5db199SXin Li except docker.errors.APIError: 789*9c5db199SXin Li # Container exists but is not running 790*9c5db199SXin Li logging.info("Cleanup of non functional container.") 791*9c5db199SXin Li self.stop_containerized_servod() 792*9c5db199SXin Li 793*9c5db199SXin Li label = os.environ.get("SERVOD_CONTAINER_LABEL", "release") 794*9c5db199SXin Li registry = os.environ.get("REGISTRY_URI", SERVOD_CONTAINER_IMAGE_PATH) 795*9c5db199SXin Li image = "%s/servod:%s" % (registry, label) 796*9c5db199SXin Li logging.info("Servod container image: %s", image) 797*9c5db199SXin Li 798*9c5db199SXin Li try: 799*9c5db199SXin Li client.images.pull(image) 800*9c5db199SXin Li except docker.errors.APIError: 801*9c5db199SXin Li logging.exception("Failed to pull servod container image.") 802*9c5db199SXin Li 803*9c5db199SXin Li environment = [ 804*9c5db199SXin Li "BOARD=%s" % self.servo_board, 805*9c5db199SXin Li "MODEL=%s" % self.servo_model, 806*9c5db199SXin Li "SERIAL=%s" % self.servo_serial, 807*9c5db199SXin Li "PORT=%s" % self.servo_port, 808*9c5db199SXin Li ] 809*9c5db199SXin Li # Start servod with dual_v4 based on servo_setup. 810*9c5db199SXin Li if self.is_dual_setup(): 811*9c5db199SXin Li environment.append("DUAL_V4=1") 812*9c5db199SXin Li # Start servod with CONFIG=cr50.xml which required for some pools. 813*9c5db199SXin Li if self._require_cr50_servod_config(): 814*9c5db199SXin Li environment.append("CONFIG=cr50.xml") 815*9c5db199SXin Li if self.servo_recovery == True: 816*9c5db199SXin Li environment.append("REC_MODE=1") 817*9c5db199SXin Li 818*9c5db199SXin Li container_network = os.environ.get("DOCKER_DEFAULT_NETWORK", None) 819*9c5db199SXin Li # In case the network environment is not set, fallback to default network 820*9c5db199SXin Li # for moblab or satlab based on the TLE. 821*9c5db199SXin Li if not container_network: 822*9c5db199SXin Li container_network = "default_moblab" 823*9c5db199SXin Li if 'drone' in docker_utils.get_running_containers(client=client): 824*9c5db199SXin Li container_network = "default_satlab" 825*9c5db199SXin Li 826*9c5db199SXin Li logging.info('Servod container will use %s network', container_network) 827*9c5db199SXin Li logging.info('Servod container environment: %s', environment) 828*9c5db199SXin Li try: 829*9c5db199SXin Li start_cmds = ["bash", "/start_servod.sh"] 830*9c5db199SXin Li if not with_servod: 831*9c5db199SXin Li # In some cases we do not need container without running servod. 832*9c5db199SXin Li start_cmds = ["tail", "-f", "/dev/null"] 833*9c5db199SXin Li logging.debug( 834*9c5db199SXin Li 'Attempting to start Servod container without servod') 835*9c5db199SXin Li container = client.containers.run( 836*9c5db199SXin Li image, 837*9c5db199SXin Li remove=False, 838*9c5db199SXin Li privileged=True, 839*9c5db199SXin Li labels=labels, 840*9c5db199SXin Li name=self.servod_container_name, 841*9c5db199SXin Li hostname=self.servod_container_name, 842*9c5db199SXin Li network=container_network, 843*9c5db199SXin Li cap_add=["NET_ADMIN"], 844*9c5db199SXin Li detach=True, 845*9c5db199SXin Li volumes=[ 846*9c5db199SXin Li "/dev:/dev", 847*9c5db199SXin Li "%s_log:/var/log/servod_%s/" % 848*9c5db199SXin Li (self.servod_container_name, self.servo_port) 849*9c5db199SXin Li ], 850*9c5db199SXin Li environment=environment, 851*9c5db199SXin Li command=start_cmds, 852*9c5db199SXin Li ) 853*9c5db199SXin Li # Probing servod ready state fails. 854*9c5db199SXin Li if with_servod: 855*9c5db199SXin Li current_time = 0 856*9c5db199SXin Li while not self.wait_for_init_servod_in_container( 857*9c5db199SXin Li container 858*9c5db199SXin Li ) and current_time <= servo_constants.SERVOD_STARTUP_TIMEOUT: 859*9c5db199SXin Li time.sleep(10) 860*9c5db199SXin Li current_time += 10 861*9c5db199SXin Li 862*9c5db199SXin Li if not self.wait_for_init_servod_in_container(container): 863*9c5db199SXin Li logging.info( 864*9c5db199SXin Li 'Servod process is not up within the servod container after %s seconds.' 865*9c5db199SXin Li % servo_constants.SERVOD_STARTUP_TIMEOUT) 866*9c5db199SXin Li else: 867*9c5db199SXin Li logging.info( 868*9c5db199SXin Li 'Servod process is up within the servod container after %s seconds.' 869*9c5db199SXin Li % current_time) 870*9c5db199SXin Li else: 871*9c5db199SXin Li logging.info( 872*9c5db199SXin Li "Servod container %s up and running without servod process.", 873*9c5db199SXin Li self.servod_container_name) 874*9c5db199SXin Li 875*9c5db199SXin Li except docker.errors.ContainerError as e: 876*9c5db199SXin Li logging.exception("Failed to start servod container. %s", e) 877*9c5db199SXin Li raise 878*9c5db199SXin Li except docker.errors.ImageNotFound: 879*9c5db199SXin Li logging.exception("Servod container image %s not found.", image) 880*9c5db199SXin Li 881*9c5db199SXin Li def stop_containerized_servod(self, remove_container=True): 882*9c5db199SXin Li """Stop the container running servod.""" 883*9c5db199SXin Li logging.info("Stopping servod container %s.", 884*9c5db199SXin Li self.servod_container_name) 885*9c5db199SXin Li client = docker_utils.get_docker_client() 886*9c5db199SXin Li try: 887*9c5db199SXin Li cont = client.containers.get(self.servod_container_name) 888*9c5db199SXin Li except docker.errors.NotFound: 889*9c5db199SXin Li logging.info("Servod container %s not found no need to stop it.", 890*9c5db199SXin Li self.servod_container_name) 891*9c5db199SXin Li except docker.errors.APIError: 892*9c5db199SXin Li logging.exception( 893*9c5db199SXin Li "Stopping servod container %s caused a docker error.", 894*9c5db199SXin Li self.servod_container_name) 895*9c5db199SXin Li else: 896*9c5db199SXin Li if remove_container == True: 897*9c5db199SXin Li cont.remove(force=True) 898*9c5db199SXin Li logging.debug('Servod container instance removed') 899*9c5db199SXin Li 900*9c5db199SXin Li def restart_servod(self, quick_startup=False): 901*9c5db199SXin Li """Restart the servod process on servohost. 902*9c5db199SXin Li """ 903*9c5db199SXin Li self.stop_servod() 904*9c5db199SXin Li self.start_servod(quick_startup) 905*9c5db199SXin Li 906*9c5db199SXin Li def _process_servodtool_error(self, response): 907*9c5db199SXin Li """Helper function to handle non-zero servodtool response. 908*9c5db199SXin Li """ 909*9c5db199SXin Li if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE, 910*9c5db199SXin Li self._to_str(response.stdout)): 911*9c5db199SXin Li logging.error('The servo is not plugged on a usb hub that supports' 912*9c5db199SXin Li ' power-cycle!') 913*9c5db199SXin Li # change the flag so we can update this label in later process. 914*9c5db199SXin Li self.smart_usbhub = False 915*9c5db199SXin Li return 916*9c5db199SXin Li 917*9c5db199SXin Li if re.search( 918*9c5db199SXin Li servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND % 919*9c5db199SXin Li self.servo_serial, self._to_str(response.stdout)): 920*9c5db199SXin Li logging.error('No servo with serial %s found!', self.servo_serial) 921*9c5db199SXin Li return 922*9c5db199SXin Li 923*9c5db199SXin Li logging.error('Unexpected error occurred from usbhub control, please' 924*9c5db199SXin Li ' file a bug and inform chrome-fleet-software@ team!') 925*9c5db199SXin Li 926*9c5db199SXin Li def get_main_servo_usb_path(self): 927*9c5db199SXin Li """Helper function to collect current usb-path to main servo. 928*9c5db199SXin Li 929*9c5db199SXin Li The usb-path is path to the folder where usb-device was enumerated. 930*9c5db199SXin Li If fail then will return an empty string (''). 931*9c5db199SXin Li 932*9c5db199SXin Li @returns: string, usb-path to the main servo device. 933*9c5db199SXin Li e.g.: '/sys/bus/usb/devices/1-6.1.3.1' 934*9c5db199SXin Li """ 935*9c5db199SXin Li # TODO remove try-except when fix crbug.com/1087964 936*9c5db199SXin Li try: 937*9c5db199SXin Li cmd = 'servodtool device -s %s usb-path' % self.servo_serial 938*9c5db199SXin Li resp = self.run(cmd, ignore_status=True, timeout=30) 939*9c5db199SXin Li except Exception as e: 940*9c5db199SXin Li # Here we catch only timeout errors. 941*9c5db199SXin Li # Other errors is filtered by ignore_status=True 942*9c5db199SXin Li logging.debug('Attempt to get servo usb-path failed due to ' 943*9c5db199SXin Li 'timeout; %s', e) 944*9c5db199SXin Li return '' 945*9c5db199SXin Li if resp.exit_status != 0: 946*9c5db199SXin Li self._process_servodtool_error(resp) 947*9c5db199SXin Li return '' 948*9c5db199SXin Li usb_path = self._to_str(resp.stdout.strip()) 949*9c5db199SXin Li 950*9c5db199SXin Li logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path) 951*9c5db199SXin Li return usb_path 952*9c5db199SXin Li 953*9c5db199SXin Li def _get_servo_usb_devnum(self): 954*9c5db199SXin Li """Helper function to collect current usb devnum of servo.""" 955*9c5db199SXin Li usb_path = self.get_main_servo_usb_path() 956*9c5db199SXin Li if not usb_path: 957*9c5db199SXin Li return '' 958*9c5db199SXin Li resp = self.run('cat %s/devnum' % usb_path, ignore_status=True) 959*9c5db199SXin Li if resp.exit_status != 0: 960*9c5db199SXin Li self._process_servodtool_error(resp) 961*9c5db199SXin Li return '' 962*9c5db199SXin Li return self._to_str(resp.stdout.strip()) 963*9c5db199SXin Li 964*9c5db199SXin Li def reboot_servo_v3_on_need(self): 965*9c5db199SXin Li """Check and reboot servo_v3 based on below conditions. 966*9c5db199SXin Li 1. If there is an update pending on reboot. 967*9c5db199SXin Li 2. Servo_v3 has been up for more than 96 hours. 968*9c5db199SXin Li """ 969*9c5db199SXin Li if self.get_board() != 'beaglebone_servo': 970*9c5db199SXin Li logging.info('Servo reboot is only applicable for servo V3.') 971*9c5db199SXin Li return 972*9c5db199SXin Li 973*9c5db199SXin Li update_pending_reboot = (self._check_update_status() == 974*9c5db199SXin Li self.UPDATE_STATE.PENDING_REBOOT) 975*9c5db199SXin Li uptime_hours = float(self.check_uptime())/3600 976*9c5db199SXin Li logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours) 977*9c5db199SXin Li long_up_time = uptime_hours > 96 978*9c5db199SXin Li 979*9c5db199SXin Li # Skip reboot if neither condition are met. 980*9c5db199SXin Li if not (update_pending_reboot or long_up_time): 981*9c5db199SXin Li return 982*9c5db199SXin Li 983*9c5db199SXin Li if update_pending_reboot: 984*9c5db199SXin Li message = 'Starting reboot servo_v3 because an update is pending.' 985*9c5db199SXin Li reboot_method = self._post_update_reboot 986*9c5db199SXin Li elif long_up_time: 987*9c5db199SXin Li message = 'Starting reboot servo_v3 because uptime > 96 hours.' 988*9c5db199SXin Li reboot_method = self._servo_host_reboot 989*9c5db199SXin Li self.record('INFO', None, None, message) 990*9c5db199SXin Li logging.info(message) 991*9c5db199SXin Li try: 992*9c5db199SXin Li reboot_method() 993*9c5db199SXin Li message = 'Servo_v3 reboot completed successfully.' 994*9c5db199SXin Li except Exception as e: 995*9c5db199SXin Li logging.debug("Fail to reboot servo_v3; %s", e) 996*9c5db199SXin Li message = ('Servo_v3 reboot failed, please check debug log ' 997*9c5db199SXin Li 'for details.') 998*9c5db199SXin Li logging.info(message) 999*9c5db199SXin Li self.record('INFO', None, None, message) 1000*9c5db199SXin Li 1001*9c5db199SXin Li def _reset_servo(self): 1002*9c5db199SXin Li logging.info('Resetting servo through smart usbhub.') 1003*9c5db199SXin Li # TODO remove try-except when fix crbug.com/1087964 1004*9c5db199SXin Li try: 1005*9c5db199SXin Li resp = self.run('servodtool device -s %s power-cycle' % 1006*9c5db199SXin Li self.servo_serial, ignore_status=True, 1007*9c5db199SXin Li timeout=30) 1008*9c5db199SXin Li if resp.exit_status != 0: 1009*9c5db199SXin Li self._process_servodtool_error(resp) 1010*9c5db199SXin Li return False 1011*9c5db199SXin Li except Exception as e: 1012*9c5db199SXin Li # Here we catch only timeout errors. 1013*9c5db199SXin Li # Other errors is filtered by ignore_status=True 1014*9c5db199SXin Li logging.debug('Attempt to reset servo failed due to timeout;' 1015*9c5db199SXin Li ' %s', e) 1016*9c5db199SXin Li return False 1017*9c5db199SXin Li 1018*9c5db199SXin Li logging.debug('Wait %s seconds for servo to come back from reset.', 1019*9c5db199SXin Li servo_constants.SERVO_RESET_TIMEOUT_SECONDS) 1020*9c5db199SXin Li time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS) 1021*9c5db199SXin Li # change the flag so we can update this label in later process. 1022*9c5db199SXin Li self.smart_usbhub = True 1023*9c5db199SXin Li return True 1024*9c5db199SXin Li 1025*9c5db199SXin Li def reset_servo(self): 1026*9c5db199SXin Li """Reset(power-cycle) the servo via smart usbhub. 1027*9c5db199SXin Li """ 1028*9c5db199SXin Li if not self.is_labstation(): 1029*9c5db199SXin Li logging.info('Servo reset is not applicable to servo_v3.') 1030*9c5db199SXin Li return 1031*9c5db199SXin Li 1032*9c5db199SXin Li pre_reset_devnum = self._get_servo_usb_devnum() 1033*9c5db199SXin Li logging.info('Servo usb devnum before reset: %s', pre_reset_devnum) 1034*9c5db199SXin Li result = self._reset_servo() 1035*9c5db199SXin Li if not result: 1036*9c5db199SXin Li message = ('Failed to reset servo with serial: %s. (Please ignore' 1037*9c5db199SXin Li ' this error if the DUT is not connected to a smart' 1038*9c5db199SXin Li ' usbhub).' % self.servo_serial) 1039*9c5db199SXin Li logging.warning(message) 1040*9c5db199SXin Li self.record('INFO', None, None, message) 1041*9c5db199SXin Li return 1042*9c5db199SXin Li 1043*9c5db199SXin Li post_reset_devnum = self._get_servo_usb_devnum() 1044*9c5db199SXin Li logging.info('Servo usb devnum after reset: %s', post_reset_devnum) 1045*9c5db199SXin Li if not (pre_reset_devnum and post_reset_devnum): 1046*9c5db199SXin Li message = ('Servo reset completed but unable to verify' 1047*9c5db199SXin Li ' devnum change!') 1048*9c5db199SXin Li elif pre_reset_devnum != post_reset_devnum: 1049*9c5db199SXin Li message = ('Reset servo with serial %s completed successfully!' 1050*9c5db199SXin Li % self.servo_serial) 1051*9c5db199SXin Li else: 1052*9c5db199SXin Li message = 'Servo reset completed but devnum is still not changed!' 1053*9c5db199SXin Li logging.info(message) 1054*9c5db199SXin Li self.record('INFO', None, None, message) 1055*9c5db199SXin Li 1056*9c5db199SXin Li def _extract_compressed_logs(self, logdir, relevant_files): 1057*9c5db199SXin Li """Decompress servod logs in |logdir|. 1058*9c5db199SXin Li 1059*9c5db199SXin Li @param logdir: directory containing compressed servod logs. 1060*9c5db199SXin Li @param relevant_files: list of files in |logdir| to consider. 1061*9c5db199SXin Li 1062*9c5db199SXin Li @returns: tuple, (tarfiles, files) where 1063*9c5db199SXin Li tarfiles: list of the compressed filenames that have been 1064*9c5db199SXin Li extracted and deleted 1065*9c5db199SXin Li files: list of the uncompressed files that were generated 1066*9c5db199SXin Li """ 1067*9c5db199SXin Li # For all tar-files, first extract them to the directory, and 1068*9c5db199SXin Li # then let the common flow handle them. 1069*9c5db199SXin Li tarfiles = [cf for cf in relevant_files if 1070*9c5db199SXin Li cf.endswith(self.COMPRESSION_SUFFIX)] 1071*9c5db199SXin Li files = [] 1072*9c5db199SXin Li for f in tarfiles: 1073*9c5db199SXin Li norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)] 1074*9c5db199SXin Li with tarfile.open(f) as tf: 1075*9c5db199SXin Li # Each tarfile has only one member, as 1076*9c5db199SXin Li # that's the compressed log. 1077*9c5db199SXin Li member = tf.members[0] 1078*9c5db199SXin Li # Manipulate so that it only extracts the basename, and not 1079*9c5db199SXin Li # the directories etc. 1080*9c5db199SXin Li member.name = norm_name 1081*9c5db199SXin Li files.append(os.path.join(logdir, member.name)) 1082*9c5db199SXin Li tf.extract(member, logdir) 1083*9c5db199SXin Li # File has been extracted: remove the compressed file. 1084*9c5db199SXin Li os.remove(f) 1085*9c5db199SXin Li return tarfiles, files 1086*9c5db199SXin Li 1087*9c5db199SXin Li def _extract_mcu_logs(self, log_subdir): 1088*9c5db199SXin Li """Extract MCU (EC, Cr50, etc) console output from servod debug logs. 1089*9c5db199SXin Li 1090*9c5db199SXin Li Using the MCU_EXTRACTOR regex (above) extract and split out MCU console 1091*9c5db199SXin Li lines from the logs to generate invidiual console logs e.g. after 1092*9c5db199SXin Li this method, you can find an ec.txt and servo_v4.txt in |log_dir| if 1093*9c5db199SXin Li those MCUs had any console input/output. 1094*9c5db199SXin Li 1095*9c5db199SXin Li @param log_subdir: directory with log.DEBUG.txt main servod debug logs. 1096*9c5db199SXin Li """ 1097*9c5db199SXin Li # Extract the MCU for each one. The MCU logs are only in the .DEBUG 1098*9c5db199SXin Li # files 1099*9c5db199SXin Li mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt') 1100*9c5db199SXin Li if not os.path.exists(mcu_lines_file): 1101*9c5db199SXin Li logging.info('No DEBUG logs found to extract MCU logs from.') 1102*9c5db199SXin Li return 1103*9c5db199SXin Li mcu_files = {} 1104*9c5db199SXin Li mcu_file_template = '%s.txt' 1105*9c5db199SXin Li with open(mcu_lines_file, 'r') as f: 1106*9c5db199SXin Li for line in f: 1107*9c5db199SXin Li match = self.MCU_EXTRACTOR.match(line) 1108*9c5db199SXin Li if match: 1109*9c5db199SXin Li mcu = match.group(self.MCU_GROUP).lower() 1110*9c5db199SXin Li # The |mcu| might contain a '/' in it. Replace it with '.' 1111*9c5db199SXin Li # to avoid generating bad filepaths. 1112*9c5db199SXin Li mcu = mcu.replace('/', '.') 1113*9c5db199SXin Li line = match.group(self.LINE_GROUP) 1114*9c5db199SXin Li if mcu not in mcu_files: 1115*9c5db199SXin Li mcu_file = os.path.join(log_subdir, 1116*9c5db199SXin Li mcu_file_template % mcu) 1117*9c5db199SXin Li mcu_files[mcu] = open(mcu_file, 'a') 1118*9c5db199SXin Li fd = mcu_files[mcu] 1119*9c5db199SXin Li fd.write(line + '\n') 1120*9c5db199SXin Li for f in mcu_files: 1121*9c5db199SXin Li mcu_files[f].close() 1122*9c5db199SXin Li 1123*9c5db199SXin Li def remove_latest_log_symlinks(self): 1124*9c5db199SXin Li """Remove the conveninence symlinks 'latest' servod logs.""" 1125*9c5db199SXin Li symlink_wildcard = '%s/latest*' % self.remote_log_dir 1126*9c5db199SXin Li cmd = 'rm ' + symlink_wildcard 1127*9c5db199SXin Li self.run(cmd, stderr_tee=None, ignore_status=True) 1128*9c5db199SXin Li 1129*9c5db199SXin Li def probe_servod_restart(self, instance_ts, outdir): 1130*9c5db199SXin Li """Grab servod logs from previous instances if part of this session. 1131*9c5db199SXin Li 1132*9c5db199SXin Li If since the last time this host called start_servod() servod crashed 1133*9c5db199SXin Li and restarted, this helper finds those logs as well, and stores them 1134*9c5db199SXin Li with the |OLD_LOG_SUFFIX| to investigate if necessary. 1135*9c5db199SXin Li 1136*9c5db199SXin Li It also issues a panicinfo command to servo devices after the restart 1137*9c5db199SXin Li to try and collect reboot information for debugging. 1138*9c5db199SXin Li 1139*9c5db199SXin Li @param instance_ts: the log timestamp that the current instance uses 1140*9c5db199SXin Li @param outdir: directory to create a subdirectory into to place the 1141*9c5db199SXin Li servod logs into. 1142*9c5db199SXin Li """ 1143*9c5db199SXin Li if self._initial_instance_ts is None: 1144*9c5db199SXin Li logging.info('No log timestamp grabbed successfully on servod ' 1145*9c5db199SXin Li 'startup. Cannot check device restarts. Ignoring.') 1146*9c5db199SXin Li return 1147*9c5db199SXin Li if instance_ts == self._initial_instance_ts: 1148*9c5db199SXin Li logging.debug('Servod appears to have run without restarting') 1149*9c5db199SXin Li return 1150*9c5db199SXin Li # Servod seems to have restarted (at least once). |_initial_instance_ts| 1151*9c5db199SXin Li # is the first timestamp, and instance_ts is the current timestamp. Find 1152*9c5db199SXin Li # all timestamps in between them, and grab the logs for each. 1153*9c5db199SXin Li tss = self._find_instance_timestamps_between(self._initial_instance_ts, 1154*9c5db199SXin Li instance_ts) 1155*9c5db199SXin Li logging.info('Servod has restarted %d times between the start and the ' 1156*9c5db199SXin Li 'end of this servo_host.', len(tss)) 1157*9c5db199SXin Li logging.info('This might be an issue. Will extract all logs from each ' 1158*9c5db199SXin Li 'instance.') 1159*9c5db199SXin Li logging.info('Logs that are not the currently running (about to turn ' 1160*9c5db199SXin Li 'down) instance are maked with a .%s in their folder.', 1161*9c5db199SXin Li self.OLD_LOG_SUFFIX) 1162*9c5db199SXin Li for ts in tss: 1163*9c5db199SXin Li self.get_instance_logs(ts, outdir, old=True) 1164*9c5db199SXin Li # Lastly, servod has restarted due to a potential issue. Try to get 1165*9c5db199SXin Li # panic information from servo micro and servo v4 for the current logs. 1166*9c5db199SXin Li # This can only happen if the |_servo| attribute is initialized. 1167*9c5db199SXin Li if self._servo: 1168*9c5db199SXin Li for mcu in ['servo_micro', 'servo_v4', 'servo_v4p1']: 1169*9c5db199SXin Li ctrl = '%s_uart_cmd' % mcu 1170*9c5db199SXin Li if self._servo.has_control(ctrl): 1171*9c5db199SXin Li logging.info('Trying to retrieve %r panicinfo into logs', 1172*9c5db199SXin Li mcu) 1173*9c5db199SXin Li try: 1174*9c5db199SXin Li self._servo.set_nocheck(ctrl, 'panicinfo') 1175*9c5db199SXin Li except error.TestFail as e: 1176*9c5db199SXin Li logging.error('Failed to generate panicinfo for %r ' 1177*9c5db199SXin Li 'logs. %s', mcu, str(e)) 1178*9c5db199SXin Li 1179*9c5db199SXin Li def _find_instance_timestamps_between(self, start_ts, end_ts): 1180*9c5db199SXin Li """Find all log timestamps between [start_ts, end_ts). 1181*9c5db199SXin Li 1182*9c5db199SXin Li @param start_ts: str, earliest log timestamp of interest 1183*9c5db199SXin Li @param end_ts: str, latest log timestamp of interest 1184*9c5db199SXin Li 1185*9c5db199SXin Li @returns: list, all timestamps between start_ts and end_ts, end_ts 1186*9c5db199SXin Li exclusive, on the servo_host. An empty list on errors 1187*9c5db199SXin Li """ 1188*9c5db199SXin Li # Simply get all timestamp, and then sort and remove 1189*9c5db199SXin Li cmd = 'ls %s' % self.remote_log_dir 1190*9c5db199SXin Li res = self.run(cmd, stderr_tee=None, ignore_status=True) 1191*9c5db199SXin Li if res.exit_status != 0: 1192*9c5db199SXin Li # Here we failed to find anything. 1193*9c5db199SXin Li logging.info('Failed to find remote servod logs. Ignoring.') 1194*9c5db199SXin Li return [] 1195*9c5db199SXin Li logfiles = self._to_str(res.stdout.strip().split()) 1196*9c5db199SXin Li timestamps = set() 1197*9c5db199SXin Li for logfile in logfiles: 1198*9c5db199SXin Li ts_match = self.TS_EXTRACTOR.match(logfile) 1199*9c5db199SXin Li if not ts_match: 1200*9c5db199SXin Li # Simply ignore files that fail the check. It might be the 1201*9c5db199SXin Li # 'latest' symlinks or random files. 1202*9c5db199SXin Li continue 1203*9c5db199SXin Li timestamps.add(ts_match.group(self.TS_GROUP)) 1204*9c5db199SXin Li # At this point we have all unique timestamps. 1205*9c5db199SXin Li timestamps = sorted(timestamps) 1206*9c5db199SXin Li for ts in [start_ts, end_ts]: 1207*9c5db199SXin Li if ts not in timestamps: 1208*9c5db199SXin Li logging.error('Timestamp %r not in servod logs. Cannot query ' 1209*9c5db199SXin Li 'for timestamps in between %r and %r', ts, 1210*9c5db199SXin Li start_ts, end_ts) 1211*9c5db199SXin Li return [] 1212*9c5db199SXin Li return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)] 1213*9c5db199SXin Li 1214*9c5db199SXin Li def get_instance_logs_ts(self): 1215*9c5db199SXin Li """Retrieve the currently running servod instance's log timestamp 1216*9c5db199SXin Li 1217*9c5db199SXin Li @returns: str, timestamp for current instance, or None on failure 1218*9c5db199SXin Li """ 1219*9c5db199SXin Li # First, extract the timestamp. This cmd gives the real filename of 1220*9c5db199SXin Li # the latest aka current log file. 1221*9c5db199SXin Li cmd = ('if [ -f %(dir)s/latest.DEBUG ];' 1222*9c5db199SXin Li 'then realpath %(dir)s/latest.DEBUG;' 1223*9c5db199SXin Li 'elif [ -f %(dir)s/latest ];' 1224*9c5db199SXin Li 'then realpath %(dir)s/latest;' 1225*9c5db199SXin Li 'else exit %(code)d;' 1226*9c5db199SXin Li 'fi' % {'dir': self.remote_log_dir, 1227*9c5db199SXin Li 'code': self.NO_SYMLINKS_CODE}) 1228*9c5db199SXin Li res = self.run(cmd, stderr_tee=None, ignore_status=True) 1229*9c5db199SXin Li if res.exit_status != 0: 1230*9c5db199SXin Li if res.exit_status == self.NO_SYMLINKS_CODE: 1231*9c5db199SXin Li logging.warning('servod log latest symlinks not found. ' 1232*9c5db199SXin Li 'This is likely due to an error starting up ' 1233*9c5db199SXin Li 'servod. Ignoring..') 1234*9c5db199SXin Li else: 1235*9c5db199SXin Li logging.warning('Failed to find servod logs on servo host.') 1236*9c5db199SXin Li logging.warning(res.stderr.strip()) 1237*9c5db199SXin Li return None 1238*9c5db199SXin Li fname = os.path.basename(self._to_str(res.stdout.strip())) 1239*9c5db199SXin Li # From the fname, ought to extract the timestamp using the TS_EXTRACTOR 1240*9c5db199SXin Li if type(fname) == type(b' '): 1241*9c5db199SXin Li fname = fname.decode("utf-8") 1242*9c5db199SXin Li ts_match = self.TS_EXTRACTOR.match(fname) 1243*9c5db199SXin Li if not ts_match: 1244*9c5db199SXin Li logging.warning('Failed to extract timestamp from servod log file ' 1245*9c5db199SXin Li '%r. Skipping. The servo host is using outdated ' 1246*9c5db199SXin Li 'servod logging and needs to be updated.', fname) 1247*9c5db199SXin Li return None 1248*9c5db199SXin Li return ts_match.group(self.TS_GROUP) 1249*9c5db199SXin Li 1250*9c5db199SXin Li def get_servohost_logs(self, outdir): 1251*9c5db199SXin Li """Get logs that can help debugging servo/servod problem from 1252*9c5db199SXin Li the servohost 1253*9c5db199SXin Li 1254*9c5db199SXin Li @param outdir: directory to create a subdirectory into to place the 1255*9c5db199SXin Li servod logs into. 1256*9c5db199SXin Li """ 1257*9c5db199SXin Li log_dir = os.path.join(outdir, 'servohost_%s' % self.hostname) 1258*9c5db199SXin Li if os.path.isdir(log_dir): 1259*9c5db199SXin Li # In multi-DUTs testing, each DUTs will may their own servohost 1260*9c5db199SXin Li # instance, where could cause duplicate efforts if they share a 1261*9c5db199SXin Li # same servohost, so we can just skip the collect if the log 1262*9c5db199SXin Li # dir already exists. 1263*9c5db199SXin Li logging.info( 1264*9c5db199SXin Li 'Skip dmesg and messages logs collecting as %s' 1265*9c5db199SXin Li ' already exists.', log_dir) 1266*9c5db199SXin Li return 1267*9c5db199SXin Li logging.info('Collecting dmesg and messages from servohost %s', 1268*9c5db199SXin Li self.hostname) 1269*9c5db199SXin Li os.mkdir(log_dir) 1270*9c5db199SXin Li logging.info('Saving servohost logs to %s.', log_dir) 1271*9c5db199SXin Li # First collect dmesg from the servohost. 1272*9c5db199SXin Li crashcollect.collect_command(self, 'dmesg -H', 1273*9c5db199SXin Li os.path.join(log_dir, 'dmesg')) 1274*9c5db199SXin Li # Collect messages log from the servohost. 1275*9c5db199SXin Li if not self.is_containerized_servod(): 1276*9c5db199SXin Li try: 1277*9c5db199SXin Li self.get_file('/var/log/messages', log_dir, try_rsync=False) 1278*9c5db199SXin Li except error.AutoservRunError: 1279*9c5db199SXin Li logging.warning( 1280*9c5db199SXin Li 'Failed to collect messages log from servohost.') 1281*9c5db199SXin Li 1282*9c5db199SXin Li def get_servod_startup_log(self, outdir): 1283*9c5db199SXin Li """Get servod start_up log, this log is available even servod was 1284*9c5db199SXin Li not started successfully. 1285*9c5db199SXin Li 1286*9c5db199SXin Li @param outdir: directory to create a subdirectory into to place the 1287*9c5db199SXin Li servod logs into. 1288*9c5db199SXin Li """ 1289*9c5db199SXin Li if self.is_containerized_servod(): 1290*9c5db199SXin Li return 1291*9c5db199SXin Li log_dir = os.path.join(outdir, 'servod_startup_%s' % self.servo_port) 1292*9c5db199SXin Li os.mkdir(log_dir) 1293*9c5db199SXin Li start_up_log = '/var/log/servod_%s.STARTUP.log' % self.servo_port 1294*9c5db199SXin Li try: 1295*9c5db199SXin Li self.get_file(start_up_log, log_dir, try_rsync=False) 1296*9c5db199SXin Li except error.AutoservRunError: 1297*9c5db199SXin Li logging.warning('Failed to collect servod start up log' 1298*9c5db199SXin Li ' from servohost.') 1299*9c5db199SXin Li 1300*9c5db199SXin Li def get_instance_logs(self, instance_ts, outdir, old=False): 1301*9c5db199SXin Li """Collect all logs with |instance_ts| and dump into a dir in |outdir| 1302*9c5db199SXin Li 1303*9c5db199SXin Li This method first collects all logs on the servo_host side pertaining 1304*9c5db199SXin Li to this servod instance (port, instatiation). It glues them together 1305*9c5db199SXin Li into combined log.[level].txt files and extracts all available MCU 1306*9c5db199SXin Li console I/O from the logs into individual files e.g. servo_v4.txt 1307*9c5db199SXin Li 1308*9c5db199SXin Li All the output can be found in a directory inside |outdir| that 1309*9c5db199SXin Li this generates based on |LOG_DIR|, the servod port, and the instance 1310*9c5db199SXin Li timestamp on the servo_host side. 1311*9c5db199SXin Li 1312*9c5db199SXin Li @param instance_ts: log timestamp to grab logfiles for 1313*9c5db199SXin Li @param outdir: directory to create a subdirectory into to place the 1314*9c5db199SXin Li servod logs into. 1315*9c5db199SXin Li @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir 1316*9c5db199SXin Li """ 1317*9c5db199SXin Li # Create the local results log dir. 1318*9c5db199SXin Li log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR, 1319*9c5db199SXin Li str(self.servo_port), 1320*9c5db199SXin Li instance_ts)) 1321*9c5db199SXin Li if old: 1322*9c5db199SXin Li log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX) 1323*9c5db199SXin Li logging.info('Saving servod logs to %r.', log_dir) 1324*9c5db199SXin Li os.mkdir(log_dir) 1325*9c5db199SXin Li # Now, get all files with that timestamp. 1326*9c5db199SXin Li cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir, 1327*9c5db199SXin Li instance_ts) 1328*9c5db199SXin Li res = self.run(cmd, stderr_tee=None, ignore_status=True) 1329*9c5db199SXin Li files = self._to_str(res.stdout.strip()).split() 1330*9c5db199SXin Li try: 1331*9c5db199SXin Li if self.is_containerized_servod(): 1332*9c5db199SXin Li client = docker_utils.get_docker_client() 1333*9c5db199SXin Li container = client.containers.get(self.servod_container_name) 1334*9c5db199SXin Li for f in files: 1335*9c5db199SXin Li file_stream, stat = container.get_archive(f) 1336*9c5db199SXin Li tf = tempfile.NamedTemporaryFile(delete=False) 1337*9c5db199SXin Li for block in file_stream: 1338*9c5db199SXin Li tf.write(block) 1339*9c5db199SXin Li tf.close() 1340*9c5db199SXin Li pw_tar = tarfile.TarFile(tf.name) 1341*9c5db199SXin Li pw_tar.extractall(log_dir) 1342*9c5db199SXin Li os.remove(tf.name) 1343*9c5db199SXin Li else: 1344*9c5db199SXin Li self.get_file(files, log_dir, try_rsync=False) 1345*9c5db199SXin Li 1346*9c5db199SXin Li if not os.listdir(log_dir): 1347*9c5db199SXin Li logging.info('No servod logs retrieved. Ignoring, and removing ' 1348*9c5db199SXin Li '%r again.', log_dir) 1349*9c5db199SXin Li os.rmdir(log_dir) 1350*9c5db199SXin Li return 1351*9c5db199SXin Li except docker.errors.NotFound: 1352*9c5db199SXin Li logging.info("Servod container %s not found no need to stop it.", 1353*9c5db199SXin Li self.hostname) 1354*9c5db199SXin Li except error.AutoservRunError as e: 1355*9c5db199SXin Li result = e.result_obj 1356*9c5db199SXin Li if result.exit_status != 0: 1357*9c5db199SXin Li stderr = result.stderr.strip() 1358*9c5db199SXin Li logging.warning("Couldn't retrieve servod logs. Ignoring: %s", 1359*9c5db199SXin Li stderr or '\n%s' % result) 1360*9c5db199SXin Li # Remove the log_dir as nothing was added to it. 1361*9c5db199SXin Li os.rmdir(log_dir) 1362*9c5db199SXin Li return 1363*9c5db199SXin Li local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)] 1364*9c5db199SXin Li # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed 1365*9c5db199SXin Li for level_name in ('DEBUG', 'INFO', 'WARNING', ''): 1366*9c5db199SXin Li # Create the joint files for each loglevel. i.e log.DEBUG 1367*9c5db199SXin Li joint_file = self.JOINT_LOG_PREFIX 1368*9c5db199SXin Li if level_name: 1369*9c5db199SXin Li joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name) 1370*9c5db199SXin Li # This helps with some online tools to avoid complaints about an 1371*9c5db199SXin Li # unknown filetype. 1372*9c5db199SXin Li joint_file = joint_file + '.txt' 1373*9c5db199SXin Li joint_path = os.path.join(log_dir, joint_file) 1374*9c5db199SXin Li files = [f for f in local_files if level_name in f] 1375*9c5db199SXin Li if not files: 1376*9c5db199SXin Li # TODO(crrev.com/c/1793030): remove no-level case once CL 1377*9c5db199SXin Li # is pushed 1378*9c5db199SXin Li continue 1379*9c5db199SXin Li # Extract compressed logs if any. 1380*9c5db199SXin Li compressed, extracted = self._extract_compressed_logs(log_dir, 1381*9c5db199SXin Li files) 1382*9c5db199SXin Li files = list(set(files) - set(compressed)) 1383*9c5db199SXin Li files.extend(extracted) 1384*9c5db199SXin Li # Need to sort. As they all share the same timestamp, and 1385*9c5db199SXin Li # loglevel, the index itself is sufficient. The highest index 1386*9c5db199SXin Li # is the oldest file, therefore we need a descending sort. 1387*9c5db199SXin Li def sortkey(f, level=level_name): 1388*9c5db199SXin Li """Custom sortkey to sort based on rotation number int.""" 1389*9c5db199SXin Li if f.endswith(level_name): return 0 1390*9c5db199SXin Li return int(f.split('.')[-1]) 1391*9c5db199SXin Li 1392*9c5db199SXin Li files.sort(reverse=True, key=sortkey) 1393*9c5db199SXin Li # Just rename the first file rather than building from scratch. 1394*9c5db199SXin Li os.rename(files[0], joint_path) 1395*9c5db199SXin Li with open(joint_path, 'a') as joint_f: 1396*9c5db199SXin Li for logfile in files[1:]: 1397*9c5db199SXin Li # Transfer the file to the joint file line by line. 1398*9c5db199SXin Li with open(logfile, 'r') as log_f: 1399*9c5db199SXin Li for line in log_f: 1400*9c5db199SXin Li joint_f.write(line) 1401*9c5db199SXin Li # File has been written over. Delete safely. 1402*9c5db199SXin Li os.remove(logfile) 1403*9c5db199SXin Li # Need to remove all files form |local_files| so we don't 1404*9c5db199SXin Li # analyze them again. 1405*9c5db199SXin Li local_files = list(set(local_files) - set(files) - set(compressed)) 1406*9c5db199SXin Li # Lastly, extract MCU logs from the joint logs. 1407*9c5db199SXin Li self._extract_mcu_logs(log_dir) 1408*9c5db199SXin Li 1409*9c5db199SXin Li def _lock(self): 1410*9c5db199SXin Li """lock servohost by touching a file. 1411*9c5db199SXin Li """ 1412*9c5db199SXin Li logging.debug('Locking servohost %s by touching %s file', 1413*9c5db199SXin Li self.hostname, self._lock_file) 1414*9c5db199SXin Li self.run('touch %s' % self._lock_file, ignore_status=True) 1415*9c5db199SXin Li self._is_locked = True 1416*9c5db199SXin Li 1417*9c5db199SXin Li def _unlock(self): 1418*9c5db199SXin Li """Unlock servohost by removing the lock file. 1419*9c5db199SXin Li """ 1420*9c5db199SXin Li logging.debug('Unlocking servohost by removing %s file', 1421*9c5db199SXin Li self._lock_file) 1422*9c5db199SXin Li self.run('rm %s' % self._lock_file, ignore_status=True) 1423*9c5db199SXin Li self._is_locked = False 1424*9c5db199SXin Li 1425*9c5db199SXin Li def close(self): 1426*9c5db199SXin Li """Close the associated servo and the host object.""" 1427*9c5db199SXin Li # NOTE: throughout this method there are multiple attempts to catch 1428*9c5db199SXin Li # all errors. This is WAI as log grabbing should not fail tests. 1429*9c5db199SXin Li # However, the goal is to catch and handle/process all errors, thus 1430*9c5db199SXin Li # we print the traceback and ask for a bug. 1431*9c5db199SXin Li if self._closed: 1432*9c5db199SXin Li logging.debug('ServoHost is already closed.') 1433*9c5db199SXin Li return 1434*9c5db199SXin Li 1435*9c5db199SXin Li # Only attempt ssh related actions if servohost is sshable. We call 1436*9c5db199SXin Li # check_cached_up_status() first because it's lightweighted and return 1437*9c5db199SXin Li # much faster in the case servohost is down, however, we still want 1438*9c5db199SXin Li # to call is_up() later since check_cached_up_status() is ping based check 1439*9c5db199SXin Li # and not guarantee the servohost is sshable. 1440*9c5db199SXin Li servo_host_ready = self.check_cached_up_status() and self.is_up() 1441*9c5db199SXin Li 1442*9c5db199SXin Li # If the dockerized servod is in used, we can start a new servod container 1443*9c5db199SXin Li # with out the servod process for log collection. 1444*9c5db199SXin Li if (not self.is_localhost() and not servo_host_ready 1445*9c5db199SXin Li and self.is_containerized_servod()): 1446*9c5db199SXin Li logging.info( 1447*9c5db199SXin Li 'Start servod container without servod for log collection.' 1448*9c5db199SXin Li ) 1449*9c5db199SXin Li self.start_containerized_servod(with_servod=False) 1450*9c5db199SXin Li servo_host_ready = True 1451*9c5db199SXin Li 1452*9c5db199SXin Li # TODO(crbug.com/1011516): once enabled, remove the check against 1453*9c5db199SXin Li # localhost and instead check against log-rotiation enablement. 1454*9c5db199SXin Li should_collect_log = (servo_host_ready and self.job 1455*9c5db199SXin Li and not self.is_localhost()) 1456*9c5db199SXin Li if should_collect_log: 1457*9c5db199SXin Li instance_ts = self.get_instance_logs_ts() 1458*9c5db199SXin Li else: 1459*9c5db199SXin Li logging.info('Servohost is down, will skip servod log collecting.') 1460*9c5db199SXin Li instance_ts = None 1461*9c5db199SXin Li servod_logs_available = instance_ts is not None 1462*9c5db199SXin Li if servod_logs_available: 1463*9c5db199SXin Li # Probe whether there was a servod restart, and grab those old 1464*9c5db199SXin Li # logs as well. 1465*9c5db199SXin Li try: 1466*9c5db199SXin Li self.probe_servod_restart(instance_ts, self.job.resultdir) 1467*9c5db199SXin Li except (error.AutoservRunError, error.TestFail) as e: 1468*9c5db199SXin Li logging.info('Failed to grab servo logs due to: %s. ' 1469*9c5db199SXin Li 'This error is forgiven.', str(e)) 1470*9c5db199SXin Li except Exception as e: 1471*9c5db199SXin Li logging.error('Unexpected error probing for old logs. %s. ' 1472*9c5db199SXin Li 'Forgiven. Please file a bug and fix or catch ' 1473*9c5db199SXin Li 'in log probing function', str(e), 1474*9c5db199SXin Li exc_info=True) 1475*9c5db199SXin Li if self._servo: 1476*9c5db199SXin Li outdir = None if not self.job else self.job.resultdir 1477*9c5db199SXin Li # In some cases when we run as lab-tools, the job object is None. 1478*9c5db199SXin Li self._servo.close(outdir) 1479*9c5db199SXin Li try: 1480*9c5db199SXin Li if should_collect_log: 1481*9c5db199SXin Li self.get_servod_startup_log(self.job.resultdir) 1482*9c5db199SXin Li self.get_servohost_logs(self.job.resultdir) 1483*9c5db199SXin Li # Grab current (not old like above) logs after the servo instance 1484*9c5db199SXin Li # was closed out. 1485*9c5db199SXin Li if servod_logs_available: 1486*9c5db199SXin Li self.get_instance_logs(instance_ts, self.job.resultdir) 1487*9c5db199SXin Li except error.AutoservRunError as e: 1488*9c5db199SXin Li logging.info( 1489*9c5db199SXin Li 'Failed to grab servo logs due to: %s. ' 1490*9c5db199SXin Li 'This error is forgiven.', str(e)) 1491*9c5db199SXin Li except Exception as e: 1492*9c5db199SXin Li logging.error( 1493*9c5db199SXin Li 'Unexpected error grabbing servod logs. %s. ' 1494*9c5db199SXin Li 'Forgiven. Please file a bug and fix or catch ' 1495*9c5db199SXin Li 'in log grabbing function', 1496*9c5db199SXin Li str(e), 1497*9c5db199SXin Li exc_info=True) 1498*9c5db199SXin Li 1499*9c5db199SXin Li if self._is_locked and servo_host_ready: 1500*9c5db199SXin Li # Remove the lock if the servohost has been locked. 1501*9c5db199SXin Li try: 1502*9c5db199SXin Li self._unlock() 1503*9c5db199SXin Li except error.AutoservSSHTimeout: 1504*9c5db199SXin Li logging.error('Unlock servohost failed due to ssh timeout.' 1505*9c5db199SXin Li ' It may caused by servohost went down during' 1506*9c5db199SXin Li ' the task.') 1507*9c5db199SXin Li # We want always stop servod after task to minimum the impact of bad 1508*9c5db199SXin Li # servod process interfere other servods.(see crbug.com/1028665) 1509*9c5db199SXin Li if servo_host_ready: 1510*9c5db199SXin Li try: 1511*9c5db199SXin Li self.stop_servod() 1512*9c5db199SXin Li except error.AutoservRunError as e: 1513*9c5db199SXin Li logging.info( 1514*9c5db199SXin Li "Failed to stop servod due to:\n%s\n" 1515*9c5db199SXin Li "This error is forgiven.", str(e)) 1516*9c5db199SXin Li 1517*9c5db199SXin Li super(ServoHost, self).close() 1518*9c5db199SXin Li # Mark closed. 1519*9c5db199SXin Li self._closed = True 1520*9c5db199SXin Li 1521*9c5db199SXin Li def get_servo_state(self): 1522*9c5db199SXin Li return self._servo_state 1523*9c5db199SXin Li 1524*9c5db199SXin Li def _get_host_metrics_data(self): 1525*9c5db199SXin Li return {'port': self.servo_port, 1526*9c5db199SXin Li 'host': self.get_dut_hostname() or self.hostname, 1527*9c5db199SXin Li 'board': self.servo_board or ''} 1528*9c5db199SXin Li 1529*9c5db199SXin Li def is_servo_board_present_on_servo_v3(self): 1530*9c5db199SXin Li """Check if servo board is detected on servo_v3""" 1531*9c5db199SXin Li logging.debug('Started to detect servo board on servo_v3') 1532*9c5db199SXin Li vid_pids = ['18d1:5004', '0403:6014'] 1533*9c5db199SXin Li not_detected = 'The servo board is not detected on servo_v3' 1534*9c5db199SXin Li try: 1535*9c5db199SXin Li cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids) 1536*9c5db199SXin Li result = self.run(cmd, ignore_status=True, timeout=30) 1537*9c5db199SXin Li if result.exit_status == 0 and self._to_str(result.stdout.strip()): 1538*9c5db199SXin Li logging.debug('The servo board is detected on servo_v3') 1539*9c5db199SXin Li return True 1540*9c5db199SXin Li logging.debug('%s; %s', not_detected, result) 1541*9c5db199SXin Li return False 1542*9c5db199SXin Li except Exception as e: 1543*9c5db199SXin Li # can be triggered by timeout issue due running the script 1544*9c5db199SXin Li metrics.Counter( 1545*9c5db199SXin Li 'chromeos/autotest/repair/servo_detection/timeout' 1546*9c5db199SXin Li ).increment(fields=self._get_host_metrics_data()) 1547*9c5db199SXin Li logging.error('%s; %s', not_detected, str(e)) 1548*9c5db199SXin Li return None 1549*9c5db199SXin Li 1550*9c5db199SXin Li def _require_cr50_servod_config(self): 1551*9c5db199SXin Li """Check whether we need start servod with CONFIG=cr50.xml""" 1552*9c5db199SXin Li dut_host_info = self.get_dut_host_info() 1553*9c5db199SXin Li if not dut_host_info: 1554*9c5db199SXin Li return False 1555*9c5db199SXin Li for pool in dut_host_info.pools: 1556*9c5db199SXin Li if pool.startswith(servo_constants.CR50_CONFIG_POOL_PREFIX): 1557*9c5db199SXin Li return True 1558*9c5db199SXin Li return False 1559*9c5db199SXin Li 1560*9c5db199SXin Li def get_verifier_state(self, tag): 1561*9c5db199SXin Li """Return the state of servo verifier. 1562*9c5db199SXin Li 1563*9c5db199SXin Li @returns: bool or None 1564*9c5db199SXin Li """ 1565*9c5db199SXin Li return self._repair_strategy.verifier_is_good(tag) 1566*9c5db199SXin Li 1567*9c5db199SXin Li def get_repair_strategy_node(self, tag): 1568*9c5db199SXin Li """Return the instance of verifier/repair node for host by tag. 1569*9c5db199SXin Li 1570*9c5db199SXin Li @returns: _DependencyNode or None 1571*9c5db199SXin Li """ 1572*9c5db199SXin Li return self._repair_strategy.node_by_tag(tag) 1573*9c5db199SXin Li 1574*9c5db199SXin Li def determine_servo_state(self): 1575*9c5db199SXin Li """Determine servo state based on the failed verifier. 1576*9c5db199SXin Li 1577*9c5db199SXin Li @returns: servo state value 1578*9c5db199SXin Li The state detecting based on first fail verifier or collecting of 1579*9c5db199SXin Li them. 1580*9c5db199SXin Li """ 1581*9c5db199SXin Li ssh = self.get_verifier_state('connection') 1582*9c5db199SXin Li servo_root_present = self.get_verifier_state('servo_root_present') 1583*9c5db199SXin Li servo_root_present_node = self.get_repair_strategy_node( 1584*9c5db199SXin Li 'servo_root_present') 1585*9c5db199SXin Li servo_v3_present = self.get_verifier_state('servo_v3_root_present') 1586*9c5db199SXin Li servo_fw = self.get_verifier_state('servo_fw') 1587*9c5db199SXin Li servo_fw_update = self.get_repair_strategy_node('servo_fw_update') 1588*9c5db199SXin Li servod_dut_controller_missing = self.get_repair_strategy_node( 1589*9c5db199SXin Li 'servod_dut_controller_missing') 1590*9c5db199SXin Li disk_space = self.get_verifier_state('servo_disk_space') 1591*9c5db199SXin Li start_servod = self.get_verifier_state('start_servod') 1592*9c5db199SXin Li servod_started = self.get_verifier_state('servod_started') 1593*9c5db199SXin Li servod_echo = self.get_verifier_state('servod_echo') 1594*9c5db199SXin Li create_servo = self.get_verifier_state('servod_connection') 1595*9c5db199SXin Li init_servo = self.get_verifier_state('servod_control') 1596*9c5db199SXin Li cr50_low_sbu = self.get_verifier_state('servo_cr50_low_sbu') 1597*9c5db199SXin Li cr50_off = self.get_verifier_state('servo_cr50_off') 1598*9c5db199SXin Li servo_topology = self.get_verifier_state('servo_topology') 1599*9c5db199SXin Li dut_connected = self.get_verifier_state('servo_dut_connected') 1600*9c5db199SXin Li hub_connected = self.get_verifier_state('servo_hub_connected') 1601*9c5db199SXin Li pwr_button = self.get_verifier_state('servo_pwr_button') 1602*9c5db199SXin Li lid_open = self.get_verifier_state('servo_lid_open') 1603*9c5db199SXin Li ec_board = self.get_verifier_state('servo_ec_console') 1604*9c5db199SXin Li cr50_console = self.get_verifier_state('servo_cr50_console') 1605*9c5db199SXin Li ccd_testlab = self.get_verifier_state('servo_ccd_testlab') 1606*9c5db199SXin Li 1607*9c5db199SXin Li if not ssh: 1608*9c5db199SXin Li return servo_constants.SERVO_STATE_NO_SSH 1609*9c5db199SXin Li if start_servod == hosts.VERIFY_FAILED: 1610*9c5db199SXin Li return servo_constants.SERVO_STATE_SERVO_HOST_ISSUE 1611*9c5db199SXin Li if servo_root_present == hosts.VERIFY_FAILED: 1612*9c5db199SXin Li if not self.servo_serial: 1613*9c5db199SXin Li return servo_constants.SERVO_STATE_WRONG_CONFIG 1614*9c5db199SXin Li if hasattr(servo_root_present_node, 'serial_mismatch'): 1615*9c5db199SXin Li return servo_constants.SERVO_STATE_SERIAL_MISMATCH 1616*9c5db199SXin Li return servo_constants.SERVO_STATE_NOT_CONNECTED 1617*9c5db199SXin Li if servo_v3_present == hosts.VERIFY_FAILED: 1618*9c5db199SXin Li # if we cannot find required board on servo_v3 1619*9c5db199SXin Li return servo_constants.SERVO_STATE_NEED_REPLACEMENT 1620*9c5db199SXin Li if servo_fw == hosts.VERIFY_FAILED: 1621*9c5db199SXin Li logging.info(servo_fw_update) 1622*9c5db199SXin Li if hasattr(servo_fw_update, 'servo_updater_issue_detected'): 1623*9c5db199SXin Li return servo_constants.SERVO_STATE_SERVO_UPDATER_ISSUE 1624*9c5db199SXin Li return servo_constants.SERVO_STATE_NEED_REPLACEMENT 1625*9c5db199SXin Li 1626*9c5db199SXin Li if dut_connected == hosts.VERIFY_FAILED: 1627*9c5db199SXin Li return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED 1628*9c5db199SXin Li if hub_connected == hosts.VERIFY_FAILED: 1629*9c5db199SXin Li logging.info('Servo HUB not connected') 1630*9c5db199SXin Li return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED 1631*9c5db199SXin Li 1632*9c5db199SXin Li if cr50_low_sbu == hosts.VERIFY_FAILED: 1633*9c5db199SXin Li return servo_constants.SERVO_STATE_SBU_LOW_VOLTAGE 1634*9c5db199SXin Li if cr50_off == hosts.VERIFY_FAILED: 1635*9c5db199SXin Li return servo_constants.SERVO_STATE_CR50_NOT_ENUMERATED 1636*9c5db199SXin Li 1637*9c5db199SXin Li if servod_dut_controller_missing == hosts.VERIFY_FAILED: 1638*9c5db199SXin Li return servo_constants.SERVO_STATE_SERVOD_DUT_CONTROLLER_MISSING 1639*9c5db199SXin Li if servo_topology == hosts.VERIFY_FAILED: 1640*9c5db199SXin Li return servo_constants.SERVO_STATE_TOPOLOGY_ISSUE 1641*9c5db199SXin Li 1642*9c5db199SXin Li # TODO(otabek@): detect special cases detected by pwr_button 1643*9c5db199SXin Li if dut_connected == hosts.VERIFY_SUCCESS: 1644*9c5db199SXin Li if pwr_button == hosts.VERIFY_FAILED: 1645*9c5db199SXin Li metrics.Counter( 1646*9c5db199SXin Li 'chromeos/autotest/repair/servo_unexpected/pwr_button2' 1647*9c5db199SXin Li ).increment(fields=self._get_host_metrics_data()) 1648*9c5db199SXin Li 1649*9c5db199SXin Li if (servod_started == hosts.VERIFY_FAILED 1650*9c5db199SXin Li or servod_echo == hosts.VERIFY_FAILED): 1651*9c5db199SXin Li return servo_constants.SERVO_STATE_SERVOD_ISSUE 1652*9c5db199SXin Li 1653*9c5db199SXin Li # one of the reason why servo can not initialized 1654*9c5db199SXin Li if cr50_console == hosts.VERIFY_FAILED: 1655*9c5db199SXin Li return servo_constants.SERVO_STATE_CR50_CONSOLE_MISSING 1656*9c5db199SXin Li if ccd_testlab == hosts.VERIFY_FAILED: 1657*9c5db199SXin Li return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE 1658*9c5db199SXin Li 1659*9c5db199SXin Li if (create_servo == hosts.VERIFY_FAILED 1660*9c5db199SXin Li or init_servo == hosts.VERIFY_FAILED): 1661*9c5db199SXin Li return servo_constants.SERVO_STATE_SERVOD_PROXY_ISSUE 1662*9c5db199SXin Li 1663*9c5db199SXin Li if ec_board == hosts.VERIFY_FAILED: 1664*9c5db199SXin Li return servo_constants.SERVO_STATE_EC_BROKEN 1665*9c5db199SXin Li if pwr_button == hosts.VERIFY_FAILED: 1666*9c5db199SXin Li return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE 1667*9c5db199SXin Li if lid_open == hosts.VERIFY_FAILED: 1668*9c5db199SXin Li return servo_constants.SERVO_STATE_LID_OPEN_FAILED 1669*9c5db199SXin Li 1670*9c5db199SXin Li metrics.Counter( 1671*9c5db199SXin Li 'chromeos/autotest/repair/unknown_servo_state' 1672*9c5db199SXin Li ).increment(fields=self._get_host_metrics_data()) 1673*9c5db199SXin Li logging.info('We do not have special state for this failure yet :)') 1674*9c5db199SXin Li return servo_constants.SERVO_STATE_BROKEN 1675*9c5db199SXin Li 1676*9c5db199SXin Li def is_servo_topology_supported(self): 1677*9c5db199SXin Li """Check if servo_topology is supported.""" 1678*9c5db199SXin Li if self.is_containerized_servod(): 1679*9c5db199SXin Li logging.info('Servod is running within a container.') 1680*9c5db199SXin Li return True 1681*9c5db199SXin Li if not self.is_up_fast(): 1682*9c5db199SXin Li logging.info('Servo-Host is not reachable.') 1683*9c5db199SXin Li return False 1684*9c5db199SXin Li if not self.servo_serial: 1685*9c5db199SXin Li logging.info('Servo-topology required a servo serial.') 1686*9c5db199SXin Li return False 1687*9c5db199SXin Li if self.is_labstation(): 1688*9c5db199SXin Li logging.info('Servod is running within labstation.') 1689*9c5db199SXin Li return True 1690*9c5db199SXin Li return False 1691*9c5db199SXin Li 1692*9c5db199SXin Li def get_topology(self): 1693*9c5db199SXin Li """Get servo topology.""" 1694*9c5db199SXin Li if not self._topology: 1695*9c5db199SXin Li self._topology = servo_topology.ServoTopology(self) 1696*9c5db199SXin Li return self._topology 1697*9c5db199SXin Li 1698*9c5db199SXin Li def is_dual_setup(self): 1699*9c5db199SXin Li """Check is servo will run in dual setup. 1700*9c5db199SXin Li 1701*9c5db199SXin Li Dual setup used only for servo_v4 when used ccd_cr50 and servo_micro 1702*9c5db199SXin Li at the same time. 1703*9c5db199SXin Li """ 1704*9c5db199SXin Li return self.servo_setup == servo_constants.SERVO_SETUP_VALUE_DUAL_V4 1705*9c5db199SXin Li 1706*9c5db199SXin Li def set_dut_health_profile(self, dut_health_profile): 1707*9c5db199SXin Li """ 1708*9c5db199SXin Li @param dut_health_profile: A DeviceHealthProfile object. 1709*9c5db199SXin Li """ 1710*9c5db199SXin Li logging.debug('setting dut_health_profile field to (%s)', 1711*9c5db199SXin Li dut_health_profile) 1712*9c5db199SXin Li self._dut_health_profile = dut_health_profile 1713*9c5db199SXin Li 1714*9c5db199SXin Li def get_dut_health_profile(self): 1715*9c5db199SXin Li """ 1716*9c5db199SXin Li @return A DeviceHealthProfile object. 1717*9c5db199SXin Li """ 1718*9c5db199SXin Li return self._dut_health_profile 1719*9c5db199SXin Li 1720*9c5db199SXin Li def print_all_servo_of_host(self): 1721*9c5db199SXin Li """Print all servos detected on the host.""" 1722*9c5db199SXin Li try: 1723*9c5db199SXin Li logging.info('\tDevices detected on the host:') 1724*9c5db199SXin Li devices = self.get_topology().get_list_available_servos() 1725*9c5db199SXin Li for device in devices: 1726*9c5db199SXin Li logging.info('\t%s', device) 1727*9c5db199SXin Li except Exception as e: 1728*9c5db199SXin Li logging.debug('(Not critical) Fail list all servos: %s', e) 1729*9c5db199SXin Li 1730*9c5db199SXin Li 1731*9c5db199SXin Lidef make_servo_hostname(dut_hostname): 1732*9c5db199SXin Li """Given a DUT's hostname, return the hostname of its servo. 1733*9c5db199SXin Li 1734*9c5db199SXin Li @param dut_hostname: hostname of a DUT. 1735*9c5db199SXin Li 1736*9c5db199SXin Li @return hostname of the DUT's servo. 1737*9c5db199SXin Li 1738*9c5db199SXin Li """ 1739*9c5db199SXin Li host_parts = dut_hostname.split('.') 1740*9c5db199SXin Li host_parts[0] = host_parts[0] + '-servo' 1741*9c5db199SXin Li return '.'.join(host_parts) 1742*9c5db199SXin Li 1743*9c5db199SXin Li 1744*9c5db199SXin Lidef _map_afe_board_to_servo_board(afe_board): 1745*9c5db199SXin Li """Map a board we get from the AFE to a servo appropriate value. 1746*9c5db199SXin Li 1747*9c5db199SXin Li Many boards are identical to other boards for servo's purposes. 1748*9c5db199SXin Li This function makes that mapping. 1749*9c5db199SXin Li 1750*9c5db199SXin Li @param afe_board string board name received from AFE. 1751*9c5db199SXin Li @return board we expect servo to have. 1752*9c5db199SXin Li 1753*9c5db199SXin Li """ 1754*9c5db199SXin Li KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets'] 1755*9c5db199SXin Li BOARD_MAP = {'gizmo': 'panther'} 1756*9c5db199SXin Li mapped_board = afe_board 1757*9c5db199SXin Li if afe_board in BOARD_MAP: 1758*9c5db199SXin Li mapped_board = BOARD_MAP[afe_board] 1759*9c5db199SXin Li else: 1760*9c5db199SXin Li for suffix in KNOWN_SUFFIXES: 1761*9c5db199SXin Li if afe_board.endswith(suffix): 1762*9c5db199SXin Li mapped_board = afe_board[0:-len(suffix)] 1763*9c5db199SXin Li break 1764*9c5db199SXin Li if mapped_board != afe_board: 1765*9c5db199SXin Li logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board) 1766*9c5db199SXin Li return mapped_board 1767*9c5db199SXin Li 1768*9c5db199SXin Li 1769*9c5db199SXin Lidef get_servo_args_for_host(dut_host): 1770*9c5db199SXin Li """Return servo data associated with a given DUT. 1771*9c5db199SXin Li 1772*9c5db199SXin Li @param dut_host Instance of `Host` on which to find the servo 1773*9c5db199SXin Li attributes. 1774*9c5db199SXin Li @return `servo_args` dict with host and an optional port. 1775*9c5db199SXin Li """ 1776*9c5db199SXin Li info = dut_host.host_info_store.get() 1777*9c5db199SXin Li servo_args = {k: v for k, v in six.iteritems(info.attributes) 1778*9c5db199SXin Li if k in servo_constants.SERVO_ATTR_KEYS} 1779*9c5db199SXin Li 1780*9c5db199SXin Li if servo_constants.SERVO_HOST_SSH_PORT_ATTR in servo_args: 1781*9c5db199SXin Li try: 1782*9c5db199SXin Li servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR] = int( 1783*9c5db199SXin Li servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR]) 1784*9c5db199SXin Li except ValueError: 1785*9c5db199SXin Li logging.error('servo host port is not an int: %s', 1786*9c5db199SXin Li servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR]) 1787*9c5db199SXin Li # Reset servo_args because we don't want to use an invalid port. 1788*9c5db199SXin Li servo_args.pop(servo_constants.SERVO_HOST_SSH_PORT_ATTR, None) 1789*9c5db199SXin Li 1790*9c5db199SXin Li if servo_constants.SERVO_PORT_ATTR in servo_args: 1791*9c5db199SXin Li try: 1792*9c5db199SXin Li servo_args[servo_constants.SERVO_PORT_ATTR] = int( 1793*9c5db199SXin Li servo_args[servo_constants.SERVO_PORT_ATTR]) 1794*9c5db199SXin Li except ValueError: 1795*9c5db199SXin Li logging.error('servo port is not an int: %s', 1796*9c5db199SXin Li servo_args[servo_constants.SERVO_PORT_ATTR]) 1797*9c5db199SXin Li # Reset servo_args because we don't want to use an invalid port. 1798*9c5db199SXin Li servo_args.pop(servo_constants.SERVO_HOST_ATTR, None) 1799*9c5db199SXin Li 1800*9c5db199SXin Li if info.board: 1801*9c5db199SXin Li servo_board = _map_afe_board_to_servo_board(info.board) 1802*9c5db199SXin Li servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board 1803*9c5db199SXin Li if info.model: 1804*9c5db199SXin Li servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model 1805*9c5db199SXin Li return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None 1806*9c5db199SXin Li 1807*9c5db199SXin Li 1808*9c5db199SXin Lidef _tweak_args_for_ssp_moblab(servo_args): 1809*9c5db199SXin Li if (servo_args[servo_constants.SERVO_HOST_ATTR] 1810*9c5db199SXin Li in ['localhost', '127.0.0.1']): 1811*9c5db199SXin Li servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value( 1812*9c5db199SXin Li 'SSP', 'host_container_ip', type=str, default=None) 1813*9c5db199SXin Li 1814*9c5db199SXin Li 1815*9c5db199SXin Lidef create_servo_host(dut, 1816*9c5db199SXin Li servo_args, 1817*9c5db199SXin Li try_lab_servo=False, 1818*9c5db199SXin Li try_servo_repair=False, 1819*9c5db199SXin Li try_servo_recovery=True, 1820*9c5db199SXin Li dut_host_info=None, 1821*9c5db199SXin Li dut_health_profile=None): 1822*9c5db199SXin Li """Create a ServoHost object for a given DUT, if appropriate. 1823*9c5db199SXin Li 1824*9c5db199SXin Li This function attempts to create and verify or repair a `ServoHost` 1825*9c5db199SXin Li object for a servo connected to the given `dut`, subject to various 1826*9c5db199SXin Li constraints imposed by the parameters: 1827*9c5db199SXin Li * When the `servo_args` parameter is not `None`, a servo 1828*9c5db199SXin Li host must be created, and must be checked with `repair()`. 1829*9c5db199SXin Li * Otherwise, if a servo exists in the lab and `try_lab_servo` is 1830*9c5db199SXin Li true: 1831*9c5db199SXin Li * If `try_servo_repair` is true, then create a servo host and 1832*9c5db199SXin Li check it with `repair()`. 1833*9c5db199SXin Li * Otherwise, if the servo responds to `ping` then create a 1834*9c5db199SXin Li servo host and check it with `verify()`. 1835*9c5db199SXin Li 1836*9c5db199SXin Li In cases where `servo_args` was not `None`, repair failure 1837*9c5db199SXin Li exceptions are passed back to the caller; otherwise, exceptions 1838*9c5db199SXin Li are logged and then discarded. Note that this only happens in cases 1839*9c5db199SXin Li where we're called from a test (not special task) control file that 1840*9c5db199SXin Li has an explicit dependency on servo. In that case, we require that 1841*9c5db199SXin Li repair not write to `status.log`, so as to avoid polluting test 1842*9c5db199SXin Li results. 1843*9c5db199SXin Li 1844*9c5db199SXin Li TODO(jrbarnette): The special handling for servo in test control 1845*9c5db199SXin Li files is a thorn in my flesh; I dearly hope to see it cut out before 1846*9c5db199SXin Li my retirement. 1847*9c5db199SXin Li 1848*9c5db199SXin Li Parameters for a servo host consist of a host name, port number, and 1849*9c5db199SXin Li DUT board, and are determined from one of these sources, in order of 1850*9c5db199SXin Li priority: 1851*9c5db199SXin Li * Servo attributes from the `dut` parameter take precedence over 1852*9c5db199SXin Li all other sources of information. 1853*9c5db199SXin Li * If a DNS entry for the servo based on the DUT hostname exists in 1854*9c5db199SXin Li the CrOS lab network, that hostname is used with the default 1855*9c5db199SXin Li port and the DUT's board. 1856*9c5db199SXin Li * If no other options are found, the parameters will be taken 1857*9c5db199SXin Li from the `servo_args` dict passed in from the caller. 1858*9c5db199SXin Li 1859*9c5db199SXin Li @param dut: An instance of `Host` from which to take 1860*9c5db199SXin Li servo parameters (if available). 1861*9c5db199SXin Li @param servo_args: A dictionary with servo parameters to use if 1862*9c5db199SXin Li they can't be found from `dut`. If this 1863*9c5db199SXin Li argument is supplied, unrepaired exceptions 1864*9c5db199SXin Li from `verify()` will be passed back to the 1865*9c5db199SXin Li caller. 1866*9c5db199SXin Li @param try_lab_servo: If not true, servo host creation will be 1867*9c5db199SXin Li skipped unless otherwise required by the 1868*9c5db199SXin Li caller. 1869*9c5db199SXin Li @param try_servo_repair: If true, check a servo host with 1870*9c5db199SXin Li `repair()` instead of `verify()`. 1871*9c5db199SXin Li @param try_servo_recovery: If true, start servod in recovery mode. 1872*9c5db199SXin Li Default value is True. 1873*9c5db199SXin Li @param dut_host_info: A HostInfo object of the DUT that connected 1874*9c5db199SXin Li to this servo. 1875*9c5db199SXin Li @param dut_health_profile: DUT repair info with history. 1876*9c5db199SXin Li 1877*9c5db199SXin Li @returns: A ServoHost object or None. See comments above. 1878*9c5db199SXin Li 1879*9c5db199SXin Li """ 1880*9c5db199SXin Li # We are explicitly looking for if servo_args is None here(which means 1881*9c5db199SXin Li # servo not needed), as servo_args == {} means servo is needed and 1882*9c5db199SXin Li # we expect load servo_args from host_info_store. 1883*9c5db199SXin Li if servo_args is None: 1884*9c5db199SXin Li servo_dependency = False 1885*9c5db199SXin Li local_run = False 1886*9c5db199SXin Li else: 1887*9c5db199SXin Li servo_dependency = True 1888*9c5db199SXin Li # If servo_args pass in directly, then this is a local test run. 1889*9c5db199SXin Li local_run = servo_constants.SERVO_HOST_ATTR in servo_args 1890*9c5db199SXin Li 1891*9c5db199SXin Li if local_run: 1892*9c5db199SXin Li logging.warning('User input servo_args detected, will attempt' 1893*9c5db199SXin Li ' to start servod and initialize servo conncetion' 1894*9c5db199SXin Li ' directly. All servo/servohost verify and repair' 1895*9c5db199SXin Li ' steps will be skipped.') 1896*9c5db199SXin Li 1897*9c5db199SXin Li # Loading servo args from host_info_store. 1898*9c5db199SXin Li if dut is not None and (try_lab_servo or servo_dependency): 1899*9c5db199SXin Li servo_args_override = get_servo_args_for_host(dut) 1900*9c5db199SXin Li if servo_args_override is not None: 1901*9c5db199SXin Li if utils.in_moblab_ssp(): 1902*9c5db199SXin Li _tweak_args_for_ssp_moblab(servo_args_override) 1903*9c5db199SXin Li logging.debug( 1904*9c5db199SXin Li 'Overriding provided servo_args (%s) with arguments' 1905*9c5db199SXin Li ' determined from the host (%s)', 1906*9c5db199SXin Li servo_args, 1907*9c5db199SXin Li servo_args_override, 1908*9c5db199SXin Li ) 1909*9c5db199SXin Li servo_args = servo_args_override 1910*9c5db199SXin Li 1911*9c5db199SXin Li if not servo_args: 1912*9c5db199SXin Li logging.debug('No servo_args provided, and failed to find overrides.') 1913*9c5db199SXin Li if try_lab_servo or servo_dependency: 1914*9c5db199SXin Li return None, servo_constants.SERVO_STATE_MISSING_CONFIG 1915*9c5db199SXin Li else: 1916*9c5db199SXin Li # For regular test case which not required the servo 1917*9c5db199SXin Li return None, None 1918*9c5db199SXin Li 1919*9c5db199SXin Li servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR) 1920*9c5db199SXin Li servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR) 1921*9c5db199SXin Li if not local_run: 1922*9c5db199SXin Li if not _is_servo_host_information_exist(servo_hostname, servo_port): 1923*9c5db199SXin Li logging.debug( 1924*9c5db199SXin Li 'Servo connection info missed hostname: %s , port: %s', 1925*9c5db199SXin Li servo_hostname, servo_port) 1926*9c5db199SXin Li return None, servo_constants.SERVO_STATE_MISSING_CONFIG 1927*9c5db199SXin Li if not is_servo_host_information_valid(servo_hostname, servo_port): 1928*9c5db199SXin Li logging.debug( 1929*9c5db199SXin Li 'Servo connection info is incorrect hostname: %s , port: %s', 1930*9c5db199SXin Li servo_hostname, servo_port) 1931*9c5db199SXin Li return None, servo_constants.SERVO_STATE_WRONG_CONFIG 1932*9c5db199SXin Li 1933*9c5db199SXin Li if try_servo_recovery == True: 1934*9c5db199SXin Li servo_args[servo_constants.SERVO_RECOVERY_MODE] = True 1935*9c5db199SXin Li 1936*9c5db199SXin Li newhost = ServoHost(**servo_args) 1937*9c5db199SXin Li if local_run: 1938*9c5db199SXin Li try: 1939*9c5db199SXin Li newhost.start_servod() 1940*9c5db199SXin Li except: 1941*9c5db199SXin Li # If we failed to start servod here, we can assume the servod 1942*9c5db199SXin Li # either already started or the test is running against a 1943*9c5db199SXin Li # non-standard servohost so the user will resiponsble for ensure 1944*9c5db199SXin Li # servod is running. 1945*9c5db199SXin Li pass 1946*9c5db199SXin Li try: 1947*9c5db199SXin Li newhost.initialize_servo() 1948*9c5db199SXin Li newhost.initialize_dut_for_servo() 1949*9c5db199SXin Li newhost._servo_state = servo_constants.SERVO_STATE_WORKING 1950*9c5db199SXin Li return newhost, newhost.get_servo_state() 1951*9c5db199SXin Li except Exception as e: 1952*9c5db199SXin Li logging.error('Failed to initialize servo. %s', e) 1953*9c5db199SXin Li return None, servo_constants.SERVO_STATE_BROKEN 1954*9c5db199SXin Li 1955*9c5db199SXin Li if newhost.is_containerized_servod(): 1956*9c5db199SXin Li # TODO(otabek@): Update for servod-manager. 1957*9c5db199SXin Li # Servod docker is not available for access. 1958*9c5db199SXin Li newhost.start_containerized_servod() 1959*9c5db199SXin Li # use_icmp is always set to true in autotest, but in labpack 1960*9c5db199SXin Li # this is determined by autoserv args. We need to make this consistent 1961*9c5db199SXin Li # across labpack and autotest b/205855910 1962*9c5db199SXin Li elif newhost.use_icmp and not newhost.is_up_fast(count=3): 1963*9c5db199SXin Li # ServoHost has internal check to wait if servo-host is in reboot 1964*9c5db199SXin Li # process. If servo-host still is not available this check will stop 1965*9c5db199SXin Li # further attempts as we do not have any option to recover servo_host. 1966*9c5db199SXin Li return None, servo_constants.SERVO_STATE_NO_SSH 1967*9c5db199SXin Li 1968*9c5db199SXin Li # Reset or reboot servo device only during AdminRepair tasks. 1969*9c5db199SXin Li if try_servo_repair: 1970*9c5db199SXin Li if newhost._is_locked: 1971*9c5db199SXin Li # Print available servos on the host for debugging. 1972*9c5db199SXin Li newhost.print_all_servo_of_host() 1973*9c5db199SXin Li # Reset servo if the servo is locked, as we check if the servohost 1974*9c5db199SXin Li # is up, if the servohost is labstation and if the servohost is in 1975*9c5db199SXin Li # lab inside the locking logic. 1976*9c5db199SXin Li newhost.reset_servo() 1977*9c5db199SXin Li else: 1978*9c5db199SXin Li try: 1979*9c5db199SXin Li newhost.reboot_servo_v3_on_need() 1980*9c5db199SXin Li except Exception as e: 1981*9c5db199SXin Li logging.info('[Non-critical] Unexpected error while trying to' 1982*9c5db199SXin Li ' reboot servo_v3, skipping the reboot; %s', e) 1983*9c5db199SXin Li 1984*9c5db199SXin Li if dut: 1985*9c5db199SXin Li newhost.set_dut_hostname(dut.hostname) 1986*9c5db199SXin Li if dut_host_info: 1987*9c5db199SXin Li newhost.set_dut_host_info(dut_host_info) 1988*9c5db199SXin Li if (dut_health_profile and (try_lab_servo or try_servo_repair)): 1989*9c5db199SXin Li try: 1990*9c5db199SXin Li if newhost.is_localhost(): 1991*9c5db199SXin Li logging.info('Servohost is a localhost, skip device' 1992*9c5db199SXin Li ' health profile setup...') 1993*9c5db199SXin Li else: 1994*9c5db199SXin Li dut_health_profile.init_profile(newhost) 1995*9c5db199SXin Li newhost.set_dut_health_profile(dut_health_profile) 1996*9c5db199SXin Li except Exception as e: 1997*9c5db199SXin Li logging.info( 1998*9c5db199SXin Li '[Non-critical] Unexpected error while trying to' 1999*9c5db199SXin Li ' load device health profile; %s', e) 2000*9c5db199SXin Li 2001*9c5db199SXin Li # Note that the logic of repair() includes everything done 2002*9c5db199SXin Li # by verify(). It's sufficient to call one or the other; 2003*9c5db199SXin Li # we don't need both. 2004*9c5db199SXin Li if servo_dependency: 2005*9c5db199SXin Li newhost.repair(silent=True) 2006*9c5db199SXin Li return newhost, newhost.get_servo_state() 2007*9c5db199SXin Li 2008*9c5db199SXin Li if try_servo_repair: 2009*9c5db199SXin Li try: 2010*9c5db199SXin Li newhost.repair() 2011*9c5db199SXin Li except Exception: 2012*9c5db199SXin Li logging.exception('servo repair failed for %s', newhost.hostname) 2013*9c5db199SXin Li else: 2014*9c5db199SXin Li try: 2015*9c5db199SXin Li newhost.verify() 2016*9c5db199SXin Li except Exception: 2017*9c5db199SXin Li logging.exception('servo verify failed for %s', newhost.hostname) 2018*9c5db199SXin Li return newhost, newhost.get_servo_state() 2019*9c5db199SXin Li 2020*9c5db199SXin Li 2021*9c5db199SXin Lidef _is_servo_host_information_exist(hostname, port): 2022*9c5db199SXin Li if hostname is None or len(hostname.strip()) == 0: 2023*9c5db199SXin Li return False 2024*9c5db199SXin Li if port is None: 2025*9c5db199SXin Li return False 2026*9c5db199SXin Li if not type(port) is int: 2027*9c5db199SXin Li try: 2028*9c5db199SXin Li int(port) 2029*9c5db199SXin Li except ValueError: 2030*9c5db199SXin Li return False 2031*9c5db199SXin Li return True 2032*9c5db199SXin Li 2033*9c5db199SXin Li 2034*9c5db199SXin Lidef is_servo_host_information_valid(hostname, port): 2035*9c5db199SXin Li """Check if provided servo attributes are valid. 2036*9c5db199SXin Li 2037*9c5db199SXin Li @param hostname Hostname of the servohost. 2038*9c5db199SXin Li @param port servo port number. 2039*9c5db199SXin Li 2040*9c5db199SXin Li @returns: A bool value to indicate if provided servo attribute valid. 2041*9c5db199SXin Li """ 2042*9c5db199SXin Li if not _is_servo_host_information_exist(hostname, port): 2043*9c5db199SXin Li return False 2044*9c5db199SXin Li # checking range and correct of the port 2045*9c5db199SXin Li port_int = int(port) 2046*9c5db199SXin Li if port_int < 1 or port_int > 65000: 2047*9c5db199SXin Li return False 2048*9c5db199SXin Li # we expecting host contain only latters, digits and '-' or '_' 2049*9c5db199SXin Li if not re.match('[a-zA-Z0-9-_\.:]*$', hostname) or len(hostname) < 5: 2050*9c5db199SXin Li return False 2051*9c5db199SXin Li return True 2052