xref: /aosp_15_r20/external/autotest/server/hosts/servo_host.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li#
6*9c5db199SXin Li# Expects to be run in an environment with sudo and no interactive password
7*9c5db199SXin Li# prompt, such as within the Chromium OS development chroot.
8*9c5db199SXin Li
9*9c5db199SXin Li
10*9c5db199SXin Li"""This file provides core logic for servo verify/repair process."""
11*9c5db199SXin Li
12*9c5db199SXin Li
13*9c5db199SXin Lifrom __future__ import absolute_import
14*9c5db199SXin Lifrom __future__ import division
15*9c5db199SXin Lifrom __future__ import print_function
16*9c5db199SXin Li
17*9c5db199SXin Liimport logging
18*9c5db199SXin Liimport os
19*9c5db199SXin Liimport re
20*9c5db199SXin Liimport tarfile
21*9c5db199SXin Liimport threading
22*9c5db199SXin Liimport json
23*9c5db199SXin Liimport tempfile
24*9c5db199SXin Liimport time
25*9c5db199SXin Liimport six
26*9c5db199SXin Liimport six.moves.xmlrpc_client
27*9c5db199SXin Li
28*9c5db199SXin Litry:
29*9c5db199SXin Li    import docker
30*9c5db199SXin Li    from autotest_lib.site_utils.docker import utils as docker_utils
31*9c5db199SXin Liexcept ImportError:
32*9c5db199SXin Li    logging.info("Docker API is not installed in this environment")
33*9c5db199SXin Li
34*9c5db199SXin Lifrom autotest_lib.client.bin import utils
35*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
36*9c5db199SXin Lifrom autotest_lib.client.common_lib import hosts
37*9c5db199SXin Lifrom autotest_lib.client.common_lib import lsbrelease_utils
38*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry
39*9c5db199SXin Lifrom autotest_lib.server import crashcollect
40*9c5db199SXin Lifrom autotest_lib.server.cros.servo import servo
41*9c5db199SXin Lifrom autotest_lib.server.hosts import servo_repair
42*9c5db199SXin Lifrom autotest_lib.server.hosts import base_servohost
43*9c5db199SXin Lifrom autotest_lib.server.hosts import servo_constants
44*9c5db199SXin Lifrom autotest_lib.server.cros.faft.utils import config
45*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config
46*9c5db199SXin Lifrom autotest_lib.server.cros.servo.topology import servo_topology
47*9c5db199SXin Li
48*9c5db199SXin Litry:
49*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
50*9c5db199SXin Liexcept ImportError:
51*9c5db199SXin Li    metrics = utils.metrics_mock
52*9c5db199SXin Li
53*9c5db199SXin Li_CONFIG = global_config.global_config
54*9c5db199SXin Li
55*9c5db199SXin LiSERVOD_CONTAINER_IMAGE_PATH = "us-docker.pkg.dev/chromeos-partner-moblab/common-core"
56*9c5db199SXin Li
57*9c5db199SXin Li# Create the flag on drone or inside the ssp container to enable servod debug mode.
58*9c5db199SXin Li# The servod container will not be deleted after being stopped.
59*9c5db199SXin LiSERVOD_DEBUG_FLAG = '/servod_debug'
60*9c5db199SXin Li
61*9c5db199SXin LiDOCKER_SERVOD_DEBUG_MODE = os.environ.get('DOCKER_SERVOD_DEBUG_MODE', '0')
62*9c5db199SXin Li
63*9c5db199SXin Li
64*9c5db199SXin Liclass ServoHost(base_servohost.BaseServoHost):
65*9c5db199SXin Li    """Host class for a servo host(e.g. beaglebone, labstation)
66*9c5db199SXin Li     that with a servo instance for a specific port.
67*9c5db199SXin Li
68*9c5db199SXin Li     @type _servo: servo.Servo | None
69*9c5db199SXin Li     """
70*9c5db199SXin Li
71*9c5db199SXin Li    DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
72*9c5db199SXin Li
73*9c5db199SXin Li    # Timeout for initializing servo signals.
74*9c5db199SXin Li    INITIALIZE_SERVO_TIMEOUT_SECS = 60
75*9c5db199SXin Li
76*9c5db199SXin Li    # Default timeout for run terminal command.
77*9c5db199SXin Li    DEFAULT_TERMINAL_TIMEOUT = 30
78*9c5db199SXin Li
79*9c5db199SXin Li    # Ready test function
80*9c5db199SXin Li    SERVO_READY_METHOD = 'get_version'
81*9c5db199SXin Li
82*9c5db199SXin Li    # Directory prefix on the servo host where the servod logs are stored.
83*9c5db199SXin Li    SERVOD_LOG_PREFIX = '/var/log/servod'
84*9c5db199SXin Li
85*9c5db199SXin Li    # Exit code to use when symlinks for servod logs are not found.
86*9c5db199SXin Li    NO_SYMLINKS_CODE = 9
87*9c5db199SXin Li
88*9c5db199SXin Li    # Directory in the job's results directory to dump the logs into.
89*9c5db199SXin Li    LOG_DIR = 'servod'
90*9c5db199SXin Li
91*9c5db199SXin Li    # Prefix for joint loglevel files in the logs.
92*9c5db199SXin Li    JOINT_LOG_PREFIX = 'log'
93*9c5db199SXin Li
94*9c5db199SXin Li    # Regex group to extract timestamp from logfile name.
95*9c5db199SXin Li    TS_GROUP = 'ts'
96*9c5db199SXin Li
97*9c5db199SXin Li    # This regex is used to extract the timestamp from servod logs.
98*9c5db199SXin Li    # files always start with log.
99*9c5db199SXin Li    TS_RE = (r'log.'
100*9c5db199SXin Li             # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
101*9c5db199SXin Li             r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
102*9c5db199SXin Li             # The loglevel is optional depending on labstation version.
103*9c5db199SXin Li             r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
104*9c5db199SXin Li    TS_EXTRACTOR = re.compile(TS_RE)
105*9c5db199SXin Li
106*9c5db199SXin Li    # Regex group to extract MCU name from logline in servod logs.
107*9c5db199SXin Li    MCU_GROUP = 'mcu'
108*9c5db199SXin Li
109*9c5db199SXin Li    # Regex group to extract logline from MCU logline in servod logs.
110*9c5db199SXin Li    LINE_GROUP = 'line'
111*9c5db199SXin Li
112*9c5db199SXin Li    # This regex is used to extract the mcu and the line content from an
113*9c5db199SXin Li    # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
114*9c5db199SXin Li    # Here is an example log-line:
115*9c5db199SXin Li    #
116*9c5db199SXin Li    # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
117*9c5db199SXin Li    # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
118*9c5db199SXin Li    #
119*9c5db199SXin Li    # Here is conceptually how they are formatted:
120*9c5db199SXin Li    #
121*9c5db199SXin Li    #  <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
122*9c5db199SXin Li    #  <output>
123*9c5db199SXin Li    #
124*9c5db199SXin Li    # The log format starts with a timestamp
125*9c5db199SXin Li    MCU_RE = (r'[\d\-]+ [\d:,]+ '
126*9c5db199SXin Li              # The mcu that is logging this is next.
127*9c5db199SXin Li              r'- (?P<%s>[\w/]+) - '
128*9c5db199SXin Li              # Next, we have more log outputs before the actual line.
129*9c5db199SXin Li              # Information about the file line, logging function etc.
130*9c5db199SXin Li              # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
131*9c5db199SXin Li              # NOTE: if the log format changes, this regex needs to be
132*9c5db199SXin Li              # adjusted.
133*9c5db199SXin Li              r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
134*9c5db199SXin Li              # Lastly, we get the MCU's console line.
135*9c5db199SXin Li              r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
136*9c5db199SXin Li    MCU_EXTRACTOR = re.compile(MCU_RE)
137*9c5db199SXin Li
138*9c5db199SXin Li    # Regex to detect timeout messages when USBC pigtail has timeout issue.
139*9c5db199SXin Li    # e.g.:  [475635.427072 PD TMOUT RX 1/1]
140*9c5db199SXin Li    USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
141*9c5db199SXin Li
142*9c5db199SXin Li    # Suffix to identify compressed logfiles.
143*9c5db199SXin Li    COMPRESSION_SUFFIX = '.tbz2'
144*9c5db199SXin Li
145*9c5db199SXin Li    # A suffix to mark servod log directories that came from instance that
146*9c5db199SXin Li    # ran during this servo_host, but are not the last one running e.g. when
147*9c5db199SXin Li    # an instance (on purpose, or due to a bug) restarted in the middle of the
148*9c5db199SXin Li    # run.
149*9c5db199SXin Li    OLD_LOG_SUFFIX = 'old'
150*9c5db199SXin Li
151*9c5db199SXin Li    def _init_attributes(self):
152*9c5db199SXin Li        self._servo_state = None
153*9c5db199SXin Li        self.servo_port = None
154*9c5db199SXin Li        self.servo_board = None
155*9c5db199SXin Li        self._ec_supported = None
156*9c5db199SXin Li        self.servo_model = None
157*9c5db199SXin Li        self.servo_serial = None
158*9c5db199SXin Li        self.servo_setup = None
159*9c5db199SXin Li        self.servo_recovery = None
160*9c5db199SXin Li        self.servo_fw_channel = None
161*9c5db199SXin Li        self.additional_servod_args = None
162*9c5db199SXin Li        self._dut_health_profile = None
163*9c5db199SXin Li        # The flag that indicate if a servo is connected to a smart usbhub.
164*9c5db199SXin Li        # TODO(xianuowang@) remove this flag once all usbhubs in the lab
165*9c5db199SXin Li        # get replaced.
166*9c5db199SXin Li        self.smart_usbhub = None
167*9c5db199SXin Li        self._servo = None
168*9c5db199SXin Li        self._topology = None
169*9c5db199SXin Li        self._tunnel_proxy = None
170*9c5db199SXin Li        self._tunnel_proxy_lock = threading.Lock()
171*9c5db199SXin Li        self._initial_instance_ts = None
172*9c5db199SXin Li        # Flag to make sure that multiple calls to close do not result in the
173*9c5db199SXin Li        # logic executing multiple times.
174*9c5db199SXin Li        self._closed = False
175*9c5db199SXin Li        # Per-thread local data
176*9c5db199SXin Li        self._local = threading.local()
177*9c5db199SXin Li
178*9c5db199SXin Li    def _initialize(self,
179*9c5db199SXin Li                    servo_host='localhost',
180*9c5db199SXin Li                    servo_port=DEFAULT_PORT,
181*9c5db199SXin Li                    servo_board=None,
182*9c5db199SXin Li                    servo_model=None,
183*9c5db199SXin Li                    servo_serial=None,
184*9c5db199SXin Li                    servo_setup=None,
185*9c5db199SXin Li                    servo_recovery=None,
186*9c5db199SXin Li                    servo_fw_channel=None,
187*9c5db199SXin Li                    additional_servod_args=None,
188*9c5db199SXin Li                    is_in_lab=None,
189*9c5db199SXin Li                    *args,
190*9c5db199SXin Li                    **dargs):
191*9c5db199SXin Li        """Initialize a ServoHost instance.
192*9c5db199SXin Li
193*9c5db199SXin Li        A ServoHost instance represents a host that controls a servo.
194*9c5db199SXin Li
195*9c5db199SXin Li        @param servo_host: Name of the host where the servod process
196*9c5db199SXin Li                           is running.
197*9c5db199SXin Li        @param servo_port: Port the servod process is listening on. Defaults
198*9c5db199SXin Li                           to the SERVOD_PORT environment variable if set,
199*9c5db199SXin Li                           otherwise 9999.
200*9c5db199SXin Li        @param servo_board: Board that the servo is connected to.
201*9c5db199SXin Li        @param servo_model: Model that the servo is connected to.
202*9c5db199SXin Li        @param servo_serial: Serial number of the servo device.
203*9c5db199SXin Li        @param servo_setup: Type of servo setup, e.g. REGULAR or DUAL_V4.
204*9c5db199SXin Li        @param additional_servod_args: Additional args that will append to
205*9c5db199SXin Li                                       servod start command.
206*9c5db199SXin Li        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
207*9c5db199SXin Li                          to None, for which utils.host_is_in_lab_zone will be
208*9c5db199SXin Li                          called to check if the servo host is in Cros lab.
209*9c5db199SXin Li
210*9c5db199SXin Li        """
211*9c5db199SXin Li        super(ServoHost, self)._initialize(hostname=servo_host,
212*9c5db199SXin Li                                           is_in_lab=is_in_lab, *args, **dargs)
213*9c5db199SXin Li        self._init_attributes()
214*9c5db199SXin Li        self.servo_port = int(servo_port)
215*9c5db199SXin Li        self.servo_board = servo_board
216*9c5db199SXin Li        self.servo_model = servo_model
217*9c5db199SXin Li        self.servo_serial = servo_serial
218*9c5db199SXin Li        self.servo_setup = servo_setup
219*9c5db199SXin Li        self.servo_recovery = servo_recovery
220*9c5db199SXin Li        self.servo_fw_channel = servo_fw_channel
221*9c5db199SXin Li        self.additional_servod_args = additional_servod_args
222*9c5db199SXin Li
223*9c5db199SXin Li        # The location of the log files on the servo host for this instance.
224*9c5db199SXin Li        self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
225*9c5db199SXin Li                                         self.servo_port)
226*9c5db199SXin Li        # Path of the servo host lock file.
227*9c5db199SXin Li        self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
228*9c5db199SXin Li                           self.LOCK_FILE_POSTFIX)
229*9c5db199SXin Li        # File path to declare a reboot request.
230*9c5db199SXin Li        self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
231*9c5db199SXin Li                             self.REBOOT_FILE_POSTFIX)
232*9c5db199SXin Li
233*9c5db199SXin Li        # Lock the servo host if it's an in-lab labstation to prevent other
234*9c5db199SXin Li        # task to reboot it until current task completes. We also wait and
235*9c5db199SXin Li        # make sure the labstation is up here, in the case of the labstation is
236*9c5db199SXin Li        # in the middle of reboot.
237*9c5db199SXin Li        self._is_locked = False
238*9c5db199SXin Li        if (not self.is_containerized_servod()
239*9c5db199SXin Li                    and self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
240*9c5db199SXin Li                    and self.is_labstation()):
241*9c5db199SXin Li            self._lock()
242*9c5db199SXin Li            try:
243*9c5db199SXin Li                self.wait_ready()
244*9c5db199SXin Li            except Exception as e:
245*9c5db199SXin Li                logging.info(
246*9c5db199SXin Li                        'Unexpected error while ensure labstation'
247*9c5db199SXin Li                        ' readiness; %s', str(e))
248*9c5db199SXin Li
249*9c5db199SXin Li        self._repair_strategy = (
250*9c5db199SXin Li                servo_repair.create_servo_repair_strategy())
251*9c5db199SXin Li
252*9c5db199SXin Li    def __str__(self):
253*9c5db199SXin Li        return "<%s '%s:%s'>" % (
254*9c5db199SXin Li                type(self).__name__, self.hostname, self.servo_port)
255*9c5db199SXin Li
256*9c5db199SXin Li    def connect_servo(self):
257*9c5db199SXin Li        """ Initialize and setup servo for later use.
258*9c5db199SXin Li        """
259*9c5db199SXin Li        self.initialize_servo()
260*9c5db199SXin Li        self.initialize_dut_for_servo()
261*9c5db199SXin Li
262*9c5db199SXin Li    def initialize_servo(self):
263*9c5db199SXin Li        """Establish a connection to the servod server on this host.
264*9c5db199SXin Li
265*9c5db199SXin Li        Initializes `self._servo` and then verifies that all network
266*9c5db199SXin Li        connections are working.  This will create an ssh tunnel if
267*9c5db199SXin Li        it's required.
268*9c5db199SXin Li        """
269*9c5db199SXin Li        self._servo = servo.Servo(servo_host=self,
270*9c5db199SXin Li                                  servo_serial=self.servo_serial)
271*9c5db199SXin Li
272*9c5db199SXin Li    def initialize_dut_for_servo(self):
273*9c5db199SXin Li        """This method will do some setup for dut control, e.g. setup
274*9c5db199SXin Li        main servo_v4 device, and also testing the connection between servo
275*9c5db199SXin Li        and DUT. As a side effect of testing the connection, all signals on
276*9c5db199SXin Li        the target servo are reset to default values, and the USB stick is
277*9c5db199SXin Li        set to the neutral (off) position.
278*9c5db199SXin Li        """
279*9c5db199SXin Li        if not self._servo:
280*9c5db199SXin Li            raise hosts.AutoservVerifyError('Servo object needs to be'
281*9c5db199SXin Li                                            ' initialized before initialize'
282*9c5db199SXin Li                                            ' DUT.')
283*9c5db199SXin Li        timeout, _ = retry.timeout(
284*9c5db199SXin Li            self._servo.initialize_dut,
285*9c5db199SXin Li            timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
286*9c5db199SXin Li        if timeout:
287*9c5db199SXin Li            raise hosts.AutoservVerifyError('Initialize dut for servo timed'
288*9c5db199SXin Li                                            ' out.')
289*9c5db199SXin Li
290*9c5db199SXin Li    def disconnect_servo(self):
291*9c5db199SXin Li        """Disconnect our servo if it exists.
292*9c5db199SXin Li
293*9c5db199SXin Li        If we've previously successfully connected to our servo,
294*9c5db199SXin Li        disconnect any established ssh tunnel, and set `self._servo`
295*9c5db199SXin Li        back to `None`.
296*9c5db199SXin Li        """
297*9c5db199SXin Li        if self._servo:
298*9c5db199SXin Li            # N.B. This call is safe even without a tunnel:
299*9c5db199SXin Li            # rpc_server_tracker.disconnect() silently ignores
300*9c5db199SXin Li            # unknown ports.
301*9c5db199SXin Li            self.rpc_server_tracker.disconnect(self.servo_port)
302*9c5db199SXin Li            self._servo = None
303*9c5db199SXin Li
304*9c5db199SXin Li    def _to_str(self, maybe_str):
305*9c5db199SXin Li        """If the param is bytes, convert it to a str."""
306*9c5db199SXin Li        # b/217780680, Make this compatible with python3,
307*9c5db199SXin Li        if isinstance(maybe_str, bytes):
308*9c5db199SXin Li            return maybe_str.decode('utf-8', errors='replace')
309*9c5db199SXin Li        return maybe_str
310*9c5db199SXin Li
311*9c5db199SXin Li    def _maybe_create_servod_ssh_tunnel_proxy(self):
312*9c5db199SXin Li        """Create a xmlrpc proxy for use with a ssh tunnel.
313*9c5db199SXin Li        A lock is used to safely create a singleton proxy.
314*9c5db199SXin Li        """
315*9c5db199SXin Li        with self._tunnel_proxy_lock:
316*9c5db199SXin Li            if self._tunnel_proxy is None:
317*9c5db199SXin Li                self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
318*9c5db199SXin Li                       None,
319*9c5db199SXin Li                       self.servo_port,
320*9c5db199SXin Li                       ready_test_name=self.SERVO_READY_METHOD,
321*9c5db199SXin Li                       timeout_seconds=60,
322*9c5db199SXin Li                       request_timeout_seconds=3600,
323*9c5db199SXin Li                       server_desc=str(self))
324*9c5db199SXin Li
325*9c5db199SXin Li    def get_servod_server_proxy(self):
326*9c5db199SXin Li        """Return a proxy if it exists; otherwise, create a new one.
327*9c5db199SXin Li        A proxy can either be a ssh tunnel based proxy, or a httplib
328*9c5db199SXin Li        based proxy.
329*9c5db199SXin Li
330*9c5db199SXin Li        @returns: An xmlrpclib.ServerProxy that is connected to the servod
331*9c5db199SXin Li                  server on the host.
332*9c5db199SXin Li        """
333*9c5db199SXin Li        if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
334*9c5db199SXin Li                    and not self.is_localhost()
335*9c5db199SXin Li                    and not self.is_containerized_servod()):
336*9c5db199SXin Li            # Check for existing ssh tunnel proxy.
337*9c5db199SXin Li            if self._tunnel_proxy is None:
338*9c5db199SXin Li                self._maybe_create_servod_ssh_tunnel_proxy()
339*9c5db199SXin Li            return self._tunnel_proxy
340*9c5db199SXin Li        else:
341*9c5db199SXin Li            # xmlrpc/httplib is not thread-safe, so each thread must have its
342*9c5db199SXin Li            # own separate proxy connection.
343*9c5db199SXin Li            if not hasattr(self._local, "_per_thread_proxy"):
344*9c5db199SXin Li                if self.is_containerized_servod():
345*9c5db199SXin Li                    ip_addr = docker_utils.get_container_ip(self.hostname)
346*9c5db199SXin Li                    remote = 'http://%s:%s' % (ip_addr, self.servo_port)
347*9c5db199SXin Li                else:
348*9c5db199SXin Li                    remote = 'http://%s:%s' % (self.hostname, self.servo_port)
349*9c5db199SXin Li                logging.debug('Servo Proxy RPC URL: %s', remote)
350*9c5db199SXin Li                self._local._per_thread_proxy = six.moves.xmlrpc_client.ServerProxy(remote)
351*9c5db199SXin Li            return self._local._per_thread_proxy
352*9c5db199SXin Li
353*9c5db199SXin Li    def verify(self, silent=False):
354*9c5db199SXin Li        """Update the servo host and verify it's in a good state.
355*9c5db199SXin Li
356*9c5db199SXin Li        @param silent   If true, suppress logging in `status.log`.
357*9c5db199SXin Li        """
358*9c5db199SXin Li        message = 'Beginning verify for servo host %s port %s serial %s'
359*9c5db199SXin Li        message %= (self.hostname, self.servo_port, self.servo_serial)
360*9c5db199SXin Li        self.record('INFO', None, None, message)
361*9c5db199SXin Li        try:
362*9c5db199SXin Li            self._repair_strategy.verify(self, silent)
363*9c5db199SXin Li            self._servo_state = servo_constants.SERVO_STATE_WORKING
364*9c5db199SXin Li            self.record('INFO', None, None,
365*9c5db199SXin Li                        'ServoHost verify set servo_state as WORKING')
366*9c5db199SXin Li        except Exception as e:
367*9c5db199SXin Li            if not self.is_localhost():
368*9c5db199SXin Li                self._servo_state = self.determine_servo_state()
369*9c5db199SXin Li                self.record('INFO', None, None,
370*9c5db199SXin Li                            'ServoHost verify set servo_state as %s'
371*9c5db199SXin Li                            % self._servo_state)
372*9c5db199SXin Li            if self._is_critical_error(e):
373*9c5db199SXin Li                raise
374*9c5db199SXin Li
375*9c5db199SXin Li    def _get_default_usbkey_mount_path(self):
376*9c5db199SXin Li        return '/media/servo_usb/%s' % self.servo_port
377*9c5db199SXin Li
378*9c5db199SXin Li    def get_image_name_from_usbkey(self, usbkey_dev):
379*9c5db199SXin Li        """Mount usb drive and check ChromeOS image name on it if there is
380*9c5db199SXin Li        one. This method assumes the image_usbkey_direction is already set
381*9c5db199SXin Li        to servo side.
382*9c5db199SXin Li
383*9c5db199SXin Li        @param usbkey_dev: usbkey dev path(e.g. /dev/sdb).
384*9c5db199SXin Li
385*9c5db199SXin Li        @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
386*9c5db199SXin Li                  or empty string if no test image detected, or unexpected
387*9c5db199SXin Li                  error occurred.
388*9c5db199SXin Li        """
389*9c5db199SXin Li        logging.info('Checking ChromeOS image name on usbkey.')
390*9c5db199SXin Li        mount_dst = self._get_default_usbkey_mount_path()
391*9c5db199SXin Li        # Unmount if there is an existing stale mount.
392*9c5db199SXin Li        self._unmount_drive(mount_dst)
393*9c5db199SXin Li        # ChromeOS root fs is in /dev/sdx3
394*9c5db199SXin Li        mount_src = usbkey_dev + '3'
395*9c5db199SXin Li        try:
396*9c5db199SXin Li            if not self._mount_drive(mount_src, mount_dst):
397*9c5db199SXin Li                logging.debug('Unexpected error occurred on mount usb drive.')
398*9c5db199SXin Li                return ''
399*9c5db199SXin Li
400*9c5db199SXin Li            release_content = self._to_str(
401*9c5db199SXin Li                    self.run('cat %s/etc/lsb-release' % mount_dst,
402*9c5db199SXin Li                             ignore_status=True).stdout.strip())
403*9c5db199SXin Li
404*9c5db199SXin Li            if not re.search(r'RELEASE_TRACK=.*test', release_content):
405*9c5db199SXin Li                logging.info('The image on usbkey is not a test image')
406*9c5db199SXin Li                return ''
407*9c5db199SXin Li
408*9c5db199SXin Li            return lsbrelease_utils.get_chromeos_release_builder_path(
409*9c5db199SXin Li                lsb_release_content=release_content)
410*9c5db199SXin Li        finally:
411*9c5db199SXin Li            logging.debug('Image check compeleted, unmounting the usb drive.')
412*9c5db199SXin Li            self._unmount_drive(mount_dst)
413*9c5db199SXin Li
414*9c5db199SXin Li    def _extract_firmware_image_from_usbkey(self, fw_dst):
415*9c5db199SXin Li        """Extract firmware images from the usbkey on servo, this method
416*9c5db199SXin Li        assumes there is already a ChromeOS test image staged on servo.
417*9c5db199SXin Li
418*9c5db199SXin Li        @param fw_dst: the path that we'll copy firmware images to.
419*9c5db199SXin Li
420*9c5db199SXin Li        @returns: a json format string of firmware manifest data.
421*9c5db199SXin Li        """
422*9c5db199SXin Li        usbkey_dev = self._probe_and_validate_usb_dev()
423*9c5db199SXin Li        if not usbkey_dev:
424*9c5db199SXin Li            raise hosts.AutoservRepairError('Unexpected error occurred when'
425*9c5db199SXin Li                      ' probe usbkey dev path, please check logs for detail.')
426*9c5db199SXin Li
427*9c5db199SXin Li        mount_dst = self._get_default_usbkey_mount_path()
428*9c5db199SXin Li        # Unmount if there is an existing stale mount.
429*9c5db199SXin Li        self._unmount_drive(mount_dst)
430*9c5db199SXin Li        # ChromeOS root fs is in /dev/sdx3
431*9c5db199SXin Li        mount_src = usbkey_dev + '3'
432*9c5db199SXin Li        try:
433*9c5db199SXin Li            if not self._mount_drive(mount_src, mount_dst):
434*9c5db199SXin Li                raise hosts.AutoservRepairError('Failed to extract firmware'
435*9c5db199SXin Li                          ' image; Unable to mount %s.' % usbkey_dev,
436*9c5db199SXin Li                          'unable to mount usbkey')
437*9c5db199SXin Li            updater_bin = os.path.join(mount_dst,
438*9c5db199SXin Li                                       'usr/sbin/chromeos-firmwareupdate')
439*9c5db199SXin Li            self.run('%s --unpack %s' % (updater_bin, fw_dst))
440*9c5db199SXin Li            return self._to_str(self.run('%s --manifest' % updater_bin).stdout)
441*9c5db199SXin Li
442*9c5db199SXin Li        finally:
443*9c5db199SXin Li            self._unmount_drive(mount_dst)
444*9c5db199SXin Li
445*9c5db199SXin Li    def prepare_repair_firmware_image(self, fw_dst=None):
446*9c5db199SXin Li        """Prepare firmware image on the servohost for auto repair process
447*9c5db199SXin Li        to consume.
448*9c5db199SXin Li
449*9c5db199SXin Li        @param fw_dst: the path that we want to store firmware image on
450*9c5db199SXin Li                       the servohost.
451*9c5db199SXin Li
452*9c5db199SXin Li        @returns: A tuple that containes ec firmware image path and bios
453*9c5db199SXin Li                  firmware image path on the servohost, or None if type of
454*9c5db199SXin Li                  image is not available based on manifest and dut's model.
455*9c5db199SXin Li        """
456*9c5db199SXin Li        model = self.servo_model or self._dut_host_info.model
457*9c5db199SXin Li        if not model:
458*9c5db199SXin Li            raise hosts.AutoservRepairError(
459*9c5db199SXin Li                      'Could not determine DUT\'s model.',
460*9c5db199SXin Li                      'model infomation unknown')
461*9c5db199SXin Li
462*9c5db199SXin Li        if not fw_dst:
463*9c5db199SXin Li            fw_dst = '/tmp/firmware_image/%s' % self.servo_port
464*9c5db199SXin Li        # Cleanup and re-create dst path to have a fresh start.
465*9c5db199SXin Li        self.run('rm -rf %s' % fw_dst)
466*9c5db199SXin Li        self.run('mkdir -p %s' % fw_dst)
467*9c5db199SXin Li
468*9c5db199SXin Li        manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst))
469*9c5db199SXin Li        # For models that have packed $MODEL_signed variant, we want use the
470*9c5db199SXin Li        # 'signed' variant once we get DVT devices, so try to read manifest
471*9c5db199SXin Li        # from $MODEL_signed first.
472*9c5db199SXin Li        build = manifest.get('%s_signed' % model) or manifest.get(model)
473*9c5db199SXin Li        if not build:
474*9c5db199SXin Li            raise hosts.AutoservRepairError('Could not find firmware manifest'
475*9c5db199SXin Li                      ' for model:%s' % model, 'model manifest not found')
476*9c5db199SXin Li        try:
477*9c5db199SXin Li            ec_image = os.path.join(fw_dst, build['ec']['image'])
478*9c5db199SXin Li        except KeyError:
479*9c5db199SXin Li            ec_image = None
480*9c5db199SXin Li        try:
481*9c5db199SXin Li            bios_image = os.path.join(fw_dst, build['host']['image'])
482*9c5db199SXin Li        except KeyError:
483*9c5db199SXin Li            bios_image = None
484*9c5db199SXin Li        if not ec_image and not bios_image:
485*9c5db199SXin Li            raise hosts.AutoservRepairError('Could not find any firmware image'
486*9c5db199SXin Li                      ' for model:%s' % model, 'cannot find firmware image')
487*9c5db199SXin Li        return ec_image, bios_image
488*9c5db199SXin Li
489*9c5db199SXin Li    def flash_ap_firmware_via_servo(self, image):
490*9c5db199SXin Li        """Flash AP firmware by use a provided image.
491*9c5db199SXin Li
492*9c5db199SXin Li        This is will be a short term enhanment for infra repair use, it use
493*9c5db199SXin Li        'futility update' which will automatically determine various parameters
494*9c5db199SXin Li        needed for flashrom, and will preserve the GBB, VPD, and HWID for
495*9c5db199SXin Li        AP firmware update.
496*9c5db199SXin Li        @TODO(xianuowang@) Remove this method once b/148403277 implemented.
497*9c5db199SXin Li
498*9c5db199SXin Li        @param image: the firmware image path on servohost.
499*9c5db199SXin Li        """
500*9c5db199SXin Li        cmd = 'futility update -i %s --servo_port=%s'
501*9c5db199SXin Li        self.run(cmd % (image, self.servo_port), timeout=900)
502*9c5db199SXin Li
503*9c5db199SXin Li    def _probe_and_validate_usb_dev(self):
504*9c5db199SXin Li        """This method probe the usb dev path by talking to servo, and then
505*9c5db199SXin Li        validate the dev path is valid block device to servohost.
506*9c5db199SXin Li        Possible output:
507*9c5db199SXin Li        1. Encounter error during probe usb dev, returns empty string.
508*9c5db199SXin Li        2. probe usb dev completed without error but cannot find usb dev,
509*9c5db199SXin Li           raise AutoservRepairError.
510*9c5db199SXin Li        3. probe usb dev find a usb dev path, but failed validation in this
511*9c5db199SXin Li           method, raise AutoservRepairError.
512*9c5db199SXin Li
513*9c5db199SXin Li        @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
514*9c5db199SXin Li        if unexpected error occurred during probe.
515*9c5db199SXin Li        @raises: AutoservRepairError if servo couldn't probe the usb dev path
516*9c5db199SXin Li        (servo.probe_host_usb_dev() returns empty string), or the dev path is
517*9c5db199SXin Li        not valid block device to servohost.
518*9c5db199SXin Li        """
519*9c5db199SXin Li        logging.info('Validating image usbkey on servo.')
520*9c5db199SXin Li        try:
521*9c5db199SXin Li            usb_dev = self._servo.probe_host_usb_dev()
522*9c5db199SXin Li        except Exception as e:
523*9c5db199SXin Li            # We don't want any unexpected or transient servo communicating
524*9c5db199SXin Li            # failure block usb repair, so capture all errors here.
525*9c5db199SXin Li            logging.error(e, exc_info=True)
526*9c5db199SXin Li            logging.error('Unexpected error occurred on get usbkey dev path,'
527*9c5db199SXin Li                          ' skipping usbkey validation.')
528*9c5db199SXin Li            return ''
529*9c5db199SXin Li
530*9c5db199SXin Li        if usb_dev:
531*9c5db199SXin Li            # probe_host_usb_dev() sometimes return stale record,
532*9c5db199SXin Li            # so we need to make sure the path exists in fdisk.
533*9c5db199SXin Li            validate_cmd = 'fdisk -l %s' % usb_dev
534*9c5db199SXin Li            try:
535*9c5db199SXin Li                resp = self.run(validate_cmd, ignore_status=True, timeout=30)
536*9c5db199SXin Li                if resp.exit_status == 0:
537*9c5db199SXin Li                    return usb_dev
538*9c5db199SXin Li                logging.error('%s is reported from "image_usbkey_dev" control'
539*9c5db199SXin Li                              ' but not detected by fdisk!', usb_dev)
540*9c5db199SXin Li            except error.AutoservRunError as e:
541*9c5db199SXin Li                if 'Timeout encountered' in str(e):
542*9c5db199SXin Li                    logging.warning('Timeout encountered during fdisk run,'
543*9c5db199SXin Li                                    ' skipping usbkey validation.')
544*9c5db199SXin Li                    return ''
545*9c5db199SXin Li                raise
546*9c5db199SXin Li
547*9c5db199SXin Li        raise hosts.AutoservRepairError(
548*9c5db199SXin Li              'No usbkey detected on servo, the usbkey may be either missing'
549*9c5db199SXin Li              ' or broken. Please replace usbkey on the servo and retry.',
550*9c5db199SXin Li              'missing usbkey')
551*9c5db199SXin Li
552*9c5db199SXin Li    def is_ec_supported(self):
553*9c5db199SXin Li        """Check if ec is supported on the servo_board"""
554*9c5db199SXin Li        if self.servo_board:
555*9c5db199SXin Li            if self._ec_supported is not None:
556*9c5db199SXin Li                return self._ec_supported
557*9c5db199SXin Li            try:
558*9c5db199SXin Li                frm_config = config.Config(self.servo_board, self.servo_model)
559*9c5db199SXin Li                self._ec_supported = getattr(frm_config, 'chrome_ec', False)
560*9c5db199SXin Li                return self._ec_supported
561*9c5db199SXin Li            except Exception as e:
562*9c5db199SXin Li                logging.error(
563*9c5db199SXin Li                        'Unexpected error when read from firmware'
564*9c5db199SXin Li                        ' configs; %s', e)
565*9c5db199SXin Li        else:
566*9c5db199SXin Li            logging.debug('Cannot detect if DUT has EC as board unknown.')
567*9c5db199SXin Li        return False
568*9c5db199SXin Li
569*9c5db199SXin Li    def validate_image_usbkey(self):
570*9c5db199SXin Li        """This method first validate if there is a recover usbkey on servo
571*9c5db199SXin Li        that accessible to servohost, and second check if a ChromeOS image is
572*9c5db199SXin Li        already on the usb drive and return the image_name so we can avoid
573*9c5db199SXin Li        unnecessary download and flash to the recover usbkey on servo.
574*9c5db199SXin Li
575*9c5db199SXin Li        Please note that, there is special error handling logic here:
576*9c5db199SXin Li        1. If unexpected error happens, we return empty string. So repair
577*9c5db199SXin Li           actions will not get blocked.
578*9c5db199SXin Li        2. If no working usbkey present on servo, but no errors, we'll raise
579*9c5db199SXin Li           AutoservRepairError here.
580*9c5db199SXin Li
581*9c5db199SXin Li        @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
582*9c5db199SXin Li                  or empty string if no test image detected, or unexpected
583*9c5db199SXin Li                  error occurred.
584*9c5db199SXin Li        @raises:  AutoservRepairError if the usbkey is not detected on servo.
585*9c5db199SXin Li        """
586*9c5db199SXin Li        usb_dev = self._probe_and_validate_usb_dev()
587*9c5db199SXin Li        if usb_dev:
588*9c5db199SXin Li            return self.get_image_name_from_usbkey(usb_dev)
589*9c5db199SXin Li        else:
590*9c5db199SXin Li            return ''
591*9c5db199SXin Li
592*9c5db199SXin Li    def repair(self, silent=False):
593*9c5db199SXin Li        """Attempt to repair servo host.
594*9c5db199SXin Li
595*9c5db199SXin Li        @param silent   If true, suppress logging in `status.log`.
596*9c5db199SXin Li        """
597*9c5db199SXin Li        message = 'Beginning repair for servo host %s port %s serial %s'
598*9c5db199SXin Li        message %= (self.hostname, self.servo_port, self.servo_serial)
599*9c5db199SXin Li        self.record('INFO', None, None, message)
600*9c5db199SXin Li        try:
601*9c5db199SXin Li            self._repair_strategy.repair(self, silent)
602*9c5db199SXin Li            self._servo_state = servo_constants.SERVO_STATE_WORKING
603*9c5db199SXin Li            self.record('INFO', None, None,
604*9c5db199SXin Li                        'ServoHost repair set servo_state as WORKING')
605*9c5db199SXin Li            # If target is a labstation then try to withdraw any existing
606*9c5db199SXin Li            # reboot request created by this servo because it passed repair.
607*9c5db199SXin Li            if self.is_labstation():
608*9c5db199SXin Li                self.withdraw_reboot_request()
609*9c5db199SXin Li        except Exception as e:
610*9c5db199SXin Li            if not self.is_localhost():
611*9c5db199SXin Li                self._servo_state = self.determine_servo_state()
612*9c5db199SXin Li                self.record('INFO', None, None,
613*9c5db199SXin Li                            'ServoHost repair set servo_state as %s'
614*9c5db199SXin Li                            % self._servo_state)
615*9c5db199SXin Li            if self._is_critical_error(e):
616*9c5db199SXin Li                self.disconnect_servo()
617*9c5db199SXin Li                self.stop_servod()
618*9c5db199SXin Li                raise
619*9c5db199SXin Li
620*9c5db199SXin Li    def _is_critical_error(self, error):
621*9c5db199SXin Li        if (isinstance(error, hosts.AutoservVerifyDependencyError)
622*9c5db199SXin Li            and not error.is_critical()):
623*9c5db199SXin Li            logging.warning('Non-critical verify failure(s) detected during'
624*9c5db199SXin Li                            ' verify/repair servo, servo connection will'
625*9c5db199SXin Li                            ' still be up but may not be fully functional.'
626*9c5db199SXin Li                            ' Some repair actions and servo dependent'
627*9c5db199SXin Li                            ' tests may not run.')
628*9c5db199SXin Li            return False
629*9c5db199SXin Li        logging.info(
630*9c5db199SXin Li                'Critical verify failure(s) detected during repair/verify '
631*9c5db199SXin Li                'servo. Disconnecting servo and running `stop servod`, all'
632*9c5db199SXin Li                ' repair actions and tests that depends on servo will not '
633*9c5db199SXin Li                'run.')
634*9c5db199SXin Li        return True
635*9c5db199SXin Li
636*9c5db199SXin Li    def get_servo(self):
637*9c5db199SXin Li        """Get the cached servo.Servo object.
638*9c5db199SXin Li
639*9c5db199SXin Li        @return: a servo.Servo object.
640*9c5db199SXin Li        @rtype: autotest_lib.server.cros.servo.servo.Servo
641*9c5db199SXin Li        """
642*9c5db199SXin Li        return self._servo
643*9c5db199SXin Li
644*9c5db199SXin Li    def request_reboot(self):
645*9c5db199SXin Li        """Request servohost to be rebooted when it's safe to by touch a file.
646*9c5db199SXin Li        """
647*9c5db199SXin Li        logging.debug('Request to reboot servohost %s has been created by '
648*9c5db199SXin Li                      'servo with port # %s', self.hostname, self.servo_port)
649*9c5db199SXin Li        self.run('touch %s' % self._reboot_file, ignore_status=True)
650*9c5db199SXin Li
651*9c5db199SXin Li    def withdraw_reboot_request(self):
652*9c5db199SXin Li        """Withdraw a servohost reboot request if exists by remove the flag
653*9c5db199SXin Li        file.
654*9c5db199SXin Li        """
655*9c5db199SXin Li        logging.debug('Withdrawing request to reboot servohost %s that created'
656*9c5db199SXin Li                      ' by servo with port # %s if exists.',
657*9c5db199SXin Li                      self.hostname, self.servo_port)
658*9c5db199SXin Li        self.run('rm -f %s' % self._reboot_file, ignore_status=True)
659*9c5db199SXin Li
660*9c5db199SXin Li    def start_servod(self, quick_startup=False):
661*9c5db199SXin Li        """Start the servod process on servohost.
662*9c5db199SXin Li        """
663*9c5db199SXin Li        # Skip if running on the localhost.(crbug.com/1038168)
664*9c5db199SXin Li        if self.is_localhost():
665*9c5db199SXin Li            logging.debug("Servohost is a localhost, skipping start servod.")
666*9c5db199SXin Li            return
667*9c5db199SXin Li
668*9c5db199SXin Li        if self.is_containerized_servod():
669*9c5db199SXin Li            return self.start_containerized_servod()
670*9c5db199SXin Li
671*9c5db199SXin Li        cmd = 'start servod'
672*9c5db199SXin Li        if self.servo_board:
673*9c5db199SXin Li            cmd += ' BOARD=%s' % self.servo_board
674*9c5db199SXin Li            if self.servo_model:
675*9c5db199SXin Li                cmd += ' MODEL=%s' % self.servo_model
676*9c5db199SXin Li        else:
677*9c5db199SXin Li            logging.warning('Board for DUT is unknown; starting servod'
678*9c5db199SXin Li                            ' assuming a pre-configured board.')
679*9c5db199SXin Li
680*9c5db199SXin Li        cmd += ' PORT=%d' % self.servo_port
681*9c5db199SXin Li        if self.servo_serial:
682*9c5db199SXin Li            cmd += ' SERIAL=%s' % self.servo_serial
683*9c5db199SXin Li
684*9c5db199SXin Li        # Start servod with dual_v4 based on servo_setup.
685*9c5db199SXin Li        if self.is_dual_setup():
686*9c5db199SXin Li            cmd += ' DUAL_V4=1'
687*9c5db199SXin Li
688*9c5db199SXin Li        # Start servod with CONFIG=cr50.xml which required for some pools.
689*9c5db199SXin Li        if self._require_cr50_servod_config():
690*9c5db199SXin Li            cmd += ' CONFIG=cr50.xml'
691*9c5db199SXin Li
692*9c5db199SXin Li        if self.servo_recovery == True:
693*9c5db199SXin Li            cmd += ' REC_MODE=1'
694*9c5db199SXin Li
695*9c5db199SXin Li        # Adding customized args if any.
696*9c5db199SXin Li        if self.additional_servod_args:
697*9c5db199SXin Li            cmd += ' ' + self.additional_servod_args
698*9c5db199SXin Li
699*9c5db199SXin Li        # Remove the symbolic links from the logs. This helps ensure that
700*9c5db199SXin Li        # a failed servod instantiation does not cause us to grab old logs
701*9c5db199SXin Li        # by mistake.
702*9c5db199SXin Li        self.remove_latest_log_symlinks()
703*9c5db199SXin Li        self.run(cmd, timeout=60)
704*9c5db199SXin Li
705*9c5db199SXin Li        # There's a lag between when `start servod` completes and when
706*9c5db199SXin Li        # the _ServodConnectionVerifier trigger can actually succeed.
707*9c5db199SXin Li        # The call to time.sleep() below gives time to make sure that
708*9c5db199SXin Li        # the trigger won't fail after we return.
709*9c5db199SXin Li
710*9c5db199SXin Li        # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
711*9c5db199SXin Li        # But in the rare case all servo on a labstation are in heavy use they
712*9c5db199SXin Li        # may take ~30 seconds. So the timeout value will double these value,
713*9c5db199SXin Li        # and we'll try quick start up when first time initialize servohost,
714*9c5db199SXin Li        # and use standard start up timeout in repair.
715*9c5db199SXin Li        if quick_startup:
716*9c5db199SXin Li            timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
717*9c5db199SXin Li        else:
718*9c5db199SXin Li            timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
719*9c5db199SXin Li        logging.debug('Wait %s seconds for servod process fully up.', timeout)
720*9c5db199SXin Li        time.sleep(timeout)
721*9c5db199SXin Li        # Cache the initial instance timestamp to check against servod restarts
722*9c5db199SXin Li        self._initial_instance_ts = self.get_instance_logs_ts()
723*9c5db199SXin Li
724*9c5db199SXin Li    def stop_servod(self):
725*9c5db199SXin Li        """Stop the servod process on servohost.
726*9c5db199SXin Li        """
727*9c5db199SXin Li        # Skip if running on the localhost.(crbug.com/1038168)
728*9c5db199SXin Li        if self.is_localhost():
729*9c5db199SXin Li            logging.debug("Servohost is a localhost, skipping stop servod.")
730*9c5db199SXin Li            return
731*9c5db199SXin Li
732*9c5db199SXin Li        if self.is_containerized_servod():
733*9c5db199SXin Li            # TODO(gregorynisbet): Remove this message in 2022Q2.
734*9c5db199SXin Li            logging.debug("ServoHost: Detected containerized servod.")
735*9c5db199SXin Li            remove_container = True
736*9c5db199SXin Li            if DOCKER_SERVOD_DEBUG_MODE == '1' or os.path.exists(
737*9c5db199SXin Li                    SERVOD_DEBUG_FLAG):
738*9c5db199SXin Li                remove_container = False
739*9c5db199SXin Li            self.stop_containerized_servod(remove_container=remove_container)
740*9c5db199SXin Li            return
741*9c5db199SXin Li
742*9c5db199SXin Li        logging.debug('Stopping servod on port %s', self.servo_port)
743*9c5db199SXin Li        self.run('stop servod PORT=%d' % self.servo_port,
744*9c5db199SXin Li                 timeout=60, ignore_status=True)
745*9c5db199SXin Li        logging.debug('Wait %s seconds for servod process fully teardown.',
746*9c5db199SXin Li                      servo_constants.SERVOD_TEARDOWN_TIMEOUT)
747*9c5db199SXin Li        time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
748*9c5db199SXin Li
749*9c5db199SXin Li    def wait_for_init_servod_in_container(self, container):
750*9c5db199SXin Li        """Waits for servod process to be ready to listen inside container."""
751*9c5db199SXin Li        ready_output = "Instance associated with id %s ready" % self.servo_port
752*9c5db199SXin Li        if not container:
753*9c5db199SXin Li            logging.debug("Container object is None.")
754*9c5db199SXin Li            return False
755*9c5db199SXin Li        try:
756*9c5db199SXin Li            # Executes servodtool command to wait for servod to be active.
757*9c5db199SXin Li            exit_code, output = container.exec_run(
758*9c5db199SXin Li                    cmd="servodtool instance wait-for-active -p %s" %
759*9c5db199SXin Li                    self.servo_port,
760*9c5db199SXin Li                    stdout=True)
761*9c5db199SXin Li            # b/217780680, Make this compatible with python3,
762*9c5db199SXin Li            if isinstance(output, bytes):
763*9c5db199SXin Li                output = output.decode(errors='replace')
764*9c5db199SXin Li            if exit_code != 0 or ready_output not in output:
765*9c5db199SXin Li                logging.debug(
766*9c5db199SXin Li                        'Failed to start servod process inside container,'
767*9c5db199SXin Li                        'exit_code=%s, output=%s.', exit_code, output)
768*9c5db199SXin Li                return False
769*9c5db199SXin Li        except docker.errors.APIError as e:
770*9c5db199SXin Li            logging.error('%s', e)
771*9c5db199SXin Li        return True
772*9c5db199SXin Li
773*9c5db199SXin Li    def start_containerized_servod(self, with_servod=True):
774*9c5db199SXin Li        """Start the servod process on servohost."""
775*9c5db199SXin Li        logging.info("Starting servod container %s.",
776*9c5db199SXin Li                     self.servod_container_name)
777*9c5db199SXin Li        client = docker_utils.get_docker_client()
778*9c5db199SXin Li        logging.debug("Docker deamon ping %s", client.ping())
779*9c5db199SXin Li        labels = {'WITH_SERVOD': str(with_servod)}
780*9c5db199SXin Li        try:
781*9c5db199SXin Li            if self.is_up(with_servod=with_servod):
782*9c5db199SXin Li                logging.warning("Container already exists - not starting")
783*9c5db199SXin Li                return
784*9c5db199SXin Li            logging.info(
785*9c5db199SXin Li                    'Servod container either does not exist or is not running.'
786*9c5db199SXin Li            )
787*9c5db199SXin Li            self.stop_containerized_servod()
788*9c5db199SXin Li        except docker.errors.APIError:
789*9c5db199SXin Li            # Container exists but is not running
790*9c5db199SXin Li            logging.info("Cleanup of non functional container.")
791*9c5db199SXin Li            self.stop_containerized_servod()
792*9c5db199SXin Li
793*9c5db199SXin Li        label = os.environ.get("SERVOD_CONTAINER_LABEL", "release")
794*9c5db199SXin Li        registry = os.environ.get("REGISTRY_URI", SERVOD_CONTAINER_IMAGE_PATH)
795*9c5db199SXin Li        image = "%s/servod:%s" % (registry, label)
796*9c5db199SXin Li        logging.info("Servod container image: %s", image)
797*9c5db199SXin Li
798*9c5db199SXin Li        try:
799*9c5db199SXin Li            client.images.pull(image)
800*9c5db199SXin Li        except docker.errors.APIError:
801*9c5db199SXin Li            logging.exception("Failed to pull servod container image.")
802*9c5db199SXin Li
803*9c5db199SXin Li        environment = [
804*9c5db199SXin Li                "BOARD=%s" % self.servo_board,
805*9c5db199SXin Li                "MODEL=%s" % self.servo_model,
806*9c5db199SXin Li                "SERIAL=%s" % self.servo_serial,
807*9c5db199SXin Li                "PORT=%s" % self.servo_port,
808*9c5db199SXin Li        ]
809*9c5db199SXin Li        # Start servod with dual_v4 based on servo_setup.
810*9c5db199SXin Li        if self.is_dual_setup():
811*9c5db199SXin Li            environment.append("DUAL_V4=1")
812*9c5db199SXin Li        # Start servod with CONFIG=cr50.xml which required for some pools.
813*9c5db199SXin Li        if self._require_cr50_servod_config():
814*9c5db199SXin Li            environment.append("CONFIG=cr50.xml")
815*9c5db199SXin Li        if self.servo_recovery == True:
816*9c5db199SXin Li            environment.append("REC_MODE=1")
817*9c5db199SXin Li
818*9c5db199SXin Li        container_network = os.environ.get("DOCKER_DEFAULT_NETWORK", None)
819*9c5db199SXin Li        # In case the network environment is not set, fallback to default network
820*9c5db199SXin Li        # for moblab or satlab based on the TLE.
821*9c5db199SXin Li        if not container_network:
822*9c5db199SXin Li            container_network = "default_moblab"
823*9c5db199SXin Li            if 'drone' in docker_utils.get_running_containers(client=client):
824*9c5db199SXin Li                container_network = "default_satlab"
825*9c5db199SXin Li
826*9c5db199SXin Li        logging.info('Servod container will use %s network', container_network)
827*9c5db199SXin Li        logging.info('Servod container environment: %s', environment)
828*9c5db199SXin Li        try:
829*9c5db199SXin Li            start_cmds = ["bash", "/start_servod.sh"]
830*9c5db199SXin Li            if not with_servod:
831*9c5db199SXin Li                # In some cases we do not need container without running servod.
832*9c5db199SXin Li                start_cmds = ["tail", "-f", "/dev/null"]
833*9c5db199SXin Li                logging.debug(
834*9c5db199SXin Li                        'Attempting to start Servod container without servod')
835*9c5db199SXin Li            container = client.containers.run(
836*9c5db199SXin Li                    image,
837*9c5db199SXin Li                    remove=False,
838*9c5db199SXin Li                    privileged=True,
839*9c5db199SXin Li                    labels=labels,
840*9c5db199SXin Li                    name=self.servod_container_name,
841*9c5db199SXin Li                    hostname=self.servod_container_name,
842*9c5db199SXin Li                    network=container_network,
843*9c5db199SXin Li                    cap_add=["NET_ADMIN"],
844*9c5db199SXin Li                    detach=True,
845*9c5db199SXin Li                    volumes=[
846*9c5db199SXin Li                            "/dev:/dev",
847*9c5db199SXin Li                            "%s_log:/var/log/servod_%s/" %
848*9c5db199SXin Li                            (self.servod_container_name, self.servo_port)
849*9c5db199SXin Li                    ],
850*9c5db199SXin Li                    environment=environment,
851*9c5db199SXin Li                    command=start_cmds,
852*9c5db199SXin Li            )
853*9c5db199SXin Li            # Probing servod ready state fails.
854*9c5db199SXin Li            if with_servod:
855*9c5db199SXin Li                current_time = 0
856*9c5db199SXin Li                while not self.wait_for_init_servod_in_container(
857*9c5db199SXin Li                        container
858*9c5db199SXin Li                ) and current_time <= servo_constants.SERVOD_STARTUP_TIMEOUT:
859*9c5db199SXin Li                    time.sleep(10)
860*9c5db199SXin Li                    current_time += 10
861*9c5db199SXin Li
862*9c5db199SXin Li                if not self.wait_for_init_servod_in_container(container):
863*9c5db199SXin Li                    logging.info(
864*9c5db199SXin Li                            'Servod process is not up within the servod container after %s seconds.'
865*9c5db199SXin Li                            % servo_constants.SERVOD_STARTUP_TIMEOUT)
866*9c5db199SXin Li                else:
867*9c5db199SXin Li                    logging.info(
868*9c5db199SXin Li                            'Servod process is up within the servod container after %s seconds.'
869*9c5db199SXin Li                            % current_time)
870*9c5db199SXin Li            else:
871*9c5db199SXin Li                logging.info(
872*9c5db199SXin Li                        "Servod container %s up and running without servod process.",
873*9c5db199SXin Li                        self.servod_container_name)
874*9c5db199SXin Li
875*9c5db199SXin Li        except docker.errors.ContainerError as e:
876*9c5db199SXin Li            logging.exception("Failed to start servod container. %s", e)
877*9c5db199SXin Li            raise
878*9c5db199SXin Li        except docker.errors.ImageNotFound:
879*9c5db199SXin Li            logging.exception("Servod container image %s not found.", image)
880*9c5db199SXin Li
881*9c5db199SXin Li    def stop_containerized_servod(self, remove_container=True):
882*9c5db199SXin Li        """Stop the container running servod."""
883*9c5db199SXin Li        logging.info("Stopping servod container %s.",
884*9c5db199SXin Li                     self.servod_container_name)
885*9c5db199SXin Li        client = docker_utils.get_docker_client()
886*9c5db199SXin Li        try:
887*9c5db199SXin Li            cont = client.containers.get(self.servod_container_name)
888*9c5db199SXin Li        except docker.errors.NotFound:
889*9c5db199SXin Li            logging.info("Servod container %s not found no need to stop it.",
890*9c5db199SXin Li                         self.servod_container_name)
891*9c5db199SXin Li        except docker.errors.APIError:
892*9c5db199SXin Li            logging.exception(
893*9c5db199SXin Li                    "Stopping servod container %s caused a docker error.",
894*9c5db199SXin Li                    self.servod_container_name)
895*9c5db199SXin Li        else:
896*9c5db199SXin Li            if remove_container == True:
897*9c5db199SXin Li                cont.remove(force=True)
898*9c5db199SXin Li                logging.debug('Servod container instance removed')
899*9c5db199SXin Li
900*9c5db199SXin Li    def restart_servod(self, quick_startup=False):
901*9c5db199SXin Li        """Restart the servod process on servohost.
902*9c5db199SXin Li        """
903*9c5db199SXin Li        self.stop_servod()
904*9c5db199SXin Li        self.start_servod(quick_startup)
905*9c5db199SXin Li
906*9c5db199SXin Li    def _process_servodtool_error(self, response):
907*9c5db199SXin Li        """Helper function to handle non-zero servodtool response.
908*9c5db199SXin Li        """
909*9c5db199SXin Li        if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
910*9c5db199SXin Li                     self._to_str(response.stdout)):
911*9c5db199SXin Li            logging.error('The servo is not plugged on a usb hub that supports'
912*9c5db199SXin Li                          ' power-cycle!')
913*9c5db199SXin Li            # change the flag so we can update this label in later process.
914*9c5db199SXin Li            self.smart_usbhub = False
915*9c5db199SXin Li            return
916*9c5db199SXin Li
917*9c5db199SXin Li        if re.search(
918*9c5db199SXin Li                servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
919*9c5db199SXin Li                self.servo_serial, self._to_str(response.stdout)):
920*9c5db199SXin Li            logging.error('No servo with serial %s found!', self.servo_serial)
921*9c5db199SXin Li            return
922*9c5db199SXin Li
923*9c5db199SXin Li        logging.error('Unexpected error occurred from usbhub control, please'
924*9c5db199SXin Li                      ' file a bug and inform chrome-fleet-software@ team!')
925*9c5db199SXin Li
926*9c5db199SXin Li    def get_main_servo_usb_path(self):
927*9c5db199SXin Li        """Helper function to collect current usb-path to main servo.
928*9c5db199SXin Li
929*9c5db199SXin Li        The usb-path is path to the folder where usb-device was enumerated.
930*9c5db199SXin Li        If fail then will return an empty string ('').
931*9c5db199SXin Li
932*9c5db199SXin Li        @returns: string, usb-path to the main servo device.
933*9c5db199SXin Li            e.g.: '/sys/bus/usb/devices/1-6.1.3.1'
934*9c5db199SXin Li        """
935*9c5db199SXin Li        # TODO remove try-except when fix crbug.com/1087964
936*9c5db199SXin Li        try:
937*9c5db199SXin Li            cmd = 'servodtool device -s %s usb-path' % self.servo_serial
938*9c5db199SXin Li            resp = self.run(cmd, ignore_status=True, timeout=30)
939*9c5db199SXin Li        except Exception as e:
940*9c5db199SXin Li            # Here we catch only timeout errors.
941*9c5db199SXin Li            # Other errors is filtered by ignore_status=True
942*9c5db199SXin Li            logging.debug('Attempt to get servo usb-path failed due to '
943*9c5db199SXin Li                          'timeout; %s', e)
944*9c5db199SXin Li            return ''
945*9c5db199SXin Li        if resp.exit_status != 0:
946*9c5db199SXin Li            self._process_servodtool_error(resp)
947*9c5db199SXin Li            return ''
948*9c5db199SXin Li        usb_path = self._to_str(resp.stdout.strip())
949*9c5db199SXin Li
950*9c5db199SXin Li        logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
951*9c5db199SXin Li        return usb_path
952*9c5db199SXin Li
953*9c5db199SXin Li    def _get_servo_usb_devnum(self):
954*9c5db199SXin Li        """Helper function to collect current usb devnum of servo."""
955*9c5db199SXin Li        usb_path = self.get_main_servo_usb_path()
956*9c5db199SXin Li        if not usb_path:
957*9c5db199SXin Li            return ''
958*9c5db199SXin Li        resp = self.run('cat %s/devnum' % usb_path, ignore_status=True)
959*9c5db199SXin Li        if resp.exit_status != 0:
960*9c5db199SXin Li            self._process_servodtool_error(resp)
961*9c5db199SXin Li            return ''
962*9c5db199SXin Li        return self._to_str(resp.stdout.strip())
963*9c5db199SXin Li
964*9c5db199SXin Li    def reboot_servo_v3_on_need(self):
965*9c5db199SXin Li        """Check and reboot servo_v3 based on below conditions.
966*9c5db199SXin Li               1. If there is an update pending on reboot.
967*9c5db199SXin Li               2. Servo_v3 has been up for more than 96 hours.
968*9c5db199SXin Li        """
969*9c5db199SXin Li        if self.get_board() != 'beaglebone_servo':
970*9c5db199SXin Li            logging.info('Servo reboot is only applicable for servo V3.')
971*9c5db199SXin Li            return
972*9c5db199SXin Li
973*9c5db199SXin Li        update_pending_reboot = (self._check_update_status() ==
974*9c5db199SXin Li                                 self.UPDATE_STATE.PENDING_REBOOT)
975*9c5db199SXin Li        uptime_hours = float(self.check_uptime())/3600
976*9c5db199SXin Li        logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
977*9c5db199SXin Li        long_up_time = uptime_hours > 96
978*9c5db199SXin Li
979*9c5db199SXin Li        # Skip reboot if neither condition are met.
980*9c5db199SXin Li        if not (update_pending_reboot or long_up_time):
981*9c5db199SXin Li            return
982*9c5db199SXin Li
983*9c5db199SXin Li        if update_pending_reboot:
984*9c5db199SXin Li            message = 'Starting reboot servo_v3 because an update is pending.'
985*9c5db199SXin Li            reboot_method = self._post_update_reboot
986*9c5db199SXin Li        elif long_up_time:
987*9c5db199SXin Li            message = 'Starting reboot servo_v3 because uptime > 96 hours.'
988*9c5db199SXin Li            reboot_method = self._servo_host_reboot
989*9c5db199SXin Li        self.record('INFO', None, None, message)
990*9c5db199SXin Li        logging.info(message)
991*9c5db199SXin Li        try:
992*9c5db199SXin Li            reboot_method()
993*9c5db199SXin Li            message = 'Servo_v3 reboot completed successfully.'
994*9c5db199SXin Li        except Exception as e:
995*9c5db199SXin Li            logging.debug("Fail to reboot servo_v3; %s", e)
996*9c5db199SXin Li            message = ('Servo_v3 reboot failed, please check debug log '
997*9c5db199SXin Li                       'for details.')
998*9c5db199SXin Li        logging.info(message)
999*9c5db199SXin Li        self.record('INFO', None, None, message)
1000*9c5db199SXin Li
1001*9c5db199SXin Li    def _reset_servo(self):
1002*9c5db199SXin Li        logging.info('Resetting servo through smart usbhub.')
1003*9c5db199SXin Li        # TODO remove try-except when fix crbug.com/1087964
1004*9c5db199SXin Li        try:
1005*9c5db199SXin Li            resp = self.run('servodtool device -s %s power-cycle' %
1006*9c5db199SXin Li                            self.servo_serial, ignore_status=True,
1007*9c5db199SXin Li                            timeout=30)
1008*9c5db199SXin Li            if resp.exit_status != 0:
1009*9c5db199SXin Li                self._process_servodtool_error(resp)
1010*9c5db199SXin Li                return False
1011*9c5db199SXin Li        except Exception as e:
1012*9c5db199SXin Li            # Here we catch only timeout errors.
1013*9c5db199SXin Li            # Other errors is filtered by ignore_status=True
1014*9c5db199SXin Li            logging.debug('Attempt to reset servo failed due to timeout;'
1015*9c5db199SXin Li                          ' %s', e)
1016*9c5db199SXin Li            return False
1017*9c5db199SXin Li
1018*9c5db199SXin Li        logging.debug('Wait %s seconds for servo to come back from reset.',
1019*9c5db199SXin Li                      servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
1020*9c5db199SXin Li        time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
1021*9c5db199SXin Li        # change the flag so we can update this label in later process.
1022*9c5db199SXin Li        self.smart_usbhub = True
1023*9c5db199SXin Li        return True
1024*9c5db199SXin Li
1025*9c5db199SXin Li    def reset_servo(self):
1026*9c5db199SXin Li        """Reset(power-cycle) the servo via smart usbhub.
1027*9c5db199SXin Li        """
1028*9c5db199SXin Li        if not self.is_labstation():
1029*9c5db199SXin Li            logging.info('Servo reset is not applicable to servo_v3.')
1030*9c5db199SXin Li            return
1031*9c5db199SXin Li
1032*9c5db199SXin Li        pre_reset_devnum = self._get_servo_usb_devnum()
1033*9c5db199SXin Li        logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
1034*9c5db199SXin Li        result = self._reset_servo()
1035*9c5db199SXin Li        if not result:
1036*9c5db199SXin Li            message = ('Failed to reset servo with serial: %s. (Please ignore'
1037*9c5db199SXin Li                       ' this error if the DUT is not connected to a smart'
1038*9c5db199SXin Li                       ' usbhub).' % self.servo_serial)
1039*9c5db199SXin Li            logging.warning(message)
1040*9c5db199SXin Li            self.record('INFO', None, None, message)
1041*9c5db199SXin Li            return
1042*9c5db199SXin Li
1043*9c5db199SXin Li        post_reset_devnum = self._get_servo_usb_devnum()
1044*9c5db199SXin Li        logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
1045*9c5db199SXin Li        if not (pre_reset_devnum and post_reset_devnum):
1046*9c5db199SXin Li            message = ('Servo reset completed but unable to verify'
1047*9c5db199SXin Li                       ' devnum change!')
1048*9c5db199SXin Li        elif pre_reset_devnum != post_reset_devnum:
1049*9c5db199SXin Li            message = ('Reset servo with serial %s completed successfully!'
1050*9c5db199SXin Li                       % self.servo_serial)
1051*9c5db199SXin Li        else:
1052*9c5db199SXin Li            message = 'Servo reset completed but devnum is still not changed!'
1053*9c5db199SXin Li        logging.info(message)
1054*9c5db199SXin Li        self.record('INFO', None, None, message)
1055*9c5db199SXin Li
1056*9c5db199SXin Li    def _extract_compressed_logs(self, logdir, relevant_files):
1057*9c5db199SXin Li        """Decompress servod logs in |logdir|.
1058*9c5db199SXin Li
1059*9c5db199SXin Li        @param logdir: directory containing compressed servod logs.
1060*9c5db199SXin Li        @param relevant_files: list of files in |logdir| to consider.
1061*9c5db199SXin Li
1062*9c5db199SXin Li        @returns: tuple, (tarfiles, files) where
1063*9c5db199SXin Li                  tarfiles: list of the compressed filenames that have been
1064*9c5db199SXin Li                            extracted and deleted
1065*9c5db199SXin Li                  files:  list of the uncompressed files that were generated
1066*9c5db199SXin Li        """
1067*9c5db199SXin Li        # For all tar-files, first extract them to the directory, and
1068*9c5db199SXin Li        # then let the common flow handle them.
1069*9c5db199SXin Li        tarfiles = [cf for cf in relevant_files if
1070*9c5db199SXin Li                    cf.endswith(self.COMPRESSION_SUFFIX)]
1071*9c5db199SXin Li        files = []
1072*9c5db199SXin Li        for f in tarfiles:
1073*9c5db199SXin Li            norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
1074*9c5db199SXin Li            with tarfile.open(f) as tf:
1075*9c5db199SXin Li                # Each tarfile has only one member, as
1076*9c5db199SXin Li                # that's the compressed log.
1077*9c5db199SXin Li                member = tf.members[0]
1078*9c5db199SXin Li                # Manipulate so that it only extracts the basename, and not
1079*9c5db199SXin Li                # the directories etc.
1080*9c5db199SXin Li                member.name = norm_name
1081*9c5db199SXin Li                files.append(os.path.join(logdir, member.name))
1082*9c5db199SXin Li                tf.extract(member, logdir)
1083*9c5db199SXin Li            # File has been extracted: remove the compressed file.
1084*9c5db199SXin Li            os.remove(f)
1085*9c5db199SXin Li        return tarfiles, files
1086*9c5db199SXin Li
1087*9c5db199SXin Li    def _extract_mcu_logs(self, log_subdir):
1088*9c5db199SXin Li        """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
1089*9c5db199SXin Li
1090*9c5db199SXin Li        Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
1091*9c5db199SXin Li        lines from the logs to generate invidiual console logs e.g. after
1092*9c5db199SXin Li        this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
1093*9c5db199SXin Li        those MCUs had any console input/output.
1094*9c5db199SXin Li
1095*9c5db199SXin Li        @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
1096*9c5db199SXin Li        """
1097*9c5db199SXin Li        # Extract the MCU for each one. The MCU logs are only in the .DEBUG
1098*9c5db199SXin Li        # files
1099*9c5db199SXin Li        mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
1100*9c5db199SXin Li        if not os.path.exists(mcu_lines_file):
1101*9c5db199SXin Li            logging.info('No DEBUG logs found to extract MCU logs from.')
1102*9c5db199SXin Li            return
1103*9c5db199SXin Li        mcu_files = {}
1104*9c5db199SXin Li        mcu_file_template = '%s.txt'
1105*9c5db199SXin Li        with open(mcu_lines_file, 'r') as f:
1106*9c5db199SXin Li            for line in f:
1107*9c5db199SXin Li                match = self.MCU_EXTRACTOR.match(line)
1108*9c5db199SXin Li                if match:
1109*9c5db199SXin Li                    mcu = match.group(self.MCU_GROUP).lower()
1110*9c5db199SXin Li                    # The |mcu| might contain a '/' in it. Replace it with '.'
1111*9c5db199SXin Li                    # to avoid generating bad filepaths.
1112*9c5db199SXin Li                    mcu = mcu.replace('/', '.')
1113*9c5db199SXin Li                    line = match.group(self.LINE_GROUP)
1114*9c5db199SXin Li                    if mcu not in mcu_files:
1115*9c5db199SXin Li                        mcu_file = os.path.join(log_subdir,
1116*9c5db199SXin Li                                                mcu_file_template % mcu)
1117*9c5db199SXin Li                        mcu_files[mcu] = open(mcu_file, 'a')
1118*9c5db199SXin Li                    fd = mcu_files[mcu]
1119*9c5db199SXin Li                    fd.write(line + '\n')
1120*9c5db199SXin Li        for f in mcu_files:
1121*9c5db199SXin Li            mcu_files[f].close()
1122*9c5db199SXin Li
1123*9c5db199SXin Li    def remove_latest_log_symlinks(self):
1124*9c5db199SXin Li        """Remove the conveninence symlinks 'latest' servod logs."""
1125*9c5db199SXin Li        symlink_wildcard = '%s/latest*' % self.remote_log_dir
1126*9c5db199SXin Li        cmd = 'rm ' + symlink_wildcard
1127*9c5db199SXin Li        self.run(cmd, stderr_tee=None, ignore_status=True)
1128*9c5db199SXin Li
1129*9c5db199SXin Li    def probe_servod_restart(self, instance_ts, outdir):
1130*9c5db199SXin Li        """Grab servod logs from previous instances if part of this session.
1131*9c5db199SXin Li
1132*9c5db199SXin Li        If since the last time this host called start_servod() servod crashed
1133*9c5db199SXin Li        and restarted, this helper finds those logs as well, and stores them
1134*9c5db199SXin Li        with the |OLD_LOG_SUFFIX| to investigate if necessary.
1135*9c5db199SXin Li
1136*9c5db199SXin Li        It also issues a panicinfo command to servo devices after the restart
1137*9c5db199SXin Li        to try and collect reboot information for debugging.
1138*9c5db199SXin Li
1139*9c5db199SXin Li        @param instance_ts: the log timestamp that the current instance uses
1140*9c5db199SXin Li        @param outdir: directory to create a subdirectory into to place the
1141*9c5db199SXin Li                       servod logs into.
1142*9c5db199SXin Li        """
1143*9c5db199SXin Li        if self._initial_instance_ts is None:
1144*9c5db199SXin Li            logging.info('No log timestamp grabbed successfully on servod '
1145*9c5db199SXin Li                         'startup. Cannot check device restarts. Ignoring.')
1146*9c5db199SXin Li            return
1147*9c5db199SXin Li        if instance_ts == self._initial_instance_ts:
1148*9c5db199SXin Li            logging.debug('Servod appears to have run without restarting')
1149*9c5db199SXin Li            return
1150*9c5db199SXin Li        # Servod seems to have restarted (at least once). |_initial_instance_ts|
1151*9c5db199SXin Li        # is the first timestamp, and instance_ts is the current timestamp. Find
1152*9c5db199SXin Li        # all timestamps in between them, and grab the logs for each.
1153*9c5db199SXin Li        tss = self._find_instance_timestamps_between(self._initial_instance_ts,
1154*9c5db199SXin Li                                                     instance_ts)
1155*9c5db199SXin Li        logging.info('Servod has restarted %d times between the start and the '
1156*9c5db199SXin Li                     'end of this servo_host.', len(tss))
1157*9c5db199SXin Li        logging.info('This might be an issue. Will extract all logs from each '
1158*9c5db199SXin Li                     'instance.')
1159*9c5db199SXin Li        logging.info('Logs that are not the currently running (about to turn '
1160*9c5db199SXin Li                     'down) instance are maked with a .%s in their folder.',
1161*9c5db199SXin Li                     self.OLD_LOG_SUFFIX)
1162*9c5db199SXin Li        for ts in tss:
1163*9c5db199SXin Li            self.get_instance_logs(ts, outdir, old=True)
1164*9c5db199SXin Li        # Lastly, servod has restarted due to a potential issue. Try to get
1165*9c5db199SXin Li        # panic information from servo micro and servo v4 for the current logs.
1166*9c5db199SXin Li        # This can only happen if the |_servo| attribute is initialized.
1167*9c5db199SXin Li        if self._servo:
1168*9c5db199SXin Li            for mcu in ['servo_micro', 'servo_v4', 'servo_v4p1']:
1169*9c5db199SXin Li                ctrl = '%s_uart_cmd' % mcu
1170*9c5db199SXin Li                if self._servo.has_control(ctrl):
1171*9c5db199SXin Li                    logging.info('Trying to retrieve %r panicinfo into logs',
1172*9c5db199SXin Li                                 mcu)
1173*9c5db199SXin Li                    try:
1174*9c5db199SXin Li                        self._servo.set_nocheck(ctrl, 'panicinfo')
1175*9c5db199SXin Li                    except error.TestFail as e:
1176*9c5db199SXin Li                        logging.error('Failed to generate panicinfo for %r '
1177*9c5db199SXin Li                                      'logs. %s', mcu, str(e))
1178*9c5db199SXin Li
1179*9c5db199SXin Li    def _find_instance_timestamps_between(self, start_ts, end_ts):
1180*9c5db199SXin Li        """Find all log timestamps between [start_ts, end_ts).
1181*9c5db199SXin Li
1182*9c5db199SXin Li        @param start_ts: str, earliest log timestamp of interest
1183*9c5db199SXin Li        @param end_ts: str, latest log timestamp of interest
1184*9c5db199SXin Li
1185*9c5db199SXin Li        @returns: list, all timestamps between start_ts and end_ts, end_ts
1186*9c5db199SXin Li                  exclusive, on the servo_host. An empty list on errors
1187*9c5db199SXin Li        """
1188*9c5db199SXin Li        # Simply get all timestamp, and then sort and remove
1189*9c5db199SXin Li        cmd = 'ls %s' % self.remote_log_dir
1190*9c5db199SXin Li        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1191*9c5db199SXin Li        if res.exit_status != 0:
1192*9c5db199SXin Li            # Here we failed to find anything.
1193*9c5db199SXin Li            logging.info('Failed to find remote servod logs. Ignoring.')
1194*9c5db199SXin Li            return []
1195*9c5db199SXin Li        logfiles = self._to_str(res.stdout.strip().split())
1196*9c5db199SXin Li        timestamps = set()
1197*9c5db199SXin Li        for logfile in logfiles:
1198*9c5db199SXin Li            ts_match = self.TS_EXTRACTOR.match(logfile)
1199*9c5db199SXin Li            if not ts_match:
1200*9c5db199SXin Li                # Simply ignore files that fail the check. It might be the
1201*9c5db199SXin Li                # 'latest' symlinks or random files.
1202*9c5db199SXin Li                continue
1203*9c5db199SXin Li            timestamps.add(ts_match.group(self.TS_GROUP))
1204*9c5db199SXin Li        # At this point we have all unique timestamps.
1205*9c5db199SXin Li        timestamps = sorted(timestamps)
1206*9c5db199SXin Li        for ts in [start_ts, end_ts]:
1207*9c5db199SXin Li            if ts not in timestamps:
1208*9c5db199SXin Li                logging.error('Timestamp %r not in servod logs. Cannot query '
1209*9c5db199SXin Li                              'for timestamps in between %r and %r', ts,
1210*9c5db199SXin Li                              start_ts, end_ts)
1211*9c5db199SXin Li                return []
1212*9c5db199SXin Li        return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
1213*9c5db199SXin Li
1214*9c5db199SXin Li    def get_instance_logs_ts(self):
1215*9c5db199SXin Li        """Retrieve the currently running servod instance's log timestamp
1216*9c5db199SXin Li
1217*9c5db199SXin Li        @returns: str, timestamp for current instance, or None on failure
1218*9c5db199SXin Li        """
1219*9c5db199SXin Li        # First, extract the timestamp. This cmd gives the real filename of
1220*9c5db199SXin Li        # the latest aka current log file.
1221*9c5db199SXin Li        cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
1222*9c5db199SXin Li               'then realpath %(dir)s/latest.DEBUG;'
1223*9c5db199SXin Li               'elif [ -f %(dir)s/latest ];'
1224*9c5db199SXin Li               'then realpath %(dir)s/latest;'
1225*9c5db199SXin Li               'else exit %(code)d;'
1226*9c5db199SXin Li               'fi' % {'dir': self.remote_log_dir,
1227*9c5db199SXin Li                       'code': self.NO_SYMLINKS_CODE})
1228*9c5db199SXin Li        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1229*9c5db199SXin Li        if res.exit_status != 0:
1230*9c5db199SXin Li            if res.exit_status == self.NO_SYMLINKS_CODE:
1231*9c5db199SXin Li                logging.warning('servod log latest symlinks not found. '
1232*9c5db199SXin Li                                'This is likely due to an error starting up '
1233*9c5db199SXin Li                                'servod. Ignoring..')
1234*9c5db199SXin Li            else:
1235*9c5db199SXin Li                logging.warning('Failed to find servod logs on servo host.')
1236*9c5db199SXin Li                logging.warning(res.stderr.strip())
1237*9c5db199SXin Li            return None
1238*9c5db199SXin Li        fname = os.path.basename(self._to_str(res.stdout.strip()))
1239*9c5db199SXin Li        # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
1240*9c5db199SXin Li        if type(fname) == type(b' '):
1241*9c5db199SXin Li            fname = fname.decode("utf-8")
1242*9c5db199SXin Li        ts_match = self.TS_EXTRACTOR.match(fname)
1243*9c5db199SXin Li        if not ts_match:
1244*9c5db199SXin Li            logging.warning('Failed to extract timestamp from servod log file '
1245*9c5db199SXin Li                            '%r. Skipping. The servo host is using outdated '
1246*9c5db199SXin Li                            'servod logging and needs to be updated.', fname)
1247*9c5db199SXin Li            return None
1248*9c5db199SXin Li        return ts_match.group(self.TS_GROUP)
1249*9c5db199SXin Li
1250*9c5db199SXin Li    def get_servohost_logs(self, outdir):
1251*9c5db199SXin Li        """Get logs that can help debugging servo/servod problem from
1252*9c5db199SXin Li        the servohost
1253*9c5db199SXin Li
1254*9c5db199SXin Li        @param outdir: directory to create a subdirectory into to place the
1255*9c5db199SXin Li                       servod logs into.
1256*9c5db199SXin Li        """
1257*9c5db199SXin Li        log_dir = os.path.join(outdir, 'servohost_%s' % self.hostname)
1258*9c5db199SXin Li        if os.path.isdir(log_dir):
1259*9c5db199SXin Li            # In multi-DUTs testing, each DUTs will may their own servohost
1260*9c5db199SXin Li            # instance, where could cause duplicate efforts if they share a
1261*9c5db199SXin Li            # same servohost, so we can just skip the collect if the log
1262*9c5db199SXin Li            # dir already exists.
1263*9c5db199SXin Li            logging.info(
1264*9c5db199SXin Li                    'Skip dmesg and messages logs collecting as %s'
1265*9c5db199SXin Li                    ' already exists.', log_dir)
1266*9c5db199SXin Li            return
1267*9c5db199SXin Li        logging.info('Collecting dmesg and messages from servohost %s',
1268*9c5db199SXin Li                     self.hostname)
1269*9c5db199SXin Li        os.mkdir(log_dir)
1270*9c5db199SXin Li        logging.info('Saving servohost logs to %s.', log_dir)
1271*9c5db199SXin Li        # First collect dmesg from the servohost.
1272*9c5db199SXin Li        crashcollect.collect_command(self, 'dmesg -H',
1273*9c5db199SXin Li                                     os.path.join(log_dir, 'dmesg'))
1274*9c5db199SXin Li        # Collect messages log from the servohost.
1275*9c5db199SXin Li        if not self.is_containerized_servod():
1276*9c5db199SXin Li            try:
1277*9c5db199SXin Li                self.get_file('/var/log/messages', log_dir, try_rsync=False)
1278*9c5db199SXin Li            except error.AutoservRunError:
1279*9c5db199SXin Li                logging.warning(
1280*9c5db199SXin Li                        'Failed to collect messages log from servohost.')
1281*9c5db199SXin Li
1282*9c5db199SXin Li    def get_servod_startup_log(self, outdir):
1283*9c5db199SXin Li        """Get servod start_up log, this log is available even servod was
1284*9c5db199SXin Li        not started successfully.
1285*9c5db199SXin Li
1286*9c5db199SXin Li        @param outdir: directory to create a subdirectory into to place the
1287*9c5db199SXin Li                       servod logs into.
1288*9c5db199SXin Li        """
1289*9c5db199SXin Li        if self.is_containerized_servod():
1290*9c5db199SXin Li            return
1291*9c5db199SXin Li        log_dir = os.path.join(outdir, 'servod_startup_%s' % self.servo_port)
1292*9c5db199SXin Li        os.mkdir(log_dir)
1293*9c5db199SXin Li        start_up_log = '/var/log/servod_%s.STARTUP.log' % self.servo_port
1294*9c5db199SXin Li        try:
1295*9c5db199SXin Li            self.get_file(start_up_log, log_dir, try_rsync=False)
1296*9c5db199SXin Li        except error.AutoservRunError:
1297*9c5db199SXin Li            logging.warning('Failed to collect servod start up log'
1298*9c5db199SXin Li                            ' from servohost.')
1299*9c5db199SXin Li
1300*9c5db199SXin Li    def get_instance_logs(self, instance_ts, outdir, old=False):
1301*9c5db199SXin Li        """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1302*9c5db199SXin Li
1303*9c5db199SXin Li        This method first collects all logs on the servo_host side pertaining
1304*9c5db199SXin Li        to this servod instance (port, instatiation). It glues them together
1305*9c5db199SXin Li        into combined log.[level].txt files and extracts all available MCU
1306*9c5db199SXin Li        console I/O from the logs into individual files e.g. servo_v4.txt
1307*9c5db199SXin Li
1308*9c5db199SXin Li        All the output can be found in a directory inside |outdir| that
1309*9c5db199SXin Li        this generates based on |LOG_DIR|, the servod port, and the instance
1310*9c5db199SXin Li        timestamp on the servo_host side.
1311*9c5db199SXin Li
1312*9c5db199SXin Li        @param instance_ts: log timestamp to grab logfiles for
1313*9c5db199SXin Li        @param outdir: directory to create a subdirectory into to place the
1314*9c5db199SXin Li                       servod logs into.
1315*9c5db199SXin Li        @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1316*9c5db199SXin Li        """
1317*9c5db199SXin Li        # Create the local results log dir.
1318*9c5db199SXin Li        log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1319*9c5db199SXin Li                                                     str(self.servo_port),
1320*9c5db199SXin Li                                                     instance_ts))
1321*9c5db199SXin Li        if old:
1322*9c5db199SXin Li            log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1323*9c5db199SXin Li        logging.info('Saving servod logs to %r.', log_dir)
1324*9c5db199SXin Li        os.mkdir(log_dir)
1325*9c5db199SXin Li        # Now, get all files with that timestamp.
1326*9c5db199SXin Li        cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1327*9c5db199SXin Li                                                       instance_ts)
1328*9c5db199SXin Li        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1329*9c5db199SXin Li        files = self._to_str(res.stdout.strip()).split()
1330*9c5db199SXin Li        try:
1331*9c5db199SXin Li            if self.is_containerized_servod():
1332*9c5db199SXin Li                client = docker_utils.get_docker_client()
1333*9c5db199SXin Li                container = client.containers.get(self.servod_container_name)
1334*9c5db199SXin Li                for f in files:
1335*9c5db199SXin Li                    file_stream, stat = container.get_archive(f)
1336*9c5db199SXin Li                    tf = tempfile.NamedTemporaryFile(delete=False)
1337*9c5db199SXin Li                    for block in file_stream:
1338*9c5db199SXin Li                        tf.write(block)
1339*9c5db199SXin Li                    tf.close()
1340*9c5db199SXin Li                    pw_tar = tarfile.TarFile(tf.name)
1341*9c5db199SXin Li                    pw_tar.extractall(log_dir)
1342*9c5db199SXin Li                    os.remove(tf.name)
1343*9c5db199SXin Li            else:
1344*9c5db199SXin Li                self.get_file(files, log_dir, try_rsync=False)
1345*9c5db199SXin Li
1346*9c5db199SXin Li            if not os.listdir(log_dir):
1347*9c5db199SXin Li                logging.info('No servod logs retrieved. Ignoring, and removing '
1348*9c5db199SXin Li                             '%r again.', log_dir)
1349*9c5db199SXin Li                os.rmdir(log_dir)
1350*9c5db199SXin Li                return
1351*9c5db199SXin Li        except docker.errors.NotFound:
1352*9c5db199SXin Li            logging.info("Servod container %s not found no need to stop it.",
1353*9c5db199SXin Li                         self.hostname)
1354*9c5db199SXin Li        except error.AutoservRunError as e:
1355*9c5db199SXin Li            result = e.result_obj
1356*9c5db199SXin Li            if result.exit_status != 0:
1357*9c5db199SXin Li                stderr = result.stderr.strip()
1358*9c5db199SXin Li                logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1359*9c5db199SXin Li                                stderr or '\n%s' % result)
1360*9c5db199SXin Li            # Remove the log_dir as nothing was added to it.
1361*9c5db199SXin Li            os.rmdir(log_dir)
1362*9c5db199SXin Li            return
1363*9c5db199SXin Li        local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1364*9c5db199SXin Li        # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1365*9c5db199SXin Li        for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1366*9c5db199SXin Li            # Create the joint files for each loglevel. i.e log.DEBUG
1367*9c5db199SXin Li            joint_file = self.JOINT_LOG_PREFIX
1368*9c5db199SXin Li            if level_name:
1369*9c5db199SXin Li                joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1370*9c5db199SXin Li            # This helps with some online tools to avoid complaints about an
1371*9c5db199SXin Li            # unknown filetype.
1372*9c5db199SXin Li            joint_file = joint_file + '.txt'
1373*9c5db199SXin Li            joint_path = os.path.join(log_dir, joint_file)
1374*9c5db199SXin Li            files = [f for f in local_files if level_name in f]
1375*9c5db199SXin Li            if not files:
1376*9c5db199SXin Li                # TODO(crrev.com/c/1793030): remove no-level case once CL
1377*9c5db199SXin Li                # is pushed
1378*9c5db199SXin Li                continue
1379*9c5db199SXin Li            # Extract compressed logs if any.
1380*9c5db199SXin Li            compressed, extracted = self._extract_compressed_logs(log_dir,
1381*9c5db199SXin Li                                                                  files)
1382*9c5db199SXin Li            files = list(set(files) - set(compressed))
1383*9c5db199SXin Li            files.extend(extracted)
1384*9c5db199SXin Li            # Need to sort. As they all share the same timestamp, and
1385*9c5db199SXin Li            # loglevel, the index itself is sufficient. The highest index
1386*9c5db199SXin Li            # is the oldest file, therefore we need a descending sort.
1387*9c5db199SXin Li            def sortkey(f, level=level_name):
1388*9c5db199SXin Li                """Custom sortkey to sort based on rotation number int."""
1389*9c5db199SXin Li                if f.endswith(level_name): return 0
1390*9c5db199SXin Li                return int(f.split('.')[-1])
1391*9c5db199SXin Li
1392*9c5db199SXin Li            files.sort(reverse=True, key=sortkey)
1393*9c5db199SXin Li            # Just rename the first file rather than building from scratch.
1394*9c5db199SXin Li            os.rename(files[0], joint_path)
1395*9c5db199SXin Li            with open(joint_path, 'a') as joint_f:
1396*9c5db199SXin Li                for logfile in files[1:]:
1397*9c5db199SXin Li                    # Transfer the file to the joint file line by line.
1398*9c5db199SXin Li                    with open(logfile, 'r') as log_f:
1399*9c5db199SXin Li                        for line in log_f:
1400*9c5db199SXin Li                            joint_f.write(line)
1401*9c5db199SXin Li                    # File has been written over. Delete safely.
1402*9c5db199SXin Li                    os.remove(logfile)
1403*9c5db199SXin Li            # Need to remove all files form |local_files| so we don't
1404*9c5db199SXin Li            # analyze them again.
1405*9c5db199SXin Li            local_files = list(set(local_files) - set(files) - set(compressed))
1406*9c5db199SXin Li        # Lastly, extract MCU logs from the joint logs.
1407*9c5db199SXin Li        self._extract_mcu_logs(log_dir)
1408*9c5db199SXin Li
1409*9c5db199SXin Li    def _lock(self):
1410*9c5db199SXin Li        """lock servohost by touching a file.
1411*9c5db199SXin Li        """
1412*9c5db199SXin Li        logging.debug('Locking servohost %s by touching %s file',
1413*9c5db199SXin Li                      self.hostname, self._lock_file)
1414*9c5db199SXin Li        self.run('touch %s' % self._lock_file, ignore_status=True)
1415*9c5db199SXin Li        self._is_locked = True
1416*9c5db199SXin Li
1417*9c5db199SXin Li    def _unlock(self):
1418*9c5db199SXin Li        """Unlock servohost by removing the lock file.
1419*9c5db199SXin Li        """
1420*9c5db199SXin Li        logging.debug('Unlocking servohost by removing %s file',
1421*9c5db199SXin Li                      self._lock_file)
1422*9c5db199SXin Li        self.run('rm %s' % self._lock_file, ignore_status=True)
1423*9c5db199SXin Li        self._is_locked = False
1424*9c5db199SXin Li
1425*9c5db199SXin Li    def close(self):
1426*9c5db199SXin Li        """Close the associated servo and the host object."""
1427*9c5db199SXin Li        # NOTE: throughout this method there are multiple attempts to catch
1428*9c5db199SXin Li        # all errors. This is WAI as log grabbing should not fail tests.
1429*9c5db199SXin Li        # However, the goal is to catch and handle/process all errors, thus
1430*9c5db199SXin Li        # we print the traceback and ask for a bug.
1431*9c5db199SXin Li        if self._closed:
1432*9c5db199SXin Li            logging.debug('ServoHost is already closed.')
1433*9c5db199SXin Li            return
1434*9c5db199SXin Li
1435*9c5db199SXin Li        # Only attempt ssh related actions if servohost is sshable. We call
1436*9c5db199SXin Li        # check_cached_up_status() first because it's lightweighted and return
1437*9c5db199SXin Li        # much faster in the case servohost is down, however, we still want
1438*9c5db199SXin Li        # to call is_up() later since check_cached_up_status() is ping based check
1439*9c5db199SXin Li        # and not guarantee the servohost is sshable.
1440*9c5db199SXin Li        servo_host_ready = self.check_cached_up_status() and self.is_up()
1441*9c5db199SXin Li
1442*9c5db199SXin Li        # If the dockerized servod is in used, we can start a new servod container
1443*9c5db199SXin Li        # with out the servod process for log collection.
1444*9c5db199SXin Li        if (not self.is_localhost() and not servo_host_ready
1445*9c5db199SXin Li                    and self.is_containerized_servod()):
1446*9c5db199SXin Li            logging.info(
1447*9c5db199SXin Li                    'Start servod container without servod for log collection.'
1448*9c5db199SXin Li            )
1449*9c5db199SXin Li            self.start_containerized_servod(with_servod=False)
1450*9c5db199SXin Li            servo_host_ready = True
1451*9c5db199SXin Li
1452*9c5db199SXin Li        # TODO(crbug.com/1011516): once enabled, remove the check against
1453*9c5db199SXin Li        # localhost and instead check against log-rotiation enablement.
1454*9c5db199SXin Li        should_collect_log = (servo_host_ready and self.job
1455*9c5db199SXin Li                              and not self.is_localhost())
1456*9c5db199SXin Li        if should_collect_log:
1457*9c5db199SXin Li            instance_ts = self.get_instance_logs_ts()
1458*9c5db199SXin Li        else:
1459*9c5db199SXin Li            logging.info('Servohost is down, will skip servod log collecting.')
1460*9c5db199SXin Li            instance_ts = None
1461*9c5db199SXin Li        servod_logs_available = instance_ts is not None
1462*9c5db199SXin Li        if servod_logs_available:
1463*9c5db199SXin Li            # Probe whether there was a servod restart, and grab those old
1464*9c5db199SXin Li            # logs as well.
1465*9c5db199SXin Li            try:
1466*9c5db199SXin Li                self.probe_servod_restart(instance_ts, self.job.resultdir)
1467*9c5db199SXin Li            except (error.AutoservRunError, error.TestFail) as e:
1468*9c5db199SXin Li                logging.info('Failed to grab servo logs due to: %s. '
1469*9c5db199SXin Li                             'This error is forgiven.', str(e))
1470*9c5db199SXin Li            except Exception as e:
1471*9c5db199SXin Li                logging.error('Unexpected error probing for old logs. %s. '
1472*9c5db199SXin Li                              'Forgiven. Please file a bug and fix or catch '
1473*9c5db199SXin Li                              'in log probing function', str(e),
1474*9c5db199SXin Li                              exc_info=True)
1475*9c5db199SXin Li        if self._servo:
1476*9c5db199SXin Li            outdir = None if not self.job else self.job.resultdir
1477*9c5db199SXin Li            # In some cases when we run as lab-tools, the job object is None.
1478*9c5db199SXin Li            self._servo.close(outdir)
1479*9c5db199SXin Li        try:
1480*9c5db199SXin Li            if should_collect_log:
1481*9c5db199SXin Li                self.get_servod_startup_log(self.job.resultdir)
1482*9c5db199SXin Li                self.get_servohost_logs(self.job.resultdir)
1483*9c5db199SXin Li            # Grab current (not old like above) logs after the servo instance
1484*9c5db199SXin Li            # was closed out.
1485*9c5db199SXin Li            if servod_logs_available:
1486*9c5db199SXin Li                self.get_instance_logs(instance_ts, self.job.resultdir)
1487*9c5db199SXin Li        except error.AutoservRunError as e:
1488*9c5db199SXin Li            logging.info(
1489*9c5db199SXin Li                    'Failed to grab servo logs due to: %s. '
1490*9c5db199SXin Li                    'This error is forgiven.', str(e))
1491*9c5db199SXin Li        except Exception as e:
1492*9c5db199SXin Li            logging.error(
1493*9c5db199SXin Li                    'Unexpected error grabbing servod logs. %s. '
1494*9c5db199SXin Li                    'Forgiven. Please file a bug and fix or catch '
1495*9c5db199SXin Li                    'in log grabbing function',
1496*9c5db199SXin Li                    str(e),
1497*9c5db199SXin Li                    exc_info=True)
1498*9c5db199SXin Li
1499*9c5db199SXin Li        if self._is_locked and servo_host_ready:
1500*9c5db199SXin Li            # Remove the lock if the servohost has been locked.
1501*9c5db199SXin Li            try:
1502*9c5db199SXin Li                self._unlock()
1503*9c5db199SXin Li            except error.AutoservSSHTimeout:
1504*9c5db199SXin Li                logging.error('Unlock servohost failed due to ssh timeout.'
1505*9c5db199SXin Li                              ' It may caused by servohost went down during'
1506*9c5db199SXin Li                              ' the task.')
1507*9c5db199SXin Li        # We want always stop servod after task to minimum the impact of bad
1508*9c5db199SXin Li        # servod process interfere other servods.(see crbug.com/1028665)
1509*9c5db199SXin Li        if servo_host_ready:
1510*9c5db199SXin Li            try:
1511*9c5db199SXin Li                self.stop_servod()
1512*9c5db199SXin Li            except error.AutoservRunError as e:
1513*9c5db199SXin Li                logging.info(
1514*9c5db199SXin Li                        "Failed to stop servod due to:\n%s\n"
1515*9c5db199SXin Li                        "This error is forgiven.", str(e))
1516*9c5db199SXin Li
1517*9c5db199SXin Li        super(ServoHost, self).close()
1518*9c5db199SXin Li        # Mark closed.
1519*9c5db199SXin Li        self._closed = True
1520*9c5db199SXin Li
1521*9c5db199SXin Li    def get_servo_state(self):
1522*9c5db199SXin Li        return self._servo_state
1523*9c5db199SXin Li
1524*9c5db199SXin Li    def _get_host_metrics_data(self):
1525*9c5db199SXin Li        return {'port': self.servo_port,
1526*9c5db199SXin Li                'host': self.get_dut_hostname() or self.hostname,
1527*9c5db199SXin Li                'board': self.servo_board or ''}
1528*9c5db199SXin Li
1529*9c5db199SXin Li    def is_servo_board_present_on_servo_v3(self):
1530*9c5db199SXin Li        """Check if servo board is detected on servo_v3"""
1531*9c5db199SXin Li        logging.debug('Started to detect servo board on servo_v3')
1532*9c5db199SXin Li        vid_pids = ['18d1:5004', '0403:6014']
1533*9c5db199SXin Li        not_detected = 'The servo board is not detected on servo_v3'
1534*9c5db199SXin Li        try:
1535*9c5db199SXin Li            cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1536*9c5db199SXin Li            result = self.run(cmd, ignore_status=True, timeout=30)
1537*9c5db199SXin Li            if result.exit_status == 0 and self._to_str(result.stdout.strip()):
1538*9c5db199SXin Li                logging.debug('The servo board is detected on servo_v3')
1539*9c5db199SXin Li                return True
1540*9c5db199SXin Li            logging.debug('%s; %s', not_detected, result)
1541*9c5db199SXin Li            return False
1542*9c5db199SXin Li        except Exception as e:
1543*9c5db199SXin Li            # can be triggered by timeout issue due running the script
1544*9c5db199SXin Li            metrics.Counter(
1545*9c5db199SXin Li                'chromeos/autotest/repair/servo_detection/timeout'
1546*9c5db199SXin Li                ).increment(fields=self._get_host_metrics_data())
1547*9c5db199SXin Li            logging.error('%s; %s', not_detected, str(e))
1548*9c5db199SXin Li        return None
1549*9c5db199SXin Li
1550*9c5db199SXin Li    def _require_cr50_servod_config(self):
1551*9c5db199SXin Li        """Check whether we need start servod with CONFIG=cr50.xml"""
1552*9c5db199SXin Li        dut_host_info = self.get_dut_host_info()
1553*9c5db199SXin Li        if not dut_host_info:
1554*9c5db199SXin Li            return False
1555*9c5db199SXin Li        for pool in dut_host_info.pools:
1556*9c5db199SXin Li            if pool.startswith(servo_constants.CR50_CONFIG_POOL_PREFIX):
1557*9c5db199SXin Li                return True
1558*9c5db199SXin Li        return False
1559*9c5db199SXin Li
1560*9c5db199SXin Li    def get_verifier_state(self, tag):
1561*9c5db199SXin Li        """Return the state of servo verifier.
1562*9c5db199SXin Li
1563*9c5db199SXin Li        @returns: bool or None
1564*9c5db199SXin Li        """
1565*9c5db199SXin Li        return self._repair_strategy.verifier_is_good(tag)
1566*9c5db199SXin Li
1567*9c5db199SXin Li    def get_repair_strategy_node(self, tag):
1568*9c5db199SXin Li        """Return the instance of verifier/repair node for host by tag.
1569*9c5db199SXin Li
1570*9c5db199SXin Li        @returns: _DependencyNode or None
1571*9c5db199SXin Li        """
1572*9c5db199SXin Li        return self._repair_strategy.node_by_tag(tag)
1573*9c5db199SXin Li
1574*9c5db199SXin Li    def determine_servo_state(self):
1575*9c5db199SXin Li        """Determine servo state based on the failed verifier.
1576*9c5db199SXin Li
1577*9c5db199SXin Li        @returns: servo state value
1578*9c5db199SXin Li        The state detecting based on first fail verifier or collecting of
1579*9c5db199SXin Li        them.
1580*9c5db199SXin Li        """
1581*9c5db199SXin Li        ssh = self.get_verifier_state('connection')
1582*9c5db199SXin Li        servo_root_present = self.get_verifier_state('servo_root_present')
1583*9c5db199SXin Li        servo_root_present_node = self.get_repair_strategy_node(
1584*9c5db199SXin Li                'servo_root_present')
1585*9c5db199SXin Li        servo_v3_present = self.get_verifier_state('servo_v3_root_present')
1586*9c5db199SXin Li        servo_fw = self.get_verifier_state('servo_fw')
1587*9c5db199SXin Li        servo_fw_update = self.get_repair_strategy_node('servo_fw_update')
1588*9c5db199SXin Li        servod_dut_controller_missing = self.get_repair_strategy_node(
1589*9c5db199SXin Li                'servod_dut_controller_missing')
1590*9c5db199SXin Li        disk_space = self.get_verifier_state('servo_disk_space')
1591*9c5db199SXin Li        start_servod = self.get_verifier_state('start_servod')
1592*9c5db199SXin Li        servod_started = self.get_verifier_state('servod_started')
1593*9c5db199SXin Li        servod_echo = self.get_verifier_state('servod_echo')
1594*9c5db199SXin Li        create_servo = self.get_verifier_state('servod_connection')
1595*9c5db199SXin Li        init_servo = self.get_verifier_state('servod_control')
1596*9c5db199SXin Li        cr50_low_sbu = self.get_verifier_state('servo_cr50_low_sbu')
1597*9c5db199SXin Li        cr50_off = self.get_verifier_state('servo_cr50_off')
1598*9c5db199SXin Li        servo_topology = self.get_verifier_state('servo_topology')
1599*9c5db199SXin Li        dut_connected = self.get_verifier_state('servo_dut_connected')
1600*9c5db199SXin Li        hub_connected = self.get_verifier_state('servo_hub_connected')
1601*9c5db199SXin Li        pwr_button = self.get_verifier_state('servo_pwr_button')
1602*9c5db199SXin Li        lid_open = self.get_verifier_state('servo_lid_open')
1603*9c5db199SXin Li        ec_board = self.get_verifier_state('servo_ec_console')
1604*9c5db199SXin Li        cr50_console = self.get_verifier_state('servo_cr50_console')
1605*9c5db199SXin Li        ccd_testlab = self.get_verifier_state('servo_ccd_testlab')
1606*9c5db199SXin Li
1607*9c5db199SXin Li        if not ssh:
1608*9c5db199SXin Li            return servo_constants.SERVO_STATE_NO_SSH
1609*9c5db199SXin Li        if start_servod == hosts.VERIFY_FAILED:
1610*9c5db199SXin Li            return servo_constants.SERVO_STATE_SERVO_HOST_ISSUE
1611*9c5db199SXin Li        if servo_root_present == hosts.VERIFY_FAILED:
1612*9c5db199SXin Li            if not self.servo_serial:
1613*9c5db199SXin Li                return servo_constants.SERVO_STATE_WRONG_CONFIG
1614*9c5db199SXin Li            if hasattr(servo_root_present_node, 'serial_mismatch'):
1615*9c5db199SXin Li                return servo_constants.SERVO_STATE_SERIAL_MISMATCH
1616*9c5db199SXin Li            return servo_constants.SERVO_STATE_NOT_CONNECTED
1617*9c5db199SXin Li        if servo_v3_present == hosts.VERIFY_FAILED:
1618*9c5db199SXin Li            # if we cannot find required board on servo_v3
1619*9c5db199SXin Li            return servo_constants.SERVO_STATE_NEED_REPLACEMENT
1620*9c5db199SXin Li        if servo_fw == hosts.VERIFY_FAILED:
1621*9c5db199SXin Li            logging.info(servo_fw_update)
1622*9c5db199SXin Li            if hasattr(servo_fw_update, 'servo_updater_issue_detected'):
1623*9c5db199SXin Li                return servo_constants.SERVO_STATE_SERVO_UPDATER_ISSUE
1624*9c5db199SXin Li            return servo_constants.SERVO_STATE_NEED_REPLACEMENT
1625*9c5db199SXin Li
1626*9c5db199SXin Li        if dut_connected == hosts.VERIFY_FAILED:
1627*9c5db199SXin Li            return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED
1628*9c5db199SXin Li        if hub_connected == hosts.VERIFY_FAILED:
1629*9c5db199SXin Li            logging.info('Servo HUB not connected')
1630*9c5db199SXin Li            return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED
1631*9c5db199SXin Li
1632*9c5db199SXin Li        if cr50_low_sbu == hosts.VERIFY_FAILED:
1633*9c5db199SXin Li            return servo_constants.SERVO_STATE_SBU_LOW_VOLTAGE
1634*9c5db199SXin Li        if cr50_off == hosts.VERIFY_FAILED:
1635*9c5db199SXin Li            return servo_constants.SERVO_STATE_CR50_NOT_ENUMERATED
1636*9c5db199SXin Li
1637*9c5db199SXin Li        if servod_dut_controller_missing == hosts.VERIFY_FAILED:
1638*9c5db199SXin Li            return servo_constants.SERVO_STATE_SERVOD_DUT_CONTROLLER_MISSING
1639*9c5db199SXin Li        if servo_topology == hosts.VERIFY_FAILED:
1640*9c5db199SXin Li            return servo_constants.SERVO_STATE_TOPOLOGY_ISSUE
1641*9c5db199SXin Li
1642*9c5db199SXin Li        # TODO(otabek@): detect special cases detected by pwr_button
1643*9c5db199SXin Li        if dut_connected == hosts.VERIFY_SUCCESS:
1644*9c5db199SXin Li            if pwr_button == hosts.VERIFY_FAILED:
1645*9c5db199SXin Li                metrics.Counter(
1646*9c5db199SXin Li                        'chromeos/autotest/repair/servo_unexpected/pwr_button2'
1647*9c5db199SXin Li                ).increment(fields=self._get_host_metrics_data())
1648*9c5db199SXin Li
1649*9c5db199SXin Li        if (servod_started == hosts.VERIFY_FAILED
1650*9c5db199SXin Li                    or servod_echo == hosts.VERIFY_FAILED):
1651*9c5db199SXin Li            return servo_constants.SERVO_STATE_SERVOD_ISSUE
1652*9c5db199SXin Li
1653*9c5db199SXin Li        # one of the reason why servo can not initialized
1654*9c5db199SXin Li        if cr50_console == hosts.VERIFY_FAILED:
1655*9c5db199SXin Li            return servo_constants.SERVO_STATE_CR50_CONSOLE_MISSING
1656*9c5db199SXin Li        if ccd_testlab == hosts.VERIFY_FAILED:
1657*9c5db199SXin Li            return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1658*9c5db199SXin Li
1659*9c5db199SXin Li        if (create_servo == hosts.VERIFY_FAILED
1660*9c5db199SXin Li                    or init_servo == hosts.VERIFY_FAILED):
1661*9c5db199SXin Li            return servo_constants.SERVO_STATE_SERVOD_PROXY_ISSUE
1662*9c5db199SXin Li
1663*9c5db199SXin Li        if ec_board == hosts.VERIFY_FAILED:
1664*9c5db199SXin Li            return servo_constants.SERVO_STATE_EC_BROKEN
1665*9c5db199SXin Li        if pwr_button == hosts.VERIFY_FAILED:
1666*9c5db199SXin Li            return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1667*9c5db199SXin Li        if lid_open == hosts.VERIFY_FAILED:
1668*9c5db199SXin Li            return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1669*9c5db199SXin Li
1670*9c5db199SXin Li        metrics.Counter(
1671*9c5db199SXin Li            'chromeos/autotest/repair/unknown_servo_state'
1672*9c5db199SXin Li            ).increment(fields=self._get_host_metrics_data())
1673*9c5db199SXin Li        logging.info('We do not have special state for this failure yet :)')
1674*9c5db199SXin Li        return servo_constants.SERVO_STATE_BROKEN
1675*9c5db199SXin Li
1676*9c5db199SXin Li    def is_servo_topology_supported(self):
1677*9c5db199SXin Li        """Check if servo_topology is supported."""
1678*9c5db199SXin Li        if self.is_containerized_servod():
1679*9c5db199SXin Li            logging.info('Servod is running within a container.')
1680*9c5db199SXin Li            return True
1681*9c5db199SXin Li        if not self.is_up_fast():
1682*9c5db199SXin Li            logging.info('Servo-Host is not reachable.')
1683*9c5db199SXin Li            return False
1684*9c5db199SXin Li        if not self.servo_serial:
1685*9c5db199SXin Li            logging.info('Servo-topology required a servo serial.')
1686*9c5db199SXin Li            return False
1687*9c5db199SXin Li        if self.is_labstation():
1688*9c5db199SXin Li            logging.info('Servod is running within labstation.')
1689*9c5db199SXin Li            return True
1690*9c5db199SXin Li        return False
1691*9c5db199SXin Li
1692*9c5db199SXin Li    def get_topology(self):
1693*9c5db199SXin Li        """Get servo topology."""
1694*9c5db199SXin Li        if not self._topology:
1695*9c5db199SXin Li            self._topology = servo_topology.ServoTopology(self)
1696*9c5db199SXin Li        return self._topology
1697*9c5db199SXin Li
1698*9c5db199SXin Li    def is_dual_setup(self):
1699*9c5db199SXin Li        """Check is servo will run in dual setup.
1700*9c5db199SXin Li
1701*9c5db199SXin Li        Dual setup used only for servo_v4 when used ccd_cr50 and servo_micro
1702*9c5db199SXin Li        at the same time.
1703*9c5db199SXin Li        """
1704*9c5db199SXin Li        return self.servo_setup == servo_constants.SERVO_SETUP_VALUE_DUAL_V4
1705*9c5db199SXin Li
1706*9c5db199SXin Li    def set_dut_health_profile(self, dut_health_profile):
1707*9c5db199SXin Li        """
1708*9c5db199SXin Li        @param dut_health_profile: A DeviceHealthProfile object.
1709*9c5db199SXin Li        """
1710*9c5db199SXin Li        logging.debug('setting dut_health_profile field to (%s)',
1711*9c5db199SXin Li                      dut_health_profile)
1712*9c5db199SXin Li        self._dut_health_profile = dut_health_profile
1713*9c5db199SXin Li
1714*9c5db199SXin Li    def get_dut_health_profile(self):
1715*9c5db199SXin Li        """
1716*9c5db199SXin Li        @return A DeviceHealthProfile object.
1717*9c5db199SXin Li        """
1718*9c5db199SXin Li        return self._dut_health_profile
1719*9c5db199SXin Li
1720*9c5db199SXin Li    def print_all_servo_of_host(self):
1721*9c5db199SXin Li        """Print all servos detected on the host."""
1722*9c5db199SXin Li        try:
1723*9c5db199SXin Li            logging.info('\tDevices detected on the host:')
1724*9c5db199SXin Li            devices = self.get_topology().get_list_available_servos()
1725*9c5db199SXin Li            for device in devices:
1726*9c5db199SXin Li                logging.info('\t%s', device)
1727*9c5db199SXin Li        except Exception as e:
1728*9c5db199SXin Li            logging.debug('(Not critical) Fail list all servos: %s', e)
1729*9c5db199SXin Li
1730*9c5db199SXin Li
1731*9c5db199SXin Lidef make_servo_hostname(dut_hostname):
1732*9c5db199SXin Li    """Given a DUT's hostname, return the hostname of its servo.
1733*9c5db199SXin Li
1734*9c5db199SXin Li    @param dut_hostname: hostname of a DUT.
1735*9c5db199SXin Li
1736*9c5db199SXin Li    @return hostname of the DUT's servo.
1737*9c5db199SXin Li
1738*9c5db199SXin Li    """
1739*9c5db199SXin Li    host_parts = dut_hostname.split('.')
1740*9c5db199SXin Li    host_parts[0] = host_parts[0] + '-servo'
1741*9c5db199SXin Li    return '.'.join(host_parts)
1742*9c5db199SXin Li
1743*9c5db199SXin Li
1744*9c5db199SXin Lidef _map_afe_board_to_servo_board(afe_board):
1745*9c5db199SXin Li    """Map a board we get from the AFE to a servo appropriate value.
1746*9c5db199SXin Li
1747*9c5db199SXin Li    Many boards are identical to other boards for servo's purposes.
1748*9c5db199SXin Li    This function makes that mapping.
1749*9c5db199SXin Li
1750*9c5db199SXin Li    @param afe_board string board name received from AFE.
1751*9c5db199SXin Li    @return board we expect servo to have.
1752*9c5db199SXin Li
1753*9c5db199SXin Li    """
1754*9c5db199SXin Li    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1755*9c5db199SXin Li    BOARD_MAP = {'gizmo': 'panther'}
1756*9c5db199SXin Li    mapped_board = afe_board
1757*9c5db199SXin Li    if afe_board in BOARD_MAP:
1758*9c5db199SXin Li        mapped_board = BOARD_MAP[afe_board]
1759*9c5db199SXin Li    else:
1760*9c5db199SXin Li        for suffix in KNOWN_SUFFIXES:
1761*9c5db199SXin Li            if afe_board.endswith(suffix):
1762*9c5db199SXin Li                mapped_board = afe_board[0:-len(suffix)]
1763*9c5db199SXin Li                break
1764*9c5db199SXin Li    if mapped_board != afe_board:
1765*9c5db199SXin Li        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1766*9c5db199SXin Li    return mapped_board
1767*9c5db199SXin Li
1768*9c5db199SXin Li
1769*9c5db199SXin Lidef get_servo_args_for_host(dut_host):
1770*9c5db199SXin Li    """Return servo data associated with a given DUT.
1771*9c5db199SXin Li
1772*9c5db199SXin Li    @param dut_host   Instance of `Host` on which to find the servo
1773*9c5db199SXin Li                      attributes.
1774*9c5db199SXin Li    @return `servo_args` dict with host and an optional port.
1775*9c5db199SXin Li    """
1776*9c5db199SXin Li    info = dut_host.host_info_store.get()
1777*9c5db199SXin Li    servo_args = {k: v for k, v in six.iteritems(info.attributes)
1778*9c5db199SXin Li                  if k in servo_constants.SERVO_ATTR_KEYS}
1779*9c5db199SXin Li
1780*9c5db199SXin Li    if servo_constants.SERVO_HOST_SSH_PORT_ATTR in servo_args:
1781*9c5db199SXin Li        try:
1782*9c5db199SXin Li            servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR] = int(
1783*9c5db199SXin Li                    servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR])
1784*9c5db199SXin Li        except ValueError:
1785*9c5db199SXin Li            logging.error('servo host port is not an int: %s',
1786*9c5db199SXin Li                          servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR])
1787*9c5db199SXin Li            # Reset servo_args because we don't want to use an invalid port.
1788*9c5db199SXin Li            servo_args.pop(servo_constants.SERVO_HOST_SSH_PORT_ATTR, None)
1789*9c5db199SXin Li
1790*9c5db199SXin Li    if servo_constants.SERVO_PORT_ATTR in servo_args:
1791*9c5db199SXin Li        try:
1792*9c5db199SXin Li            servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1793*9c5db199SXin Li                servo_args[servo_constants.SERVO_PORT_ATTR])
1794*9c5db199SXin Li        except ValueError:
1795*9c5db199SXin Li            logging.error('servo port is not an int: %s',
1796*9c5db199SXin Li                          servo_args[servo_constants.SERVO_PORT_ATTR])
1797*9c5db199SXin Li            # Reset servo_args because we don't want to use an invalid port.
1798*9c5db199SXin Li            servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
1799*9c5db199SXin Li
1800*9c5db199SXin Li    if info.board:
1801*9c5db199SXin Li        servo_board = _map_afe_board_to_servo_board(info.board)
1802*9c5db199SXin Li        servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
1803*9c5db199SXin Li    if info.model:
1804*9c5db199SXin Li        servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1805*9c5db199SXin Li    return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
1806*9c5db199SXin Li
1807*9c5db199SXin Li
1808*9c5db199SXin Lidef _tweak_args_for_ssp_moblab(servo_args):
1809*9c5db199SXin Li    if (servo_args[servo_constants.SERVO_HOST_ATTR]
1810*9c5db199SXin Li            in ['localhost', '127.0.0.1']):
1811*9c5db199SXin Li        servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
1812*9c5db199SXin Li                'SSP', 'host_container_ip', type=str, default=None)
1813*9c5db199SXin Li
1814*9c5db199SXin Li
1815*9c5db199SXin Lidef create_servo_host(dut,
1816*9c5db199SXin Li                      servo_args,
1817*9c5db199SXin Li                      try_lab_servo=False,
1818*9c5db199SXin Li                      try_servo_repair=False,
1819*9c5db199SXin Li                      try_servo_recovery=True,
1820*9c5db199SXin Li                      dut_host_info=None,
1821*9c5db199SXin Li                      dut_health_profile=None):
1822*9c5db199SXin Li    """Create a ServoHost object for a given DUT, if appropriate.
1823*9c5db199SXin Li
1824*9c5db199SXin Li    This function attempts to create and verify or repair a `ServoHost`
1825*9c5db199SXin Li    object for a servo connected to the given `dut`, subject to various
1826*9c5db199SXin Li    constraints imposed by the parameters:
1827*9c5db199SXin Li      * When the `servo_args` parameter is not `None`, a servo
1828*9c5db199SXin Li        host must be created, and must be checked with `repair()`.
1829*9c5db199SXin Li      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1830*9c5db199SXin Li        true:
1831*9c5db199SXin Li          * If `try_servo_repair` is true, then create a servo host and
1832*9c5db199SXin Li            check it with `repair()`.
1833*9c5db199SXin Li          * Otherwise, if the servo responds to `ping` then create a
1834*9c5db199SXin Li            servo host and check it with `verify()`.
1835*9c5db199SXin Li
1836*9c5db199SXin Li    In cases where `servo_args` was not `None`, repair failure
1837*9c5db199SXin Li    exceptions are passed back to the caller; otherwise, exceptions
1838*9c5db199SXin Li    are logged and then discarded.  Note that this only happens in cases
1839*9c5db199SXin Li    where we're called from a test (not special task) control file that
1840*9c5db199SXin Li    has an explicit dependency on servo.  In that case, we require that
1841*9c5db199SXin Li    repair not write to `status.log`, so as to avoid polluting test
1842*9c5db199SXin Li    results.
1843*9c5db199SXin Li
1844*9c5db199SXin Li    TODO(jrbarnette):  The special handling for servo in test control
1845*9c5db199SXin Li    files is a thorn in my flesh; I dearly hope to see it cut out before
1846*9c5db199SXin Li    my retirement.
1847*9c5db199SXin Li
1848*9c5db199SXin Li    Parameters for a servo host consist of a host name, port number, and
1849*9c5db199SXin Li    DUT board, and are determined from one of these sources, in order of
1850*9c5db199SXin Li    priority:
1851*9c5db199SXin Li      * Servo attributes from the `dut` parameter take precedence over
1852*9c5db199SXin Li        all other sources of information.
1853*9c5db199SXin Li      * If a DNS entry for the servo based on the DUT hostname exists in
1854*9c5db199SXin Li        the CrOS lab network, that hostname is used with the default
1855*9c5db199SXin Li        port and the DUT's board.
1856*9c5db199SXin Li      * If no other options are found, the parameters will be taken
1857*9c5db199SXin Li        from the `servo_args` dict passed in from the caller.
1858*9c5db199SXin Li
1859*9c5db199SXin Li    @param dut:                 An instance of `Host` from which to take
1860*9c5db199SXin Li                                servo parameters (if available).
1861*9c5db199SXin Li    @param servo_args:          A dictionary with servo parameters to use if
1862*9c5db199SXin Li                                they can't be found from `dut`.  If this
1863*9c5db199SXin Li                                argument is supplied, unrepaired exceptions
1864*9c5db199SXin Li                                from `verify()` will be passed back to the
1865*9c5db199SXin Li                                caller.
1866*9c5db199SXin Li    @param try_lab_servo:       If not true, servo host creation will be
1867*9c5db199SXin Li                                skipped unless otherwise required by the
1868*9c5db199SXin Li                                caller.
1869*9c5db199SXin Li    @param try_servo_repair:    If true, check a servo host with
1870*9c5db199SXin Li                                `repair()` instead of `verify()`.
1871*9c5db199SXin Li    @param try_servo_recovery:  If true, start servod in recovery mode.
1872*9c5db199SXin Li                                Default value is True.
1873*9c5db199SXin Li    @param dut_host_info:       A HostInfo object of the DUT that connected
1874*9c5db199SXin Li                                to this servo.
1875*9c5db199SXin Li    @param dut_health_profile:  DUT repair info with history.
1876*9c5db199SXin Li
1877*9c5db199SXin Li    @returns: A ServoHost object or None. See comments above.
1878*9c5db199SXin Li
1879*9c5db199SXin Li    """
1880*9c5db199SXin Li    # We are explicitly looking for if servo_args is None here(which means
1881*9c5db199SXin Li    # servo not needed), as servo_args == {} means servo is needed and
1882*9c5db199SXin Li    # we expect load servo_args from host_info_store.
1883*9c5db199SXin Li    if servo_args is None:
1884*9c5db199SXin Li        servo_dependency = False
1885*9c5db199SXin Li        local_run = False
1886*9c5db199SXin Li    else:
1887*9c5db199SXin Li        servo_dependency = True
1888*9c5db199SXin Li        # If servo_args pass in directly, then this is a local test run.
1889*9c5db199SXin Li        local_run = servo_constants.SERVO_HOST_ATTR in servo_args
1890*9c5db199SXin Li
1891*9c5db199SXin Li    if local_run:
1892*9c5db199SXin Li        logging.warning('User input servo_args detected, will attempt'
1893*9c5db199SXin Li                        ' to start servod and initialize servo conncetion'
1894*9c5db199SXin Li                        ' directly. All servo/servohost verify and repair'
1895*9c5db199SXin Li                        ' steps will be skipped.')
1896*9c5db199SXin Li
1897*9c5db199SXin Li    # Loading servo args from host_info_store.
1898*9c5db199SXin Li    if dut is not None and (try_lab_servo or servo_dependency):
1899*9c5db199SXin Li        servo_args_override = get_servo_args_for_host(dut)
1900*9c5db199SXin Li        if servo_args_override is not None:
1901*9c5db199SXin Li            if utils.in_moblab_ssp():
1902*9c5db199SXin Li                _tweak_args_for_ssp_moblab(servo_args_override)
1903*9c5db199SXin Li            logging.debug(
1904*9c5db199SXin Li                    'Overriding provided servo_args (%s) with arguments'
1905*9c5db199SXin Li                    ' determined from the host (%s)',
1906*9c5db199SXin Li                    servo_args,
1907*9c5db199SXin Li                    servo_args_override,
1908*9c5db199SXin Li            )
1909*9c5db199SXin Li            servo_args = servo_args_override
1910*9c5db199SXin Li
1911*9c5db199SXin Li    if not servo_args:
1912*9c5db199SXin Li        logging.debug('No servo_args provided, and failed to find overrides.')
1913*9c5db199SXin Li        if try_lab_servo or servo_dependency:
1914*9c5db199SXin Li            return None, servo_constants.SERVO_STATE_MISSING_CONFIG
1915*9c5db199SXin Li        else:
1916*9c5db199SXin Li            # For regular test case which not required the servo
1917*9c5db199SXin Li            return None, None
1918*9c5db199SXin Li
1919*9c5db199SXin Li    servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1920*9c5db199SXin Li    servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
1921*9c5db199SXin Li    if not local_run:
1922*9c5db199SXin Li        if not _is_servo_host_information_exist(servo_hostname, servo_port):
1923*9c5db199SXin Li            logging.debug(
1924*9c5db199SXin Li                    'Servo connection info missed hostname: %s , port: %s',
1925*9c5db199SXin Li                    servo_hostname, servo_port)
1926*9c5db199SXin Li            return None, servo_constants.SERVO_STATE_MISSING_CONFIG
1927*9c5db199SXin Li        if not is_servo_host_information_valid(servo_hostname, servo_port):
1928*9c5db199SXin Li            logging.debug(
1929*9c5db199SXin Li                    'Servo connection info is incorrect hostname: %s , port: %s',
1930*9c5db199SXin Li                    servo_hostname, servo_port)
1931*9c5db199SXin Li            return None, servo_constants.SERVO_STATE_WRONG_CONFIG
1932*9c5db199SXin Li
1933*9c5db199SXin Li        if try_servo_recovery == True:
1934*9c5db199SXin Li            servo_args[servo_constants.SERVO_RECOVERY_MODE] = True
1935*9c5db199SXin Li
1936*9c5db199SXin Li    newhost = ServoHost(**servo_args)
1937*9c5db199SXin Li    if local_run:
1938*9c5db199SXin Li        try:
1939*9c5db199SXin Li            newhost.start_servod()
1940*9c5db199SXin Li        except:
1941*9c5db199SXin Li            # If we failed to start servod here, we can assume the servod
1942*9c5db199SXin Li            # either already started or the test is running against a
1943*9c5db199SXin Li            # non-standard servohost so the user will resiponsble for ensure
1944*9c5db199SXin Li            # servod is running.
1945*9c5db199SXin Li            pass
1946*9c5db199SXin Li        try:
1947*9c5db199SXin Li            newhost.initialize_servo()
1948*9c5db199SXin Li            newhost.initialize_dut_for_servo()
1949*9c5db199SXin Li            newhost._servo_state = servo_constants.SERVO_STATE_WORKING
1950*9c5db199SXin Li            return newhost, newhost.get_servo_state()
1951*9c5db199SXin Li        except Exception as e:
1952*9c5db199SXin Li            logging.error('Failed to initialize servo. %s', e)
1953*9c5db199SXin Li            return None, servo_constants.SERVO_STATE_BROKEN
1954*9c5db199SXin Li
1955*9c5db199SXin Li    if newhost.is_containerized_servod():
1956*9c5db199SXin Li        # TODO(otabek@): Update for servod-manager.
1957*9c5db199SXin Li        # Servod docker is not available for access.
1958*9c5db199SXin Li        newhost.start_containerized_servod()
1959*9c5db199SXin Li    # use_icmp is always set to true in autotest, but in labpack
1960*9c5db199SXin Li    # this is determined by autoserv args. We need to make this consistent
1961*9c5db199SXin Li    # across labpack and autotest b/205855910
1962*9c5db199SXin Li    elif newhost.use_icmp and not newhost.is_up_fast(count=3):
1963*9c5db199SXin Li        # ServoHost has internal check to wait if servo-host is in reboot
1964*9c5db199SXin Li        # process. If servo-host still is not available this check will stop
1965*9c5db199SXin Li        # further attempts as we do not have any option to recover servo_host.
1966*9c5db199SXin Li        return None, servo_constants.SERVO_STATE_NO_SSH
1967*9c5db199SXin Li
1968*9c5db199SXin Li    # Reset or reboot servo device only during AdminRepair tasks.
1969*9c5db199SXin Li    if try_servo_repair:
1970*9c5db199SXin Li        if newhost._is_locked:
1971*9c5db199SXin Li            # Print available servos on the host for debugging.
1972*9c5db199SXin Li            newhost.print_all_servo_of_host()
1973*9c5db199SXin Li            # Reset servo if the servo is locked, as we check if the servohost
1974*9c5db199SXin Li            # is up, if the servohost is labstation and if the servohost is in
1975*9c5db199SXin Li            # lab inside the locking logic.
1976*9c5db199SXin Li            newhost.reset_servo()
1977*9c5db199SXin Li        else:
1978*9c5db199SXin Li            try:
1979*9c5db199SXin Li                newhost.reboot_servo_v3_on_need()
1980*9c5db199SXin Li            except Exception as e:
1981*9c5db199SXin Li                logging.info('[Non-critical] Unexpected error while trying to'
1982*9c5db199SXin Li                              ' reboot servo_v3, skipping the reboot; %s', e)
1983*9c5db199SXin Li
1984*9c5db199SXin Li    if dut:
1985*9c5db199SXin Li        newhost.set_dut_hostname(dut.hostname)
1986*9c5db199SXin Li    if dut_host_info:
1987*9c5db199SXin Li        newhost.set_dut_host_info(dut_host_info)
1988*9c5db199SXin Li    if (dut_health_profile and (try_lab_servo or try_servo_repair)):
1989*9c5db199SXin Li        try:
1990*9c5db199SXin Li            if newhost.is_localhost():
1991*9c5db199SXin Li                logging.info('Servohost is a localhost, skip device'
1992*9c5db199SXin Li                             ' health profile setup...')
1993*9c5db199SXin Li            else:
1994*9c5db199SXin Li                dut_health_profile.init_profile(newhost)
1995*9c5db199SXin Li                newhost.set_dut_health_profile(dut_health_profile)
1996*9c5db199SXin Li        except Exception as e:
1997*9c5db199SXin Li            logging.info(
1998*9c5db199SXin Li                    '[Non-critical] Unexpected error while trying to'
1999*9c5db199SXin Li                    ' load device health profile; %s', e)
2000*9c5db199SXin Li
2001*9c5db199SXin Li    # Note that the logic of repair() includes everything done
2002*9c5db199SXin Li    # by verify().  It's sufficient to call one or the other;
2003*9c5db199SXin Li    # we don't need both.
2004*9c5db199SXin Li    if servo_dependency:
2005*9c5db199SXin Li        newhost.repair(silent=True)
2006*9c5db199SXin Li        return newhost, newhost.get_servo_state()
2007*9c5db199SXin Li
2008*9c5db199SXin Li    if try_servo_repair:
2009*9c5db199SXin Li        try:
2010*9c5db199SXin Li            newhost.repair()
2011*9c5db199SXin Li        except Exception:
2012*9c5db199SXin Li            logging.exception('servo repair failed for %s', newhost.hostname)
2013*9c5db199SXin Li    else:
2014*9c5db199SXin Li        try:
2015*9c5db199SXin Li            newhost.verify()
2016*9c5db199SXin Li        except Exception:
2017*9c5db199SXin Li            logging.exception('servo verify failed for %s', newhost.hostname)
2018*9c5db199SXin Li    return newhost, newhost.get_servo_state()
2019*9c5db199SXin Li
2020*9c5db199SXin Li
2021*9c5db199SXin Lidef _is_servo_host_information_exist(hostname, port):
2022*9c5db199SXin Li    if hostname is None or len(hostname.strip()) == 0:
2023*9c5db199SXin Li        return False
2024*9c5db199SXin Li    if port is None:
2025*9c5db199SXin Li        return False
2026*9c5db199SXin Li    if not type(port) is int:
2027*9c5db199SXin Li        try:
2028*9c5db199SXin Li            int(port)
2029*9c5db199SXin Li        except ValueError:
2030*9c5db199SXin Li            return False
2031*9c5db199SXin Li    return True
2032*9c5db199SXin Li
2033*9c5db199SXin Li
2034*9c5db199SXin Lidef is_servo_host_information_valid(hostname, port):
2035*9c5db199SXin Li    """Check if provided servo attributes are valid.
2036*9c5db199SXin Li
2037*9c5db199SXin Li    @param hostname Hostname of the servohost.
2038*9c5db199SXin Li    @param port     servo port number.
2039*9c5db199SXin Li
2040*9c5db199SXin Li    @returns: A bool value to indicate if provided servo attribute valid.
2041*9c5db199SXin Li    """
2042*9c5db199SXin Li    if not _is_servo_host_information_exist(hostname, port):
2043*9c5db199SXin Li        return False
2044*9c5db199SXin Li    # checking range and correct of the port
2045*9c5db199SXin Li    port_int = int(port)
2046*9c5db199SXin Li    if port_int < 1 or port_int > 65000:
2047*9c5db199SXin Li        return False
2048*9c5db199SXin Li    # we expecting host contain only latters, digits and '-' or '_'
2049*9c5db199SXin Li    if not re.match('[a-zA-Z0-9-_\.:]*$', hostname) or len(hostname) < 5:
2050*9c5db199SXin Li        return False
2051*9c5db199SXin Li    return True
2052