xref: /aosp_15_r20/external/autotest/server/cros/clique_lib/clique_runner.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Copyright 2015 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import datetime
6import logging
7import pprint
8import time
9
10import common
11from autotest_lib.client.common_lib import error
12from autotest_lib.client.common_lib.cros.network import ap_constants
13from autotest_lib.server import site_linux_system
14from autotest_lib.server.cros import host_lock_manager
15from autotest_lib.server.cros.ap_configurators import ap_batch_locker
16from autotest_lib.server.cros.network import chaos_clique_utils as utils
17from autotest_lib.server.cros.network import connection_worker
18from autotest_lib.server.cros.clique_lib import clique_dut_locker
19from autotest_lib.server.cros.clique_lib import clique_dut_log_collector
20from autotest_lib.server.cros.clique_lib import clique_dut_updater
21
22
23class CliqueRunner(object):
24    """Object to run a network_WiFi_CliqueXXX test."""
25
26    def __init__(self, test, dut_pool_spec, ap_specs):
27        """Initializes and runs test.
28
29        @param test: a string, test name.
30        @param dut_pool_spec: a list of pool sets. Each set contains a list of
31                              board: <board_name> labels to chose the required
32                              DUT's.
33        @param ap_specs: a list of APSpec objects corresponding to the APs
34                         needed for the test.
35        """
36        self._test = test
37        self._ap_specs = ap_specs
38        self._dut_pool_spec = dut_pool_spec
39        self._dut_pool = []
40        # Log server and DUT times
41        dt = datetime.datetime.now()
42        logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y'))
43
44    def _allocate_dut_pool(self, dut_locker):
45        """Allocate the required DUT's from the spec for the test.
46        The DUT objects are stored in a list of sets in |_dut_pool| attribute.
47
48        @param dut_locker: DUTBatchLocker object used to allocate the DUTs
49                           for the test pool.
50
51        @return: Returns a list of DUTObjects allocated.
52        """
53        self._dut_pool  = dut_locker.get_dut_pool()
54        # Flatten the list of DUT objects into a single list.
55        dut_objects = sum(self._dut_pool, [])
56        return dut_objects
57
58    @staticmethod
59    def _update_dut_pool(dut_objects, release_version):
60        """Allocate the required DUT's from the spec for the test.
61
62        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
63                            test.
64        @param release_version: A chromeOS release version.
65
66        @return: True if all the DUT's successfully upgraded, False otherwise.
67        """
68        dut_updater = clique_dut_updater.CliqueDUTUpdater()
69        return dut_updater.update_dut_pool(dut_objects, release_version)
70
71    @staticmethod
72    def _collect_dut_pool_logs(dut_objects, job):
73        """Allocate the required DUT's from the spec for the test.
74        The DUT objects are stored in a list of sets in |_dut_pool| attribute.
75
76        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
77                            test.
78        @param job: Autotest job object to be used for log collection.
79
80        @return: Returns a list of DUTObjects allocated.
81        """
82        log_collector = clique_dut_log_collector.CliqueDUTLogCollector()
83        log_collector.collect_logs(dut_objects, job)
84
85    @staticmethod
86    def _are_all_duts_healthy(dut_objects, ap):
87        """Returns if iw scan is not working on any of the DUTs.
88
89        Sometimes iw scan will die, especially on the Atheros chips.
90        This works around that bug.  See crbug.com/358716.
91
92        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
93                            test.
94        @param ap: ap_configurator object
95
96        @returns True if all the DUTs are healthy, False otherwise.
97        """
98        healthy = True
99        for dut in dut_objects:
100            if not utils.is_dut_healthy(dut.wifi_client, ap):
101                logging.error('DUT %s not healthy.', dut.host.hostname)
102                healthy = False
103        return healthy
104
105    @staticmethod
106    def _sanitize_all_duts(dut_objects):
107        """Clean up logs and reboot all the DUTs.
108
109        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
110                            test.
111        """
112        for dut in dut_objects:
113            utils.sanitize_client(dut.host)
114
115    @staticmethod
116    def _sync_time_on_all_duts(dut_objects):
117        """Syncs time on all the DUTs in the pool to the time on the host.
118
119        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
120                            test.
121        """
122        # Let's get the timestamp once on the host and then set it on all
123        # the duts.
124        epoch_seconds = time.time()
125        logging.info('Syncing epoch time on DUTs to %d seconds.', epoch_seconds)
126        for dut in dut_objects:
127            dut.wifi_client.shill.sync_time_to(epoch_seconds)
128
129    @staticmethod
130    def _get_debug_string(dut_objects, aps):
131        """Gets the debug info for all the DUT's and APs in the pool.
132
133        This is printed in the logs at the end of each test scenario for
134        debugging.
135        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
136                            test.
137        @param aps: A list of APConfigurator for all APs allocated for
138                    the test.
139
140        @returns a string with the list of information for each DUT and AP
141                 in the pool.
142        """
143        debug_string = ""
144        for dut in dut_objects:
145            kernel_ver = dut.host.get_kernel_ver()
146            firmware_ver = utils.get_firmware_ver(dut.host)
147            if not firmware_ver:
148                firmware_ver = "Unknown"
149            debug_dict = {'host_name': dut.host.hostname,
150                          'kernel_versions': kernel_ver,
151                          'wifi_firmware_versions': firmware_ver}
152            debug_string += pprint.pformat(debug_dict)
153        for ap in aps:
154            debug_string += pprint.pformat({'ap_name': ap.name})
155        return debug_string
156
157    @staticmethod
158    def _are_all_conn_workers_healthy(workers, aps, assoc_params_list, job):
159        """Returns if all the connection workers are working properly.
160
161        From time to time the connection worker will fail to establish a
162        connection to the APs.
163
164        @param workers: a list of conn_worker objects.
165        @param aps: a list of an ap_configurator objects.
166        @param assoc_params_list: list of connection association parameters.
167        @param job: the Autotest job object.
168
169        @returns True if all the workers are healthy, False otherwise.
170        """
171        healthy = True
172        for worker, ap, assoc_params in zip(workers, aps, assoc_params_list):
173            if not utils.is_conn_worker_healthy(worker, ap, assoc_params, job):
174                logging.error('Connection worker %s not healthy.',
175                              worker.host.hostname)
176                healthy = False
177        return healthy
178
179    def _cleanup(self, dut_objects, dut_locker, ap_locker, capturer,
180                 conn_workers):
181        """Cleans up after the test is complete.
182
183        @param dut_objects: A list of DUTObjects for all DUTs allocated for the
184                            test.
185        @param dut_locker: DUTBatchLocker object used to allocate the DUTs
186                           for the test pool.
187        @param ap_locker: the AP batch locker object.
188        @param capturer: a packet capture device.
189        @param conn_workers: a list of conn_worker objects.
190        """
191        self._collect_dut_pool_logs(dut_objects)
192        for worker in conn_workers:
193            if worker: worker.cleanup()
194        capturer.close()
195        ap_locker.unlock_aps()
196        dut_locker.unlock_and_close_duts()
197
198    def run(self, job, tries=10, capturer_hostname=None,
199            conn_worker_hostnames=[], release_version="",
200            disabled_sysinfo=False):
201        """Executes Clique test.
202
203        @param job: an Autotest job object.
204        @param tries: an integer, number of iterations to run per AP.
205        @param capturer_hostname: a string or None, hostname or IP of capturer.
206        @param conn_worker_hostnames: a list of string, hostname of
207                                      connection workers.
208        @param release_version: the DUT cros image version to use for testing.
209        @param disabled_sysinfo: a bool, disable collection of logs from DUT.
210        """
211        lock_manager = host_lock_manager.HostLockManager()
212        with host_lock_manager.HostsLockedBy(lock_manager):
213            dut_locker = clique_dut_locker.CliqueDUTBatchLocker(
214                    lock_manager, self._dut_pool_spec)
215            dut_objects = self._allocate_dut_pool(dut_locker)
216            if not dut_objects:
217                raise error.TestError('No DUTs allocated for test.')
218            update_status = self._update_dut_pool(dut_objects, release_version)
219            if not update_status:
220                raise error.TestError('DUT pool update failed. Bailing!')
221
222            capture_host = utils.allocate_packet_capturer(
223                    lock_manager, hostname=capturer_hostname)
224            capturer = site_linux_system.LinuxSystem(
225                    capture_host, {}, 'packet_capturer')
226
227            conn_workers = []
228            for hostname in conn_worker_hostnames:
229                conn_worker_host = utils.allocate_packet_capturer(
230                        lock_manager, hostname=hostname)
231                # Let's create generic connection workers and make them connect
232                # to the corresponding AP. The DUT role will recast each of
233                # these connection workers based on the role we want them to
234                # perform.
235                conn_worker = connection_worker.ConnectionWorker()
236                conn_worker.prepare_work_client(conn_worker_host)
237                conn_workers.append(conn_worker)
238
239            aps = []
240            for ap_spec in self._ap_specs:
241                ap_locker = ap_batch_locker.ApBatchLocker(
242                        lock_manager, ap_spec,
243                        ap_test_type=ap_constants.AP_TEST_TYPE_CLIQUE)
244                ap = ap_locker.get_ap_batch(batch_size=1)
245                if not ap:
246                    raise error.TestError('AP matching spec not found.')
247                aps.append(ap)
248
249            # Reset all the DUTs before the test starts and configure all the
250            # APs.
251            self._sanitize_all_duts(dut_objects)
252            utils.configure_aps(aps, self._ap_specs)
253
254            # This is a list of association parameters for the test for all the
255            # APs in the test.
256            assoc_params_list = []
257            # Check if all our APs, DUTs and connection workers are in good
258            # state before we proceed.
259            for ap, ap_spec in zip(aps, self._ap_specs):
260                if ap.ssid == None:
261                    self._cleanup(dut_objects, dut_locker, ap_locker,
262                                  capturer, conn_workers)
263                    raise error.TestError('SSID not set for the AP: %s.' %
264                                          ap.configurator.host_name)
265                networks = utils.return_available_networks(
266                        ap, ap_spec, capturer, job)
267                if ((networks is None) or (networks == list())):
268                    self._cleanup(dut_objects, dut_locker, ap_locker,
269                                  capturer, conn_workers)
270                    raise error.TestError('Scanning error on the AP %s.' %
271                                          ap.configurator.host_name)
272
273                assoc_params = ap.get_association_parameters()
274                assoc_params_list.append(assoc_params)
275
276            if not self._are_all_duts_healthy(dut_objects, ap):
277                self._cleanup(dut_objects, dut_locker, ap_locker,
278                              capturer, conn_workers)
279                raise error.TestError('Not all DUTs healthy.')
280
281            if not self._are_all_conn_workers_healthy(
282                    conn_workers, aps, assoc_params_list, job):
283                self._cleanup(dut_objects, dut_locker, ap_locker,
284                              capturer, conn_workers)
285                raise error.TestError('Not all connection workers healthy.')
286
287            debug_string = self._get_debug_string(dut_objects, aps)
288            self._sync_time_on_all_duts(dut_objects)
289
290            result = job.run_test(
291                    self._test,
292                    capturer=capturer,
293                    capturer_frequency=networks[0].frequency,
294                    capturer_ht_type=networks[0].width,
295                    dut_pool=self._dut_pool,
296                    assoc_params_list=assoc_params_list,
297                    tries=tries,
298                    debug_info=debug_string,
299                    conn_workers=conn_workers,
300                    # Copy all logs from the system
301                    disabled_sysinfo=disabled_sysinfo)
302
303            # Reclaim all the APs, DUTs and capturers used in the test and
304            # collect the required logs.
305            self._cleanup(dut_objects, dut_locker, ap_locker,
306                          capturer, conn_workers)
307