xref: /aosp_15_r20/external/autotest/site_utils/lxc/container_bucket.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Copyright 2017 The Chromium OS Authors. All rights reserved.
2*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
3*9c5db199SXin Li# found in the LICENSE file.
4*9c5db199SXin Li
5*9c5db199SXin Liimport logging
6*9c5db199SXin Liimport os
7*9c5db199SXin Liimport time
8*9c5db199SXin Li
9*9c5db199SXin Liimport common
10*9c5db199SXin Li
11*9c5db199SXin Lifrom autotest_lib.client.bin import utils
12*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
13*9c5db199SXin Lifrom autotest_lib.site_utils.lxc import config as lxc_config
14*9c5db199SXin Lifrom autotest_lib.site_utils.lxc import constants
15*9c5db199SXin Lifrom autotest_lib.site_utils.lxc import lxc
16*9c5db199SXin Lifrom autotest_lib.site_utils.lxc import utils as lxc_utils
17*9c5db199SXin Lifrom autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail
18*9c5db199SXin Lifrom autotest_lib.site_utils.lxc.base_image import BaseImage
19*9c5db199SXin Lifrom autotest_lib.site_utils.lxc.constants import \
20*9c5db199SXin Li    CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX
21*9c5db199SXin Lifrom autotest_lib.site_utils.lxc.container import Container
22*9c5db199SXin Lifrom autotest_lib.site_utils.lxc.container_factory import ContainerFactory
23*9c5db199SXin Li
24*9c5db199SXin Litry:
25*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
26*9c5db199SXin Li    from infra_libs import ts_mon
27*9c5db199SXin Liexcept ImportError:
28*9c5db199SXin Li    import mock
29*9c5db199SXin Li    metrics = utils.metrics_mock
30*9c5db199SXin Li    ts_mon = mock.Mock()
31*9c5db199SXin Li
32*9c5db199SXin Li
33*9c5db199SXin Liclass ContainerBucket(object):
34*9c5db199SXin Li    """A wrapper class to interact with containers in a specific container path.
35*9c5db199SXin Li    """
36*9c5db199SXin Li
37*9c5db199SXin Li    def __init__(self,
38*9c5db199SXin Li                 container_path=constants.DEFAULT_CONTAINER_PATH,
39*9c5db199SXin Li                 base_name=constants.BASE,
40*9c5db199SXin Li                 container_factory=None,
41*9c5db199SXin Li                 base_container_path=constants.DEFAULT_BASE_CONTAINER_PATH):
42*9c5db199SXin Li        """Initialize a ContainerBucket.
43*9c5db199SXin Li
44*9c5db199SXin Li        @param container_path: Path to the directory used to store containers.
45*9c5db199SXin Li                               Default is set to AUTOSERV/container_path in
46*9c5db199SXin Li                               global config.
47*9c5db199SXin Li        @param base_name: Name of the base container image. Used to initialize a
48*9c5db199SXin Li                          ContainerFactory unless one is provided via the
49*9c5db199SXin Li                          arguments. Defaults to value set via
50*9c5db199SXin Li                          AUTOSERV/container_base_name in global config.
51*9c5db199SXin Li        @param container_factory: A factory for creating Containers.
52*9c5db199SXin Li        @param base_container_path: Path to the directory used for the base container.
53*9c5db199SXin Li                                    Default is AUTOSERV/base_container_path in
54*9c5db199SXin Li                                    global config.
55*9c5db199SXin Li        """
56*9c5db199SXin Li        self.container_path = os.path.realpath(container_path)
57*9c5db199SXin Li        if container_factory is not None:
58*9c5db199SXin Li            self._factory = container_factory
59*9c5db199SXin Li        else:
60*9c5db199SXin Li            # Pass in the container path so that the bucket is hermetic (i.e. so
61*9c5db199SXin Li            # that if the container path is customized, the base image doesn't
62*9c5db199SXin Li            # fall back to using the default container path).
63*9c5db199SXin Li            try:
64*9c5db199SXin Li                base_image_ok = True
65*9c5db199SXin Li                container = BaseImage(base_container_path, base_name).get()
66*9c5db199SXin Li            except error.ContainerError:
67*9c5db199SXin Li                base_image_ok = False
68*9c5db199SXin Li                raise
69*9c5db199SXin Li            finally:
70*9c5db199SXin Li                metrics.Counter(METRICS_PREFIX + '/base_image',
71*9c5db199SXin Li                                field_spec=[ts_mon.BooleanField('corrupted')]
72*9c5db199SXin Li                                ).increment(
73*9c5db199SXin Li                                    fields={'corrupted': not base_image_ok})
74*9c5db199SXin Li            self._factory = ContainerFactory(
75*9c5db199SXin Li                base_container=container,
76*9c5db199SXin Li                lxc_path=self.container_path)
77*9c5db199SXin Li        self.container_cache = {}
78*9c5db199SXin Li
79*9c5db199SXin Li
80*9c5db199SXin Li    def get_all(self, force_update=False):
81*9c5db199SXin Li        """Get details of all containers.
82*9c5db199SXin Li
83*9c5db199SXin Li        Retrieves all containers owned by the bucket.  Note that this doesn't
84*9c5db199SXin Li        include the base container, or any containers owned by the container
85*9c5db199SXin Li        pool.
86*9c5db199SXin Li
87*9c5db199SXin Li        @param force_update: Boolean, ignore cached values if set.
88*9c5db199SXin Li
89*9c5db199SXin Li        @return: A dictionary of all containers with detailed attributes,
90*9c5db199SXin Li                 indexed by container name.
91*9c5db199SXin Li        """
92*9c5db199SXin Li        logging.debug("Fetching all extant LXC containers")
93*9c5db199SXin Li        info_collection = lxc.get_container_info(self.container_path)
94*9c5db199SXin Li        if force_update:
95*9c5db199SXin Li            logging.debug("Clearing cached container info")
96*9c5db199SXin Li        containers = {} if force_update else self.container_cache
97*9c5db199SXin Li        for info in info_collection:
98*9c5db199SXin Li            # The keys of `containers` are container.ContainerId object, not a
99*9c5db199SXin Li            # string.
100*9c5db199SXin Li            for k in containers:
101*9c5db199SXin Li                if str(k) == info['name']:
102*9c5db199SXin Li                    continue
103*9c5db199SXin Li            container = Container.create_from_existing_dir(self.container_path,
104*9c5db199SXin Li                                                           **info)
105*9c5db199SXin Li            # Active containers have an ID.  Zygotes and base containers, don't.
106*9c5db199SXin Li            if container.id is not None:
107*9c5db199SXin Li                containers[container.id] = container
108*9c5db199SXin Li        logging.debug('All containers found: %s',
109*9c5db199SXin Li                      [(repr(k), str(k)) for k in containers])
110*9c5db199SXin Li        self.container_cache = containers
111*9c5db199SXin Li        return containers
112*9c5db199SXin Li
113*9c5db199SXin Li
114*9c5db199SXin Li    def get_container(self, container_id):
115*9c5db199SXin Li        """Get a container with matching name.
116*9c5db199SXin Li
117*9c5db199SXin Li        @param container_id: ID of the container.
118*9c5db199SXin Li
119*9c5db199SXin Li        @return: A container object with matching name. Returns None if no
120*9c5db199SXin Li                 container matches the given name.
121*9c5db199SXin Li        """
122*9c5db199SXin Li        logging.debug("Fetching LXC container with id %s", container_id)
123*9c5db199SXin Li        if container_id in self.container_cache:
124*9c5db199SXin Li            logging.debug("Found container %s in cache", container_id)
125*9c5db199SXin Li            return self.container_cache[container_id]
126*9c5db199SXin Li
127*9c5db199SXin Li        container = self.get_all().get(container_id, None)
128*9c5db199SXin Li        if container:
129*9c5db199SXin Li            return container
130*9c5db199SXin Li
131*9c5db199SXin Li        logging.debug(
132*9c5db199SXin Li                "Could not find container by container id object: %s (%s)",
133*9c5db199SXin Li                container_id, repr(container_id))
134*9c5db199SXin Li        # When load container Ids from disk, we cast job_id from NoneType to a
135*9c5db199SXin Li        # string 'None' (crrev/c/1056366). This causes problems if the input id
136*9c5db199SXin Li        # has not been casted.
137*9c5db199SXin Li        logging.debug('Try to get container by the id string: %s',
138*9c5db199SXin Li                      container_id)
139*9c5db199SXin Li        for k, v in self.get_all().items():
140*9c5db199SXin Li            if str(k) == str(container_id):
141*9c5db199SXin Li                return v
142*9c5db199SXin Li
143*9c5db199SXin Li        logging.debug('Could not find container by id string: %s',
144*9c5db199SXin Li                      container_id)
145*9c5db199SXin Li        return None
146*9c5db199SXin Li
147*9c5db199SXin Li
148*9c5db199SXin Li    def exist(self, container_id):
149*9c5db199SXin Li        """Check if a container exists with the given name.
150*9c5db199SXin Li
151*9c5db199SXin Li        @param container_id: ID of the container.
152*9c5db199SXin Li
153*9c5db199SXin Li        @return: True if the container with the given ID exists, otherwise
154*9c5db199SXin Li                 returns False.
155*9c5db199SXin Li        """
156*9c5db199SXin Li        return self.get_container(container_id) != None
157*9c5db199SXin Li
158*9c5db199SXin Li
159*9c5db199SXin Li    def destroy_all(self):
160*9c5db199SXin Li        """Destroy all containers, base must be destroyed at the last.
161*9c5db199SXin Li        """
162*9c5db199SXin Li        containers = self.get_all().values()
163*9c5db199SXin Li        for container in sorted(
164*9c5db199SXin Li                containers, key=lambda n: 1 if n.name == constants.BASE else 0):
165*9c5db199SXin Li            key = container.id
166*9c5db199SXin Li            logging.info('Destroy container %s.', container.name)
167*9c5db199SXin Li            container.destroy()
168*9c5db199SXin Li            del self.container_cache[key]
169*9c5db199SXin Li
170*9c5db199SXin Li    def scrub_container_location(self, name,
171*9c5db199SXin Li                                 timeout=constants.LXC_SCRUB_TIMEOUT):
172*9c5db199SXin Li        """Destroy a possibly-nonexistent, possibly-malformed container.
173*9c5db199SXin Li
174*9c5db199SXin Li        This exists to clean up an unreachable container which may or may not
175*9c5db199SXin Li        exist and is probably but not definitely malformed if it does exist. It
176*9c5db199SXin Li        is accordingly scorched-earth and force-destroys the container with all
177*9c5db199SXin Li        associated snapshots. Also accordingly, this will not raise an
178*9c5db199SXin Li        exception if the destruction fails.
179*9c5db199SXin Li
180*9c5db199SXin Li        @param name: ID of the container.
181*9c5db199SXin Li        @param timeout: Seconds to wait for removal.
182*9c5db199SXin Li
183*9c5db199SXin Li        @returns: CmdResult object from the shell command
184*9c5db199SXin Li        """
185*9c5db199SXin Li        logging.debug(
186*9c5db199SXin Li            "Force-destroying container %s if it exists, with timeout %s sec",
187*9c5db199SXin Li            name, timeout)
188*9c5db199SXin Li        try:
189*9c5db199SXin Li            result = lxc_utils.destroy(self.container_path,
190*9c5db199SXin Li                                       name,
191*9c5db199SXin Li                                       force=True,
192*9c5db199SXin Li                                       snapshots=True,
193*9c5db199SXin Li                                       ignore_status=True,
194*9c5db199SXin Li                                       timeout=timeout)
195*9c5db199SXin Li        except error.CmdTimeoutError:
196*9c5db199SXin Li            logging.warning("Force-destruction of container %s timed out.",
197*9c5db199SXin Li                            name)
198*9c5db199SXin Li        logging.debug("Force-destruction exit code %s", result.exit_status)
199*9c5db199SXin Li        return result
200*9c5db199SXin Li
201*9c5db199SXin Li
202*9c5db199SXin Li
203*9c5db199SXin Li    @metrics.SecondsTimerDecorator(
204*9c5db199SXin Li        '%s/setup_test_duration' % constants.STATS_KEY)
205*9c5db199SXin Li    @cleanup_if_fail()
206*9c5db199SXin Li    def setup_test(self,
207*9c5db199SXin Li                   container_id,
208*9c5db199SXin Li                   job_id,
209*9c5db199SXin Li                   server_package_url,
210*9c5db199SXin Li                   result_path,
211*9c5db199SXin Li                   control=None,
212*9c5db199SXin Li                   skip_cleanup=False,
213*9c5db199SXin Li                   job_folder=None,
214*9c5db199SXin Li                   dut_name=None):
215*9c5db199SXin Li        """Setup test container for the test job to run.
216*9c5db199SXin Li
217*9c5db199SXin Li        The setup includes:
218*9c5db199SXin Li        1. Install autotest_server package from given url.
219*9c5db199SXin Li        2. Copy over local shadow_config.ini.
220*9c5db199SXin Li        3. Mount local site-packages.
221*9c5db199SXin Li        4. Mount test result directory.
222*9c5db199SXin Li
223*9c5db199SXin Li        TODO(dshi): Setup also needs to include test control file for autoserv
224*9c5db199SXin Li                    to run in container.
225*9c5db199SXin Li
226*9c5db199SXin Li        @param container_id: ID to assign to the test container.
227*9c5db199SXin Li        @param job_id: Job id for the test job to run in the test container.
228*9c5db199SXin Li        @param server_package_url: Url to download autotest_server package.
229*9c5db199SXin Li        @param result_path: Directory to be mounted to container to store test
230*9c5db199SXin Li                            results.
231*9c5db199SXin Li        @param control: Path to the control file to run the test job. Default is
232*9c5db199SXin Li                        set to None.
233*9c5db199SXin Li        @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
234*9c5db199SXin Li                             container failures.
235*9c5db199SXin Li        @param job_folder: Folder name of the job, e.g., 123-debug_user.
236*9c5db199SXin Li        @param dut_name: Name of the dut to run test, used as the hostname of
237*9c5db199SXin Li                         the container. Default is None.
238*9c5db199SXin Li        @return: A Container object for the test container.
239*9c5db199SXin Li
240*9c5db199SXin Li        @raise ContainerError: If container does not exist, or not running.
241*9c5db199SXin Li        """
242*9c5db199SXin Li        start_time = time.time()
243*9c5db199SXin Li
244*9c5db199SXin Li        if not os.path.exists(result_path):
245*9c5db199SXin Li            raise error.ContainerError('Result directory does not exist: %s',
246*9c5db199SXin Li                                       result_path)
247*9c5db199SXin Li        result_path = os.path.abspath(result_path)
248*9c5db199SXin Li
249*9c5db199SXin Li        # Save control file to result_path temporarily. The reason is that the
250*9c5db199SXin Li        # control file in drone_tmp folder can be deleted during scheduler
251*9c5db199SXin Li        # restart. For test not using SSP, the window between test starts and
252*9c5db199SXin Li        # control file being picked up by the test is very small (< 2 seconds).
253*9c5db199SXin Li        # However, for tests using SSP, it takes around 1 minute before the
254*9c5db199SXin Li        # container is setup. If scheduler is restarted during that period, the
255*9c5db199SXin Li        # control file will be deleted, and the test will fail.
256*9c5db199SXin Li        if control:
257*9c5db199SXin Li            control_file_name = os.path.basename(control)
258*9c5db199SXin Li            safe_control = os.path.join(result_path, control_file_name)
259*9c5db199SXin Li            utils.run('cp %s %s' % (control, safe_control))
260*9c5db199SXin Li
261*9c5db199SXin Li        # Create test container from the base container.
262*9c5db199SXin Li        container = self._factory.create_container(container_id)
263*9c5db199SXin Li
264*9c5db199SXin Li        # Deploy server side package
265*9c5db199SXin Li        container.install_ssp(server_package_url)
266*9c5db199SXin Li
267*9c5db199SXin Li        deploy_config_manager = lxc_config.DeployConfigManager(container)
268*9c5db199SXin Li        deploy_config_manager.deploy_pre_start()
269*9c5db199SXin Li
270*9c5db199SXin Li        # Copy over control file to run the test job.
271*9c5db199SXin Li        if control:
272*9c5db199SXin Li            container.install_control_file(safe_control)
273*9c5db199SXin Li
274*9c5db199SXin Li        # Use a pre-packaged Trusty-compatible Autotest site_packages
275*9c5db199SXin Li        # instead if it exists.  crbug.com/1013241
276*9c5db199SXin Li        if os.path.exists(constants.TRUSTY_SITE_PACKAGES_PATH):
277*9c5db199SXin Li            mount_entries = [(constants.TRUSTY_SITE_PACKAGES_PATH,
278*9c5db199SXin Li                              constants.CONTAINER_SITE_PACKAGES_PATH,
279*9c5db199SXin Li                              True)]
280*9c5db199SXin Li        else:
281*9c5db199SXin Li            mount_entries = [(constants.SITE_PACKAGES_PATH,
282*9c5db199SXin Li                              constants.CONTAINER_SITE_PACKAGES_PATH,
283*9c5db199SXin Li                              True)]
284*9c5db199SXin Li        mount_entries.extend([
285*9c5db199SXin Li                (result_path,
286*9c5db199SXin Li                 os.path.join(constants.RESULT_DIR_FMT % job_folder),
287*9c5db199SXin Li                 False),
288*9c5db199SXin Li        ])
289*9c5db199SXin Li
290*9c5db199SXin Li        # Update container config to mount directories.
291*9c5db199SXin Li        for source, destination, readonly in mount_entries:
292*9c5db199SXin Li            container.mount_dir(source, destination, readonly)
293*9c5db199SXin Li
294*9c5db199SXin Li        # Update file permissions.
295*9c5db199SXin Li        # TODO(dshi): crbug.com/459344 Skip following action when test container
296*9c5db199SXin Li        # can be unprivileged container.
297*9c5db199SXin Li        autotest_path = os.path.join(
298*9c5db199SXin Li                container.rootfs,
299*9c5db199SXin Li                constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep))
300*9c5db199SXin Li        utils.run('sudo chown -R root "%s"' % autotest_path)
301*9c5db199SXin Li        utils.run('sudo chgrp -R root "%s"' % autotest_path)
302*9c5db199SXin Li
303*9c5db199SXin Li        container.start(wait_for_network=True, log_dir=result_path)
304*9c5db199SXin Li        deploy_config_manager.deploy_post_start()
305*9c5db199SXin Li
306*9c5db199SXin Li        # Update the hostname of the test container to be `dut-name`.
307*9c5db199SXin Li        # Some TradeFed tests use hostname in test results, which is used to
308*9c5db199SXin Li        # group test results in dashboard. The default container name is set to
309*9c5db199SXin Li        # be the name of the folder, which is unique (as it is composed of job
310*9c5db199SXin Li        # id and timestamp. For better result view, the container's hostname is
311*9c5db199SXin Li        # set to be a string containing the dut hostname.
312*9c5db199SXin Li        if dut_name:
313*9c5db199SXin Li            container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT %
314*9c5db199SXin Li                                   dut_name.replace('.', '-'))
315*9c5db199SXin Li
316*9c5db199SXin Li        container.modify_import_order()
317*9c5db199SXin Li
318*9c5db199SXin Li        container.verify_autotest_setup(job_folder)
319*9c5db199SXin Li
320*9c5db199SXin Li        logging.debug('Test container %s is set up.', container.name)
321*9c5db199SXin Li        return container
322