xref: /aosp_15_r20/external/autotest/site_utils/lxc/container_bucket.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import os
7import time
8
9import common
10
11from autotest_lib.client.bin import utils
12from autotest_lib.client.common_lib import error
13from autotest_lib.site_utils.lxc import config as lxc_config
14from autotest_lib.site_utils.lxc import constants
15from autotest_lib.site_utils.lxc import lxc
16from autotest_lib.site_utils.lxc import utils as lxc_utils
17from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail
18from autotest_lib.site_utils.lxc.base_image import BaseImage
19from autotest_lib.site_utils.lxc.constants import \
20    CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX
21from autotest_lib.site_utils.lxc.container import Container
22from autotest_lib.site_utils.lxc.container_factory import ContainerFactory
23
24try:
25    from autotest_lib.utils.frozen_chromite.lib import metrics
26    from infra_libs import ts_mon
27except ImportError:
28    import mock
29    metrics = utils.metrics_mock
30    ts_mon = mock.Mock()
31
32
33class ContainerBucket(object):
34    """A wrapper class to interact with containers in a specific container path.
35    """
36
37    def __init__(self,
38                 container_path=constants.DEFAULT_CONTAINER_PATH,
39                 base_name=constants.BASE,
40                 container_factory=None,
41                 base_container_path=constants.DEFAULT_BASE_CONTAINER_PATH):
42        """Initialize a ContainerBucket.
43
44        @param container_path: Path to the directory used to store containers.
45                               Default is set to AUTOSERV/container_path in
46                               global config.
47        @param base_name: Name of the base container image. Used to initialize a
48                          ContainerFactory unless one is provided via the
49                          arguments. Defaults to value set via
50                          AUTOSERV/container_base_name in global config.
51        @param container_factory: A factory for creating Containers.
52        @param base_container_path: Path to the directory used for the base container.
53                                    Default is AUTOSERV/base_container_path in
54                                    global config.
55        """
56        self.container_path = os.path.realpath(container_path)
57        if container_factory is not None:
58            self._factory = container_factory
59        else:
60            # Pass in the container path so that the bucket is hermetic (i.e. so
61            # that if the container path is customized, the base image doesn't
62            # fall back to using the default container path).
63            try:
64                base_image_ok = True
65                container = BaseImage(base_container_path, base_name).get()
66            except error.ContainerError:
67                base_image_ok = False
68                raise
69            finally:
70                metrics.Counter(METRICS_PREFIX + '/base_image',
71                                field_spec=[ts_mon.BooleanField('corrupted')]
72                                ).increment(
73                                    fields={'corrupted': not base_image_ok})
74            self._factory = ContainerFactory(
75                base_container=container,
76                lxc_path=self.container_path)
77        self.container_cache = {}
78
79
80    def get_all(self, force_update=False):
81        """Get details of all containers.
82
83        Retrieves all containers owned by the bucket.  Note that this doesn't
84        include the base container, or any containers owned by the container
85        pool.
86
87        @param force_update: Boolean, ignore cached values if set.
88
89        @return: A dictionary of all containers with detailed attributes,
90                 indexed by container name.
91        """
92        logging.debug("Fetching all extant LXC containers")
93        info_collection = lxc.get_container_info(self.container_path)
94        if force_update:
95            logging.debug("Clearing cached container info")
96        containers = {} if force_update else self.container_cache
97        for info in info_collection:
98            # The keys of `containers` are container.ContainerId object, not a
99            # string.
100            for k in containers:
101                if str(k) == info['name']:
102                    continue
103            container = Container.create_from_existing_dir(self.container_path,
104                                                           **info)
105            # Active containers have an ID.  Zygotes and base containers, don't.
106            if container.id is not None:
107                containers[container.id] = container
108        logging.debug('All containers found: %s',
109                      [(repr(k), str(k)) for k in containers])
110        self.container_cache = containers
111        return containers
112
113
114    def get_container(self, container_id):
115        """Get a container with matching name.
116
117        @param container_id: ID of the container.
118
119        @return: A container object with matching name. Returns None if no
120                 container matches the given name.
121        """
122        logging.debug("Fetching LXC container with id %s", container_id)
123        if container_id in self.container_cache:
124            logging.debug("Found container %s in cache", container_id)
125            return self.container_cache[container_id]
126
127        container = self.get_all().get(container_id, None)
128        if container:
129            return container
130
131        logging.debug(
132                "Could not find container by container id object: %s (%s)",
133                container_id, repr(container_id))
134        # When load container Ids from disk, we cast job_id from NoneType to a
135        # string 'None' (crrev/c/1056366). This causes problems if the input id
136        # has not been casted.
137        logging.debug('Try to get container by the id string: %s',
138                      container_id)
139        for k, v in self.get_all().items():
140            if str(k) == str(container_id):
141                return v
142
143        logging.debug('Could not find container by id string: %s',
144                      container_id)
145        return None
146
147
148    def exist(self, container_id):
149        """Check if a container exists with the given name.
150
151        @param container_id: ID of the container.
152
153        @return: True if the container with the given ID exists, otherwise
154                 returns False.
155        """
156        return self.get_container(container_id) != None
157
158
159    def destroy_all(self):
160        """Destroy all containers, base must be destroyed at the last.
161        """
162        containers = self.get_all().values()
163        for container in sorted(
164                containers, key=lambda n: 1 if n.name == constants.BASE else 0):
165            key = container.id
166            logging.info('Destroy container %s.', container.name)
167            container.destroy()
168            del self.container_cache[key]
169
170    def scrub_container_location(self, name,
171                                 timeout=constants.LXC_SCRUB_TIMEOUT):
172        """Destroy a possibly-nonexistent, possibly-malformed container.
173
174        This exists to clean up an unreachable container which may or may not
175        exist and is probably but not definitely malformed if it does exist. It
176        is accordingly scorched-earth and force-destroys the container with all
177        associated snapshots. Also accordingly, this will not raise an
178        exception if the destruction fails.
179
180        @param name: ID of the container.
181        @param timeout: Seconds to wait for removal.
182
183        @returns: CmdResult object from the shell command
184        """
185        logging.debug(
186            "Force-destroying container %s if it exists, with timeout %s sec",
187            name, timeout)
188        try:
189            result = lxc_utils.destroy(self.container_path,
190                                       name,
191                                       force=True,
192                                       snapshots=True,
193                                       ignore_status=True,
194                                       timeout=timeout)
195        except error.CmdTimeoutError:
196            logging.warning("Force-destruction of container %s timed out.",
197                            name)
198        logging.debug("Force-destruction exit code %s", result.exit_status)
199        return result
200
201
202
203    @metrics.SecondsTimerDecorator(
204        '%s/setup_test_duration' % constants.STATS_KEY)
205    @cleanup_if_fail()
206    def setup_test(self,
207                   container_id,
208                   job_id,
209                   server_package_url,
210                   result_path,
211                   control=None,
212                   skip_cleanup=False,
213                   job_folder=None,
214                   dut_name=None):
215        """Setup test container for the test job to run.
216
217        The setup includes:
218        1. Install autotest_server package from given url.
219        2. Copy over local shadow_config.ini.
220        3. Mount local site-packages.
221        4. Mount test result directory.
222
223        TODO(dshi): Setup also needs to include test control file for autoserv
224                    to run in container.
225
226        @param container_id: ID to assign to the test container.
227        @param job_id: Job id for the test job to run in the test container.
228        @param server_package_url: Url to download autotest_server package.
229        @param result_path: Directory to be mounted to container to store test
230                            results.
231        @param control: Path to the control file to run the test job. Default is
232                        set to None.
233        @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
234                             container failures.
235        @param job_folder: Folder name of the job, e.g., 123-debug_user.
236        @param dut_name: Name of the dut to run test, used as the hostname of
237                         the container. Default is None.
238        @return: A Container object for the test container.
239
240        @raise ContainerError: If container does not exist, or not running.
241        """
242        start_time = time.time()
243
244        if not os.path.exists(result_path):
245            raise error.ContainerError('Result directory does not exist: %s',
246                                       result_path)
247        result_path = os.path.abspath(result_path)
248
249        # Save control file to result_path temporarily. The reason is that the
250        # control file in drone_tmp folder can be deleted during scheduler
251        # restart. For test not using SSP, the window between test starts and
252        # control file being picked up by the test is very small (< 2 seconds).
253        # However, for tests using SSP, it takes around 1 minute before the
254        # container is setup. If scheduler is restarted during that period, the
255        # control file will be deleted, and the test will fail.
256        if control:
257            control_file_name = os.path.basename(control)
258            safe_control = os.path.join(result_path, control_file_name)
259            utils.run('cp %s %s' % (control, safe_control))
260
261        # Create test container from the base container.
262        container = self._factory.create_container(container_id)
263
264        # Deploy server side package
265        container.install_ssp(server_package_url)
266
267        deploy_config_manager = lxc_config.DeployConfigManager(container)
268        deploy_config_manager.deploy_pre_start()
269
270        # Copy over control file to run the test job.
271        if control:
272            container.install_control_file(safe_control)
273
274        # Use a pre-packaged Trusty-compatible Autotest site_packages
275        # instead if it exists.  crbug.com/1013241
276        if os.path.exists(constants.TRUSTY_SITE_PACKAGES_PATH):
277            mount_entries = [(constants.TRUSTY_SITE_PACKAGES_PATH,
278                              constants.CONTAINER_SITE_PACKAGES_PATH,
279                              True)]
280        else:
281            mount_entries = [(constants.SITE_PACKAGES_PATH,
282                              constants.CONTAINER_SITE_PACKAGES_PATH,
283                              True)]
284        mount_entries.extend([
285                (result_path,
286                 os.path.join(constants.RESULT_DIR_FMT % job_folder),
287                 False),
288        ])
289
290        # Update container config to mount directories.
291        for source, destination, readonly in mount_entries:
292            container.mount_dir(source, destination, readonly)
293
294        # Update file permissions.
295        # TODO(dshi): crbug.com/459344 Skip following action when test container
296        # can be unprivileged container.
297        autotest_path = os.path.join(
298                container.rootfs,
299                constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep))
300        utils.run('sudo chown -R root "%s"' % autotest_path)
301        utils.run('sudo chgrp -R root "%s"' % autotest_path)
302
303        container.start(wait_for_network=True, log_dir=result_path)
304        deploy_config_manager.deploy_post_start()
305
306        # Update the hostname of the test container to be `dut-name`.
307        # Some TradeFed tests use hostname in test results, which is used to
308        # group test results in dashboard. The default container name is set to
309        # be the name of the folder, which is unique (as it is composed of job
310        # id and timestamp. For better result view, the container's hostname is
311        # set to be a string containing the dut hostname.
312        if dut_name:
313            container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT %
314                                   dut_name.replace('.', '-'))
315
316        container.modify_import_order()
317
318        container.verify_autotest_setup(job_folder)
319
320        logging.debug('Test container %s is set up.', container.name)
321        return container
322