1# Copyright 2017 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import os 7import time 8 9import common 10 11from autotest_lib.client.bin import utils 12from autotest_lib.client.common_lib import error 13from autotest_lib.site_utils.lxc import config as lxc_config 14from autotest_lib.site_utils.lxc import constants 15from autotest_lib.site_utils.lxc import lxc 16from autotest_lib.site_utils.lxc import utils as lxc_utils 17from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail 18from autotest_lib.site_utils.lxc.base_image import BaseImage 19from autotest_lib.site_utils.lxc.constants import \ 20 CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX 21from autotest_lib.site_utils.lxc.container import Container 22from autotest_lib.site_utils.lxc.container_factory import ContainerFactory 23 24try: 25 from autotest_lib.utils.frozen_chromite.lib import metrics 26 from infra_libs import ts_mon 27except ImportError: 28 import mock 29 metrics = utils.metrics_mock 30 ts_mon = mock.Mock() 31 32 33class ContainerBucket(object): 34 """A wrapper class to interact with containers in a specific container path. 35 """ 36 37 def __init__(self, 38 container_path=constants.DEFAULT_CONTAINER_PATH, 39 base_name=constants.BASE, 40 container_factory=None, 41 base_container_path=constants.DEFAULT_BASE_CONTAINER_PATH): 42 """Initialize a ContainerBucket. 43 44 @param container_path: Path to the directory used to store containers. 45 Default is set to AUTOSERV/container_path in 46 global config. 47 @param base_name: Name of the base container image. Used to initialize a 48 ContainerFactory unless one is provided via the 49 arguments. Defaults to value set via 50 AUTOSERV/container_base_name in global config. 51 @param container_factory: A factory for creating Containers. 52 @param base_container_path: Path to the directory used for the base container. 53 Default is AUTOSERV/base_container_path in 54 global config. 55 """ 56 self.container_path = os.path.realpath(container_path) 57 if container_factory is not None: 58 self._factory = container_factory 59 else: 60 # Pass in the container path so that the bucket is hermetic (i.e. so 61 # that if the container path is customized, the base image doesn't 62 # fall back to using the default container path). 63 try: 64 base_image_ok = True 65 container = BaseImage(base_container_path, base_name).get() 66 except error.ContainerError: 67 base_image_ok = False 68 raise 69 finally: 70 metrics.Counter(METRICS_PREFIX + '/base_image', 71 field_spec=[ts_mon.BooleanField('corrupted')] 72 ).increment( 73 fields={'corrupted': not base_image_ok}) 74 self._factory = ContainerFactory( 75 base_container=container, 76 lxc_path=self.container_path) 77 self.container_cache = {} 78 79 80 def get_all(self, force_update=False): 81 """Get details of all containers. 82 83 Retrieves all containers owned by the bucket. Note that this doesn't 84 include the base container, or any containers owned by the container 85 pool. 86 87 @param force_update: Boolean, ignore cached values if set. 88 89 @return: A dictionary of all containers with detailed attributes, 90 indexed by container name. 91 """ 92 logging.debug("Fetching all extant LXC containers") 93 info_collection = lxc.get_container_info(self.container_path) 94 if force_update: 95 logging.debug("Clearing cached container info") 96 containers = {} if force_update else self.container_cache 97 for info in info_collection: 98 # The keys of `containers` are container.ContainerId object, not a 99 # string. 100 for k in containers: 101 if str(k) == info['name']: 102 continue 103 container = Container.create_from_existing_dir(self.container_path, 104 **info) 105 # Active containers have an ID. Zygotes and base containers, don't. 106 if container.id is not None: 107 containers[container.id] = container 108 logging.debug('All containers found: %s', 109 [(repr(k), str(k)) for k in containers]) 110 self.container_cache = containers 111 return containers 112 113 114 def get_container(self, container_id): 115 """Get a container with matching name. 116 117 @param container_id: ID of the container. 118 119 @return: A container object with matching name. Returns None if no 120 container matches the given name. 121 """ 122 logging.debug("Fetching LXC container with id %s", container_id) 123 if container_id in self.container_cache: 124 logging.debug("Found container %s in cache", container_id) 125 return self.container_cache[container_id] 126 127 container = self.get_all().get(container_id, None) 128 if container: 129 return container 130 131 logging.debug( 132 "Could not find container by container id object: %s (%s)", 133 container_id, repr(container_id)) 134 # When load container Ids from disk, we cast job_id from NoneType to a 135 # string 'None' (crrev/c/1056366). This causes problems if the input id 136 # has not been casted. 137 logging.debug('Try to get container by the id string: %s', 138 container_id) 139 for k, v in self.get_all().items(): 140 if str(k) == str(container_id): 141 return v 142 143 logging.debug('Could not find container by id string: %s', 144 container_id) 145 return None 146 147 148 def exist(self, container_id): 149 """Check if a container exists with the given name. 150 151 @param container_id: ID of the container. 152 153 @return: True if the container with the given ID exists, otherwise 154 returns False. 155 """ 156 return self.get_container(container_id) != None 157 158 159 def destroy_all(self): 160 """Destroy all containers, base must be destroyed at the last. 161 """ 162 containers = self.get_all().values() 163 for container in sorted( 164 containers, key=lambda n: 1 if n.name == constants.BASE else 0): 165 key = container.id 166 logging.info('Destroy container %s.', container.name) 167 container.destroy() 168 del self.container_cache[key] 169 170 def scrub_container_location(self, name, 171 timeout=constants.LXC_SCRUB_TIMEOUT): 172 """Destroy a possibly-nonexistent, possibly-malformed container. 173 174 This exists to clean up an unreachable container which may or may not 175 exist and is probably but not definitely malformed if it does exist. It 176 is accordingly scorched-earth and force-destroys the container with all 177 associated snapshots. Also accordingly, this will not raise an 178 exception if the destruction fails. 179 180 @param name: ID of the container. 181 @param timeout: Seconds to wait for removal. 182 183 @returns: CmdResult object from the shell command 184 """ 185 logging.debug( 186 "Force-destroying container %s if it exists, with timeout %s sec", 187 name, timeout) 188 try: 189 result = lxc_utils.destroy(self.container_path, 190 name, 191 force=True, 192 snapshots=True, 193 ignore_status=True, 194 timeout=timeout) 195 except error.CmdTimeoutError: 196 logging.warning("Force-destruction of container %s timed out.", 197 name) 198 logging.debug("Force-destruction exit code %s", result.exit_status) 199 return result 200 201 202 203 @metrics.SecondsTimerDecorator( 204 '%s/setup_test_duration' % constants.STATS_KEY) 205 @cleanup_if_fail() 206 def setup_test(self, 207 container_id, 208 job_id, 209 server_package_url, 210 result_path, 211 control=None, 212 skip_cleanup=False, 213 job_folder=None, 214 dut_name=None): 215 """Setup test container for the test job to run. 216 217 The setup includes: 218 1. Install autotest_server package from given url. 219 2. Copy over local shadow_config.ini. 220 3. Mount local site-packages. 221 4. Mount test result directory. 222 223 TODO(dshi): Setup also needs to include test control file for autoserv 224 to run in container. 225 226 @param container_id: ID to assign to the test container. 227 @param job_id: Job id for the test job to run in the test container. 228 @param server_package_url: Url to download autotest_server package. 229 @param result_path: Directory to be mounted to container to store test 230 results. 231 @param control: Path to the control file to run the test job. Default is 232 set to None. 233 @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot 234 container failures. 235 @param job_folder: Folder name of the job, e.g., 123-debug_user. 236 @param dut_name: Name of the dut to run test, used as the hostname of 237 the container. Default is None. 238 @return: A Container object for the test container. 239 240 @raise ContainerError: If container does not exist, or not running. 241 """ 242 start_time = time.time() 243 244 if not os.path.exists(result_path): 245 raise error.ContainerError('Result directory does not exist: %s', 246 result_path) 247 result_path = os.path.abspath(result_path) 248 249 # Save control file to result_path temporarily. The reason is that the 250 # control file in drone_tmp folder can be deleted during scheduler 251 # restart. For test not using SSP, the window between test starts and 252 # control file being picked up by the test is very small (< 2 seconds). 253 # However, for tests using SSP, it takes around 1 minute before the 254 # container is setup. If scheduler is restarted during that period, the 255 # control file will be deleted, and the test will fail. 256 if control: 257 control_file_name = os.path.basename(control) 258 safe_control = os.path.join(result_path, control_file_name) 259 utils.run('cp %s %s' % (control, safe_control)) 260 261 # Create test container from the base container. 262 container = self._factory.create_container(container_id) 263 264 # Deploy server side package 265 container.install_ssp(server_package_url) 266 267 deploy_config_manager = lxc_config.DeployConfigManager(container) 268 deploy_config_manager.deploy_pre_start() 269 270 # Copy over control file to run the test job. 271 if control: 272 container.install_control_file(safe_control) 273 274 # Use a pre-packaged Trusty-compatible Autotest site_packages 275 # instead if it exists. crbug.com/1013241 276 if os.path.exists(constants.TRUSTY_SITE_PACKAGES_PATH): 277 mount_entries = [(constants.TRUSTY_SITE_PACKAGES_PATH, 278 constants.CONTAINER_SITE_PACKAGES_PATH, 279 True)] 280 else: 281 mount_entries = [(constants.SITE_PACKAGES_PATH, 282 constants.CONTAINER_SITE_PACKAGES_PATH, 283 True)] 284 mount_entries.extend([ 285 (result_path, 286 os.path.join(constants.RESULT_DIR_FMT % job_folder), 287 False), 288 ]) 289 290 # Update container config to mount directories. 291 for source, destination, readonly in mount_entries: 292 container.mount_dir(source, destination, readonly) 293 294 # Update file permissions. 295 # TODO(dshi): crbug.com/459344 Skip following action when test container 296 # can be unprivileged container. 297 autotest_path = os.path.join( 298 container.rootfs, 299 constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep)) 300 utils.run('sudo chown -R root "%s"' % autotest_path) 301 utils.run('sudo chgrp -R root "%s"' % autotest_path) 302 303 container.start(wait_for_network=True, log_dir=result_path) 304 deploy_config_manager.deploy_post_start() 305 306 # Update the hostname of the test container to be `dut-name`. 307 # Some TradeFed tests use hostname in test results, which is used to 308 # group test results in dashboard. The default container name is set to 309 # be the name of the folder, which is unique (as it is composed of job 310 # id and timestamp. For better result view, the container's hostname is 311 # set to be a string containing the dut hostname. 312 if dut_name: 313 container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT % 314 dut_name.replace('.', '-')) 315 316 container.modify_import_order() 317 318 container.verify_autotest_setup(job_folder) 319 320 logging.debug('Test container %s is set up.', container.name) 321 return container 322