xref: /aosp_15_r20/external/autotest/site_utils/lxc/container.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Copyright 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from __future__ import absolute_import
6from __future__ import division
7from __future__ import print_function
8
9import collections
10import json
11import logging
12import os
13import re
14import shutil
15import tempfile
16import time
17
18import common
19from autotest_lib.client.bin import utils
20from autotest_lib.client.common_lib import error
21from autotest_lib.site_utils.lxc import constants
22from autotest_lib.site_utils.lxc import lxc
23from autotest_lib.site_utils.lxc import utils as lxc_utils
24import six
25
26try:
27    from autotest_lib.utils.frozen_chromite.lib import metrics
28except ImportError:
29    metrics = utils.metrics_mock
30
31# Naming convention of test container, e.g., test_300_1422862512_2424, where:
32# 300:        The test job ID.
33# 1422862512: The tick when container is created.
34# 2424:       The PID of autoserv that starts the container.
35_TEST_CONTAINER_NAME_FMT = 'test_%s_%d_%d'
36# Name of the container ID file.
37_CONTAINER_ID_FILENAME = 'container_id.json'
38
39
40class ContainerId(collections.namedtuple('ContainerId',
41                                         ['job_id', 'creation_time', 'pid'])):
42    """An identifier for containers."""
43
44    # Optimization.  Avoids __dict__ creation.  Empty because this subclass has
45    # no instance vars of its own.
46    __slots__ = ()
47
48
49    def __str__(self):
50        # NOTE: The `creation_time` is a float, but we format it as an integer.
51        # Internally we still use the float value to do comparing, hashing,
52        # etc.
53        return _TEST_CONTAINER_NAME_FMT % self
54
55
56    def save(self, path):
57        """Saves the ID to the given path.
58
59        @param path: Path to a directory where the container ID will be
60                     serialized.
61        """
62        dst = os.path.join(path, _CONTAINER_ID_FILENAME)
63        with open(dst, 'w') as f:
64            json.dump(self, f)
65
66        with open(dst) as f:
67            logging.debug('Container id saved to %s (content: %s)', dst,
68                          f.read())
69
70    @classmethod
71    def load(cls, path):
72        """Reads the ID from the given path.
73
74        @param path: Path to check for a serialized container ID.
75
76        @return: A container ID if one is found on the given path, or None
77                 otherwise.
78
79        @raise ValueError: If a JSON load error occurred.
80        @raise TypeError: If the file was valid JSON but didn't contain a valid
81                          ContainerId.
82        """
83        src = os.path.join(path, _CONTAINER_ID_FILENAME)
84
85        try:
86            with open(src, 'r') as f:
87                job_id, ctime, pid = json.load(f)
88        except IOError as err:
89            # File not found, or couldn't be opened for some other reason.
90            # Treat all these cases as no ID.
91            logging.warning('Load container id file "%s" error: %s', src, err)
92            return None
93        # TODO(pprabhu, crbug.com/842343) Remove this once all persistent
94        # container ids have migrated to str.
95        job_id = str(job_id)
96        return cls(job_id, ctime, pid)
97
98
99    @classmethod
100    def create(cls, job_id, ctime=None, pid=None):
101        """Creates a new container ID.
102
103        @param job_id: The first field in the ID.
104        @param ctime: The second field in the ID.  Optional. If not provided,
105                      the current epoch timestamp is used.
106        @param pid: The third field in the ID.  Optional.  If not provided, the
107                    PID of the current process is used.
108        """
109        if ctime is None:
110            ctime = int(time.time())
111        if pid is None:
112            pid = os.getpid()
113        # TODO(pprabhu) Drop str() cast once
114        # job_directories.get_job_id_or_task_id() starts returning str directly.
115        return cls(str(job_id), ctime, pid)
116
117
118class Container(object):
119    """A wrapper class of an LXC container.
120
121    The wrapper class provides methods to interact with a container, e.g.,
122    start, stop, destroy, run a command. It also has attributes of the
123    container, including:
124    name: Name of the container.
125    state: State of the container, e.g., ABORTING, RUNNING, STARTING, STOPPED,
126           or STOPPING.
127
128    lxc-ls can also collect other attributes of a container including:
129    ipv4: IP address for IPv4.
130    ipv6: IP address for IPv6.
131    autostart: If the container will autostart at system boot.
132    pid: Process ID of the container.
133    memory: Memory used by the container, as a string, e.g., "6.2MB"
134    ram: Physical ram used by the container, as a string, e.g., "6.2MB"
135    swap: swap used by the container, as a string, e.g., "1.0MB"
136
137    For performance reason, such info is not collected for now.
138
139    The attributes available are defined in ATTRIBUTES constant.
140    """
141
142    _LXC_VERSION = None
143
144    def __init__(self, container_path, name, attribute_values, src=None,
145                 snapshot=False):
146        """Initialize an object of LXC container with given attribute values.
147
148        @param container_path: Directory that stores the container.
149        @param name: Name of the container.
150        @param attribute_values: A dictionary of attribute values for the
151                                 container.
152        @param src: An optional source container.  If provided, the source
153                    continer is cloned, and the new container will point to the
154                    clone.
155        @param snapshot: If a source container was specified, this argument
156                         specifies whether or not to create a snapshot clone.
157                         The default is to attempt to create a snapshot.
158                         If a snapshot is requested and creating the snapshot
159                         fails, a full clone will be attempted.
160        """
161        self.container_path = os.path.realpath(container_path)
162        # Path to the rootfs of the container. This will be initialized when
163        # property rootfs is retrieved.
164        self._rootfs = None
165        self.name = name
166        for attribute, value in six.iteritems(attribute_values):
167            setattr(self, attribute, value)
168
169        # Clone the container
170        if src is not None:
171            # Clone the source container to initialize this one.
172            lxc_utils.clone(src.container_path, src.name, self.container_path,
173                            self.name, snapshot)
174            # Newly cloned containers have no ID.
175            self._id = None
176        else:
177            # This may be an existing container.  Try to read the ID.
178            try:
179                self._id = ContainerId.load(
180                        os.path.join(self.container_path, self.name))
181                logging.debug('Container %s has id: "%s"', self.name, self._id)
182            except (ValueError, TypeError):
183                # Ignore load errors.  ContainerBucket currently queries every
184                # container quite frequently, and emitting exceptions here would
185                # cause any invalid containers on a server to block all
186                # ContainerBucket.get_all calls (see crbug/783865).
187                logging.warning('Unable to determine ID for container %s:',
188                                self.name)
189                self._id = None
190
191        if not Container._LXC_VERSION:
192            Container._LXC_VERSION = lxc_utils.get_lxc_version()
193
194
195    @classmethod
196    def create_from_existing_dir(cls, lxc_path, name, **kwargs):
197        """Creates a new container instance for an lxc container that already
198        exists on disk.
199
200        @param lxc_path: The LXC path for the container.
201        @param name: The container name.
202
203        @raise error.ContainerError: If the container doesn't already exist.
204
205        @return: The new container.
206        """
207        return cls(lxc_path, name, kwargs)
208
209
210    # Containers have a name and an ID.  The name is simply the name of the LXC
211    # container.  The ID is the actual key that is used to identify the
212    # container to the autoserv system.  In the case of a JIT-created container,
213    # we have the ID at the container's creation time so we use that to name the
214    # container.  This may not be the case for other types of containers.
215    @classmethod
216    def clone(cls, src, new_name=None, new_path=None, snapshot=False,
217              cleanup=False):
218        """Creates a clone of this container.
219
220        @param src: The original container.
221        @param new_name: Name for the cloned container.  If this is not
222                         provided, a random unique container name will be
223                         generated.
224        @param new_path: LXC path for the cloned container (optional; if not
225                         specified, the new container is created in the same
226                         directory as the source container).
227        @param snapshot: Whether to snapshot, or create a full clone.  Note that
228                         snapshot cloning is not supported on all platforms.  If
229                         this code is running on a platform that does not
230                         support snapshot clones, this flag is ignored.
231        @param cleanup: If a container with the given name and path already
232                        exist, clean it up first.
233        """
234        if new_path is None:
235            new_path = src.container_path
236
237        if new_name is None:
238            _, new_name = os.path.split(
239                tempfile.mkdtemp(dir=new_path, prefix='container.'))
240            logging.debug('Generating new name for container: %s', new_name)
241        else:
242            # If a container exists at this location, clean it up first
243            container_folder = os.path.join(new_path, new_name)
244            if lxc_utils.path_exists(container_folder):
245                if not cleanup:
246                    raise error.ContainerError('Container %s already exists.' %
247                                               new_name)
248                container = Container.create_from_existing_dir(new_path,
249                                                               new_name)
250                try:
251                    container.destroy()
252                except error.CmdError as e:
253                    # The container could be created in a incompleted
254                    # state. Delete the container folder instead.
255                    logging.warning('Failed to destroy container %s, error: %s',
256                                 new_name, e)
257                    utils.run('sudo rm -rf "%s"' % container_folder)
258            # Create the directory prior to creating the new container.  This
259            # puts the ownership of the container under the current process's
260            # user, rather than root.  This is necessary to enable the
261            # ContainerId to serialize properly.
262            os.mkdir(container_folder)
263
264        # Create and return the new container.
265        new_container = cls(new_path, new_name, {}, src, snapshot)
266
267        return new_container
268
269
270    def refresh_status(self):
271        """Refresh the status information of the container.
272        """
273        containers = lxc.get_container_info(self.container_path, name=self.name)
274        if not containers:
275            raise error.ContainerError(
276                    'No container found in directory %s with name of %s.' %
277                    (self.container_path, self.name))
278        attribute_values = containers[0]
279        for attribute, value in six.iteritems(attribute_values):
280            setattr(self, attribute, value)
281
282
283    @property
284    def rootfs(self):
285        """Path to the rootfs of the container.
286
287        This property returns the path to the rootfs of the container, that is,
288        the folder where the container stores its local files. It reads the
289        attribute lxc.rootfs from the config file of the container, e.g.,
290            lxc.rootfs = /usr/local/autotest/containers/t4/rootfs
291        If the container is created with snapshot, the rootfs is a chain of
292        folders, separated by `:` and ordered by how the snapshot is created,
293        e.g.,
294            lxc.rootfs = overlayfs:/usr/local/autotest/containers/base/rootfs:
295            /usr/local/autotest/containers/t4_s/delta0
296        This function returns the last folder in the chain, in above example,
297        that is `/usr/local/autotest/containers/t4_s/delta0`
298
299        Files in the rootfs will be accessible directly within container. For
300        example, a folder in host "[rootfs]/usr/local/file1", can be accessed
301        inside container by path "/usr/local/file1". Note that symlink in the
302        host can not across host/container boundary, instead, directory mount
303        should be used, refer to function mount_dir.
304
305        @return: Path to the rootfs of the container.
306        """
307        lxc_rootfs_config_name = 'lxc.rootfs'
308        # Check to see if the major lxc version is 3 or greater
309        if Container._LXC_VERSION:
310            logging.info("Detected lxc version %s", Container._LXC_VERSION)
311            if Container._LXC_VERSION[0] >= 3:
312                lxc_rootfs_config_name = 'lxc.rootfs.path'
313        if not self._rootfs:
314            lxc_rootfs = self._get_lxc_config(lxc_rootfs_config_name)[0]
315            cloned_from_snapshot = ':' in lxc_rootfs
316            if cloned_from_snapshot:
317                self._rootfs = lxc_rootfs.split(':')[-1]
318            else:
319                self._rootfs = lxc_rootfs
320        return self._rootfs
321
322
323    def attach_run(self, command, bash=True):
324        """Attach to a given container and run the given command.
325
326        @param command: Command to run in the container.
327        @param bash: Run the command through bash -c "command". This allows
328                     pipes to be used in command. Default is set to True.
329
330        @return: The output of the command.
331
332        @raise error.CmdError: If container does not exist, or not running.
333        """
334        cmd = 'sudo lxc-attach -P %s -n %s' % (self.container_path, self.name)
335        if bash and not command.startswith('bash -c'):
336            command = 'bash -c "%s"' % utils.sh_escape(command)
337        cmd += ' -- %s' % command
338        # TODO(dshi): crbug.com/459344 Set sudo to default to False when test
339        # container can be unprivileged container.
340        return utils.run(cmd)
341
342
343    def is_network_up(self):
344        """Check if network is up in the container by curl base container url.
345
346        @return: True if the network is up, otherwise False.
347        """
348        # TODO(b/184304822) Remove the extra logging.
349        try:
350            with open('/proc/net/udp') as f:
351                logging.debug('Checking UDP on drone:\n %s', f.read())
352        except Exception as e:
353            logging.debug(e)
354
355        try:
356            self.attach_run('ifconfig eth0 ;'
357                            'ping -c 1 8.8.8.8 ;'
358                            'cat /proc/net/udp ;'
359                            'curl --head %s' % constants.CONTAINER_BASE_URL)
360            return True
361        except error.CmdError as e:
362            logging.debug(e)
363            return False
364
365
366    @metrics.SecondsTimerDecorator(
367        '%s/container_start_duration' % constants.STATS_KEY)
368    def start(self, wait_for_network=True, log_dir=None):
369        """Start the container.
370
371        @param wait_for_network: True to wait for network to be up. Default is
372                                 set to True.
373
374        @raise ContainerError: If container does not exist, or fails to start.
375        """
376        log_addendum = ""
377        if log_dir:
378            log_addendum = "--logpriority=DEBUG --logfile={} --console-log={}".format(
379                    os.path.join(log_dir, 'ssp_logs/debug/lxc-start.log'),
380                    os.path.join(log_dir, 'ssp_logs/debug/lxc-console.log'))
381
382        cmd = 'sudo lxc-start -P %s -n %s -d %s' % (self.container_path,
383                                                    self.name, log_addendum)
384        output = utils.run(cmd).stdout
385        if not self.is_running():
386            raise error.ContainerError(
387                    'Container %s failed to start. lxc command output:\n%s' %
388                    (os.path.join(self.container_path, self.name),
389                     output))
390
391        if wait_for_network:
392            logging.debug('Wait for network to be up.')
393            start_time = time.time()
394            try:
395                utils.poll_for_condition(
396                        condition=self.is_network_up,
397                        timeout=constants.NETWORK_INIT_TIMEOUT,
398                        sleep_interval=constants.NETWORK_INIT_CHECK_INTERVAL,
399                        desc='network is up')
400            except Exception:
401                # Save and upload syslog for network issues debugging.
402                shutil.copy('/var/log/syslog',
403                            os.path.join(log_dir, 'ssp_logs', 'debug'))
404                raise
405            logging.debug('Network is up after %.2f seconds.',
406                          time.time() - start_time)
407
408
409    @metrics.SecondsTimerDecorator(
410        '%s/container_stop_duration' % constants.STATS_KEY)
411    def stop(self):
412        """Stop the container.
413
414        @raise ContainerError: If container does not exist, or fails to start.
415        """
416        cmd = 'sudo lxc-stop -P %s -n %s' % (self.container_path, self.name)
417        output = utils.run(cmd).stdout
418        self.refresh_status()
419        if self.state != 'STOPPED':
420            raise error.ContainerError(
421                    'Container %s failed to be stopped. lxc command output:\n'
422                    '%s' % (os.path.join(self.container_path, self.name),
423                            output))
424
425
426    @metrics.SecondsTimerDecorator(
427        '%s/container_destroy_duration' % constants.STATS_KEY)
428    def destroy(self, force=True):
429        """Destroy the container.
430
431        @param force: Set to True to force to destroy the container even if it's
432                      running. This is faster than stop a container first then
433                      try to destroy it. Default is set to True.
434
435        @raise ContainerError: If container does not exist or failed to destroy
436                               the container.
437        """
438        logging.debug('Destroying container %s/%s',
439                      self.container_path,
440                      self.name)
441        lxc_utils.destroy(self.container_path, self.name, force=force)
442
443
444    def mount_dir(self, source, destination, readonly=False):
445        """Mount a directory in host to a directory in the container.
446
447        @param source: Directory in host to be mounted.
448        @param destination: Directory in container to mount the source directory
449        @param readonly: Set to True to make a readonly mount, default is False.
450        """
451        # Destination path in container must be relative.
452        destination = destination.lstrip('/')
453        # Create directory in container for mount.  Changes to container rootfs
454        # require sudo.
455        utils.run('sudo mkdir -p %s' % os.path.join(self.rootfs, destination))
456        mount = ('%s %s none bind%s 0 0' %
457                 (source, destination, ',ro' if readonly else ''))
458        self._set_lxc_config('lxc.mount.entry', mount)
459
460    def verify_autotest_setup(self, job_folder):
461        """Verify autotest code is set up properly in the container.
462
463        @param job_folder: Name of the job result folder.
464
465        @raise ContainerError: If autotest code is not set up properly.
466        """
467        # Test autotest code is setup by verifying a list of
468        # (directory, minimum file count)
469        directories_to_check = [
470                (constants.CONTAINER_AUTOTEST_DIR, 3),
471                (constants.RESULT_DIR_FMT % job_folder, 0),
472                (constants.CONTAINER_SITE_PACKAGES_PATH, 3)]
473        for directory, count in directories_to_check:
474            result = self.attach_run(command=(constants.COUNT_FILE_CMD %
475                                              {'dir': directory})).stdout
476            logging.debug('%s entries in %s.', int(result), directory)
477            if int(result) < count:
478                raise error.ContainerError('%s is not properly set up.' %
479                                           directory)
480        # lxc-attach and run command does not run in shell, thus .bashrc is not
481        # loaded. Following command creates a symlink in /usr/bin/ for gsutil
482        # if it's installed.
483        # TODO(dshi): Remove this code after lab container is updated with
484        # gsutil installed in /usr/bin/
485        self.attach_run('test -f /root/gsutil/gsutil && '
486                        'ln -s /root/gsutil/gsutil /usr/bin/gsutil || true')
487
488
489    def modify_import_order(self):
490        """Swap the python import order of lib and local/lib.
491
492        In Moblab, the host's python modules located in
493        /usr/lib64/python2.7/site-packages is mounted to following folder inside
494        container: /usr/local/lib/python2.7/dist-packages/. The modules include
495        an old version of requests module, which is used in autotest
496        site-packages. For test, the module is only used in
497        dev_server/symbolicate_dump for requests.call and requests.codes.OK.
498        When pip is installed inside the container, it installs requests module
499        with version of 2.2.1 in /usr/lib/python2.7/dist-packages/. The version
500        is newer than the one used in autotest site-packages, but not the latest
501        either.
502        According to /usr/lib/python2.7/site.py, modules in /usr/local/lib are
503        imported before the ones in /usr/lib. That leads to pip to use the older
504        version of requests (0.11.2), and it will fail. On the other hand,
505        requests module 2.2.1 can't be installed in CrOS (refer to CL:265759),
506        and higher version of requests module can't work with pip.
507        The only fix to resolve this is to switch the import order, so modules
508        in /usr/lib can be imported before /usr/local/lib.
509        """
510        site_module = '/usr/lib/python2.7/site.py'
511        self.attach_run("sed -i ':a;N;$!ba;s/\"local\/lib\",\\n/"
512                        "\"lib_placeholder\",\\n/g' %s" % site_module)
513        self.attach_run("sed -i ':a;N;$!ba;s/\"lib\",\\n/"
514                        "\"local\/lib\",\\n/g' %s" % site_module)
515        self.attach_run('sed -i "s/lib_placeholder/lib/g" %s' %
516                        site_module)
517
518
519    def is_running(self):
520        """Returns whether or not this container is currently running."""
521        self.refresh_status()
522        return self.state == 'RUNNING'
523
524
525    def set_hostname(self, hostname):
526        """Sets the hostname within the container.
527
528        This method can only be called on a running container.
529
530        @param hostname The new container hostname.
531
532        @raise ContainerError: If the container is not running.
533        """
534        if not self.is_running():
535            raise error.ContainerError(
536                    'set_hostname can only be called on running containers.')
537
538        self.attach_run('hostname %s' % (hostname))
539        self.attach_run(constants.APPEND_CMD_FMT % {
540                'content': '127.0.0.1 %s' % (hostname),
541                'file': '/etc/hosts'})
542
543
544    def install_ssp(self, ssp_url):
545        """Downloads and installs the given server package.
546
547        @param ssp_url: The URL of the ssp to download and install.
548        """
549        usr_local_path = os.path.join(self.rootfs, 'usr', 'local')
550        autotest_pkg_path = os.path.join(usr_local_path,
551                                         'autotest_server_package.tar.bz2')
552        # Changes within the container rootfs require sudo.
553        utils.run('sudo mkdir -p %s'% usr_local_path)
554
555        lxc.download_extract(ssp_url, autotest_pkg_path, usr_local_path)
556
557
558    def install_control_file(self, control_file):
559        """Installs the given control file.
560
561        The given file will be copied into the container.
562
563        @param control_file: Path to the control file to install.
564        """
565        dst = os.path.join(constants.CONTROL_TEMP_PATH,
566                           os.path.basename(control_file))
567        self.copy(control_file, dst)
568
569
570    def copy(self, host_path, container_path):
571        """Copies files into the container.
572
573        @param host_path: Path to the source file/dir to be copied.
574        @param container_path: Path to the destination dir (in the container).
575        """
576        dst_path = os.path.join(self.rootfs,
577                                container_path.lstrip(os.path.sep))
578        self._do_copy(src=host_path, dst=dst_path)
579
580
581    @property
582    def id(self):
583        """Returns the container ID."""
584        return self._id
585
586
587    @id.setter
588    def id(self, new_id):
589        """Sets the container ID."""
590        self._id = new_id;
591        # Persist the ID so other container objects can pick it up.
592        self._id.save(os.path.join(self.container_path, self.name))
593
594
595    def _do_copy(self, src, dst):
596        """Copies files and directories on the host system.
597
598        @param src: The source file or directory.
599        @param dst: The destination file or directory.  If the path to the
600                    destination does not exist, it will be created.
601        """
602        # Create the dst dir. mkdir -p will not fail if dst_dir exists.
603        dst_dir = os.path.dirname(dst)
604        # Make sure the source ends with `/.` if it's a directory. Otherwise
605        # command cp will not work.
606        if os.path.isdir(src) and os.path.split(src)[1] != '.':
607            src = os.path.join(src, '.')
608        utils.run("sudo sh -c 'mkdir -p \"%s\" && cp -RL \"%s\" \"%s\"'" %
609                  (dst_dir, src, dst))
610
611    def _set_lxc_config(self, key, value):
612        """Sets an LXC config value for this container.
613
614        Configuration changes made while a container is running don't take
615        effect until the container is restarted.  Since this isn't a scenario
616        that should ever come up in our use cases, calling this method on a
617        running container will cause a ContainerError.
618
619        @param key: The LXC config key to set.
620        @param value: The value to use for the given key.
621
622        @raise error.ContainerError: If the container is already started.
623        """
624        if self.is_running():
625            raise error.ContainerError(
626                '_set_lxc_config(%s, %s) called on a running container.' %
627                (key, value))
628        config_file = os.path.join(self.container_path, self.name, 'config')
629        config = '%s = %s' % (key, value)
630        utils.run(
631            constants.APPEND_CMD_FMT % {'content': config, 'file': config_file})
632
633
634    def _get_lxc_config(self, key):
635        """Retrieves an LXC config value from the container.
636
637        @param key The key of the config value to retrieve.
638        """
639        cmd = ('sudo lxc-info -P %s -n %s -c %s' %
640               (self.container_path, self.name, key))
641        config = utils.run(cmd).stdout.strip().splitlines()
642
643        # Strip the decoration from line 1 of the output.
644        match = re.match('%s = (.*)' % key, config[0])
645        if not match:
646            raise error.ContainerError(
647                    'Config %s not found for container %s. (%s)' %
648                    (key, self.name, ','.join(config)))
649        config[0] = match.group(1)
650        return config
651