autotest/server/autotest.py

*9c5db199SXin Li# Lint as: python2, python3
*9c5db199SXin Li# Copyright 2007 Google Inc. Released under the GPL v2
*9c5db199SXin Li#pylint: disable-msg=C0111
*9c5db199SXin Li
*9c5db199SXin Lifrom __future__ import absolute_import
*9c5db199SXin Lifrom __future__ import division
*9c5db199SXin Lifrom __future__ import print_function
*9c5db199SXin Li
*9c5db199SXin Liimport glob
*9c5db199SXin Liimport logging
*9c5db199SXin Liimport os
*9c5db199SXin Liimport re
*9c5db199SXin Liimport sys
*9c5db199SXin Liimport tempfile
*9c5db199SXin Liimport time
*9c5db199SXin Liimport traceback
*9c5db199SXin Li
*9c5db199SXin Liimport common
*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import runner as result_tools_runner
*9c5db199SXin Lifrom autotest_lib.client.common_lib import autotemp
*9c5db199SXin Lifrom autotest_lib.client.common_lib import base_job
*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config
*9c5db199SXin Lifrom autotest_lib.client.common_lib import packages
*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils as client_utils
*9c5db199SXin Lifrom autotest_lib.server import installable_object
*9c5db199SXin Lifrom autotest_lib.server import utils
*9c5db199SXin Lifrom autotest_lib.server import utils as server_utils
*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite.constants import JOB_REPO_URL
*9c5db199SXin Liimport six
*9c5db199SXin Lifrom six.moves import map
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Litry:
*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
*9c5db199SXin Liexcept ImportError:
*9c5db199SXin Li    metrics = client_utils.metrics_mock
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li# This is assumed to be the value by tests, do not change it.
*9c5db199SXin LiOFFLOAD_ENVVAR = "SYNCHRONOUS_OFFLOAD_DIR"
*9c5db199SXin Li
*9c5db199SXin LiAUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
*9c5db199SXin LiAUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
*9c5db199SXin Li
*9c5db199SXin Li_CONFIG = global_config.global_config
*9c5db199SXin LiAUTOSERV_PREBUILD = _CONFIG.get_config_value(
*9c5db199SXin Li        'AUTOSERV', 'enable_server_prebuild', type=bool, default=False)
*9c5db199SXin Li
*9c5db199SXin Li# Match on a line like this:
*9c5db199SXin Li# FAIL test_name  test_name timestamp=1 localtime=Nov 15 12:43:10 <fail_msg>
*9c5db199SXin Li_FAIL_STATUS_RE = re.compile(
*9c5db199SXin Li    r'\s*FAIL.*localtime=.*\s*.*\s*[0-9]+:[0-9]+:[0-9]+\s*(?P<fail_msg>.*)')
*9c5db199SXin Li
*9c5db199SXin LiLOG_BUFFER_SIZE_BYTES = 64
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Lidef _set_py_version():
*9c5db199SXin Li    """As of ~R102 (aka when this merges), DUTs only have Python 3."""
*9c5db199SXin Li    return '--py_version=3'
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutodirNotFoundError(Exception):
*9c5db199SXin Li    """No Autotest installation could be found."""
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestFailure(Exception):
*9c5db199SXin Li    """Gereric exception class for failures during a test run."""
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestAbort(AutotestFailure):
*9c5db199SXin Li    """
*9c5db199SXin Li    AutotestAborts are thrown when the DUT seems fine,
*9c5db199SXin Li    and the test doesn't give us an explicit reason for
*9c5db199SXin Li    failure; In this case we have no choice but to abort.
*9c5db199SXin Li    """
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestDeviceError(AutotestFailure):
*9c5db199SXin Li    """
*9c5db199SXin Li    Exceptions that inherit from AutotestDeviceError
*9c5db199SXin Li    are thrown when we can determine the current
*9c5db199SXin Li    state of the DUT and conclude that it probably
*9c5db199SXin Li    lead to the test failing; these exceptions lead
*9c5db199SXin Li    to failures instead of aborts.
*9c5db199SXin Li    """
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestDeviceNotPingable(AutotestDeviceError):
*9c5db199SXin Li    """Error for when a DUT becomes unpingable."""
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestDeviceNotSSHable(AutotestDeviceError):
*9c5db199SXin Li    """Error for when a DUT is pingable but not SSHable."""
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass AutotestDeviceRebooted(AutotestDeviceError):
*9c5db199SXin Li    """Error for when a DUT rebooted unexpectedly."""
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass Autotest(installable_object.InstallableObject):
*9c5db199SXin Li    """
*9c5db199SXin Li    This class represents the Autotest program.
*9c5db199SXin Li
*9c5db199SXin Li    Autotest is used to run tests automatically and collect the results.
*9c5db199SXin Li    It also supports profilers.
*9c5db199SXin Li
*9c5db199SXin Li    Implementation details:
*9c5db199SXin Li    This is a leaf class in an abstract class hierarchy, it must
*9c5db199SXin Li    implement the unimplemented methods in parent classes.
*9c5db199SXin Li    """
*9c5db199SXin Li
*9c5db199SXin Li    def __init__(self, host=None):
*9c5db199SXin Li        self.host = host
*9c5db199SXin Li        self.got = False
*9c5db199SXin Li        self.installed = False
*9c5db199SXin Li        self.serverdir = utils.get_server_dir()
*9c5db199SXin Li        super(Autotest, self).__init__()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    install_in_tmpdir = False
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def set_install_in_tmpdir(cls, flag):
*9c5db199SXin Li        """ Sets a flag that controls whether or not Autotest should by
*9c5db199SXin Li        default be installed in a "standard" directory (e.g.
*9c5db199SXin Li        /home/autotest, /usr/local/autotest) or a temporary directory. """
*9c5db199SXin Li        cls.install_in_tmpdir = flag
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def get_client_autodir_paths(cls, host):
*9c5db199SXin Li        return global_config.global_config.get_config_value(
*9c5db199SXin Li                'AUTOSERV', 'client_autodir_paths', type=list)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def get_installed_autodir(cls, host):
*9c5db199SXin Li        """
*9c5db199SXin Li        Find where the Autotest client is installed on the host.
*9c5db199SXin Li        @returns an absolute path to an installed Autotest client root.
*9c5db199SXin Li        @raises AutodirNotFoundError if no Autotest installation can be found.
*9c5db199SXin Li        """
*9c5db199SXin Li        autodir = host.get_autodir()
*9c5db199SXin Li        if autodir:
*9c5db199SXin Li            logging.debug('Using existing host autodir: %s', autodir)
*9c5db199SXin Li            return autodir
*9c5db199SXin Li
*9c5db199SXin Li        for path in Autotest.get_client_autodir_paths(host):
*9c5db199SXin Li            try:
*9c5db199SXin Li                autotest_binary = os.path.join(path, 'bin', 'autotest')
*9c5db199SXin Li                host.run('test -x %s' % utils.sh_escape(autotest_binary))
*9c5db199SXin Li                host.run('test -w %s' % utils.sh_escape(path))
*9c5db199SXin Li                logging.debug('Found existing autodir at %s', path)
*9c5db199SXin Li                return path
*9c5db199SXin Li            except error.GenericHostRunError:
*9c5db199SXin Li                logging.debug('%s does not exist on %s', autotest_binary,
*9c5db199SXin Li                              host.hostname)
*9c5db199SXin Li        raise AutodirNotFoundError
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def get_install_dir(cls, host):
*9c5db199SXin Li        """
*9c5db199SXin Li        Determines the location where autotest should be installed on
*9c5db199SXin Li        host. If self.install_in_tmpdir is set, it will return a unique
*9c5db199SXin Li        temporary directory that autotest can be installed in. Otherwise, looks
*9c5db199SXin Li        for an existing installation to use; if none is found, looks for a
*9c5db199SXin Li        usable directory in the global config client_autodir_paths.
*9c5db199SXin Li        """
*9c5db199SXin Li        try:
*9c5db199SXin Li            install_dir = cls.get_installed_autodir(host)
*9c5db199SXin Li        except AutodirNotFoundError:
*9c5db199SXin Li            install_dir = cls._find_installable_dir(host)
*9c5db199SXin Li
*9c5db199SXin Li        if cls.install_in_tmpdir:
*9c5db199SXin Li            return host.get_tmp_dir(parent=install_dir)
*9c5db199SXin Li        return install_dir
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def _find_installable_dir(cls, host):
*9c5db199SXin Li        client_autodir_paths = cls.get_client_autodir_paths(host)
*9c5db199SXin Li        for path in client_autodir_paths:
*9c5db199SXin Li            try:
*9c5db199SXin Li                host.run('mkdir -p %s' % utils.sh_escape(path))
*9c5db199SXin Li                host.run('test -w %s' % utils.sh_escape(path))
*9c5db199SXin Li                return path
*9c5db199SXin Li            except error.AutoservRunError:
*9c5db199SXin Li                logging.debug('Failed to create %s', path)
*9c5db199SXin Li        metrics.Counter(
*9c5db199SXin Li            'chromeos/autotest/errors/no_autotest_install_path').increment(
*9c5db199SXin Li                fields={'dut_host_name': host.hostname})
*9c5db199SXin Li        raise error.AutoservInstallError(
*9c5db199SXin Li                'Unable to find a place to install Autotest; tried %s' %
*9c5db199SXin Li                ', '.join(client_autodir_paths))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_fetch_location(self):
*9c5db199SXin Li        """Generate list of locations where autotest can look for packages.
*9c5db199SXin Li
*9c5db199SXin Li        Hosts are tagged with an attribute containing the URL from which
*9c5db199SXin Li        to source packages when running a test on that host.
*9c5db199SXin Li
*9c5db199SXin Li        @returns the list of candidate locations to check for packages.
*9c5db199SXin Li        """
*9c5db199SXin Li        c = global_config.global_config
*9c5db199SXin Li        repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
*9c5db199SXin Li                                   default=[])
*9c5db199SXin Li        repos.reverse()
*9c5db199SXin Li
*9c5db199SXin Li        if not server_utils.is_inside_chroot():
*9c5db199SXin Li            # Only try to get fetch location from host attribute if the
*9c5db199SXin Li            # test is not running inside chroot.
*9c5db199SXin Li            #
*9c5db199SXin Li            # Look for the repo url via the host attribute. If we are
*9c5db199SXin Li            # not running with a full AFE autoserv will fall back to
*9c5db199SXin Li            # serving packages itself from whatever source version it is
*9c5db199SXin Li            # sync'd to rather than using the proper artifacts for the
*9c5db199SXin Li            # build on the host.
*9c5db199SXin Li            found_repo = self._get_fetch_location_from_host_attribute()
*9c5db199SXin Li            if found_repo is not None:
*9c5db199SXin Li                # Add our new repo to the end, the package manager will
*9c5db199SXin Li                # later reverse the list of repositories resulting in ours
*9c5db199SXin Li                # being first
*9c5db199SXin Li                repos.append(found_repo)
*9c5db199SXin Li
*9c5db199SXin Li        return repos
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _get_fetch_location_from_host_attribute(self):
*9c5db199SXin Li        """Get repo to use for packages from host attribute, if possible.
*9c5db199SXin Li
*9c5db199SXin Li        Hosts are tagged with an attribute containing the URL
*9c5db199SXin Li        from which to source packages when running a test on that host.
*9c5db199SXin Li        If self.host is set, attempt to look this attribute in the host info.
*9c5db199SXin Li
*9c5db199SXin Li        @returns value of the 'job_repo_url' host attribute, if present.
*9c5db199SXin Li        """
*9c5db199SXin Li        if not self.host:
*9c5db199SXin Li            return None
*9c5db199SXin Li
*9c5db199SXin Li        try:
*9c5db199SXin Li            info = self.host.host_info_store.get()
*9c5db199SXin Li        except Exception as e:
*9c5db199SXin Li            # TODO(pprabhu): We really want to catch host_info.StoreError here,
*9c5db199SXin Li            # but we can't import host_info from this module.
*9c5db199SXin Li            #   - autotest_lib.hosts.host_info pulls in (naturally)
*9c5db199SXin Li            #   autotest_lib.hosts.__init__
*9c5db199SXin Li            #   - This pulls in all the host classes ever defined
*9c5db199SXin Li            #   - That includes abstract_ssh, which depends on autotest
*9c5db199SXin Li            logging.warning('Failed to obtain host info: %r', e)
*9c5db199SXin Li            logging.warning('Skipping autotest fetch location based on %s',
*9c5db199SXin Li                            JOB_REPO_URL)
*9c5db199SXin Li            return None
*9c5db199SXin Li
*9c5db199SXin Li        job_repo_url = info.attributes.get(JOB_REPO_URL, '')
*9c5db199SXin Li        if not job_repo_url:
*9c5db199SXin Li            logging.warning("No %s for %s", JOB_REPO_URL, self.host)
*9c5db199SXin Li            return None
*9c5db199SXin Li
*9c5db199SXin Li        logging.info('Got job repo url from host attributes: %s',
*9c5db199SXin Li                        job_repo_url)
*9c5db199SXin Li        return job_repo_url
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def install(self, host=None, autodir=None, use_packaging=True):
*9c5db199SXin Li        """Install autotest.  If |host| is not None, stores it in |self.host|.
*9c5db199SXin Li
*9c5db199SXin Li        @param host A Host instance on which autotest will be installed
*9c5db199SXin Li        @param autodir Location on the remote host to install to
*9c5db199SXin Li        @param use_packaging Enable install modes that use the packaging system.
*9c5db199SXin Li
*9c5db199SXin Li        """
*9c5db199SXin Li        if host:
*9c5db199SXin Li            self.host = host
*9c5db199SXin Li        self._install(host=host, autodir=autodir, use_packaging=use_packaging)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def install_full_client(self, host=None, autodir=None):
*9c5db199SXin Li        self._install(host=host, autodir=autodir, use_autoserv=False,
*9c5db199SXin Li                      use_packaging=False)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def install_no_autoserv(self, host=None, autodir=None):
*9c5db199SXin Li        self._install(host=host, autodir=autodir, use_autoserv=False)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _install_using_packaging(self, host, autodir):
*9c5db199SXin Li        repos = self.get_fetch_location()
*9c5db199SXin Li        if not repos:
*9c5db199SXin Li            raise error.PackageInstallError("No repos to install an "
*9c5db199SXin Li                                            "autotest client from")
*9c5db199SXin Li        # Make sure devserver has the autotest package staged
*9c5db199SXin Li        host.verify_job_repo_url()
*9c5db199SXin Li        pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
*9c5db199SXin Li                                         repo_urls=repos,
*9c5db199SXin Li                                         do_locking=False,
*9c5db199SXin Li                                         run_function=host.run,
*9c5db199SXin Li                                         run_function_dargs=dict(timeout=600))
*9c5db199SXin Li        # The packages dir is used to store all the packages that
*9c5db199SXin Li        # are fetched on that client. (for the tests,deps etc.
*9c5db199SXin Li        # too apart from the client)
*9c5db199SXin Li        pkg_dir = os.path.join(autodir, 'packages')
*9c5db199SXin Li        # clean up the autodir except for the packages and result_tools
*9c5db199SXin Li        # directory.
*9c5db199SXin Li        host.run('cd %s && ls | grep -v "^packages$" | grep -v "^result_tools$"'
*9c5db199SXin Li                 ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
*9c5db199SXin Li        pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
*9c5db199SXin Li                           preserve_install_dir=True)
*9c5db199SXin Li        self.installed = True
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _install_using_send_file(self, host, autodir):
*9c5db199SXin Li        dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
*9c5db199SXin Li                               "packages"])
*9c5db199SXin Li        light_files = [os.path.join(self.source_material, f)
*9c5db199SXin Li                       for f in os.listdir(self.source_material)
*9c5db199SXin Li                       if f not in dirs_to_exclude]
*9c5db199SXin Li        host.send_file(light_files, autodir, delete_dest=True)
*9c5db199SXin Li
*9c5db199SXin Li        # create empty dirs for all the stuff we excluded
*9c5db199SXin Li        commands = []
*9c5db199SXin Li        for path in dirs_to_exclude:
*9c5db199SXin Li            abs_path = os.path.join(autodir, path)
*9c5db199SXin Li            abs_path = utils.sh_escape(abs_path)
*9c5db199SXin Li            commands.append("mkdir -p '%s'" % abs_path)
*9c5db199SXin Li            commands.append("touch '%s'/__init__.py" % abs_path)
*9c5db199SXin Li        host.run(';'.join(commands))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _install(self, host=None, autodir=None, use_autoserv=True,
*9c5db199SXin Li                 use_packaging=True):
*9c5db199SXin Li        """
*9c5db199SXin Li        Install autotest.  If get() was not called previously, an
*9c5db199SXin Li        attempt will be made to install from the autotest svn
*9c5db199SXin Li        repository.
*9c5db199SXin Li
*9c5db199SXin Li        @param host A Host instance on which autotest will be installed
*9c5db199SXin Li        @param autodir Location on the remote host to install to
*9c5db199SXin Li        @param use_autoserv Enable install modes that depend on the client
*9c5db199SXin Li            running with the autoserv harness
*9c5db199SXin Li        @param use_packaging Enable install modes that use the packaging system
*9c5db199SXin Li
*9c5db199SXin Li        @exception AutoservError if a tarball was not specified and
*9c5db199SXin Li            the target host does not have svn installed in its path
*9c5db199SXin Li        """
*9c5db199SXin Li        if not host:
*9c5db199SXin Li            host = self.host
*9c5db199SXin Li        if not self.got:
*9c5db199SXin Li            self.get()
*9c5db199SXin Li        host.wait_up(timeout=30)
*9c5db199SXin Li        host.setup()
*9c5db199SXin Li        # B/203609358 someting is removing telemetry. Adding this to check the
*9c5db199SXin Li        # status of the folder as early as possible.
*9c5db199SXin Li        logging.info("Installing autotest on %s", host.hostname)
*9c5db199SXin Li
*9c5db199SXin Li        # set up the autotest directory on the remote machine
*9c5db199SXin Li        if not autodir:
*9c5db199SXin Li            autodir = self.get_install_dir(host)
*9c5db199SXin Li        logging.info('Using installation dir %s', autodir)
*9c5db199SXin Li        host.set_autodir(autodir)
*9c5db199SXin Li        host.run('mkdir -p %s' % utils.sh_escape(autodir))
*9c5db199SXin Li
*9c5db199SXin Li        # make sure there are no files in $AUTODIR/results
*9c5db199SXin Li        results_path = os.path.join(autodir, 'results')
*9c5db199SXin Li        host.run('rm -rf %s/*' % utils.sh_escape(results_path),
*9c5db199SXin Li                 ignore_status=True)
*9c5db199SXin Li
*9c5db199SXin Li        # Fetch the autotest client from the nearest repository
*9c5db199SXin Li        if use_packaging:
*9c5db199SXin Li            try:
*9c5db199SXin Li                self._install_using_packaging(host, autodir)
*9c5db199SXin Li                logging.info("Installation of autotest completed using the "
*9c5db199SXin Li                             "packaging system.")
*9c5db199SXin Li                return
*9c5db199SXin Li            except (error.PackageInstallError, error.AutoservRunError,
*9c5db199SXin Li                    global_config.ConfigError) as e:
*9c5db199SXin Li                logging.info("Could not install autotest using the packaging "
*9c5db199SXin Li                             "system: %s. Trying other methods", e)
*9c5db199SXin Li        else:
*9c5db199SXin Li            # Delete the package checksum file to force dut updating local
*9c5db199SXin Li            # packages.
*9c5db199SXin Li            command = ('rm -f "%s"' %
*9c5db199SXin Li                       (os.path.join(autodir, packages.CHECKSUM_FILE)))
*9c5db199SXin Li            host.run(command)
*9c5db199SXin Li
*9c5db199SXin Li        # try to install from file or directory
*9c5db199SXin Li        if self.source_material:
*9c5db199SXin Li            c = global_config.global_config
*9c5db199SXin Li            supports_autoserv_packaging = c.get_config_value(
*9c5db199SXin Li                "PACKAGES", "serve_packages_from_autoserv", type=bool)
*9c5db199SXin Li            # Copy autotest recursively
*9c5db199SXin Li            if supports_autoserv_packaging and use_autoserv:
*9c5db199SXin Li                self._install_using_send_file(host, autodir)
*9c5db199SXin Li            else:
*9c5db199SXin Li                host.send_file(self.source_material, autodir, delete_dest=True)
*9c5db199SXin Li            logging.info("Installation of autotest completed from %s",
*9c5db199SXin Li                         self.source_material)
*9c5db199SXin Li            self.installed = True
*9c5db199SXin Li        else:
*9c5db199SXin Li            # if that fails try to install using svn
*9c5db199SXin Li            if utils.run('which svn').exit_status:
*9c5db199SXin Li                raise error.AutoservError(
*9c5db199SXin Li                        'svn not found on target machine: %s' %
*9c5db199SXin Li                        host.hostname)
*9c5db199SXin Li            try:
*9c5db199SXin Li                host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
*9c5db199SXin Li            except error.AutoservRunError as e:
*9c5db199SXin Li                host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
*9c5db199SXin Li            logging.info("Installation of autotest completed using SVN.")
*9c5db199SXin Li            self.installed = True
*9c5db199SXin Li
*9c5db199SXin Li        # TODO(milleral): http://crbug.com/258161
*9c5db199SXin Li        # Send over the most recent global_config.ini after installation if one
*9c5db199SXin Li        # is available.
*9c5db199SXin Li        # This code is a bit duplicated from
*9c5db199SXin Li        # _Run._create_client_config_file, but oh well.
*9c5db199SXin Li        if self.installed and self.source_material:
*9c5db199SXin Li            self._send_shadow_config()
*9c5db199SXin Li
*9c5db199SXin Li        # sync the disk, to avoid getting 0-byte files if a test resets the DUT
*9c5db199SXin Li        host.run(os.path.join(autodir, 'bin', 'fs_sync.py'),
*9c5db199SXin Li                 ignore_status=True)
*9c5db199SXin Li
*9c5db199SXin Li    def _send_shadow_config(self):
*9c5db199SXin Li        logging.info('Installing updated global_config.ini.')
*9c5db199SXin Li        destination = os.path.join(self.host.get_autodir(),
*9c5db199SXin Li                                   'global_config.ini')
*9c5db199SXin Li        with tempfile.NamedTemporaryFile(mode='w') as client_config:
*9c5db199SXin Li            config = global_config.global_config
*9c5db199SXin Li            client_section = config.get_section_values('CLIENT')
*9c5db199SXin Li            client_section.write(client_config)
*9c5db199SXin Li            client_config.flush()
*9c5db199SXin Li            self.host.send_file(client_config.name, destination)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def uninstall(self, host=None):
*9c5db199SXin Li        """
*9c5db199SXin Li        Uninstall (i.e. delete) autotest. Removes the autotest client install
*9c5db199SXin Li        from the specified host.
*9c5db199SXin Li
*9c5db199SXin Li        @params host a Host instance from which the client will be removed
*9c5db199SXin Li        """
*9c5db199SXin Li        if not self.installed:
*9c5db199SXin Li            return
*9c5db199SXin Li        if not host:
*9c5db199SXin Li            host = self.host
*9c5db199SXin Li        autodir = host.get_autodir()
*9c5db199SXin Li        if not autodir:
*9c5db199SXin Li            return
*9c5db199SXin Li
*9c5db199SXin Li        # perform the actual uninstall
*9c5db199SXin Li        host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
*9c5db199SXin Li        host.set_autodir(None)
*9c5db199SXin Li        self.installed = False
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get(self, location=None):
*9c5db199SXin Li        if not location:
*9c5db199SXin Li            location = os.path.join(self.serverdir, '../client')
*9c5db199SXin Li            location = os.path.abspath(location)
*9c5db199SXin Li        installable_object.InstallableObject.get(self, location)
*9c5db199SXin Li        self.got = True
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def run(self, control_file, results_dir='.', host=None, timeout=None,
*9c5db199SXin Li            tag=None, parallel_flag=False, background=False,
*9c5db199SXin Li            client_disconnect_timeout=None, use_packaging=True):
*9c5db199SXin Li        """
*9c5db199SXin Li        Run an autotest job on the remote machine.
*9c5db199SXin Li
*9c5db199SXin Li        @param control_file: An open file-like-obj of the control file.
*9c5db199SXin Li        @param results_dir: A str path where the results should be stored
*9c5db199SXin Li                on the local filesystem.
*9c5db199SXin Li        @param host: A Host instance on which the control file should
*9c5db199SXin Li                be run.
*9c5db199SXin Li        @param timeout: Maximum number of seconds to wait for the run or None.
*9c5db199SXin Li        @param tag: Tag name for the client side instance of autotest.
*9c5db199SXin Li        @param parallel_flag: Flag set when multiple jobs are run at the
*9c5db199SXin Li                same time.
*9c5db199SXin Li        @param background: Indicates that the client should be launched as
*9c5db199SXin Li                a background job; the code calling run will be responsible
*9c5db199SXin Li                for monitoring the client and collecting the results.
*9c5db199SXin Li        @param client_disconnect_timeout: Seconds to wait for the remote host
*9c5db199SXin Li                to come back after a reboot. Defaults to the host setting for
*9c5db199SXin Li                DEFAULT_REBOOT_TIMEOUT.
*9c5db199SXin Li
*9c5db199SXin Li        @raises AutotestRunError: If there is a problem executing
*9c5db199SXin Li                the control file.
*9c5db199SXin Li        """
*9c5db199SXin Li        host = self._get_host_and_setup(host, use_packaging=use_packaging)
*9c5db199SXin Li        logging.debug('Autotest job starts on remote host: %s',
*9c5db199SXin Li                      host.hostname)
*9c5db199SXin Li        results_dir = os.path.abspath(results_dir)
*9c5db199SXin Li
*9c5db199SXin Li        if client_disconnect_timeout is None:
*9c5db199SXin Li            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
*9c5db199SXin Li
*9c5db199SXin Li        if tag:
*9c5db199SXin Li            results_dir = os.path.join(results_dir, tag)
*9c5db199SXin Li
*9c5db199SXin Li        atrun = _Run(host, results_dir, tag, parallel_flag, background)
*9c5db199SXin Li        self._do_run(control_file, results_dir, host, atrun, timeout,
*9c5db199SXin Li                     client_disconnect_timeout, use_packaging=use_packaging)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _get_host_and_setup(self, host, use_packaging=True):
*9c5db199SXin Li        if not host:
*9c5db199SXin Li            host = self.host
*9c5db199SXin Li        if not self.installed:
*9c5db199SXin Li            self.install(host, use_packaging=use_packaging)
*9c5db199SXin Li
*9c5db199SXin Li        host.wait_up(timeout=30)
*9c5db199SXin Li        return host
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _do_run(self, control_file, results_dir, host, atrun, timeout,
*9c5db199SXin Li                client_disconnect_timeout, use_packaging=True):
*9c5db199SXin Li        try:
*9c5db199SXin Li            atrun.verify_machine()
*9c5db199SXin Li        except:
*9c5db199SXin Li            logging.error("Verify failed on %s. Reinstalling autotest",
*9c5db199SXin Li                          host.hostname)
*9c5db199SXin Li            self.install(host)
*9c5db199SXin Li            atrun.verify_machine()
*9c5db199SXin Li        debug = os.path.join(results_dir, 'debug')
*9c5db199SXin Li        try:
*9c5db199SXin Li            os.makedirs(debug)
*9c5db199SXin Li        except Exception:
*9c5db199SXin Li            pass
*9c5db199SXin Li
*9c5db199SXin Li        delete_file_list = [atrun.remote_control_file,
*9c5db199SXin Li                            atrun.remote_control_file + '.state',
*9c5db199SXin Li                            atrun.manual_control_file,
*9c5db199SXin Li                            atrun.manual_control_file + '.state']
*9c5db199SXin Li        cmd = ';'.join('rm -f ' + control for control in delete_file_list)
*9c5db199SXin Li        host.run(cmd, ignore_status=True)
*9c5db199SXin Li
*9c5db199SXin Li        tmppath = utils.get(control_file, local_copy=True)
*9c5db199SXin Li
*9c5db199SXin Li        # build up the initialization prologue for the control file
*9c5db199SXin Li        prologue_lines = []
*9c5db199SXin Li
*9c5db199SXin Li        # Add the additional user arguments
*9c5db199SXin Li        prologue_lines.append("args = %r\n" % self.job.args)
*9c5db199SXin Li
*9c5db199SXin Li        # If the packaging system is being used, add the repository list.
*9c5db199SXin Li        repos = None
*9c5db199SXin Li        try:
*9c5db199SXin Li            if use_packaging:
*9c5db199SXin Li                repos = self.get_fetch_location()
*9c5db199SXin Li                prologue_lines.append('job.add_repository(%s)\n' % repos)
*9c5db199SXin Li            else:
*9c5db199SXin Li                logging.debug('use_packaging is set to False, do not add any '
*9c5db199SXin Li                              'repository.')
*9c5db199SXin Li        except global_config.ConfigError as e:
*9c5db199SXin Li            # If repos is defined packaging is enabled so log the error
*9c5db199SXin Li            if repos:
*9c5db199SXin Li                logging.error(e)
*9c5db199SXin Li
*9c5db199SXin Li        # on full-size installs, turn on any profilers the server is using
*9c5db199SXin Li        if not atrun.background:
*9c5db199SXin Li            running_profilers = six.iteritems(host.job.profilers.add_log)
*9c5db199SXin Li            for profiler, (args, dargs) in running_profilers:
*9c5db199SXin Li                call_args = [repr(profiler)]
*9c5db199SXin Li                call_args += [repr(arg) for arg in args]
*9c5db199SXin Li                call_args += ["%s=%r" % item for item in six.iteritems(dargs)]
*9c5db199SXin Li                prologue_lines.append("job.profilers.add(%s)\n"
*9c5db199SXin Li                                      % ", ".join(call_args))
*9c5db199SXin Li        cfile = "".join(prologue_lines)
*9c5db199SXin Li
*9c5db199SXin Li        cfile += open(tmppath).read()
*9c5db199SXin Li        open(tmppath, "w").write(cfile)
*9c5db199SXin Li
*9c5db199SXin Li        # Create and copy state file to remote_control_file + '.state'
*9c5db199SXin Li        state_file = host.job.preprocess_client_state()
*9c5db199SXin Li        host.send_file(state_file, atrun.remote_control_file + '.init.state')
*9c5db199SXin Li        os.remove(state_file)
*9c5db199SXin Li
*9c5db199SXin Li        # Copy control_file to remote_control_file on the host
*9c5db199SXin Li        host.send_file(tmppath, atrun.remote_control_file)
*9c5db199SXin Li        if os.path.abspath(tmppath) != os.path.abspath(control_file):
*9c5db199SXin Li            os.remove(tmppath)
*9c5db199SXin Li
*9c5db199SXin Li        atrun.execute_control(
*9c5db199SXin Li                timeout=timeout,
*9c5db199SXin Li                client_disconnect_timeout=client_disconnect_timeout)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @staticmethod
*9c5db199SXin Li    def extract_test_failure_msg(failure_status_line):
*9c5db199SXin Li        """Extract the test failure message from the status line.
*9c5db199SXin Li
*9c5db199SXin Li        @param failure_status_line:  String of test failure status line, it will
*9c5db199SXin Li            look like:
*9c5db199SXin Li          FAIL <test name>  <test name> timestamp=<ts> localtime=<lt> <reason>
*9c5db199SXin Li
*9c5db199SXin Li        @returns String of the reason, return empty string if we can't regex out
*9c5db199SXin Li            reason.
*9c5db199SXin Li        """
*9c5db199SXin Li        fail_msg = ''
*9c5db199SXin Li        match = _FAIL_STATUS_RE.match(failure_status_line)
*9c5db199SXin Li        if match:
*9c5db199SXin Li            fail_msg = match.group('fail_msg')
*9c5db199SXin Li        return fail_msg
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @classmethod
*9c5db199SXin Li    def _check_client_test_result(cls, host, test_name):
*9c5db199SXin Li        """
*9c5db199SXin Li        Check result of client test.
*9c5db199SXin Li        Autotest will store results in the file name status.
*9c5db199SXin Li        We check that second to last line in that file begins with 'END GOOD'
*9c5db199SXin Li
*9c5db199SXin Li        @raises TestFail: If client test does not pass.
*9c5db199SXin Li        """
*9c5db199SXin Li        client_result_dir = '%s/results/default' % host.autodir
*9c5db199SXin Li        command = 'tail -2 %s/status | head -1' % client_result_dir
*9c5db199SXin Li        status = host.run(command).stdout.strip()
*9c5db199SXin Li        logging.info(status)
*9c5db199SXin Li        if status[:8] != 'END GOOD':
*9c5db199SXin Li            test_fail_status_line_cmd = (
*9c5db199SXin Li                    'grep "^\s*FAIL\s*%s" %s/status | tail -n 1' %
*9c5db199SXin Li                    (test_name, client_result_dir))
*9c5db199SXin Li            test_fail_msg = cls.extract_test_failure_msg(
*9c5db199SXin Li                    host.run(test_fail_status_line_cmd).stdout.strip())
*9c5db199SXin Li            test_fail_msg_reason = ('' if not test_fail_msg
*9c5db199SXin Li                                    else ' (reason: %s)' % test_fail_msg)
*9c5db199SXin Li            test_fail_status = '%s client test did not pass%s.' % (
*9c5db199SXin Li                    test_name, test_fail_msg_reason)
*9c5db199SXin Li            raise error.TestFail(test_fail_status)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def run_timed_test(self, test_name, results_dir='.', host=None,
*9c5db199SXin Li                       timeout=None, parallel_flag=False, background=False,
*9c5db199SXin Li                       client_disconnect_timeout=None, *args, **dargs):
*9c5db199SXin Li        """
*9c5db199SXin Li        Assemble a tiny little control file to just run one test,
*9c5db199SXin Li        and run it as an autotest client-side test
*9c5db199SXin Li        """
*9c5db199SXin Li        if not host:
*9c5db199SXin Li            host = self.host
*9c5db199SXin Li        if not self.installed:
*9c5db199SXin Li            self.install(host)
*9c5db199SXin Li
*9c5db199SXin Li        opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
*9c5db199SXin Li        cmd = ", ".join([repr(test_name)] + list(map(repr, args)) + opts)
*9c5db199SXin Li        control = "job.run_test(%s)\n" % cmd
*9c5db199SXin Li        self.run(control, results_dir, host, timeout=timeout,
*9c5db199SXin Li                 parallel_flag=parallel_flag, background=background,
*9c5db199SXin Li                 client_disconnect_timeout=client_disconnect_timeout)
*9c5db199SXin Li
*9c5db199SXin Li        if dargs.get('check_client_result', False):
*9c5db199SXin Li            self._check_client_test_result(host, test_name)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def run_test(self,
*9c5db199SXin Li                 test_name,
*9c5db199SXin Li                 results_dir='.',
*9c5db199SXin Li                 host=None,
*9c5db199SXin Li                 parallel_flag=False,
*9c5db199SXin Li                 background=False,
*9c5db199SXin Li                 client_disconnect_timeout=None,
*9c5db199SXin Li                 timeout=None,
*9c5db199SXin Li                 *args,
*9c5db199SXin Li                 **dargs):
*9c5db199SXin Li        self.run_timed_test(
*9c5db199SXin Li                test_name,
*9c5db199SXin Li                results_dir,
*9c5db199SXin Li                host,
*9c5db199SXin Li                timeout=timeout,
*9c5db199SXin Li                parallel_flag=parallel_flag,
*9c5db199SXin Li                background=background,
*9c5db199SXin Li                client_disconnect_timeout=client_disconnect_timeout,
*9c5db199SXin Li                *args,
*9c5db199SXin Li                **dargs)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def run_static_method(self, module, method, results_dir='.', host=None,
*9c5db199SXin Li                          *args):
*9c5db199SXin Li        """Runs a non-instance method with |args| from |module| on the client.
*9c5db199SXin Li
*9c5db199SXin Li        This method runs a static/class/module autotest method on the client.
*9c5db199SXin Li        For example:
*9c5db199SXin Li          run_static_method("autotest_lib.client.cros.cros_ui", "reboot")
*9c5db199SXin Li
*9c5db199SXin Li        Will run autotest_lib.client.cros.cros_ui.reboot() on the client.
*9c5db199SXin Li
*9c5db199SXin Li        @param module: module name as you would refer to it when importing in a
*9c5db199SXin Li            control file. e.g. autotest_lib.client.common_lib.module_name.
*9c5db199SXin Li        @param method: the method you want to call.
*9c5db199SXin Li        @param results_dir: A str path where the results should be stored
*9c5db199SXin Li            on the local filesystem.
*9c5db199SXin Li        @param host: A Host instance on which the control file should
*9c5db199SXin Li            be run.
*9c5db199SXin Li        @param args: args to pass to the method.
*9c5db199SXin Li        """
*9c5db199SXin Li        control = "\n".join(["import %s" % module,
*9c5db199SXin Li                             "%s.%s(%s)\n" % (module, method,
*9c5db199SXin Li                                              ','.join(map(repr, args)))])
*9c5db199SXin Li        self.run(control, results_dir=results_dir, host=host)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass _Run(object):
*9c5db199SXin Li    """
*9c5db199SXin Li    Represents a run of autotest control file.  This class maintains
*9c5db199SXin Li    all the state necessary as an autotest control file is executed.
*9c5db199SXin Li
*9c5db199SXin Li    It is not intended to be used directly, rather control files
*9c5db199SXin Li    should be run using the run method in Autotest.
*9c5db199SXin Li    """
*9c5db199SXin Li    def __init__(self, host, results_dir, tag, parallel_flag, background):
*9c5db199SXin Li        self.host = host
*9c5db199SXin Li        self.results_dir = results_dir
*9c5db199SXin Li        self.tag = tag
*9c5db199SXin Li        self.parallel_flag = parallel_flag
*9c5db199SXin Li        self.background = background
*9c5db199SXin Li        self.autodir = Autotest.get_installed_autodir(self.host)
*9c5db199SXin Li        control = os.path.join(self.autodir, 'control')
*9c5db199SXin Li        if tag:
*9c5db199SXin Li            control += '.' + tag
*9c5db199SXin Li        self.manual_control_file = control
*9c5db199SXin Li        self.remote_control_file = control + '.autoserv'
*9c5db199SXin Li        self.config_file = os.path.join(self.autodir, 'global_config.ini')
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def verify_machine(self):
*9c5db199SXin Li        binary = os.path.join(self.autodir, 'bin/autotest')
*9c5db199SXin Li        at_check = "test -e {} && echo True || echo False".format(binary)
*9c5db199SXin Li        if not self.parallel_flag:
*9c5db199SXin Li            tmpdir = os.path.join(self.autodir, 'tmp')
*9c5db199SXin Li            download = os.path.join(self.autodir, 'tests/download')
*9c5db199SXin Li            at_check += "; umount {}; umount {}".format(tmpdir, download)
*9c5db199SXin Li        # Check if the test dir is missing.
*9c5db199SXin Li        if "False" in str(self.host.run(at_check, ignore_status=True).stdout):
*9c5db199SXin Li            raise error.AutoservInstallError(
*9c5db199SXin Li                "Autotest does not appear to be installed")
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_base_cmd_args(self, section):
*9c5db199SXin Li        args = ['--verbose']
*9c5db199SXin Li        if section > 0:
*9c5db199SXin Li            args.append('-c')
*9c5db199SXin Li        if self.tag:
*9c5db199SXin Li            args.append('-t %s' % self.tag)
*9c5db199SXin Li        if self.host.job.use_external_logging():
*9c5db199SXin Li            args.append('-l')
*9c5db199SXin Li        if self.host.hostname:
*9c5db199SXin Li            args.append('--hostname=%s' % self.host.hostname)
*9c5db199SXin Li        args.append('--user=%s' % self.host.job.user)
*9c5db199SXin Li
*9c5db199SXin Li        args.append(self.remote_control_file)
*9c5db199SXin Li        return args
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_background_cmd(self, section):
*9c5db199SXin Li        cmd = [
*9c5db199SXin Li                'nohup',
*9c5db199SXin Li                os.path.join(self.autodir, 'bin/autotest_client'),
*9c5db199SXin Li                _set_py_version()
*9c5db199SXin Li        ]
*9c5db199SXin Li        cmd += self.get_base_cmd_args(section)
*9c5db199SXin Li        cmd += ['>/dev/null', '2>/dev/null', '&']
*9c5db199SXin Li        return ' '.join(cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_daemon_cmd(self, section, monitor_dir):
*9c5db199SXin Li        cmd = [
*9c5db199SXin Li                'nohup',
*9c5db199SXin Li                os.path.join(self.autodir, 'bin/autotestd'), monitor_dir,
*9c5db199SXin Li                '-H autoserv',
*9c5db199SXin Li                _set_py_version()
*9c5db199SXin Li        ]
*9c5db199SXin Li        cmd += self.get_base_cmd_args(section)
*9c5db199SXin Li        cmd += ['>/dev/null', '2>/dev/null', '&']
*9c5db199SXin Li        return ' '.join(cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
*9c5db199SXin Li        cmd = [
*9c5db199SXin Li                os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
*9c5db199SXin Li                monitor_dir,
*9c5db199SXin Li                str(stdout_read),
*9c5db199SXin Li                str(stderr_read),
*9c5db199SXin Li                _set_py_version()
*9c5db199SXin Li        ]
*9c5db199SXin Li        return ' '.join(cmd)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def get_client_log(self):
*9c5db199SXin Li        """Find what the "next" client.* prefix should be
*9c5db199SXin Li
*9c5db199SXin Li        @returns A string of the form client.INTEGER that should be prefixed
*9c5db199SXin Li            to all client debug log files.
*9c5db199SXin Li        """
*9c5db199SXin Li        max_digit = -1
*9c5db199SXin Li        debug_dir = os.path.join(self.results_dir, 'debug')
*9c5db199SXin Li        client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
*9c5db199SXin Li        for log in client_logs:
*9c5db199SXin Li            _, number, _ = log.split('.', 2)
*9c5db199SXin Li            if number.isdigit():
*9c5db199SXin Li                max_digit = max(max_digit, int(number))
*9c5db199SXin Li        return 'client.%d' % (max_digit + 1)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def copy_client_config_file(self, client_log_prefix=None):
*9c5db199SXin Li        """
*9c5db199SXin Li        Create and copy the client config file based on the server config.
*9c5db199SXin Li
*9c5db199SXin Li        @param client_log_prefix: Optional prefix to prepend to log files.
*9c5db199SXin Li        """
*9c5db199SXin Li        client_config_file = self._create_client_config_file(client_log_prefix)
*9c5db199SXin Li        self.host.send_file(client_config_file, self.config_file)
*9c5db199SXin Li        os.remove(client_config_file)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _create_client_config_file(self, client_log_prefix=None):
*9c5db199SXin Li        """
*9c5db199SXin Li        Create a temporary file with the [CLIENT] section configuration values
*9c5db199SXin Li        taken from the server global_config.ini.
*9c5db199SXin Li
*9c5db199SXin Li        @param client_log_prefix: Optional prefix to prepend to log files.
*9c5db199SXin Li
*9c5db199SXin Li        @return: Path of the temporary file generated.
*9c5db199SXin Li        """
*9c5db199SXin Li        config = global_config.global_config.get_section_values('CLIENT')
*9c5db199SXin Li        if client_log_prefix:
*9c5db199SXin Li            config.set('CLIENT', 'default_logging_name', client_log_prefix)
*9c5db199SXin Li        return self._create_aux_file(config.write)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _create_aux_file(self, func, *args):
*9c5db199SXin Li        """
*9c5db199SXin Li        Creates a temporary file and writes content to it according to a
*9c5db199SXin Li        content creation function. The file object is appended to *args, which
*9c5db199SXin Li        is then passed to the content creation function
*9c5db199SXin Li
*9c5db199SXin Li        @param func: Function that will be used to write content to the
*9c5db199SXin Li                temporary file.
*9c5db199SXin Li        @param *args: List of parameters that func takes.
*9c5db199SXin Li        @return: Path to the temporary file that was created.
*9c5db199SXin Li        """
*9c5db199SXin Li        fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
*9c5db199SXin Li        aux_file = os.fdopen(fd, "w")
*9c5db199SXin Li        try:
*9c5db199SXin Li            list_args = list(args)
*9c5db199SXin Li            list_args.append(aux_file)
*9c5db199SXin Li            func(*list_args)
*9c5db199SXin Li        finally:
*9c5db199SXin Li            aux_file.close()
*9c5db199SXin Li        return path
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @staticmethod
*9c5db199SXin Li    def is_client_job_finished(last_line):
*9c5db199SXin Li        return bool(re.match(r'^\t*END .*\t[\w.-]+\t[\w.-]+\t.*$', last_line))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @staticmethod
*9c5db199SXin Li    def is_client_job_rebooting(last_line):
*9c5db199SXin Li        return bool(re.match(r'^\t*GOOD\t[\w.-]+\treboot\.start.*$', last_line))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    # Roughly ordered list from concrete to less specific reboot causes.
*9c5db199SXin Li    _failure_reasons = [
*9c5db199SXin Li        # Try to find possible reasons leading towards failure.
*9c5db199SXin Li        ('ethernet recovery methods have failed. Rebooting.',
*9c5db199SXin Li         'dead ethernet dongle crbug/1031035'),
*9c5db199SXin Li        # GPU hangs are not always recovered from.
*9c5db199SXin Li        ('[drm:amdgpu_job_timedout] \*ERROR\* ring gfx timeout',
*9c5db199SXin Li         'drm ring gfx timeout'),
*9c5db199SXin Li        ('[drm:do_aquire_global_lock] \*ERROR(.*)hw_done or flip_done timed',
*9c5db199SXin Li         'drm hw/flip timeout'),
*9c5db199SXin Li        ('[drm:i915_hangcheck_hung] \*ERROR\* Hangcheck(.*)GPU hung',
*9c5db199SXin Li         'drm GPU hung'),
*9c5db199SXin Li        # TODO(ihf): try to get a better magic signature for kernel crashes.
*9c5db199SXin Li        ('BUG: unable to handle kernel paging request', 'kernel paging'),
*9c5db199SXin Li        ('Kernel panic - not syncing: Out of memory', 'kernel out of memory'),
*9c5db199SXin Li        ('Kernel panic - not syncing', 'kernel panic'),
*9c5db199SXin Li        # Fish for user mode killing OOM messages. Shows unstable system.
*9c5db199SXin Li        ('out_of_memory', 'process out of memory'),
*9c5db199SXin Li        # Reboot was bad enough to have truncated the logs.
*9c5db199SXin Li        ('crash_reporter(.*)Stored kcrash', 'kcrash'),
*9c5db199SXin Li        ('crash_reporter(.*)Last shutdown was not clean', 'not clean'),
*9c5db199SXin Li    ]
*9c5db199SXin Li
*9c5db199SXin Li    def _diagnose_reboot(self):
*9c5db199SXin Li        """
*9c5db199SXin Li        Runs diagnostic check on a rebooted DUT.
*9c5db199SXin Li
*9c5db199SXin Li        TODO(ihf): if this analysis is useful consider moving the code to the
*9c5db199SXin Li                   DUT into a script and call it from here. This is more
*9c5db199SXin Li                   powerful and might be cleaner to grow in functionality. But
*9c5db199SXin Li                   it may also be less robust if stateful is damaged during the
*9c5db199SXin Li                   reboot.
*9c5db199SXin Li
*9c5db199SXin Li        @returns msg describing reboot reason.
*9c5db199SXin Li        """
*9c5db199SXin Li        reasons = []
*9c5db199SXin Li        for (message, bucket) in self._failure_reasons:
*9c5db199SXin Li            # Use -a option for grep to avoid "binary file" warning to stdout.
*9c5db199SXin Li            # The grep -v is added to not match itself in the log (across jobs).
*9c5db199SXin Li            # Using grep is slightly problematic as it finds any reason, not
*9c5db199SXin Li            # just the most recent reason (since 2 boots ago), so it may guess
*9c5db199SXin Li            # wrong. Multiple reboots are unusual in the lab setting though and
*9c5db199SXin Li            # it is better to have a reasonable guess than no reason at all.
*9c5db199SXin Li            found = self.host.run(
*9c5db199SXin Li                "grep -aE '" + message + "' /var/log/messages | grep -av grep",
*9c5db199SXin Li                ignore_status=True
*9c5db199SXin Li            ).stdout
*9c5db199SXin Li            if found and found.strip():
*9c5db199SXin Li                reasons.append(bucket)
*9c5db199SXin Li        signature = 'reason unknown'
*9c5db199SXin Li        if reasons:
*9c5db199SXin Li            # Concatenate possible reasons found to obtain a magic signature.
*9c5db199SXin Li            signature = ', '.join(reasons)
*9c5db199SXin Li        return ('DUT rebooted during the test run. (%s)\n' % signature)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _diagnose_dut(self, old_boot_id=None):
*9c5db199SXin Li        """
*9c5db199SXin Li        Run diagnostic checks on a DUT.
*9c5db199SXin Li
*9c5db199SXin Li        1. ping: A dead host will not respond to pings.
*9c5db199SXin Li        2. ssh (happens with 3.): DUT hangs usually fail in authentication
*9c5db199SXin Li            but respond to pings.
*9c5db199SXin Li        3. Check if a reboot occured: A healthy but unexpected reboot leaves the
*9c5db199SXin Li            host running with a new boot id.
*9c5db199SXin Li
*9c5db199SXin Li        This method will always raise an exception from the AutotestFailure
*9c5db199SXin Li        family and should only get called when the reason for a test failing
*9c5db199SXin Li        is ambiguous.
*9c5db199SXin Li
*9c5db199SXin Li        @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
*9c5db199SXin Li        @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
*9c5db199SXin Li        @raises AutotestDeviceRebooted: If the boot id changed.
*9c5db199SXin Li        @raises AutotestAbort: If none of the above exceptions were raised.
*9c5db199SXin Li            Since we have no recourse we must abort at this stage.
*9c5db199SXin Li        """
*9c5db199SXin Li        msg = 'Autotest client terminated unexpectedly: '
*9c5db199SXin Li        if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
*9c5db199SXin Li            msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
*9c5db199SXin Li            raise AutotestDeviceNotPingable(msg)
*9c5db199SXin Li
*9c5db199SXin Li        if old_boot_id:
*9c5db199SXin Li            try:
*9c5db199SXin Li                new_boot_id = self.host.get_boot_id(timeout=60)
*9c5db199SXin Li            except Exception as e:
*9c5db199SXin Li                msg += ('DUT is pingable but not SSHable, it most likely'
*9c5db199SXin Li                        ' sporadically rebooted during testing. %s\n' % str(e))
*9c5db199SXin Li                raise AutotestDeviceNotSSHable(msg)
*9c5db199SXin Li            else:
*9c5db199SXin Li                if new_boot_id != old_boot_id:
*9c5db199SXin Li                    msg += self._diagnose_reboot()
*9c5db199SXin Li                    raise AutotestDeviceRebooted(msg)
*9c5db199SXin Li
*9c5db199SXin Li            msg += ('DUT is pingable, SSHable and did NOT restart '
*9c5db199SXin Li                    'un-expectedly. We probably lost connectivity during the '
*9c5db199SXin Li                    'test.')
*9c5db199SXin Li        else:
*9c5db199SXin Li            msg += ('DUT is pingable, could not determine if an un-expected '
*9c5db199SXin Li                    'reboot occured during the test.')
*9c5db199SXin Li
*9c5db199SXin Li        raise AutotestAbort(msg)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
*9c5db199SXin Li        """
*9c5db199SXin Li        Logs that something unexpected happened, then tries to diagnose the
*9c5db199SXin Li        failure. The purpose of this function is only to close out the status
*9c5db199SXin Li        log with the appropriate error message, not to critically terminate
*9c5db199SXin Li        the program.
*9c5db199SXin Li
*9c5db199SXin Li        @param stderr_redirector: log stream.
*9c5db199SXin Li        @param old_boot_id: boot id used to infer if a reboot occured.
*9c5db199SXin Li        """
*9c5db199SXin Li        stderr_redirector.flush_all_buffers()
*9c5db199SXin Li        try:
*9c5db199SXin Li            self._diagnose_dut(old_boot_id)
*9c5db199SXin Li        except AutotestFailure as e:
*9c5db199SXin Li            self.host.job.record('END ABORT', None, None, str(e))
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _execute_in_background(self, section, timeout):
*9c5db199SXin Li        full_cmd = self.get_background_cmd(section)
*9c5db199SXin Li        devnull = open(os.devnull, "w")
*9c5db199SXin Li
*9c5db199SXin Li        self.copy_client_config_file(self.get_client_log())
*9c5db199SXin Li
*9c5db199SXin Li        self.host.job.push_execution_context(self.results_dir)
*9c5db199SXin Li        try:
*9c5db199SXin Li            result = self.host.run(full_cmd, ignore_status=True,
*9c5db199SXin Li                                   timeout=timeout,
*9c5db199SXin Li                                   stdout_tee=devnull,
*9c5db199SXin Li                                   stderr_tee=devnull)
*9c5db199SXin Li        finally:
*9c5db199SXin Li            self.host.job.pop_execution_context()
*9c5db199SXin Li
*9c5db199SXin Li        return result
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    @staticmethod
*9c5db199SXin Li    def _strip_stderr_prologue(stderr, monitor_cmd):
*9c5db199SXin Li        """Strips the 'standard' prologue that get pre-pended to every
*9c5db199SXin Li        remote command and returns the text that was actually written to
*9c5db199SXin Li        stderr by the remote command.
*9c5db199SXin Li
*9c5db199SXin Li        This will always strip atleast the first line ('standard' prologue),
*9c5db199SXin Li        and strip any extra messages prior. The following are common 'extra'
*9c5db199SXin Li        messages which could appear.
*9c5db199SXin Li
*9c5db199SXin Li        1.) Any warnings. For example, on CrOS version R90, any script running
*9c5db199SXin Li            in python2 result in the following warning in the stderr:
*9c5db199SXin Li            "warning: Python 2.7 is deprecated and will be removed from CrOS by
*9c5db199SXin Li            end of 2021. All users must migrate ASAP"
*9c5db199SXin Li        2.) The actual command used to launch autotestd_monitor (monitor_cmd)
*9c5db199SXin Li
*9c5db199SXin Li        Additionally there is a NOTE line that could be present needing also to
*9c5db199SXin Li        be stripped.
*9c5db199SXin Li        """
*9c5db199SXin Li        stderr_lines = stderr.split("\n")
*9c5db199SXin Li        if not stderr_lines:
*9c5db199SXin Li            return ""
*9c5db199SXin Li
*9c5db199SXin Li        # If no warnings/monitor_cmd, strip only the first line
*9c5db199SXin Li        skipn = 1
*9c5db199SXin Li        for i, line in enumerate(stderr_lines):
*9c5db199SXin Li            if monitor_cmd in line:
*9c5db199SXin Li                # add *2* (1 for the index, 1 for the 'standard prolouge'
*9c5db199SXin Li                # which follows this line).
*9c5db199SXin Li                skipn = i + 2
*9c5db199SXin Li                break
*9c5db199SXin Li
*9c5db199SXin Li        stderr_lines = stderr_lines[skipn:]
*9c5db199SXin Li
*9c5db199SXin Li        if stderr_lines[0].startswith("NOTE: autotestd_monitor"):
*9c5db199SXin Li            del stderr_lines[0]
*9c5db199SXin Li        return "\n".join(stderr_lines)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _execute_daemon(self, section, timeout, stderr_redirector,
*9c5db199SXin Li                        client_disconnect_timeout):
*9c5db199SXin Li        monitor_dir = self.host.get_tmp_dir()
*9c5db199SXin Li        daemon_cmd = self.get_daemon_cmd(section, monitor_dir)
*9c5db199SXin Li
*9c5db199SXin Li        # grab the location for the server-side client log file
*9c5db199SXin Li        client_log_prefix = self.get_client_log()
*9c5db199SXin Li        client_log_path = os.path.join(self.results_dir, 'debug',
*9c5db199SXin Li                                       client_log_prefix + '.log')
*9c5db199SXin Li        client_log = open(client_log_path, 'w', LOG_BUFFER_SIZE_BYTES)
*9c5db199SXin Li        self.copy_client_config_file(client_log_prefix)
*9c5db199SXin Li
*9c5db199SXin Li        stdout_read = stderr_read = 0
*9c5db199SXin Li        self.host.job.push_execution_context(self.results_dir)
*9c5db199SXin Li        try:
*9c5db199SXin Li            self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
*9c5db199SXin Li            disconnect_warnings = []
*9c5db199SXin Li            while True:
*9c5db199SXin Li                monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
*9c5db199SXin Li                                                   stderr_read)
*9c5db199SXin Li                try:
*9c5db199SXin Li                    result = self.host.run(monitor_cmd, ignore_status=True,
*9c5db199SXin Li                                           timeout=timeout,
*9c5db199SXin Li                                           stdout_tee=client_log,
*9c5db199SXin Li                                           stderr_tee=stderr_redirector)
*9c5db199SXin Li                except error.AutoservRunError as e:
*9c5db199SXin Li                    result = e.result_obj
*9c5db199SXin Li                    result.exit_status = None
*9c5db199SXin Li                    disconnect_warnings.append(e.description)
*9c5db199SXin Li
*9c5db199SXin Li                    stderr_redirector.log_warning(
*9c5db199SXin Li                        "Autotest client was disconnected: %s" % e.description,
*9c5db199SXin Li                        "NETWORK")
*9c5db199SXin Li                except error.AutoservSSHTimeout:
*9c5db199SXin Li                    result = utils.CmdResult(monitor_cmd, "", "", None, 0)
*9c5db199SXin Li                    stderr_redirector.log_warning(
*9c5db199SXin Li                        "Attempt to connect to Autotest client timed out",
*9c5db199SXin Li                        "NETWORK")
*9c5db199SXin Li
*9c5db199SXin Li                stdout_read += len(result.stdout)
*9c5db199SXin Li                stderr_read += len(
*9c5db199SXin Li                        self._strip_stderr_prologue(result.stderr,
*9c5db199SXin Li                                                    monitor_cmd))
*9c5db199SXin Li
*9c5db199SXin Li                if result.exit_status is not None:
*9c5db199SXin Li                    # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
*9c5db199SXin Li                    logging.debug('Result exit status is %d.',
*9c5db199SXin Li                                  result.exit_status)
*9c5db199SXin Li                    return result
*9c5db199SXin Li                elif not self.host.wait_up(client_disconnect_timeout):
*9c5db199SXin Li                    raise error.AutoservSSHTimeout(
*9c5db199SXin Li                        "client was disconnected, reconnect timed out")
*9c5db199SXin Li        finally:
*9c5db199SXin Li            client_log.close()
*9c5db199SXin Li            self.host.job.pop_execution_context()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def execute_section(self, section, timeout, stderr_redirector,
*9c5db199SXin Li                        client_disconnect_timeout, boot_id=None):
*9c5db199SXin Li        # TODO(crbug.com/684311) The claim is that section is never more than 0
*9c5db199SXin Li        # in pratice. After validating for a week or so, delete all support of
*9c5db199SXin Li        # multiple sections.
*9c5db199SXin Li        metrics.Counter('chromeos/autotest/autotest/sections').increment(
*9c5db199SXin Li                fields={'is_first_section': (section == 0)})
*9c5db199SXin Li        logging.info("Executing %s/bin/autotest %s/control phase %d",
*9c5db199SXin Li                     self.autodir, self.autodir, section)
*9c5db199SXin Li
*9c5db199SXin Li        if self.background:
*9c5db199SXin Li            result = self._execute_in_background(section, timeout)
*9c5db199SXin Li        else:
*9c5db199SXin Li            result = self._execute_daemon(section, timeout, stderr_redirector,
*9c5db199SXin Li                                          client_disconnect_timeout)
*9c5db199SXin Li
*9c5db199SXin Li        last_line = stderr_redirector.last_line
*9c5db199SXin Li
*9c5db199SXin Li        # check if we failed hard enough to warrant an exception
*9c5db199SXin Li        if result.exit_status == 1:
*9c5db199SXin Li            err = error.AutotestRunError("client job was aborted")
*9c5db199SXin Li        elif not self.background and not result.stderr:
*9c5db199SXin Li            err = error.AutotestRunError(
*9c5db199SXin Li                "execute_section %s failed to return anything\n"
*9c5db199SXin Li                "stdout:%s\n" % (section, result.stdout))
*9c5db199SXin Li        else:
*9c5db199SXin Li            err = None
*9c5db199SXin Li
*9c5db199SXin Li        # log something if the client failed AND never finished logging
*9c5db199SXin Li        if err and not self.is_client_job_finished(last_line):
*9c5db199SXin Li            self.log_unexpected_abort(stderr_redirector, old_boot_id=boot_id)
*9c5db199SXin Li
*9c5db199SXin Li        if err:
*9c5db199SXin Li            raise err
*9c5db199SXin Li        else:
*9c5db199SXin Li            return stderr_redirector.last_line
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _wait_for_reboot(self, old_boot_id):
*9c5db199SXin Li        logging.info("Client is rebooting")
*9c5db199SXin Li        logging.info("Waiting for client to halt")
*9c5db199SXin Li        if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
*9c5db199SXin Li                                   old_boot_id=old_boot_id):
*9c5db199SXin Li            err = "%s failed to shutdown after %d"
*9c5db199SXin Li            err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
*9c5db199SXin Li            raise error.AutotestRunError(err)
*9c5db199SXin Li        logging.info("Client down, waiting for restart")
*9c5db199SXin Li        if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
*9c5db199SXin Li            # since reboot failed
*9c5db199SXin Li            # hardreset the machine once if possible
*9c5db199SXin Li            # before failing this control file
*9c5db199SXin Li            warning = "%s did not come back up, hard resetting"
*9c5db199SXin Li            warning %= self.host.hostname
*9c5db199SXin Li            logging.warning(warning)
*9c5db199SXin Li            try:
*9c5db199SXin Li                self.host.hardreset(wait=False)
*9c5db199SXin Li            except (AttributeError, error.AutoservUnsupportedError):
*9c5db199SXin Li                warning = "Hard reset unsupported on %s"
*9c5db199SXin Li                warning %= self.host.hostname
*9c5db199SXin Li                logging.warning(warning)
*9c5db199SXin Li            raise error.AutotestRunError("%s failed to boot after %ds" %
*9c5db199SXin Li                                         (self.host.hostname,
*9c5db199SXin Li                                          self.host.DEFAULT_REBOOT_TIMEOUT))
*9c5db199SXin Li        self.host.reboot_followup()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def execute_control(self, timeout=None, client_disconnect_timeout=None):
*9c5db199SXin Li        if not self.background:
*9c5db199SXin Li            collector = log_collector(self.host, self.tag, self.results_dir)
*9c5db199SXin Li            hostname = self.host.hostname
*9c5db199SXin Li            remote_results = collector.client_results_dir
*9c5db199SXin Li            local_results = collector.server_results_dir
*9c5db199SXin Li            self.host.job.add_client_log(hostname, remote_results,
*9c5db199SXin Li                                         local_results)
*9c5db199SXin Li            job_record_context = self.host.job.get_record_context()
*9c5db199SXin Li
*9c5db199SXin Li        section = 0
*9c5db199SXin Li        start_time = time.time()
*9c5db199SXin Li
*9c5db199SXin Li        logger = client_logger(self.host, self.tag, self.results_dir)
*9c5db199SXin Li        try:
*9c5db199SXin Li            while not timeout or time.time() < start_time + timeout:
*9c5db199SXin Li                if timeout:
*9c5db199SXin Li                    section_timeout = start_time + timeout - time.time()
*9c5db199SXin Li                else:
*9c5db199SXin Li                    section_timeout = None
*9c5db199SXin Li                boot_id = self.host.get_boot_id()
*9c5db199SXin Li                last = self.execute_section(section, section_timeout,
*9c5db199SXin Li                                            logger, client_disconnect_timeout,
*9c5db199SXin Li                                            boot_id=boot_id)
*9c5db199SXin Li                if self.background:
*9c5db199SXin Li                    return
*9c5db199SXin Li                section += 1
*9c5db199SXin Li                if self.is_client_job_finished(last):
*9c5db199SXin Li                    logging.info("Client complete")
*9c5db199SXin Li                    return
*9c5db199SXin Li                elif self.is_client_job_rebooting(last):
*9c5db199SXin Li                    try:
*9c5db199SXin Li                        self._wait_for_reboot(boot_id)
*9c5db199SXin Li                    except error.AutotestRunError as e:
*9c5db199SXin Li                        self.host.job.record("ABORT", None, "reboot", str(e))
*9c5db199SXin Li                        self.host.job.record("END ABORT", None, None, str(e))
*9c5db199SXin Li                        raise
*9c5db199SXin Li                    continue
*9c5db199SXin Li
*9c5db199SXin Li                # If a test fails without probable cause we try to bucket it's
*9c5db199SXin Li                # failure into one of 2 categories. If we can determine the
*9c5db199SXin Li                # current state of the device and it is suspicious, we close the
*9c5db199SXin Li                # status lines indicating a failure. If we either cannot
*9c5db199SXin Li                # determine the state of the device, or it appears totally
*9c5db199SXin Li                # healthy, we give up and abort.
*9c5db199SXin Li                try:
*9c5db199SXin Li                    self._diagnose_dut(boot_id)
*9c5db199SXin Li                except AutotestDeviceError as e:
*9c5db199SXin Li                    # The status lines of the test are pretty much tailed to
*9c5db199SXin Li                    # our log, with indentation, from the client job on the DUT.
*9c5db199SXin Li                    # So if the DUT goes down unexpectedly we'll end up with a
*9c5db199SXin Li                    # malformed status log unless we manually unwind the status
*9c5db199SXin Li                    # stack. Ideally we would want to write a nice wrapper like
*9c5db199SXin Li                    # server_job methods run_reboot, run_group but they expect
*9c5db199SXin Li                    # reboots and we don't.
*9c5db199SXin Li                    self.host.job.record('FAIL', None, None, str(e))
*9c5db199SXin Li                    self.host.job.record('END FAIL', None, None)
*9c5db199SXin Li                    self.host.job.record('END GOOD', None, None)
*9c5db199SXin Li                    self.host.job.failed_with_device_error = True
*9c5db199SXin Li                    return
*9c5db199SXin Li                except AutotestAbort as e:
*9c5db199SXin Li                    self.host.job.record('ABORT', None, None, str(e))
*9c5db199SXin Li                    self.host.job.record('END ABORT', None, None)
*9c5db199SXin Li
*9c5db199SXin Li                    # give the client machine a chance to recover from a crash
*9c5db199SXin Li                    self.host.wait_up(
*9c5db199SXin Li                        self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
*9c5db199SXin Li                    logging.debug('Unexpected final status message from '
*9c5db199SXin Li                                  'client %s: %s', self.host.hostname, last)
*9c5db199SXin Li                    # The line 'last' may have sensitive phrases, like
*9c5db199SXin Li                    # 'END GOOD', which breaks the tko parser. So the error
*9c5db199SXin Li                    # message will exclude it, since it will be recorded to
*9c5db199SXin Li                    # status.log.
*9c5db199SXin Li                    msg = ("Aborting - unexpected final status message from "
*9c5db199SXin Li                           "client on %s\n") % self.host.hostname
*9c5db199SXin Li                    raise error.AutotestRunError(msg)
*9c5db199SXin Li        finally:
*9c5db199SXin Li            # B/203609358 someting is removing telemetry. Adding this to check the
*9c5db199SXin Li            # status of the folder as late as possible.
*9c5db199SXin Li            logging.debug('Autotest job finishes running. Below is the '
*9c5db199SXin Li                          'post-processing operations.')
*9c5db199SXin Li            logger.close()
*9c5db199SXin Li            if not self.background:
*9c5db199SXin Li                collector.collect_client_job_results()
*9c5db199SXin Li                collector.remove_redundant_client_logs()
*9c5db199SXin Li                state_file = os.path.basename(self.remote_control_file
*9c5db199SXin Li                                              + '.state')
*9c5db199SXin Li                state_path = os.path.join(self.results_dir, state_file)
*9c5db199SXin Li                self.host.job.postprocess_client_state(state_path)
*9c5db199SXin Li                self.host.job.remove_client_log(hostname, remote_results,
*9c5db199SXin Li                                                local_results)
*9c5db199SXin Li                job_record_context.restore()
*9c5db199SXin Li
*9c5db199SXin Li            logging.debug('Autotest job finishes.')
*9c5db199SXin Li
*9c5db199SXin Li        # should only get here if we timed out
*9c5db199SXin Li        assert timeout
*9c5db199SXin Li        raise error.AutotestTimeoutError()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Liclass log_collector(object):
*9c5db199SXin Li    def __init__(self, host, client_tag, results_dir):
*9c5db199SXin Li        self.host = host
*9c5db199SXin Li        if not client_tag:
*9c5db199SXin Li            client_tag = "default"
*9c5db199SXin Li        self.client_results_dir = os.path.join(host.get_autodir(), "results",
*9c5db199SXin Li                                               client_tag)
*9c5db199SXin Li        self.server_results_dir = results_dir
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def collect_client_job_results(self):
*9c5db199SXin Li        """ A method that collects all the current results of a running
*9c5db199SXin Li        client job into the results dir. By default does nothing as no
*9c5db199SXin Li        client job is running, but when running a client job you can override
*9c5db199SXin Li        this with something that will actually do something. """
*9c5db199SXin Li        # make an effort to wait for the machine to come up
*9c5db199SXin Li        try:
*9c5db199SXin Li            self.host.wait_up(timeout=30)
*9c5db199SXin Li        except error.AutoservError:
*9c5db199SXin Li            # don't worry about any errors, we'll try and
*9c5db199SXin Li            # get the results anyway
*9c5db199SXin Li            pass
*9c5db199SXin Li
*9c5db199SXin Li        # Copy all dirs in default to results_dir
*9c5db199SXin Li        try:
*9c5db199SXin Li            # Build test result directory summary
*9c5db199SXin Li            result_tools_runner.run_on_client(
*9c5db199SXin Li                    self.host, self.client_results_dir)
*9c5db199SXin Li
*9c5db199SXin Li            with metrics.SecondsTimer(
*9c5db199SXin Li                    'chromeos/autotest/job/log_collection_duration',
*9c5db199SXin Li                    fields={'dut_host_name': self.host.hostname}):
*9c5db199SXin Li                self.host.get_file(
*9c5db199SXin Li                        self.client_results_dir + '/',
*9c5db199SXin Li                        self.server_results_dir,
*9c5db199SXin Li                        preserve_symlinks=True)
*9c5db199SXin Li        except Exception:
*9c5db199SXin Li            # well, don't stop running just because we couldn't get logs
*9c5db199SXin Li            e_msg = "Unexpected error copying test result logs, continuing ..."
*9c5db199SXin Li            logging.error(e_msg)
*9c5db199SXin Li            traceback.print_exc(file=sys.stdout)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def remove_redundant_client_logs(self):
*9c5db199SXin Li        """Remove client.*.log files in favour of client.*.DEBUG files."""
*9c5db199SXin Li        debug_dir = os.path.join(self.server_results_dir, 'debug')
*9c5db199SXin Li        debug_files = [f for f in os.listdir(debug_dir)
*9c5db199SXin Li                       if re.search(r'^client\.\d+\.DEBUG$', f)]
*9c5db199SXin Li        for debug_file in debug_files:
*9c5db199SXin Li            log_file = debug_file.replace('DEBUG', 'log')
*9c5db199SXin Li            log_file = os.path.join(debug_dir, log_file)
*9c5db199SXin Li            if os.path.exists(log_file):
*9c5db199SXin Li                os.remove(log_file)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li# a file-like object for catching stderr from an autotest client and
*9c5db199SXin Li# extracting status logs from it
*9c5db199SXin Liclass client_logger(object):
*9c5db199SXin Li    """Partial file object to write to both stdout and
*9c5db199SXin Li    the status log file.  We only implement those methods
*9c5db199SXin Li    utils.run() actually calls.
*9c5db199SXin Li    """
*9c5db199SXin Li    status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
*9c5db199SXin Li    test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
*9c5db199SXin Li    fetch_package_parser = re.compile(
*9c5db199SXin Li        r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
*9c5db199SXin Li    extract_indent = re.compile(r"^(\t*).*$")
*9c5db199SXin Li    extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
*9c5db199SXin Li
*9c5db199SXin Li    def __init__(self, host, tag, server_results_dir):
*9c5db199SXin Li        self.host = host
*9c5db199SXin Li        self.job = host.job
*9c5db199SXin Li        self.log_collector = log_collector(host, tag, server_results_dir)
*9c5db199SXin Li        self.leftover = ""
*9c5db199SXin Li        self.last_line = ""
*9c5db199SXin Li        self.logs = {}
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _process_log_dict(self, log_dict):
*9c5db199SXin Li        log_list = log_dict.pop("logs", [])
*9c5db199SXin Li        for key in sorted(six.iterkeys(log_dict)):
*9c5db199SXin Li            log_list += self._process_log_dict(log_dict.pop(key))
*9c5db199SXin Li        return log_list
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _process_logs(self):
*9c5db199SXin Li        """Go through the accumulated logs in self.log and print them
*9c5db199SXin Li        out to stdout and the status log. Note that this processes
*9c5db199SXin Li        logs in an ordering where:
*9c5db199SXin Li
*9c5db199SXin Li        1) logs to different tags are never interleaved
*9c5db199SXin Li        2) logs to x.y come before logs to x.y.z for all z
*9c5db199SXin Li        3) logs to x.y come before x.z whenever y < z
*9c5db199SXin Li
*9c5db199SXin Li        Note that this will in general not be the same as the
*9c5db199SXin Li        chronological ordering of the logs. However, if a chronological
*9c5db199SXin Li        ordering is desired that one can be reconstructed from the
*9c5db199SXin Li        status log by looking at timestamp lines."""
*9c5db199SXin Li        log_list = self._process_log_dict(self.logs)
*9c5db199SXin Li        for entry in log_list:
*9c5db199SXin Li            self.job.record_entry(entry, log_in_subdir=False)
*9c5db199SXin Li        if log_list:
*9c5db199SXin Li            self.last_line = log_list[-1].render()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _process_quoted_line(self, tag, line):
*9c5db199SXin Li        """Process a line quoted with an AUTOTEST_STATUS flag. If the
*9c5db199SXin Li        tag is blank then we want to push out all the data we've been
*9c5db199SXin Li        building up in self.logs, and then the newest line. If the
*9c5db199SXin Li        tag is not blank, then push the line into the logs for handling
*9c5db199SXin Li        later."""
*9c5db199SXin Li        entry = base_job.status_log_entry.parse(line)
*9c5db199SXin Li        if entry is None:
*9c5db199SXin Li            return  # the line contains no status lines
*9c5db199SXin Li        if tag == "":
*9c5db199SXin Li            self._process_logs()
*9c5db199SXin Li            self.job.record_entry(entry, log_in_subdir=False)
*9c5db199SXin Li            self.last_line = line
*9c5db199SXin Li        else:
*9c5db199SXin Li            tag_parts = [int(x) for x in tag.split(".")]
*9c5db199SXin Li            log_dict = self.logs
*9c5db199SXin Li            for part in tag_parts:
*9c5db199SXin Li                log_dict = log_dict.setdefault(part, {})
*9c5db199SXin Li            log_list = log_dict.setdefault("logs", [])
*9c5db199SXin Li            log_list.append(entry)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _process_info_line(self, line):
*9c5db199SXin Li        """Check if line is an INFO line, and if it is, interpret any control
*9c5db199SXin Li        messages (e.g. enabling/disabling warnings) that it may contain."""
*9c5db199SXin Li        match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
*9c5db199SXin Li        if not match:
*9c5db199SXin Li            return   # not an INFO line
*9c5db199SXin Li        for field in match.group(1).split('\t'):
*9c5db199SXin Li            if field.startswith("warnings.enable="):
*9c5db199SXin Li                func = self.job.warning_manager.enable_warnings
*9c5db199SXin Li            elif field.startswith("warnings.disable="):
*9c5db199SXin Li                func = self.job.warning_manager.disable_warnings
*9c5db199SXin Li            else:
*9c5db199SXin Li                continue
*9c5db199SXin Li            warning_type = field.split("=", 1)[1]
*9c5db199SXin Li            func(warning_type)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _process_line(self, line):
*9c5db199SXin Li        """Write out a line of data to the appropriate stream.
*9c5db199SXin Li
*9c5db199SXin Li        Returns the package checksum file if it exists.
*9c5db199SXin Li
*9c5db199SXin Li        Status lines sent by autotest will be prepended with
*9c5db199SXin Li        "AUTOTEST_STATUS", and all other lines are ssh error messages.
*9c5db199SXin Li        """
*9c5db199SXin Li        logging.debug(line)
*9c5db199SXin Li        fetch_package_match = self.fetch_package_parser.search(line)
*9c5db199SXin Li        if fetch_package_match:
*9c5db199SXin Li            pkg_name, dest_path, fifo_path = fetch_package_match.groups()
*9c5db199SXin Li            serve_packages = _CONFIG.get_config_value(
*9c5db199SXin Li                "PACKAGES", "serve_packages_from_autoserv", type=bool)
*9c5db199SXin Li            if serve_packages and pkg_name == 'packages.checksum':
*9c5db199SXin Li                try:
*9c5db199SXin Li                    checksum_file = os.path.join(
*9c5db199SXin Li                        self.job.pkgmgr.pkgmgr_dir, 'packages', pkg_name)
*9c5db199SXin Li                    if os.path.exists(checksum_file):
*9c5db199SXin Li                        self.host.send_file(checksum_file, dest_path)
*9c5db199SXin Li                except error.AutoservRunError:
*9c5db199SXin Li                    msg = "Package checksum file not found, continuing anyway"
*9c5db199SXin Li                    logging.exception(msg)
*9c5db199SXin Li
*9c5db199SXin Li                try:
*9c5db199SXin Li                    # When fetching a package, the client expects to be
*9c5db199SXin Li                    # notified when the fetching is complete. Autotest
*9c5db199SXin Li                    # does this pushing a B to a fifo queue to the client.
*9c5db199SXin Li                    self.host.run("echo B > %s" % fifo_path)
*9c5db199SXin Li                except error.AutoservRunError:
*9c5db199SXin Li                    msg = "Checksum installation failed, continuing anyway"
*9c5db199SXin Li                    logging.exception(msg)
*9c5db199SXin Li                finally:
*9c5db199SXin Li                    return
*9c5db199SXin Li
*9c5db199SXin Li        status_match = self.status_parser.search(line)
*9c5db199SXin Li        test_complete_match = self.test_complete_parser.search(line)
*9c5db199SXin Li        fetch_package_match = self.fetch_package_parser.search(line)
*9c5db199SXin Li        if status_match:
*9c5db199SXin Li            tag, line = status_match.groups()
*9c5db199SXin Li            self._process_info_line(line)
*9c5db199SXin Li            self._process_quoted_line(tag, line)
*9c5db199SXin Li        elif test_complete_match:
*9c5db199SXin Li            self._process_logs()
*9c5db199SXin Li            fifo_path, = test_complete_match.groups()
*9c5db199SXin Li            try:
*9c5db199SXin Li                self.log_collector.collect_client_job_results()
*9c5db199SXin Li                self.host.run("echo A > %s" % fifo_path)
*9c5db199SXin Li            except Exception:
*9c5db199SXin Li                msg = "Post-test log collection failed, continuing anyway"
*9c5db199SXin Li                logging.exception(msg)
*9c5db199SXin Li        elif fetch_package_match:
*9c5db199SXin Li            pkg_name, dest_path, fifo_path = fetch_package_match.groups()
*9c5db199SXin Li            serve_packages = global_config.global_config.get_config_value(
*9c5db199SXin Li                "PACKAGES", "serve_packages_from_autoserv", type=bool)
*9c5db199SXin Li            if serve_packages and pkg_name.endswith(".tar.bz2"):
*9c5db199SXin Li                try:
*9c5db199SXin Li                    self._send_tarball(pkg_name, dest_path)
*9c5db199SXin Li                except Exception:
*9c5db199SXin Li                    msg = "Package tarball creation failed, continuing anyway"
*9c5db199SXin Li                    logging.exception(msg)
*9c5db199SXin Li            try:
*9c5db199SXin Li                self.host.run("echo B > %s" % fifo_path)
*9c5db199SXin Li            except Exception:
*9c5db199SXin Li                msg = "Package tarball installation failed, continuing anyway"
*9c5db199SXin Li                logging.exception(msg)
*9c5db199SXin Li        else:
*9c5db199SXin Li            logging.info(line)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def _send_tarball(self, pkg_name, remote_dest):
*9c5db199SXin Li        """Uses tarballs in package manager by default."""
*9c5db199SXin Li        try:
*9c5db199SXin Li            server_package = os.path.join(self.job.pkgmgr.pkgmgr_dir,
*9c5db199SXin Li                                          'packages', pkg_name)
*9c5db199SXin Li            if os.path.exists(server_package):
*9c5db199SXin Li                self.host.send_file(server_package, remote_dest)
*9c5db199SXin Li                return
*9c5db199SXin Li
*9c5db199SXin Li        except error.AutoservRunError:
*9c5db199SXin Li            msg = ("Package %s could not be sent from the package cache." %
*9c5db199SXin Li                   pkg_name)
*9c5db199SXin Li            logging.exception(msg)
*9c5db199SXin Li
*9c5db199SXin Li        name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
*9c5db199SXin Li        src_dirs = []
*9c5db199SXin Li        if pkg_type == 'test':
*9c5db199SXin Li            for test_dir in ['site_tests', 'tests']:
*9c5db199SXin Li                src_dir = os.path.join(self.job.clientdir, test_dir, name)
*9c5db199SXin Li                if os.path.exists(src_dir):
*9c5db199SXin Li                    src_dirs += [src_dir]
*9c5db199SXin Li                    break
*9c5db199SXin Li        elif pkg_type == 'profiler':
*9c5db199SXin Li            src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
*9c5db199SXin Li        elif pkg_type == 'dep':
*9c5db199SXin Li            src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
*9c5db199SXin Li        elif pkg_type == 'client':
*9c5db199SXin Li            return  # you must already have a client to hit this anyway
*9c5db199SXin Li        else:
*9c5db199SXin Li            return  # no other types are supported
*9c5db199SXin Li
*9c5db199SXin Li        # iterate over src_dirs until we find one that exists, then tar it
*9c5db199SXin Li        for src_dir in src_dirs:
*9c5db199SXin Li            if os.path.exists(src_dir):
*9c5db199SXin Li                try:
*9c5db199SXin Li                    logging.info('Bundling %s into %s', src_dir, pkg_name)
*9c5db199SXin Li                    temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
*9c5db199SXin Li                                                dir=self.job.tmpdir)
*9c5db199SXin Li                    tarball_path = self.job.pkgmgr.tar_package(
*9c5db199SXin Li                        pkg_name, src_dir, temp_dir.name, " .")
*9c5db199SXin Li                    self.host.send_file(tarball_path, remote_dest)
*9c5db199SXin Li                finally:
*9c5db199SXin Li                    temp_dir.clean()
*9c5db199SXin Li                return
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def log_warning(self, msg, warning_type):
*9c5db199SXin Li        """Injects a WARN message into the current status logging stream."""
*9c5db199SXin Li        timestamp = int(time.time())
*9c5db199SXin Li        if self.job.warning_manager.is_valid(timestamp, warning_type):
*9c5db199SXin Li            self.job.record('WARN', None, None, msg)
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def write(self, data):
*9c5db199SXin Li        # now start processing the existing buffer and the new data
*9c5db199SXin Li        data = self.leftover + data
*9c5db199SXin Li        lines = data.split('\n')
*9c5db199SXin Li        processed_lines = 0
*9c5db199SXin Li        try:
*9c5db199SXin Li            # process all the buffered data except the last line
*9c5db199SXin Li            # ignore the last line since we may not have all of it yet
*9c5db199SXin Li            for line in lines[:-1]:
*9c5db199SXin Li                self._process_line(line)
*9c5db199SXin Li                processed_lines += 1
*9c5db199SXin Li        finally:
*9c5db199SXin Li            # save any unprocessed lines for future processing
*9c5db199SXin Li            self.leftover = '\n'.join(lines[processed_lines:])
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def flush(self):
*9c5db199SXin Li        sys.stdout.flush()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def flush_all_buffers(self):
*9c5db199SXin Li        if self.leftover:
*9c5db199SXin Li            self._process_line(self.leftover)
*9c5db199SXin Li            self.leftover = ""
*9c5db199SXin Li        self._process_logs()
*9c5db199SXin Li        self.flush()
*9c5db199SXin Li
*9c5db199SXin Li
*9c5db199SXin Li    def close(self):
*9c5db199SXin Li        self.flush_all_buffers()