1*9c5db199SXin Li# Lint as: python2, python3 2*9c5db199SXin Li# Copyright 2007 Google Inc. Released under the GPL v2 3*9c5db199SXin Li#pylint: disable-msg=C0111 4*9c5db199SXin Li 5*9c5db199SXin Lifrom __future__ import absolute_import 6*9c5db199SXin Lifrom __future__ import division 7*9c5db199SXin Lifrom __future__ import print_function 8*9c5db199SXin Li 9*9c5db199SXin Liimport glob 10*9c5db199SXin Liimport logging 11*9c5db199SXin Liimport os 12*9c5db199SXin Liimport re 13*9c5db199SXin Liimport sys 14*9c5db199SXin Liimport tempfile 15*9c5db199SXin Liimport time 16*9c5db199SXin Liimport traceback 17*9c5db199SXin Li 18*9c5db199SXin Liimport common 19*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import runner as result_tools_runner 20*9c5db199SXin Lifrom autotest_lib.client.common_lib import autotemp 21*9c5db199SXin Lifrom autotest_lib.client.common_lib import base_job 22*9c5db199SXin Lifrom autotest_lib.client.common_lib import error 23*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config 24*9c5db199SXin Lifrom autotest_lib.client.common_lib import packages 25*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils as client_utils 26*9c5db199SXin Lifrom autotest_lib.server import installable_object 27*9c5db199SXin Lifrom autotest_lib.server import utils 28*9c5db199SXin Lifrom autotest_lib.server import utils as server_utils 29*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite.constants import JOB_REPO_URL 30*9c5db199SXin Liimport six 31*9c5db199SXin Lifrom six.moves import map 32*9c5db199SXin Li 33*9c5db199SXin Li 34*9c5db199SXin Litry: 35*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import metrics 36*9c5db199SXin Liexcept ImportError: 37*9c5db199SXin Li metrics = client_utils.metrics_mock 38*9c5db199SXin Li 39*9c5db199SXin Li 40*9c5db199SXin Li# This is assumed to be the value by tests, do not change it. 41*9c5db199SXin LiOFFLOAD_ENVVAR = "SYNCHRONOUS_OFFLOAD_DIR" 42*9c5db199SXin Li 43*9c5db199SXin LiAUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client' 44*9c5db199SXin LiAUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client' 45*9c5db199SXin Li 46*9c5db199SXin Li_CONFIG = global_config.global_config 47*9c5db199SXin LiAUTOSERV_PREBUILD = _CONFIG.get_config_value( 48*9c5db199SXin Li 'AUTOSERV', 'enable_server_prebuild', type=bool, default=False) 49*9c5db199SXin Li 50*9c5db199SXin Li# Match on a line like this: 51*9c5db199SXin Li# FAIL test_name test_name timestamp=1 localtime=Nov 15 12:43:10 <fail_msg> 52*9c5db199SXin Li_FAIL_STATUS_RE = re.compile( 53*9c5db199SXin Li r'\s*FAIL.*localtime=.*\s*.*\s*[0-9]+:[0-9]+:[0-9]+\s*(?P<fail_msg>.*)') 54*9c5db199SXin Li 55*9c5db199SXin LiLOG_BUFFER_SIZE_BYTES = 64 56*9c5db199SXin Li 57*9c5db199SXin Li 58*9c5db199SXin Lidef _set_py_version(): 59*9c5db199SXin Li """As of ~R102 (aka when this merges), DUTs only have Python 3.""" 60*9c5db199SXin Li return '--py_version=3' 61*9c5db199SXin Li 62*9c5db199SXin Li 63*9c5db199SXin Liclass AutodirNotFoundError(Exception): 64*9c5db199SXin Li """No Autotest installation could be found.""" 65*9c5db199SXin Li 66*9c5db199SXin Li 67*9c5db199SXin Liclass AutotestFailure(Exception): 68*9c5db199SXin Li """Gereric exception class for failures during a test run.""" 69*9c5db199SXin Li 70*9c5db199SXin Li 71*9c5db199SXin Liclass AutotestAbort(AutotestFailure): 72*9c5db199SXin Li """ 73*9c5db199SXin Li AutotestAborts are thrown when the DUT seems fine, 74*9c5db199SXin Li and the test doesn't give us an explicit reason for 75*9c5db199SXin Li failure; In this case we have no choice but to abort. 76*9c5db199SXin Li """ 77*9c5db199SXin Li 78*9c5db199SXin Li 79*9c5db199SXin Liclass AutotestDeviceError(AutotestFailure): 80*9c5db199SXin Li """ 81*9c5db199SXin Li Exceptions that inherit from AutotestDeviceError 82*9c5db199SXin Li are thrown when we can determine the current 83*9c5db199SXin Li state of the DUT and conclude that it probably 84*9c5db199SXin Li lead to the test failing; these exceptions lead 85*9c5db199SXin Li to failures instead of aborts. 86*9c5db199SXin Li """ 87*9c5db199SXin Li 88*9c5db199SXin Li 89*9c5db199SXin Liclass AutotestDeviceNotPingable(AutotestDeviceError): 90*9c5db199SXin Li """Error for when a DUT becomes unpingable.""" 91*9c5db199SXin Li 92*9c5db199SXin Li 93*9c5db199SXin Liclass AutotestDeviceNotSSHable(AutotestDeviceError): 94*9c5db199SXin Li """Error for when a DUT is pingable but not SSHable.""" 95*9c5db199SXin Li 96*9c5db199SXin Li 97*9c5db199SXin Liclass AutotestDeviceRebooted(AutotestDeviceError): 98*9c5db199SXin Li """Error for when a DUT rebooted unexpectedly.""" 99*9c5db199SXin Li 100*9c5db199SXin Li 101*9c5db199SXin Liclass Autotest(installable_object.InstallableObject): 102*9c5db199SXin Li """ 103*9c5db199SXin Li This class represents the Autotest program. 104*9c5db199SXin Li 105*9c5db199SXin Li Autotest is used to run tests automatically and collect the results. 106*9c5db199SXin Li It also supports profilers. 107*9c5db199SXin Li 108*9c5db199SXin Li Implementation details: 109*9c5db199SXin Li This is a leaf class in an abstract class hierarchy, it must 110*9c5db199SXin Li implement the unimplemented methods in parent classes. 111*9c5db199SXin Li """ 112*9c5db199SXin Li 113*9c5db199SXin Li def __init__(self, host=None): 114*9c5db199SXin Li self.host = host 115*9c5db199SXin Li self.got = False 116*9c5db199SXin Li self.installed = False 117*9c5db199SXin Li self.serverdir = utils.get_server_dir() 118*9c5db199SXin Li super(Autotest, self).__init__() 119*9c5db199SXin Li 120*9c5db199SXin Li 121*9c5db199SXin Li install_in_tmpdir = False 122*9c5db199SXin Li @classmethod 123*9c5db199SXin Li def set_install_in_tmpdir(cls, flag): 124*9c5db199SXin Li """ Sets a flag that controls whether or not Autotest should by 125*9c5db199SXin Li default be installed in a "standard" directory (e.g. 126*9c5db199SXin Li /home/autotest, /usr/local/autotest) or a temporary directory. """ 127*9c5db199SXin Li cls.install_in_tmpdir = flag 128*9c5db199SXin Li 129*9c5db199SXin Li 130*9c5db199SXin Li @classmethod 131*9c5db199SXin Li def get_client_autodir_paths(cls, host): 132*9c5db199SXin Li return global_config.global_config.get_config_value( 133*9c5db199SXin Li 'AUTOSERV', 'client_autodir_paths', type=list) 134*9c5db199SXin Li 135*9c5db199SXin Li 136*9c5db199SXin Li @classmethod 137*9c5db199SXin Li def get_installed_autodir(cls, host): 138*9c5db199SXin Li """ 139*9c5db199SXin Li Find where the Autotest client is installed on the host. 140*9c5db199SXin Li @returns an absolute path to an installed Autotest client root. 141*9c5db199SXin Li @raises AutodirNotFoundError if no Autotest installation can be found. 142*9c5db199SXin Li """ 143*9c5db199SXin Li autodir = host.get_autodir() 144*9c5db199SXin Li if autodir: 145*9c5db199SXin Li logging.debug('Using existing host autodir: %s', autodir) 146*9c5db199SXin Li return autodir 147*9c5db199SXin Li 148*9c5db199SXin Li for path in Autotest.get_client_autodir_paths(host): 149*9c5db199SXin Li try: 150*9c5db199SXin Li autotest_binary = os.path.join(path, 'bin', 'autotest') 151*9c5db199SXin Li host.run('test -x %s' % utils.sh_escape(autotest_binary)) 152*9c5db199SXin Li host.run('test -w %s' % utils.sh_escape(path)) 153*9c5db199SXin Li logging.debug('Found existing autodir at %s', path) 154*9c5db199SXin Li return path 155*9c5db199SXin Li except error.GenericHostRunError: 156*9c5db199SXin Li logging.debug('%s does not exist on %s', autotest_binary, 157*9c5db199SXin Li host.hostname) 158*9c5db199SXin Li raise AutodirNotFoundError 159*9c5db199SXin Li 160*9c5db199SXin Li 161*9c5db199SXin Li @classmethod 162*9c5db199SXin Li def get_install_dir(cls, host): 163*9c5db199SXin Li """ 164*9c5db199SXin Li Determines the location where autotest should be installed on 165*9c5db199SXin Li host. If self.install_in_tmpdir is set, it will return a unique 166*9c5db199SXin Li temporary directory that autotest can be installed in. Otherwise, looks 167*9c5db199SXin Li for an existing installation to use; if none is found, looks for a 168*9c5db199SXin Li usable directory in the global config client_autodir_paths. 169*9c5db199SXin Li """ 170*9c5db199SXin Li try: 171*9c5db199SXin Li install_dir = cls.get_installed_autodir(host) 172*9c5db199SXin Li except AutodirNotFoundError: 173*9c5db199SXin Li install_dir = cls._find_installable_dir(host) 174*9c5db199SXin Li 175*9c5db199SXin Li if cls.install_in_tmpdir: 176*9c5db199SXin Li return host.get_tmp_dir(parent=install_dir) 177*9c5db199SXin Li return install_dir 178*9c5db199SXin Li 179*9c5db199SXin Li 180*9c5db199SXin Li @classmethod 181*9c5db199SXin Li def _find_installable_dir(cls, host): 182*9c5db199SXin Li client_autodir_paths = cls.get_client_autodir_paths(host) 183*9c5db199SXin Li for path in client_autodir_paths: 184*9c5db199SXin Li try: 185*9c5db199SXin Li host.run('mkdir -p %s' % utils.sh_escape(path)) 186*9c5db199SXin Li host.run('test -w %s' % utils.sh_escape(path)) 187*9c5db199SXin Li return path 188*9c5db199SXin Li except error.AutoservRunError: 189*9c5db199SXin Li logging.debug('Failed to create %s', path) 190*9c5db199SXin Li metrics.Counter( 191*9c5db199SXin Li 'chromeos/autotest/errors/no_autotest_install_path').increment( 192*9c5db199SXin Li fields={'dut_host_name': host.hostname}) 193*9c5db199SXin Li raise error.AutoservInstallError( 194*9c5db199SXin Li 'Unable to find a place to install Autotest; tried %s' % 195*9c5db199SXin Li ', '.join(client_autodir_paths)) 196*9c5db199SXin Li 197*9c5db199SXin Li 198*9c5db199SXin Li def get_fetch_location(self): 199*9c5db199SXin Li """Generate list of locations where autotest can look for packages. 200*9c5db199SXin Li 201*9c5db199SXin Li Hosts are tagged with an attribute containing the URL from which 202*9c5db199SXin Li to source packages when running a test on that host. 203*9c5db199SXin Li 204*9c5db199SXin Li @returns the list of candidate locations to check for packages. 205*9c5db199SXin Li """ 206*9c5db199SXin Li c = global_config.global_config 207*9c5db199SXin Li repos = c.get_config_value("PACKAGES", 'fetch_location', type=list, 208*9c5db199SXin Li default=[]) 209*9c5db199SXin Li repos.reverse() 210*9c5db199SXin Li 211*9c5db199SXin Li if not server_utils.is_inside_chroot(): 212*9c5db199SXin Li # Only try to get fetch location from host attribute if the 213*9c5db199SXin Li # test is not running inside chroot. 214*9c5db199SXin Li # 215*9c5db199SXin Li # Look for the repo url via the host attribute. If we are 216*9c5db199SXin Li # not running with a full AFE autoserv will fall back to 217*9c5db199SXin Li # serving packages itself from whatever source version it is 218*9c5db199SXin Li # sync'd to rather than using the proper artifacts for the 219*9c5db199SXin Li # build on the host. 220*9c5db199SXin Li found_repo = self._get_fetch_location_from_host_attribute() 221*9c5db199SXin Li if found_repo is not None: 222*9c5db199SXin Li # Add our new repo to the end, the package manager will 223*9c5db199SXin Li # later reverse the list of repositories resulting in ours 224*9c5db199SXin Li # being first 225*9c5db199SXin Li repos.append(found_repo) 226*9c5db199SXin Li 227*9c5db199SXin Li return repos 228*9c5db199SXin Li 229*9c5db199SXin Li 230*9c5db199SXin Li def _get_fetch_location_from_host_attribute(self): 231*9c5db199SXin Li """Get repo to use for packages from host attribute, if possible. 232*9c5db199SXin Li 233*9c5db199SXin Li Hosts are tagged with an attribute containing the URL 234*9c5db199SXin Li from which to source packages when running a test on that host. 235*9c5db199SXin Li If self.host is set, attempt to look this attribute in the host info. 236*9c5db199SXin Li 237*9c5db199SXin Li @returns value of the 'job_repo_url' host attribute, if present. 238*9c5db199SXin Li """ 239*9c5db199SXin Li if not self.host: 240*9c5db199SXin Li return None 241*9c5db199SXin Li 242*9c5db199SXin Li try: 243*9c5db199SXin Li info = self.host.host_info_store.get() 244*9c5db199SXin Li except Exception as e: 245*9c5db199SXin Li # TODO(pprabhu): We really want to catch host_info.StoreError here, 246*9c5db199SXin Li # but we can't import host_info from this module. 247*9c5db199SXin Li # - autotest_lib.hosts.host_info pulls in (naturally) 248*9c5db199SXin Li # autotest_lib.hosts.__init__ 249*9c5db199SXin Li # - This pulls in all the host classes ever defined 250*9c5db199SXin Li # - That includes abstract_ssh, which depends on autotest 251*9c5db199SXin Li logging.warning('Failed to obtain host info: %r', e) 252*9c5db199SXin Li logging.warning('Skipping autotest fetch location based on %s', 253*9c5db199SXin Li JOB_REPO_URL) 254*9c5db199SXin Li return None 255*9c5db199SXin Li 256*9c5db199SXin Li job_repo_url = info.attributes.get(JOB_REPO_URL, '') 257*9c5db199SXin Li if not job_repo_url: 258*9c5db199SXin Li logging.warning("No %s for %s", JOB_REPO_URL, self.host) 259*9c5db199SXin Li return None 260*9c5db199SXin Li 261*9c5db199SXin Li logging.info('Got job repo url from host attributes: %s', 262*9c5db199SXin Li job_repo_url) 263*9c5db199SXin Li return job_repo_url 264*9c5db199SXin Li 265*9c5db199SXin Li 266*9c5db199SXin Li def install(self, host=None, autodir=None, use_packaging=True): 267*9c5db199SXin Li """Install autotest. If |host| is not None, stores it in |self.host|. 268*9c5db199SXin Li 269*9c5db199SXin Li @param host A Host instance on which autotest will be installed 270*9c5db199SXin Li @param autodir Location on the remote host to install to 271*9c5db199SXin Li @param use_packaging Enable install modes that use the packaging system. 272*9c5db199SXin Li 273*9c5db199SXin Li """ 274*9c5db199SXin Li if host: 275*9c5db199SXin Li self.host = host 276*9c5db199SXin Li self._install(host=host, autodir=autodir, use_packaging=use_packaging) 277*9c5db199SXin Li 278*9c5db199SXin Li 279*9c5db199SXin Li def install_full_client(self, host=None, autodir=None): 280*9c5db199SXin Li self._install(host=host, autodir=autodir, use_autoserv=False, 281*9c5db199SXin Li use_packaging=False) 282*9c5db199SXin Li 283*9c5db199SXin Li 284*9c5db199SXin Li def install_no_autoserv(self, host=None, autodir=None): 285*9c5db199SXin Li self._install(host=host, autodir=autodir, use_autoserv=False) 286*9c5db199SXin Li 287*9c5db199SXin Li 288*9c5db199SXin Li def _install_using_packaging(self, host, autodir): 289*9c5db199SXin Li repos = self.get_fetch_location() 290*9c5db199SXin Li if not repos: 291*9c5db199SXin Li raise error.PackageInstallError("No repos to install an " 292*9c5db199SXin Li "autotest client from") 293*9c5db199SXin Li # Make sure devserver has the autotest package staged 294*9c5db199SXin Li host.verify_job_repo_url() 295*9c5db199SXin Li pkgmgr = packages.PackageManager(autodir, hostname=host.hostname, 296*9c5db199SXin Li repo_urls=repos, 297*9c5db199SXin Li do_locking=False, 298*9c5db199SXin Li run_function=host.run, 299*9c5db199SXin Li run_function_dargs=dict(timeout=600)) 300*9c5db199SXin Li # The packages dir is used to store all the packages that 301*9c5db199SXin Li # are fetched on that client. (for the tests,deps etc. 302*9c5db199SXin Li # too apart from the client) 303*9c5db199SXin Li pkg_dir = os.path.join(autodir, 'packages') 304*9c5db199SXin Li # clean up the autodir except for the packages and result_tools 305*9c5db199SXin Li # directory. 306*9c5db199SXin Li host.run('cd %s && ls | grep -v "^packages$" | grep -v "^result_tools$"' 307*9c5db199SXin Li ' | xargs rm -rf && rm -rf .[!.]*' % autodir) 308*9c5db199SXin Li pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir, 309*9c5db199SXin Li preserve_install_dir=True) 310*9c5db199SXin Li self.installed = True 311*9c5db199SXin Li 312*9c5db199SXin Li 313*9c5db199SXin Li def _install_using_send_file(self, host, autodir): 314*9c5db199SXin Li dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers", 315*9c5db199SXin Li "packages"]) 316*9c5db199SXin Li light_files = [os.path.join(self.source_material, f) 317*9c5db199SXin Li for f in os.listdir(self.source_material) 318*9c5db199SXin Li if f not in dirs_to_exclude] 319*9c5db199SXin Li host.send_file(light_files, autodir, delete_dest=True) 320*9c5db199SXin Li 321*9c5db199SXin Li # create empty dirs for all the stuff we excluded 322*9c5db199SXin Li commands = [] 323*9c5db199SXin Li for path in dirs_to_exclude: 324*9c5db199SXin Li abs_path = os.path.join(autodir, path) 325*9c5db199SXin Li abs_path = utils.sh_escape(abs_path) 326*9c5db199SXin Li commands.append("mkdir -p '%s'" % abs_path) 327*9c5db199SXin Li commands.append("touch '%s'/__init__.py" % abs_path) 328*9c5db199SXin Li host.run(';'.join(commands)) 329*9c5db199SXin Li 330*9c5db199SXin Li 331*9c5db199SXin Li def _install(self, host=None, autodir=None, use_autoserv=True, 332*9c5db199SXin Li use_packaging=True): 333*9c5db199SXin Li """ 334*9c5db199SXin Li Install autotest. If get() was not called previously, an 335*9c5db199SXin Li attempt will be made to install from the autotest svn 336*9c5db199SXin Li repository. 337*9c5db199SXin Li 338*9c5db199SXin Li @param host A Host instance on which autotest will be installed 339*9c5db199SXin Li @param autodir Location on the remote host to install to 340*9c5db199SXin Li @param use_autoserv Enable install modes that depend on the client 341*9c5db199SXin Li running with the autoserv harness 342*9c5db199SXin Li @param use_packaging Enable install modes that use the packaging system 343*9c5db199SXin Li 344*9c5db199SXin Li @exception AutoservError if a tarball was not specified and 345*9c5db199SXin Li the target host does not have svn installed in its path 346*9c5db199SXin Li """ 347*9c5db199SXin Li if not host: 348*9c5db199SXin Li host = self.host 349*9c5db199SXin Li if not self.got: 350*9c5db199SXin Li self.get() 351*9c5db199SXin Li host.wait_up(timeout=30) 352*9c5db199SXin Li host.setup() 353*9c5db199SXin Li # B/203609358 someting is removing telemetry. Adding this to check the 354*9c5db199SXin Li # status of the folder as early as possible. 355*9c5db199SXin Li logging.info("Installing autotest on %s", host.hostname) 356*9c5db199SXin Li 357*9c5db199SXin Li # set up the autotest directory on the remote machine 358*9c5db199SXin Li if not autodir: 359*9c5db199SXin Li autodir = self.get_install_dir(host) 360*9c5db199SXin Li logging.info('Using installation dir %s', autodir) 361*9c5db199SXin Li host.set_autodir(autodir) 362*9c5db199SXin Li host.run('mkdir -p %s' % utils.sh_escape(autodir)) 363*9c5db199SXin Li 364*9c5db199SXin Li # make sure there are no files in $AUTODIR/results 365*9c5db199SXin Li results_path = os.path.join(autodir, 'results') 366*9c5db199SXin Li host.run('rm -rf %s/*' % utils.sh_escape(results_path), 367*9c5db199SXin Li ignore_status=True) 368*9c5db199SXin Li 369*9c5db199SXin Li # Fetch the autotest client from the nearest repository 370*9c5db199SXin Li if use_packaging: 371*9c5db199SXin Li try: 372*9c5db199SXin Li self._install_using_packaging(host, autodir) 373*9c5db199SXin Li logging.info("Installation of autotest completed using the " 374*9c5db199SXin Li "packaging system.") 375*9c5db199SXin Li return 376*9c5db199SXin Li except (error.PackageInstallError, error.AutoservRunError, 377*9c5db199SXin Li global_config.ConfigError) as e: 378*9c5db199SXin Li logging.info("Could not install autotest using the packaging " 379*9c5db199SXin Li "system: %s. Trying other methods", e) 380*9c5db199SXin Li else: 381*9c5db199SXin Li # Delete the package checksum file to force dut updating local 382*9c5db199SXin Li # packages. 383*9c5db199SXin Li command = ('rm -f "%s"' % 384*9c5db199SXin Li (os.path.join(autodir, packages.CHECKSUM_FILE))) 385*9c5db199SXin Li host.run(command) 386*9c5db199SXin Li 387*9c5db199SXin Li # try to install from file or directory 388*9c5db199SXin Li if self.source_material: 389*9c5db199SXin Li c = global_config.global_config 390*9c5db199SXin Li supports_autoserv_packaging = c.get_config_value( 391*9c5db199SXin Li "PACKAGES", "serve_packages_from_autoserv", type=bool) 392*9c5db199SXin Li # Copy autotest recursively 393*9c5db199SXin Li if supports_autoserv_packaging and use_autoserv: 394*9c5db199SXin Li self._install_using_send_file(host, autodir) 395*9c5db199SXin Li else: 396*9c5db199SXin Li host.send_file(self.source_material, autodir, delete_dest=True) 397*9c5db199SXin Li logging.info("Installation of autotest completed from %s", 398*9c5db199SXin Li self.source_material) 399*9c5db199SXin Li self.installed = True 400*9c5db199SXin Li else: 401*9c5db199SXin Li # if that fails try to install using svn 402*9c5db199SXin Li if utils.run('which svn').exit_status: 403*9c5db199SXin Li raise error.AutoservError( 404*9c5db199SXin Li 'svn not found on target machine: %s' % 405*9c5db199SXin Li host.hostname) 406*9c5db199SXin Li try: 407*9c5db199SXin Li host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir)) 408*9c5db199SXin Li except error.AutoservRunError as e: 409*9c5db199SXin Li host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir)) 410*9c5db199SXin Li logging.info("Installation of autotest completed using SVN.") 411*9c5db199SXin Li self.installed = True 412*9c5db199SXin Li 413*9c5db199SXin Li # TODO(milleral): http://crbug.com/258161 414*9c5db199SXin Li # Send over the most recent global_config.ini after installation if one 415*9c5db199SXin Li # is available. 416*9c5db199SXin Li # This code is a bit duplicated from 417*9c5db199SXin Li # _Run._create_client_config_file, but oh well. 418*9c5db199SXin Li if self.installed and self.source_material: 419*9c5db199SXin Li self._send_shadow_config() 420*9c5db199SXin Li 421*9c5db199SXin Li # sync the disk, to avoid getting 0-byte files if a test resets the DUT 422*9c5db199SXin Li host.run(os.path.join(autodir, 'bin', 'fs_sync.py'), 423*9c5db199SXin Li ignore_status=True) 424*9c5db199SXin Li 425*9c5db199SXin Li def _send_shadow_config(self): 426*9c5db199SXin Li logging.info('Installing updated global_config.ini.') 427*9c5db199SXin Li destination = os.path.join(self.host.get_autodir(), 428*9c5db199SXin Li 'global_config.ini') 429*9c5db199SXin Li with tempfile.NamedTemporaryFile(mode='w') as client_config: 430*9c5db199SXin Li config = global_config.global_config 431*9c5db199SXin Li client_section = config.get_section_values('CLIENT') 432*9c5db199SXin Li client_section.write(client_config) 433*9c5db199SXin Li client_config.flush() 434*9c5db199SXin Li self.host.send_file(client_config.name, destination) 435*9c5db199SXin Li 436*9c5db199SXin Li 437*9c5db199SXin Li def uninstall(self, host=None): 438*9c5db199SXin Li """ 439*9c5db199SXin Li Uninstall (i.e. delete) autotest. Removes the autotest client install 440*9c5db199SXin Li from the specified host. 441*9c5db199SXin Li 442*9c5db199SXin Li @params host a Host instance from which the client will be removed 443*9c5db199SXin Li """ 444*9c5db199SXin Li if not self.installed: 445*9c5db199SXin Li return 446*9c5db199SXin Li if not host: 447*9c5db199SXin Li host = self.host 448*9c5db199SXin Li autodir = host.get_autodir() 449*9c5db199SXin Li if not autodir: 450*9c5db199SXin Li return 451*9c5db199SXin Li 452*9c5db199SXin Li # perform the actual uninstall 453*9c5db199SXin Li host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True) 454*9c5db199SXin Li host.set_autodir(None) 455*9c5db199SXin Li self.installed = False 456*9c5db199SXin Li 457*9c5db199SXin Li 458*9c5db199SXin Li def get(self, location=None): 459*9c5db199SXin Li if not location: 460*9c5db199SXin Li location = os.path.join(self.serverdir, '../client') 461*9c5db199SXin Li location = os.path.abspath(location) 462*9c5db199SXin Li installable_object.InstallableObject.get(self, location) 463*9c5db199SXin Li self.got = True 464*9c5db199SXin Li 465*9c5db199SXin Li 466*9c5db199SXin Li def run(self, control_file, results_dir='.', host=None, timeout=None, 467*9c5db199SXin Li tag=None, parallel_flag=False, background=False, 468*9c5db199SXin Li client_disconnect_timeout=None, use_packaging=True): 469*9c5db199SXin Li """ 470*9c5db199SXin Li Run an autotest job on the remote machine. 471*9c5db199SXin Li 472*9c5db199SXin Li @param control_file: An open file-like-obj of the control file. 473*9c5db199SXin Li @param results_dir: A str path where the results should be stored 474*9c5db199SXin Li on the local filesystem. 475*9c5db199SXin Li @param host: A Host instance on which the control file should 476*9c5db199SXin Li be run. 477*9c5db199SXin Li @param timeout: Maximum number of seconds to wait for the run or None. 478*9c5db199SXin Li @param tag: Tag name for the client side instance of autotest. 479*9c5db199SXin Li @param parallel_flag: Flag set when multiple jobs are run at the 480*9c5db199SXin Li same time. 481*9c5db199SXin Li @param background: Indicates that the client should be launched as 482*9c5db199SXin Li a background job; the code calling run will be responsible 483*9c5db199SXin Li for monitoring the client and collecting the results. 484*9c5db199SXin Li @param client_disconnect_timeout: Seconds to wait for the remote host 485*9c5db199SXin Li to come back after a reboot. Defaults to the host setting for 486*9c5db199SXin Li DEFAULT_REBOOT_TIMEOUT. 487*9c5db199SXin Li 488*9c5db199SXin Li @raises AutotestRunError: If there is a problem executing 489*9c5db199SXin Li the control file. 490*9c5db199SXin Li """ 491*9c5db199SXin Li host = self._get_host_and_setup(host, use_packaging=use_packaging) 492*9c5db199SXin Li logging.debug('Autotest job starts on remote host: %s', 493*9c5db199SXin Li host.hostname) 494*9c5db199SXin Li results_dir = os.path.abspath(results_dir) 495*9c5db199SXin Li 496*9c5db199SXin Li if client_disconnect_timeout is None: 497*9c5db199SXin Li client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT 498*9c5db199SXin Li 499*9c5db199SXin Li if tag: 500*9c5db199SXin Li results_dir = os.path.join(results_dir, tag) 501*9c5db199SXin Li 502*9c5db199SXin Li atrun = _Run(host, results_dir, tag, parallel_flag, background) 503*9c5db199SXin Li self._do_run(control_file, results_dir, host, atrun, timeout, 504*9c5db199SXin Li client_disconnect_timeout, use_packaging=use_packaging) 505*9c5db199SXin Li 506*9c5db199SXin Li 507*9c5db199SXin Li def _get_host_and_setup(self, host, use_packaging=True): 508*9c5db199SXin Li if not host: 509*9c5db199SXin Li host = self.host 510*9c5db199SXin Li if not self.installed: 511*9c5db199SXin Li self.install(host, use_packaging=use_packaging) 512*9c5db199SXin Li 513*9c5db199SXin Li host.wait_up(timeout=30) 514*9c5db199SXin Li return host 515*9c5db199SXin Li 516*9c5db199SXin Li 517*9c5db199SXin Li def _do_run(self, control_file, results_dir, host, atrun, timeout, 518*9c5db199SXin Li client_disconnect_timeout, use_packaging=True): 519*9c5db199SXin Li try: 520*9c5db199SXin Li atrun.verify_machine() 521*9c5db199SXin Li except: 522*9c5db199SXin Li logging.error("Verify failed on %s. Reinstalling autotest", 523*9c5db199SXin Li host.hostname) 524*9c5db199SXin Li self.install(host) 525*9c5db199SXin Li atrun.verify_machine() 526*9c5db199SXin Li debug = os.path.join(results_dir, 'debug') 527*9c5db199SXin Li try: 528*9c5db199SXin Li os.makedirs(debug) 529*9c5db199SXin Li except Exception: 530*9c5db199SXin Li pass 531*9c5db199SXin Li 532*9c5db199SXin Li delete_file_list = [atrun.remote_control_file, 533*9c5db199SXin Li atrun.remote_control_file + '.state', 534*9c5db199SXin Li atrun.manual_control_file, 535*9c5db199SXin Li atrun.manual_control_file + '.state'] 536*9c5db199SXin Li cmd = ';'.join('rm -f ' + control for control in delete_file_list) 537*9c5db199SXin Li host.run(cmd, ignore_status=True) 538*9c5db199SXin Li 539*9c5db199SXin Li tmppath = utils.get(control_file, local_copy=True) 540*9c5db199SXin Li 541*9c5db199SXin Li # build up the initialization prologue for the control file 542*9c5db199SXin Li prologue_lines = [] 543*9c5db199SXin Li 544*9c5db199SXin Li # Add the additional user arguments 545*9c5db199SXin Li prologue_lines.append("args = %r\n" % self.job.args) 546*9c5db199SXin Li 547*9c5db199SXin Li # If the packaging system is being used, add the repository list. 548*9c5db199SXin Li repos = None 549*9c5db199SXin Li try: 550*9c5db199SXin Li if use_packaging: 551*9c5db199SXin Li repos = self.get_fetch_location() 552*9c5db199SXin Li prologue_lines.append('job.add_repository(%s)\n' % repos) 553*9c5db199SXin Li else: 554*9c5db199SXin Li logging.debug('use_packaging is set to False, do not add any ' 555*9c5db199SXin Li 'repository.') 556*9c5db199SXin Li except global_config.ConfigError as e: 557*9c5db199SXin Li # If repos is defined packaging is enabled so log the error 558*9c5db199SXin Li if repos: 559*9c5db199SXin Li logging.error(e) 560*9c5db199SXin Li 561*9c5db199SXin Li # on full-size installs, turn on any profilers the server is using 562*9c5db199SXin Li if not atrun.background: 563*9c5db199SXin Li running_profilers = six.iteritems(host.job.profilers.add_log) 564*9c5db199SXin Li for profiler, (args, dargs) in running_profilers: 565*9c5db199SXin Li call_args = [repr(profiler)] 566*9c5db199SXin Li call_args += [repr(arg) for arg in args] 567*9c5db199SXin Li call_args += ["%s=%r" % item for item in six.iteritems(dargs)] 568*9c5db199SXin Li prologue_lines.append("job.profilers.add(%s)\n" 569*9c5db199SXin Li % ", ".join(call_args)) 570*9c5db199SXin Li cfile = "".join(prologue_lines) 571*9c5db199SXin Li 572*9c5db199SXin Li cfile += open(tmppath).read() 573*9c5db199SXin Li open(tmppath, "w").write(cfile) 574*9c5db199SXin Li 575*9c5db199SXin Li # Create and copy state file to remote_control_file + '.state' 576*9c5db199SXin Li state_file = host.job.preprocess_client_state() 577*9c5db199SXin Li host.send_file(state_file, atrun.remote_control_file + '.init.state') 578*9c5db199SXin Li os.remove(state_file) 579*9c5db199SXin Li 580*9c5db199SXin Li # Copy control_file to remote_control_file on the host 581*9c5db199SXin Li host.send_file(tmppath, atrun.remote_control_file) 582*9c5db199SXin Li if os.path.abspath(tmppath) != os.path.abspath(control_file): 583*9c5db199SXin Li os.remove(tmppath) 584*9c5db199SXin Li 585*9c5db199SXin Li atrun.execute_control( 586*9c5db199SXin Li timeout=timeout, 587*9c5db199SXin Li client_disconnect_timeout=client_disconnect_timeout) 588*9c5db199SXin Li 589*9c5db199SXin Li 590*9c5db199SXin Li @staticmethod 591*9c5db199SXin Li def extract_test_failure_msg(failure_status_line): 592*9c5db199SXin Li """Extract the test failure message from the status line. 593*9c5db199SXin Li 594*9c5db199SXin Li @param failure_status_line: String of test failure status line, it will 595*9c5db199SXin Li look like: 596*9c5db199SXin Li FAIL <test name> <test name> timestamp=<ts> localtime=<lt> <reason> 597*9c5db199SXin Li 598*9c5db199SXin Li @returns String of the reason, return empty string if we can't regex out 599*9c5db199SXin Li reason. 600*9c5db199SXin Li """ 601*9c5db199SXin Li fail_msg = '' 602*9c5db199SXin Li match = _FAIL_STATUS_RE.match(failure_status_line) 603*9c5db199SXin Li if match: 604*9c5db199SXin Li fail_msg = match.group('fail_msg') 605*9c5db199SXin Li return fail_msg 606*9c5db199SXin Li 607*9c5db199SXin Li 608*9c5db199SXin Li @classmethod 609*9c5db199SXin Li def _check_client_test_result(cls, host, test_name): 610*9c5db199SXin Li """ 611*9c5db199SXin Li Check result of client test. 612*9c5db199SXin Li Autotest will store results in the file name status. 613*9c5db199SXin Li We check that second to last line in that file begins with 'END GOOD' 614*9c5db199SXin Li 615*9c5db199SXin Li @raises TestFail: If client test does not pass. 616*9c5db199SXin Li """ 617*9c5db199SXin Li client_result_dir = '%s/results/default' % host.autodir 618*9c5db199SXin Li command = 'tail -2 %s/status | head -1' % client_result_dir 619*9c5db199SXin Li status = host.run(command).stdout.strip() 620*9c5db199SXin Li logging.info(status) 621*9c5db199SXin Li if status[:8] != 'END GOOD': 622*9c5db199SXin Li test_fail_status_line_cmd = ( 623*9c5db199SXin Li 'grep "^\s*FAIL\s*%s" %s/status | tail -n 1' % 624*9c5db199SXin Li (test_name, client_result_dir)) 625*9c5db199SXin Li test_fail_msg = cls.extract_test_failure_msg( 626*9c5db199SXin Li host.run(test_fail_status_line_cmd).stdout.strip()) 627*9c5db199SXin Li test_fail_msg_reason = ('' if not test_fail_msg 628*9c5db199SXin Li else ' (reason: %s)' % test_fail_msg) 629*9c5db199SXin Li test_fail_status = '%s client test did not pass%s.' % ( 630*9c5db199SXin Li test_name, test_fail_msg_reason) 631*9c5db199SXin Li raise error.TestFail(test_fail_status) 632*9c5db199SXin Li 633*9c5db199SXin Li 634*9c5db199SXin Li def run_timed_test(self, test_name, results_dir='.', host=None, 635*9c5db199SXin Li timeout=None, parallel_flag=False, background=False, 636*9c5db199SXin Li client_disconnect_timeout=None, *args, **dargs): 637*9c5db199SXin Li """ 638*9c5db199SXin Li Assemble a tiny little control file to just run one test, 639*9c5db199SXin Li and run it as an autotest client-side test 640*9c5db199SXin Li """ 641*9c5db199SXin Li if not host: 642*9c5db199SXin Li host = self.host 643*9c5db199SXin Li if not self.installed: 644*9c5db199SXin Li self.install(host) 645*9c5db199SXin Li 646*9c5db199SXin Li opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()] 647*9c5db199SXin Li cmd = ", ".join([repr(test_name)] + list(map(repr, args)) + opts) 648*9c5db199SXin Li control = "job.run_test(%s)\n" % cmd 649*9c5db199SXin Li self.run(control, results_dir, host, timeout=timeout, 650*9c5db199SXin Li parallel_flag=parallel_flag, background=background, 651*9c5db199SXin Li client_disconnect_timeout=client_disconnect_timeout) 652*9c5db199SXin Li 653*9c5db199SXin Li if dargs.get('check_client_result', False): 654*9c5db199SXin Li self._check_client_test_result(host, test_name) 655*9c5db199SXin Li 656*9c5db199SXin Li 657*9c5db199SXin Li def run_test(self, 658*9c5db199SXin Li test_name, 659*9c5db199SXin Li results_dir='.', 660*9c5db199SXin Li host=None, 661*9c5db199SXin Li parallel_flag=False, 662*9c5db199SXin Li background=False, 663*9c5db199SXin Li client_disconnect_timeout=None, 664*9c5db199SXin Li timeout=None, 665*9c5db199SXin Li *args, 666*9c5db199SXin Li **dargs): 667*9c5db199SXin Li self.run_timed_test( 668*9c5db199SXin Li test_name, 669*9c5db199SXin Li results_dir, 670*9c5db199SXin Li host, 671*9c5db199SXin Li timeout=timeout, 672*9c5db199SXin Li parallel_flag=parallel_flag, 673*9c5db199SXin Li background=background, 674*9c5db199SXin Li client_disconnect_timeout=client_disconnect_timeout, 675*9c5db199SXin Li *args, 676*9c5db199SXin Li **dargs) 677*9c5db199SXin Li 678*9c5db199SXin Li 679*9c5db199SXin Li def run_static_method(self, module, method, results_dir='.', host=None, 680*9c5db199SXin Li *args): 681*9c5db199SXin Li """Runs a non-instance method with |args| from |module| on the client. 682*9c5db199SXin Li 683*9c5db199SXin Li This method runs a static/class/module autotest method on the client. 684*9c5db199SXin Li For example: 685*9c5db199SXin Li run_static_method("autotest_lib.client.cros.cros_ui", "reboot") 686*9c5db199SXin Li 687*9c5db199SXin Li Will run autotest_lib.client.cros.cros_ui.reboot() on the client. 688*9c5db199SXin Li 689*9c5db199SXin Li @param module: module name as you would refer to it when importing in a 690*9c5db199SXin Li control file. e.g. autotest_lib.client.common_lib.module_name. 691*9c5db199SXin Li @param method: the method you want to call. 692*9c5db199SXin Li @param results_dir: A str path where the results should be stored 693*9c5db199SXin Li on the local filesystem. 694*9c5db199SXin Li @param host: A Host instance on which the control file should 695*9c5db199SXin Li be run. 696*9c5db199SXin Li @param args: args to pass to the method. 697*9c5db199SXin Li """ 698*9c5db199SXin Li control = "\n".join(["import %s" % module, 699*9c5db199SXin Li "%s.%s(%s)\n" % (module, method, 700*9c5db199SXin Li ','.join(map(repr, args)))]) 701*9c5db199SXin Li self.run(control, results_dir=results_dir, host=host) 702*9c5db199SXin Li 703*9c5db199SXin Li 704*9c5db199SXin Liclass _Run(object): 705*9c5db199SXin Li """ 706*9c5db199SXin Li Represents a run of autotest control file. This class maintains 707*9c5db199SXin Li all the state necessary as an autotest control file is executed. 708*9c5db199SXin Li 709*9c5db199SXin Li It is not intended to be used directly, rather control files 710*9c5db199SXin Li should be run using the run method in Autotest. 711*9c5db199SXin Li """ 712*9c5db199SXin Li def __init__(self, host, results_dir, tag, parallel_flag, background): 713*9c5db199SXin Li self.host = host 714*9c5db199SXin Li self.results_dir = results_dir 715*9c5db199SXin Li self.tag = tag 716*9c5db199SXin Li self.parallel_flag = parallel_flag 717*9c5db199SXin Li self.background = background 718*9c5db199SXin Li self.autodir = Autotest.get_installed_autodir(self.host) 719*9c5db199SXin Li control = os.path.join(self.autodir, 'control') 720*9c5db199SXin Li if tag: 721*9c5db199SXin Li control += '.' + tag 722*9c5db199SXin Li self.manual_control_file = control 723*9c5db199SXin Li self.remote_control_file = control + '.autoserv' 724*9c5db199SXin Li self.config_file = os.path.join(self.autodir, 'global_config.ini') 725*9c5db199SXin Li 726*9c5db199SXin Li 727*9c5db199SXin Li def verify_machine(self): 728*9c5db199SXin Li binary = os.path.join(self.autodir, 'bin/autotest') 729*9c5db199SXin Li at_check = "test -e {} && echo True || echo False".format(binary) 730*9c5db199SXin Li if not self.parallel_flag: 731*9c5db199SXin Li tmpdir = os.path.join(self.autodir, 'tmp') 732*9c5db199SXin Li download = os.path.join(self.autodir, 'tests/download') 733*9c5db199SXin Li at_check += "; umount {}; umount {}".format(tmpdir, download) 734*9c5db199SXin Li # Check if the test dir is missing. 735*9c5db199SXin Li if "False" in str(self.host.run(at_check, ignore_status=True).stdout): 736*9c5db199SXin Li raise error.AutoservInstallError( 737*9c5db199SXin Li "Autotest does not appear to be installed") 738*9c5db199SXin Li 739*9c5db199SXin Li 740*9c5db199SXin Li 741*9c5db199SXin Li def get_base_cmd_args(self, section): 742*9c5db199SXin Li args = ['--verbose'] 743*9c5db199SXin Li if section > 0: 744*9c5db199SXin Li args.append('-c') 745*9c5db199SXin Li if self.tag: 746*9c5db199SXin Li args.append('-t %s' % self.tag) 747*9c5db199SXin Li if self.host.job.use_external_logging(): 748*9c5db199SXin Li args.append('-l') 749*9c5db199SXin Li if self.host.hostname: 750*9c5db199SXin Li args.append('--hostname=%s' % self.host.hostname) 751*9c5db199SXin Li args.append('--user=%s' % self.host.job.user) 752*9c5db199SXin Li 753*9c5db199SXin Li args.append(self.remote_control_file) 754*9c5db199SXin Li return args 755*9c5db199SXin Li 756*9c5db199SXin Li 757*9c5db199SXin Li def get_background_cmd(self, section): 758*9c5db199SXin Li cmd = [ 759*9c5db199SXin Li 'nohup', 760*9c5db199SXin Li os.path.join(self.autodir, 'bin/autotest_client'), 761*9c5db199SXin Li _set_py_version() 762*9c5db199SXin Li ] 763*9c5db199SXin Li cmd += self.get_base_cmd_args(section) 764*9c5db199SXin Li cmd += ['>/dev/null', '2>/dev/null', '&'] 765*9c5db199SXin Li return ' '.join(cmd) 766*9c5db199SXin Li 767*9c5db199SXin Li 768*9c5db199SXin Li def get_daemon_cmd(self, section, monitor_dir): 769*9c5db199SXin Li cmd = [ 770*9c5db199SXin Li 'nohup', 771*9c5db199SXin Li os.path.join(self.autodir, 'bin/autotestd'), monitor_dir, 772*9c5db199SXin Li '-H autoserv', 773*9c5db199SXin Li _set_py_version() 774*9c5db199SXin Li ] 775*9c5db199SXin Li cmd += self.get_base_cmd_args(section) 776*9c5db199SXin Li cmd += ['>/dev/null', '2>/dev/null', '&'] 777*9c5db199SXin Li return ' '.join(cmd) 778*9c5db199SXin Li 779*9c5db199SXin Li 780*9c5db199SXin Li def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read): 781*9c5db199SXin Li cmd = [ 782*9c5db199SXin Li os.path.join(self.autodir, 'bin', 'autotestd_monitor'), 783*9c5db199SXin Li monitor_dir, 784*9c5db199SXin Li str(stdout_read), 785*9c5db199SXin Li str(stderr_read), 786*9c5db199SXin Li _set_py_version() 787*9c5db199SXin Li ] 788*9c5db199SXin Li return ' '.join(cmd) 789*9c5db199SXin Li 790*9c5db199SXin Li 791*9c5db199SXin Li def get_client_log(self): 792*9c5db199SXin Li """Find what the "next" client.* prefix should be 793*9c5db199SXin Li 794*9c5db199SXin Li @returns A string of the form client.INTEGER that should be prefixed 795*9c5db199SXin Li to all client debug log files. 796*9c5db199SXin Li """ 797*9c5db199SXin Li max_digit = -1 798*9c5db199SXin Li debug_dir = os.path.join(self.results_dir, 'debug') 799*9c5db199SXin Li client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*')) 800*9c5db199SXin Li for log in client_logs: 801*9c5db199SXin Li _, number, _ = log.split('.', 2) 802*9c5db199SXin Li if number.isdigit(): 803*9c5db199SXin Li max_digit = max(max_digit, int(number)) 804*9c5db199SXin Li return 'client.%d' % (max_digit + 1) 805*9c5db199SXin Li 806*9c5db199SXin Li 807*9c5db199SXin Li def copy_client_config_file(self, client_log_prefix=None): 808*9c5db199SXin Li """ 809*9c5db199SXin Li Create and copy the client config file based on the server config. 810*9c5db199SXin Li 811*9c5db199SXin Li @param client_log_prefix: Optional prefix to prepend to log files. 812*9c5db199SXin Li """ 813*9c5db199SXin Li client_config_file = self._create_client_config_file(client_log_prefix) 814*9c5db199SXin Li self.host.send_file(client_config_file, self.config_file) 815*9c5db199SXin Li os.remove(client_config_file) 816*9c5db199SXin Li 817*9c5db199SXin Li 818*9c5db199SXin Li def _create_client_config_file(self, client_log_prefix=None): 819*9c5db199SXin Li """ 820*9c5db199SXin Li Create a temporary file with the [CLIENT] section configuration values 821*9c5db199SXin Li taken from the server global_config.ini. 822*9c5db199SXin Li 823*9c5db199SXin Li @param client_log_prefix: Optional prefix to prepend to log files. 824*9c5db199SXin Li 825*9c5db199SXin Li @return: Path of the temporary file generated. 826*9c5db199SXin Li """ 827*9c5db199SXin Li config = global_config.global_config.get_section_values('CLIENT') 828*9c5db199SXin Li if client_log_prefix: 829*9c5db199SXin Li config.set('CLIENT', 'default_logging_name', client_log_prefix) 830*9c5db199SXin Li return self._create_aux_file(config.write) 831*9c5db199SXin Li 832*9c5db199SXin Li 833*9c5db199SXin Li def _create_aux_file(self, func, *args): 834*9c5db199SXin Li """ 835*9c5db199SXin Li Creates a temporary file and writes content to it according to a 836*9c5db199SXin Li content creation function. The file object is appended to *args, which 837*9c5db199SXin Li is then passed to the content creation function 838*9c5db199SXin Li 839*9c5db199SXin Li @param func: Function that will be used to write content to the 840*9c5db199SXin Li temporary file. 841*9c5db199SXin Li @param *args: List of parameters that func takes. 842*9c5db199SXin Li @return: Path to the temporary file that was created. 843*9c5db199SXin Li """ 844*9c5db199SXin Li fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir) 845*9c5db199SXin Li aux_file = os.fdopen(fd, "w") 846*9c5db199SXin Li try: 847*9c5db199SXin Li list_args = list(args) 848*9c5db199SXin Li list_args.append(aux_file) 849*9c5db199SXin Li func(*list_args) 850*9c5db199SXin Li finally: 851*9c5db199SXin Li aux_file.close() 852*9c5db199SXin Li return path 853*9c5db199SXin Li 854*9c5db199SXin Li 855*9c5db199SXin Li @staticmethod 856*9c5db199SXin Li def is_client_job_finished(last_line): 857*9c5db199SXin Li return bool(re.match(r'^\t*END .*\t[\w.-]+\t[\w.-]+\t.*$', last_line)) 858*9c5db199SXin Li 859*9c5db199SXin Li 860*9c5db199SXin Li @staticmethod 861*9c5db199SXin Li def is_client_job_rebooting(last_line): 862*9c5db199SXin Li return bool(re.match(r'^\t*GOOD\t[\w.-]+\treboot\.start.*$', last_line)) 863*9c5db199SXin Li 864*9c5db199SXin Li 865*9c5db199SXin Li # Roughly ordered list from concrete to less specific reboot causes. 866*9c5db199SXin Li _failure_reasons = [ 867*9c5db199SXin Li # Try to find possible reasons leading towards failure. 868*9c5db199SXin Li ('ethernet recovery methods have failed. Rebooting.', 869*9c5db199SXin Li 'dead ethernet dongle crbug/1031035'), 870*9c5db199SXin Li # GPU hangs are not always recovered from. 871*9c5db199SXin Li ('[drm:amdgpu_job_timedout] \*ERROR\* ring gfx timeout', 872*9c5db199SXin Li 'drm ring gfx timeout'), 873*9c5db199SXin Li ('[drm:do_aquire_global_lock] \*ERROR(.*)hw_done or flip_done timed', 874*9c5db199SXin Li 'drm hw/flip timeout'), 875*9c5db199SXin Li ('[drm:i915_hangcheck_hung] \*ERROR\* Hangcheck(.*)GPU hung', 876*9c5db199SXin Li 'drm GPU hung'), 877*9c5db199SXin Li # TODO(ihf): try to get a better magic signature for kernel crashes. 878*9c5db199SXin Li ('BUG: unable to handle kernel paging request', 'kernel paging'), 879*9c5db199SXin Li ('Kernel panic - not syncing: Out of memory', 'kernel out of memory'), 880*9c5db199SXin Li ('Kernel panic - not syncing', 'kernel panic'), 881*9c5db199SXin Li # Fish for user mode killing OOM messages. Shows unstable system. 882*9c5db199SXin Li ('out_of_memory', 'process out of memory'), 883*9c5db199SXin Li # Reboot was bad enough to have truncated the logs. 884*9c5db199SXin Li ('crash_reporter(.*)Stored kcrash', 'kcrash'), 885*9c5db199SXin Li ('crash_reporter(.*)Last shutdown was not clean', 'not clean'), 886*9c5db199SXin Li ] 887*9c5db199SXin Li 888*9c5db199SXin Li def _diagnose_reboot(self): 889*9c5db199SXin Li """ 890*9c5db199SXin Li Runs diagnostic check on a rebooted DUT. 891*9c5db199SXin Li 892*9c5db199SXin Li TODO(ihf): if this analysis is useful consider moving the code to the 893*9c5db199SXin Li DUT into a script and call it from here. This is more 894*9c5db199SXin Li powerful and might be cleaner to grow in functionality. But 895*9c5db199SXin Li it may also be less robust if stateful is damaged during the 896*9c5db199SXin Li reboot. 897*9c5db199SXin Li 898*9c5db199SXin Li @returns msg describing reboot reason. 899*9c5db199SXin Li """ 900*9c5db199SXin Li reasons = [] 901*9c5db199SXin Li for (message, bucket) in self._failure_reasons: 902*9c5db199SXin Li # Use -a option for grep to avoid "binary file" warning to stdout. 903*9c5db199SXin Li # The grep -v is added to not match itself in the log (across jobs). 904*9c5db199SXin Li # Using grep is slightly problematic as it finds any reason, not 905*9c5db199SXin Li # just the most recent reason (since 2 boots ago), so it may guess 906*9c5db199SXin Li # wrong. Multiple reboots are unusual in the lab setting though and 907*9c5db199SXin Li # it is better to have a reasonable guess than no reason at all. 908*9c5db199SXin Li found = self.host.run( 909*9c5db199SXin Li "grep -aE '" + message + "' /var/log/messages | grep -av grep", 910*9c5db199SXin Li ignore_status=True 911*9c5db199SXin Li ).stdout 912*9c5db199SXin Li if found and found.strip(): 913*9c5db199SXin Li reasons.append(bucket) 914*9c5db199SXin Li signature = 'reason unknown' 915*9c5db199SXin Li if reasons: 916*9c5db199SXin Li # Concatenate possible reasons found to obtain a magic signature. 917*9c5db199SXin Li signature = ', '.join(reasons) 918*9c5db199SXin Li return ('DUT rebooted during the test run. (%s)\n' % signature) 919*9c5db199SXin Li 920*9c5db199SXin Li 921*9c5db199SXin Li def _diagnose_dut(self, old_boot_id=None): 922*9c5db199SXin Li """ 923*9c5db199SXin Li Run diagnostic checks on a DUT. 924*9c5db199SXin Li 925*9c5db199SXin Li 1. ping: A dead host will not respond to pings. 926*9c5db199SXin Li 2. ssh (happens with 3.): DUT hangs usually fail in authentication 927*9c5db199SXin Li but respond to pings. 928*9c5db199SXin Li 3. Check if a reboot occured: A healthy but unexpected reboot leaves the 929*9c5db199SXin Li host running with a new boot id. 930*9c5db199SXin Li 931*9c5db199SXin Li This method will always raise an exception from the AutotestFailure 932*9c5db199SXin Li family and should only get called when the reason for a test failing 933*9c5db199SXin Li is ambiguous. 934*9c5db199SXin Li 935*9c5db199SXin Li @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping. 936*9c5db199SXin Li @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT. 937*9c5db199SXin Li @raises AutotestDeviceRebooted: If the boot id changed. 938*9c5db199SXin Li @raises AutotestAbort: If none of the above exceptions were raised. 939*9c5db199SXin Li Since we have no recourse we must abort at this stage. 940*9c5db199SXin Li """ 941*9c5db199SXin Li msg = 'Autotest client terminated unexpectedly: ' 942*9c5db199SXin Li if utils.ping(self.host.hostname, tries=1, deadline=1) != 0: 943*9c5db199SXin Li msg += 'DUT is no longer pingable, it may have rebooted or hung.\n' 944*9c5db199SXin Li raise AutotestDeviceNotPingable(msg) 945*9c5db199SXin Li 946*9c5db199SXin Li if old_boot_id: 947*9c5db199SXin Li try: 948*9c5db199SXin Li new_boot_id = self.host.get_boot_id(timeout=60) 949*9c5db199SXin Li except Exception as e: 950*9c5db199SXin Li msg += ('DUT is pingable but not SSHable, it most likely' 951*9c5db199SXin Li ' sporadically rebooted during testing. %s\n' % str(e)) 952*9c5db199SXin Li raise AutotestDeviceNotSSHable(msg) 953*9c5db199SXin Li else: 954*9c5db199SXin Li if new_boot_id != old_boot_id: 955*9c5db199SXin Li msg += self._diagnose_reboot() 956*9c5db199SXin Li raise AutotestDeviceRebooted(msg) 957*9c5db199SXin Li 958*9c5db199SXin Li msg += ('DUT is pingable, SSHable and did NOT restart ' 959*9c5db199SXin Li 'un-expectedly. We probably lost connectivity during the ' 960*9c5db199SXin Li 'test.') 961*9c5db199SXin Li else: 962*9c5db199SXin Li msg += ('DUT is pingable, could not determine if an un-expected ' 963*9c5db199SXin Li 'reboot occured during the test.') 964*9c5db199SXin Li 965*9c5db199SXin Li raise AutotestAbort(msg) 966*9c5db199SXin Li 967*9c5db199SXin Li 968*9c5db199SXin Li def log_unexpected_abort(self, stderr_redirector, old_boot_id=None): 969*9c5db199SXin Li """ 970*9c5db199SXin Li Logs that something unexpected happened, then tries to diagnose the 971*9c5db199SXin Li failure. The purpose of this function is only to close out the status 972*9c5db199SXin Li log with the appropriate error message, not to critically terminate 973*9c5db199SXin Li the program. 974*9c5db199SXin Li 975*9c5db199SXin Li @param stderr_redirector: log stream. 976*9c5db199SXin Li @param old_boot_id: boot id used to infer if a reboot occured. 977*9c5db199SXin Li """ 978*9c5db199SXin Li stderr_redirector.flush_all_buffers() 979*9c5db199SXin Li try: 980*9c5db199SXin Li self._diagnose_dut(old_boot_id) 981*9c5db199SXin Li except AutotestFailure as e: 982*9c5db199SXin Li self.host.job.record('END ABORT', None, None, str(e)) 983*9c5db199SXin Li 984*9c5db199SXin Li 985*9c5db199SXin Li def _execute_in_background(self, section, timeout): 986*9c5db199SXin Li full_cmd = self.get_background_cmd(section) 987*9c5db199SXin Li devnull = open(os.devnull, "w") 988*9c5db199SXin Li 989*9c5db199SXin Li self.copy_client_config_file(self.get_client_log()) 990*9c5db199SXin Li 991*9c5db199SXin Li self.host.job.push_execution_context(self.results_dir) 992*9c5db199SXin Li try: 993*9c5db199SXin Li result = self.host.run(full_cmd, ignore_status=True, 994*9c5db199SXin Li timeout=timeout, 995*9c5db199SXin Li stdout_tee=devnull, 996*9c5db199SXin Li stderr_tee=devnull) 997*9c5db199SXin Li finally: 998*9c5db199SXin Li self.host.job.pop_execution_context() 999*9c5db199SXin Li 1000*9c5db199SXin Li return result 1001*9c5db199SXin Li 1002*9c5db199SXin Li 1003*9c5db199SXin Li @staticmethod 1004*9c5db199SXin Li def _strip_stderr_prologue(stderr, monitor_cmd): 1005*9c5db199SXin Li """Strips the 'standard' prologue that get pre-pended to every 1006*9c5db199SXin Li remote command and returns the text that was actually written to 1007*9c5db199SXin Li stderr by the remote command. 1008*9c5db199SXin Li 1009*9c5db199SXin Li This will always strip atleast the first line ('standard' prologue), 1010*9c5db199SXin Li and strip any extra messages prior. The following are common 'extra' 1011*9c5db199SXin Li messages which could appear. 1012*9c5db199SXin Li 1013*9c5db199SXin Li 1.) Any warnings. For example, on CrOS version R90, any script running 1014*9c5db199SXin Li in python2 result in the following warning in the stderr: 1015*9c5db199SXin Li "warning: Python 2.7 is deprecated and will be removed from CrOS by 1016*9c5db199SXin Li end of 2021. All users must migrate ASAP" 1017*9c5db199SXin Li 2.) The actual command used to launch autotestd_monitor (monitor_cmd) 1018*9c5db199SXin Li 1019*9c5db199SXin Li Additionally there is a NOTE line that could be present needing also to 1020*9c5db199SXin Li be stripped. 1021*9c5db199SXin Li """ 1022*9c5db199SXin Li stderr_lines = stderr.split("\n") 1023*9c5db199SXin Li if not stderr_lines: 1024*9c5db199SXin Li return "" 1025*9c5db199SXin Li 1026*9c5db199SXin Li # If no warnings/monitor_cmd, strip only the first line 1027*9c5db199SXin Li skipn = 1 1028*9c5db199SXin Li for i, line in enumerate(stderr_lines): 1029*9c5db199SXin Li if monitor_cmd in line: 1030*9c5db199SXin Li # add *2* (1 for the index, 1 for the 'standard prolouge' 1031*9c5db199SXin Li # which follows this line). 1032*9c5db199SXin Li skipn = i + 2 1033*9c5db199SXin Li break 1034*9c5db199SXin Li 1035*9c5db199SXin Li stderr_lines = stderr_lines[skipn:] 1036*9c5db199SXin Li 1037*9c5db199SXin Li if stderr_lines[0].startswith("NOTE: autotestd_monitor"): 1038*9c5db199SXin Li del stderr_lines[0] 1039*9c5db199SXin Li return "\n".join(stderr_lines) 1040*9c5db199SXin Li 1041*9c5db199SXin Li 1042*9c5db199SXin Li def _execute_daemon(self, section, timeout, stderr_redirector, 1043*9c5db199SXin Li client_disconnect_timeout): 1044*9c5db199SXin Li monitor_dir = self.host.get_tmp_dir() 1045*9c5db199SXin Li daemon_cmd = self.get_daemon_cmd(section, monitor_dir) 1046*9c5db199SXin Li 1047*9c5db199SXin Li # grab the location for the server-side client log file 1048*9c5db199SXin Li client_log_prefix = self.get_client_log() 1049*9c5db199SXin Li client_log_path = os.path.join(self.results_dir, 'debug', 1050*9c5db199SXin Li client_log_prefix + '.log') 1051*9c5db199SXin Li client_log = open(client_log_path, 'w', LOG_BUFFER_SIZE_BYTES) 1052*9c5db199SXin Li self.copy_client_config_file(client_log_prefix) 1053*9c5db199SXin Li 1054*9c5db199SXin Li stdout_read = stderr_read = 0 1055*9c5db199SXin Li self.host.job.push_execution_context(self.results_dir) 1056*9c5db199SXin Li try: 1057*9c5db199SXin Li self.host.run(daemon_cmd, ignore_status=True, timeout=timeout) 1058*9c5db199SXin Li disconnect_warnings = [] 1059*9c5db199SXin Li while True: 1060*9c5db199SXin Li monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read, 1061*9c5db199SXin Li stderr_read) 1062*9c5db199SXin Li try: 1063*9c5db199SXin Li result = self.host.run(monitor_cmd, ignore_status=True, 1064*9c5db199SXin Li timeout=timeout, 1065*9c5db199SXin Li stdout_tee=client_log, 1066*9c5db199SXin Li stderr_tee=stderr_redirector) 1067*9c5db199SXin Li except error.AutoservRunError as e: 1068*9c5db199SXin Li result = e.result_obj 1069*9c5db199SXin Li result.exit_status = None 1070*9c5db199SXin Li disconnect_warnings.append(e.description) 1071*9c5db199SXin Li 1072*9c5db199SXin Li stderr_redirector.log_warning( 1073*9c5db199SXin Li "Autotest client was disconnected: %s" % e.description, 1074*9c5db199SXin Li "NETWORK") 1075*9c5db199SXin Li except error.AutoservSSHTimeout: 1076*9c5db199SXin Li result = utils.CmdResult(monitor_cmd, "", "", None, 0) 1077*9c5db199SXin Li stderr_redirector.log_warning( 1078*9c5db199SXin Li "Attempt to connect to Autotest client timed out", 1079*9c5db199SXin Li "NETWORK") 1080*9c5db199SXin Li 1081*9c5db199SXin Li stdout_read += len(result.stdout) 1082*9c5db199SXin Li stderr_read += len( 1083*9c5db199SXin Li self._strip_stderr_prologue(result.stderr, 1084*9c5db199SXin Li monitor_cmd)) 1085*9c5db199SXin Li 1086*9c5db199SXin Li if result.exit_status is not None: 1087*9c5db199SXin Li # TODO (crosbug.com/38224)- sbasi: Remove extra logging. 1088*9c5db199SXin Li logging.debug('Result exit status is %d.', 1089*9c5db199SXin Li result.exit_status) 1090*9c5db199SXin Li return result 1091*9c5db199SXin Li elif not self.host.wait_up(client_disconnect_timeout): 1092*9c5db199SXin Li raise error.AutoservSSHTimeout( 1093*9c5db199SXin Li "client was disconnected, reconnect timed out") 1094*9c5db199SXin Li finally: 1095*9c5db199SXin Li client_log.close() 1096*9c5db199SXin Li self.host.job.pop_execution_context() 1097*9c5db199SXin Li 1098*9c5db199SXin Li 1099*9c5db199SXin Li def execute_section(self, section, timeout, stderr_redirector, 1100*9c5db199SXin Li client_disconnect_timeout, boot_id=None): 1101*9c5db199SXin Li # TODO(crbug.com/684311) The claim is that section is never more than 0 1102*9c5db199SXin Li # in pratice. After validating for a week or so, delete all support of 1103*9c5db199SXin Li # multiple sections. 1104*9c5db199SXin Li metrics.Counter('chromeos/autotest/autotest/sections').increment( 1105*9c5db199SXin Li fields={'is_first_section': (section == 0)}) 1106*9c5db199SXin Li logging.info("Executing %s/bin/autotest %s/control phase %d", 1107*9c5db199SXin Li self.autodir, self.autodir, section) 1108*9c5db199SXin Li 1109*9c5db199SXin Li if self.background: 1110*9c5db199SXin Li result = self._execute_in_background(section, timeout) 1111*9c5db199SXin Li else: 1112*9c5db199SXin Li result = self._execute_daemon(section, timeout, stderr_redirector, 1113*9c5db199SXin Li client_disconnect_timeout) 1114*9c5db199SXin Li 1115*9c5db199SXin Li last_line = stderr_redirector.last_line 1116*9c5db199SXin Li 1117*9c5db199SXin Li # check if we failed hard enough to warrant an exception 1118*9c5db199SXin Li if result.exit_status == 1: 1119*9c5db199SXin Li err = error.AutotestRunError("client job was aborted") 1120*9c5db199SXin Li elif not self.background and not result.stderr: 1121*9c5db199SXin Li err = error.AutotestRunError( 1122*9c5db199SXin Li "execute_section %s failed to return anything\n" 1123*9c5db199SXin Li "stdout:%s\n" % (section, result.stdout)) 1124*9c5db199SXin Li else: 1125*9c5db199SXin Li err = None 1126*9c5db199SXin Li 1127*9c5db199SXin Li # log something if the client failed AND never finished logging 1128*9c5db199SXin Li if err and not self.is_client_job_finished(last_line): 1129*9c5db199SXin Li self.log_unexpected_abort(stderr_redirector, old_boot_id=boot_id) 1130*9c5db199SXin Li 1131*9c5db199SXin Li if err: 1132*9c5db199SXin Li raise err 1133*9c5db199SXin Li else: 1134*9c5db199SXin Li return stderr_redirector.last_line 1135*9c5db199SXin Li 1136*9c5db199SXin Li 1137*9c5db199SXin Li def _wait_for_reboot(self, old_boot_id): 1138*9c5db199SXin Li logging.info("Client is rebooting") 1139*9c5db199SXin Li logging.info("Waiting for client to halt") 1140*9c5db199SXin Li if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT, 1141*9c5db199SXin Li old_boot_id=old_boot_id): 1142*9c5db199SXin Li err = "%s failed to shutdown after %d" 1143*9c5db199SXin Li err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT) 1144*9c5db199SXin Li raise error.AutotestRunError(err) 1145*9c5db199SXin Li logging.info("Client down, waiting for restart") 1146*9c5db199SXin Li if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT): 1147*9c5db199SXin Li # since reboot failed 1148*9c5db199SXin Li # hardreset the machine once if possible 1149*9c5db199SXin Li # before failing this control file 1150*9c5db199SXin Li warning = "%s did not come back up, hard resetting" 1151*9c5db199SXin Li warning %= self.host.hostname 1152*9c5db199SXin Li logging.warning(warning) 1153*9c5db199SXin Li try: 1154*9c5db199SXin Li self.host.hardreset(wait=False) 1155*9c5db199SXin Li except (AttributeError, error.AutoservUnsupportedError): 1156*9c5db199SXin Li warning = "Hard reset unsupported on %s" 1157*9c5db199SXin Li warning %= self.host.hostname 1158*9c5db199SXin Li logging.warning(warning) 1159*9c5db199SXin Li raise error.AutotestRunError("%s failed to boot after %ds" % 1160*9c5db199SXin Li (self.host.hostname, 1161*9c5db199SXin Li self.host.DEFAULT_REBOOT_TIMEOUT)) 1162*9c5db199SXin Li self.host.reboot_followup() 1163*9c5db199SXin Li 1164*9c5db199SXin Li 1165*9c5db199SXin Li def execute_control(self, timeout=None, client_disconnect_timeout=None): 1166*9c5db199SXin Li if not self.background: 1167*9c5db199SXin Li collector = log_collector(self.host, self.tag, self.results_dir) 1168*9c5db199SXin Li hostname = self.host.hostname 1169*9c5db199SXin Li remote_results = collector.client_results_dir 1170*9c5db199SXin Li local_results = collector.server_results_dir 1171*9c5db199SXin Li self.host.job.add_client_log(hostname, remote_results, 1172*9c5db199SXin Li local_results) 1173*9c5db199SXin Li job_record_context = self.host.job.get_record_context() 1174*9c5db199SXin Li 1175*9c5db199SXin Li section = 0 1176*9c5db199SXin Li start_time = time.time() 1177*9c5db199SXin Li 1178*9c5db199SXin Li logger = client_logger(self.host, self.tag, self.results_dir) 1179*9c5db199SXin Li try: 1180*9c5db199SXin Li while not timeout or time.time() < start_time + timeout: 1181*9c5db199SXin Li if timeout: 1182*9c5db199SXin Li section_timeout = start_time + timeout - time.time() 1183*9c5db199SXin Li else: 1184*9c5db199SXin Li section_timeout = None 1185*9c5db199SXin Li boot_id = self.host.get_boot_id() 1186*9c5db199SXin Li last = self.execute_section(section, section_timeout, 1187*9c5db199SXin Li logger, client_disconnect_timeout, 1188*9c5db199SXin Li boot_id=boot_id) 1189*9c5db199SXin Li if self.background: 1190*9c5db199SXin Li return 1191*9c5db199SXin Li section += 1 1192*9c5db199SXin Li if self.is_client_job_finished(last): 1193*9c5db199SXin Li logging.info("Client complete") 1194*9c5db199SXin Li return 1195*9c5db199SXin Li elif self.is_client_job_rebooting(last): 1196*9c5db199SXin Li try: 1197*9c5db199SXin Li self._wait_for_reboot(boot_id) 1198*9c5db199SXin Li except error.AutotestRunError as e: 1199*9c5db199SXin Li self.host.job.record("ABORT", None, "reboot", str(e)) 1200*9c5db199SXin Li self.host.job.record("END ABORT", None, None, str(e)) 1201*9c5db199SXin Li raise 1202*9c5db199SXin Li continue 1203*9c5db199SXin Li 1204*9c5db199SXin Li # If a test fails without probable cause we try to bucket it's 1205*9c5db199SXin Li # failure into one of 2 categories. If we can determine the 1206*9c5db199SXin Li # current state of the device and it is suspicious, we close the 1207*9c5db199SXin Li # status lines indicating a failure. If we either cannot 1208*9c5db199SXin Li # determine the state of the device, or it appears totally 1209*9c5db199SXin Li # healthy, we give up and abort. 1210*9c5db199SXin Li try: 1211*9c5db199SXin Li self._diagnose_dut(boot_id) 1212*9c5db199SXin Li except AutotestDeviceError as e: 1213*9c5db199SXin Li # The status lines of the test are pretty much tailed to 1214*9c5db199SXin Li # our log, with indentation, from the client job on the DUT. 1215*9c5db199SXin Li # So if the DUT goes down unexpectedly we'll end up with a 1216*9c5db199SXin Li # malformed status log unless we manually unwind the status 1217*9c5db199SXin Li # stack. Ideally we would want to write a nice wrapper like 1218*9c5db199SXin Li # server_job methods run_reboot, run_group but they expect 1219*9c5db199SXin Li # reboots and we don't. 1220*9c5db199SXin Li self.host.job.record('FAIL', None, None, str(e)) 1221*9c5db199SXin Li self.host.job.record('END FAIL', None, None) 1222*9c5db199SXin Li self.host.job.record('END GOOD', None, None) 1223*9c5db199SXin Li self.host.job.failed_with_device_error = True 1224*9c5db199SXin Li return 1225*9c5db199SXin Li except AutotestAbort as e: 1226*9c5db199SXin Li self.host.job.record('ABORT', None, None, str(e)) 1227*9c5db199SXin Li self.host.job.record('END ABORT', None, None) 1228*9c5db199SXin Li 1229*9c5db199SXin Li # give the client machine a chance to recover from a crash 1230*9c5db199SXin Li self.host.wait_up( 1231*9c5db199SXin Li self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600) 1232*9c5db199SXin Li logging.debug('Unexpected final status message from ' 1233*9c5db199SXin Li 'client %s: %s', self.host.hostname, last) 1234*9c5db199SXin Li # The line 'last' may have sensitive phrases, like 1235*9c5db199SXin Li # 'END GOOD', which breaks the tko parser. So the error 1236*9c5db199SXin Li # message will exclude it, since it will be recorded to 1237*9c5db199SXin Li # status.log. 1238*9c5db199SXin Li msg = ("Aborting - unexpected final status message from " 1239*9c5db199SXin Li "client on %s\n") % self.host.hostname 1240*9c5db199SXin Li raise error.AutotestRunError(msg) 1241*9c5db199SXin Li finally: 1242*9c5db199SXin Li # B/203609358 someting is removing telemetry. Adding this to check the 1243*9c5db199SXin Li # status of the folder as late as possible. 1244*9c5db199SXin Li logging.debug('Autotest job finishes running. Below is the ' 1245*9c5db199SXin Li 'post-processing operations.') 1246*9c5db199SXin Li logger.close() 1247*9c5db199SXin Li if not self.background: 1248*9c5db199SXin Li collector.collect_client_job_results() 1249*9c5db199SXin Li collector.remove_redundant_client_logs() 1250*9c5db199SXin Li state_file = os.path.basename(self.remote_control_file 1251*9c5db199SXin Li + '.state') 1252*9c5db199SXin Li state_path = os.path.join(self.results_dir, state_file) 1253*9c5db199SXin Li self.host.job.postprocess_client_state(state_path) 1254*9c5db199SXin Li self.host.job.remove_client_log(hostname, remote_results, 1255*9c5db199SXin Li local_results) 1256*9c5db199SXin Li job_record_context.restore() 1257*9c5db199SXin Li 1258*9c5db199SXin Li logging.debug('Autotest job finishes.') 1259*9c5db199SXin Li 1260*9c5db199SXin Li # should only get here if we timed out 1261*9c5db199SXin Li assert timeout 1262*9c5db199SXin Li raise error.AutotestTimeoutError() 1263*9c5db199SXin Li 1264*9c5db199SXin Li 1265*9c5db199SXin Liclass log_collector(object): 1266*9c5db199SXin Li def __init__(self, host, client_tag, results_dir): 1267*9c5db199SXin Li self.host = host 1268*9c5db199SXin Li if not client_tag: 1269*9c5db199SXin Li client_tag = "default" 1270*9c5db199SXin Li self.client_results_dir = os.path.join(host.get_autodir(), "results", 1271*9c5db199SXin Li client_tag) 1272*9c5db199SXin Li self.server_results_dir = results_dir 1273*9c5db199SXin Li 1274*9c5db199SXin Li 1275*9c5db199SXin Li def collect_client_job_results(self): 1276*9c5db199SXin Li """ A method that collects all the current results of a running 1277*9c5db199SXin Li client job into the results dir. By default does nothing as no 1278*9c5db199SXin Li client job is running, but when running a client job you can override 1279*9c5db199SXin Li this with something that will actually do something. """ 1280*9c5db199SXin Li # make an effort to wait for the machine to come up 1281*9c5db199SXin Li try: 1282*9c5db199SXin Li self.host.wait_up(timeout=30) 1283*9c5db199SXin Li except error.AutoservError: 1284*9c5db199SXin Li # don't worry about any errors, we'll try and 1285*9c5db199SXin Li # get the results anyway 1286*9c5db199SXin Li pass 1287*9c5db199SXin Li 1288*9c5db199SXin Li # Copy all dirs in default to results_dir 1289*9c5db199SXin Li try: 1290*9c5db199SXin Li # Build test result directory summary 1291*9c5db199SXin Li result_tools_runner.run_on_client( 1292*9c5db199SXin Li self.host, self.client_results_dir) 1293*9c5db199SXin Li 1294*9c5db199SXin Li with metrics.SecondsTimer( 1295*9c5db199SXin Li 'chromeos/autotest/job/log_collection_duration', 1296*9c5db199SXin Li fields={'dut_host_name': self.host.hostname}): 1297*9c5db199SXin Li self.host.get_file( 1298*9c5db199SXin Li self.client_results_dir + '/', 1299*9c5db199SXin Li self.server_results_dir, 1300*9c5db199SXin Li preserve_symlinks=True) 1301*9c5db199SXin Li except Exception: 1302*9c5db199SXin Li # well, don't stop running just because we couldn't get logs 1303*9c5db199SXin Li e_msg = "Unexpected error copying test result logs, continuing ..." 1304*9c5db199SXin Li logging.error(e_msg) 1305*9c5db199SXin Li traceback.print_exc(file=sys.stdout) 1306*9c5db199SXin Li 1307*9c5db199SXin Li 1308*9c5db199SXin Li def remove_redundant_client_logs(self): 1309*9c5db199SXin Li """Remove client.*.log files in favour of client.*.DEBUG files.""" 1310*9c5db199SXin Li debug_dir = os.path.join(self.server_results_dir, 'debug') 1311*9c5db199SXin Li debug_files = [f for f in os.listdir(debug_dir) 1312*9c5db199SXin Li if re.search(r'^client\.\d+\.DEBUG$', f)] 1313*9c5db199SXin Li for debug_file in debug_files: 1314*9c5db199SXin Li log_file = debug_file.replace('DEBUG', 'log') 1315*9c5db199SXin Li log_file = os.path.join(debug_dir, log_file) 1316*9c5db199SXin Li if os.path.exists(log_file): 1317*9c5db199SXin Li os.remove(log_file) 1318*9c5db199SXin Li 1319*9c5db199SXin Li 1320*9c5db199SXin Li# a file-like object for catching stderr from an autotest client and 1321*9c5db199SXin Li# extracting status logs from it 1322*9c5db199SXin Liclass client_logger(object): 1323*9c5db199SXin Li """Partial file object to write to both stdout and 1324*9c5db199SXin Li the status log file. We only implement those methods 1325*9c5db199SXin Li utils.run() actually calls. 1326*9c5db199SXin Li """ 1327*9c5db199SXin Li status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$") 1328*9c5db199SXin Li test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$") 1329*9c5db199SXin Li fetch_package_parser = re.compile( 1330*9c5db199SXin Li r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$") 1331*9c5db199SXin Li extract_indent = re.compile(r"^(\t*).*$") 1332*9c5db199SXin Li extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$") 1333*9c5db199SXin Li 1334*9c5db199SXin Li def __init__(self, host, tag, server_results_dir): 1335*9c5db199SXin Li self.host = host 1336*9c5db199SXin Li self.job = host.job 1337*9c5db199SXin Li self.log_collector = log_collector(host, tag, server_results_dir) 1338*9c5db199SXin Li self.leftover = "" 1339*9c5db199SXin Li self.last_line = "" 1340*9c5db199SXin Li self.logs = {} 1341*9c5db199SXin Li 1342*9c5db199SXin Li 1343*9c5db199SXin Li def _process_log_dict(self, log_dict): 1344*9c5db199SXin Li log_list = log_dict.pop("logs", []) 1345*9c5db199SXin Li for key in sorted(six.iterkeys(log_dict)): 1346*9c5db199SXin Li log_list += self._process_log_dict(log_dict.pop(key)) 1347*9c5db199SXin Li return log_list 1348*9c5db199SXin Li 1349*9c5db199SXin Li 1350*9c5db199SXin Li def _process_logs(self): 1351*9c5db199SXin Li """Go through the accumulated logs in self.log and print them 1352*9c5db199SXin Li out to stdout and the status log. Note that this processes 1353*9c5db199SXin Li logs in an ordering where: 1354*9c5db199SXin Li 1355*9c5db199SXin Li 1) logs to different tags are never interleaved 1356*9c5db199SXin Li 2) logs to x.y come before logs to x.y.z for all z 1357*9c5db199SXin Li 3) logs to x.y come before x.z whenever y < z 1358*9c5db199SXin Li 1359*9c5db199SXin Li Note that this will in general not be the same as the 1360*9c5db199SXin Li chronological ordering of the logs. However, if a chronological 1361*9c5db199SXin Li ordering is desired that one can be reconstructed from the 1362*9c5db199SXin Li status log by looking at timestamp lines.""" 1363*9c5db199SXin Li log_list = self._process_log_dict(self.logs) 1364*9c5db199SXin Li for entry in log_list: 1365*9c5db199SXin Li self.job.record_entry(entry, log_in_subdir=False) 1366*9c5db199SXin Li if log_list: 1367*9c5db199SXin Li self.last_line = log_list[-1].render() 1368*9c5db199SXin Li 1369*9c5db199SXin Li 1370*9c5db199SXin Li def _process_quoted_line(self, tag, line): 1371*9c5db199SXin Li """Process a line quoted with an AUTOTEST_STATUS flag. If the 1372*9c5db199SXin Li tag is blank then we want to push out all the data we've been 1373*9c5db199SXin Li building up in self.logs, and then the newest line. If the 1374*9c5db199SXin Li tag is not blank, then push the line into the logs for handling 1375*9c5db199SXin Li later.""" 1376*9c5db199SXin Li entry = base_job.status_log_entry.parse(line) 1377*9c5db199SXin Li if entry is None: 1378*9c5db199SXin Li return # the line contains no status lines 1379*9c5db199SXin Li if tag == "": 1380*9c5db199SXin Li self._process_logs() 1381*9c5db199SXin Li self.job.record_entry(entry, log_in_subdir=False) 1382*9c5db199SXin Li self.last_line = line 1383*9c5db199SXin Li else: 1384*9c5db199SXin Li tag_parts = [int(x) for x in tag.split(".")] 1385*9c5db199SXin Li log_dict = self.logs 1386*9c5db199SXin Li for part in tag_parts: 1387*9c5db199SXin Li log_dict = log_dict.setdefault(part, {}) 1388*9c5db199SXin Li log_list = log_dict.setdefault("logs", []) 1389*9c5db199SXin Li log_list.append(entry) 1390*9c5db199SXin Li 1391*9c5db199SXin Li 1392*9c5db199SXin Li def _process_info_line(self, line): 1393*9c5db199SXin Li """Check if line is an INFO line, and if it is, interpret any control 1394*9c5db199SXin Li messages (e.g. enabling/disabling warnings) that it may contain.""" 1395*9c5db199SXin Li match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line) 1396*9c5db199SXin Li if not match: 1397*9c5db199SXin Li return # not an INFO line 1398*9c5db199SXin Li for field in match.group(1).split('\t'): 1399*9c5db199SXin Li if field.startswith("warnings.enable="): 1400*9c5db199SXin Li func = self.job.warning_manager.enable_warnings 1401*9c5db199SXin Li elif field.startswith("warnings.disable="): 1402*9c5db199SXin Li func = self.job.warning_manager.disable_warnings 1403*9c5db199SXin Li else: 1404*9c5db199SXin Li continue 1405*9c5db199SXin Li warning_type = field.split("=", 1)[1] 1406*9c5db199SXin Li func(warning_type) 1407*9c5db199SXin Li 1408*9c5db199SXin Li 1409*9c5db199SXin Li def _process_line(self, line): 1410*9c5db199SXin Li """Write out a line of data to the appropriate stream. 1411*9c5db199SXin Li 1412*9c5db199SXin Li Returns the package checksum file if it exists. 1413*9c5db199SXin Li 1414*9c5db199SXin Li Status lines sent by autotest will be prepended with 1415*9c5db199SXin Li "AUTOTEST_STATUS", and all other lines are ssh error messages. 1416*9c5db199SXin Li """ 1417*9c5db199SXin Li logging.debug(line) 1418*9c5db199SXin Li fetch_package_match = self.fetch_package_parser.search(line) 1419*9c5db199SXin Li if fetch_package_match: 1420*9c5db199SXin Li pkg_name, dest_path, fifo_path = fetch_package_match.groups() 1421*9c5db199SXin Li serve_packages = _CONFIG.get_config_value( 1422*9c5db199SXin Li "PACKAGES", "serve_packages_from_autoserv", type=bool) 1423*9c5db199SXin Li if serve_packages and pkg_name == 'packages.checksum': 1424*9c5db199SXin Li try: 1425*9c5db199SXin Li checksum_file = os.path.join( 1426*9c5db199SXin Li self.job.pkgmgr.pkgmgr_dir, 'packages', pkg_name) 1427*9c5db199SXin Li if os.path.exists(checksum_file): 1428*9c5db199SXin Li self.host.send_file(checksum_file, dest_path) 1429*9c5db199SXin Li except error.AutoservRunError: 1430*9c5db199SXin Li msg = "Package checksum file not found, continuing anyway" 1431*9c5db199SXin Li logging.exception(msg) 1432*9c5db199SXin Li 1433*9c5db199SXin Li try: 1434*9c5db199SXin Li # When fetching a package, the client expects to be 1435*9c5db199SXin Li # notified when the fetching is complete. Autotest 1436*9c5db199SXin Li # does this pushing a B to a fifo queue to the client. 1437*9c5db199SXin Li self.host.run("echo B > %s" % fifo_path) 1438*9c5db199SXin Li except error.AutoservRunError: 1439*9c5db199SXin Li msg = "Checksum installation failed, continuing anyway" 1440*9c5db199SXin Li logging.exception(msg) 1441*9c5db199SXin Li finally: 1442*9c5db199SXin Li return 1443*9c5db199SXin Li 1444*9c5db199SXin Li status_match = self.status_parser.search(line) 1445*9c5db199SXin Li test_complete_match = self.test_complete_parser.search(line) 1446*9c5db199SXin Li fetch_package_match = self.fetch_package_parser.search(line) 1447*9c5db199SXin Li if status_match: 1448*9c5db199SXin Li tag, line = status_match.groups() 1449*9c5db199SXin Li self._process_info_line(line) 1450*9c5db199SXin Li self._process_quoted_line(tag, line) 1451*9c5db199SXin Li elif test_complete_match: 1452*9c5db199SXin Li self._process_logs() 1453*9c5db199SXin Li fifo_path, = test_complete_match.groups() 1454*9c5db199SXin Li try: 1455*9c5db199SXin Li self.log_collector.collect_client_job_results() 1456*9c5db199SXin Li self.host.run("echo A > %s" % fifo_path) 1457*9c5db199SXin Li except Exception: 1458*9c5db199SXin Li msg = "Post-test log collection failed, continuing anyway" 1459*9c5db199SXin Li logging.exception(msg) 1460*9c5db199SXin Li elif fetch_package_match: 1461*9c5db199SXin Li pkg_name, dest_path, fifo_path = fetch_package_match.groups() 1462*9c5db199SXin Li serve_packages = global_config.global_config.get_config_value( 1463*9c5db199SXin Li "PACKAGES", "serve_packages_from_autoserv", type=bool) 1464*9c5db199SXin Li if serve_packages and pkg_name.endswith(".tar.bz2"): 1465*9c5db199SXin Li try: 1466*9c5db199SXin Li self._send_tarball(pkg_name, dest_path) 1467*9c5db199SXin Li except Exception: 1468*9c5db199SXin Li msg = "Package tarball creation failed, continuing anyway" 1469*9c5db199SXin Li logging.exception(msg) 1470*9c5db199SXin Li try: 1471*9c5db199SXin Li self.host.run("echo B > %s" % fifo_path) 1472*9c5db199SXin Li except Exception: 1473*9c5db199SXin Li msg = "Package tarball installation failed, continuing anyway" 1474*9c5db199SXin Li logging.exception(msg) 1475*9c5db199SXin Li else: 1476*9c5db199SXin Li logging.info(line) 1477*9c5db199SXin Li 1478*9c5db199SXin Li 1479*9c5db199SXin Li def _send_tarball(self, pkg_name, remote_dest): 1480*9c5db199SXin Li """Uses tarballs in package manager by default.""" 1481*9c5db199SXin Li try: 1482*9c5db199SXin Li server_package = os.path.join(self.job.pkgmgr.pkgmgr_dir, 1483*9c5db199SXin Li 'packages', pkg_name) 1484*9c5db199SXin Li if os.path.exists(server_package): 1485*9c5db199SXin Li self.host.send_file(server_package, remote_dest) 1486*9c5db199SXin Li return 1487*9c5db199SXin Li 1488*9c5db199SXin Li except error.AutoservRunError: 1489*9c5db199SXin Li msg = ("Package %s could not be sent from the package cache." % 1490*9c5db199SXin Li pkg_name) 1491*9c5db199SXin Li logging.exception(msg) 1492*9c5db199SXin Li 1493*9c5db199SXin Li name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name) 1494*9c5db199SXin Li src_dirs = [] 1495*9c5db199SXin Li if pkg_type == 'test': 1496*9c5db199SXin Li for test_dir in ['site_tests', 'tests']: 1497*9c5db199SXin Li src_dir = os.path.join(self.job.clientdir, test_dir, name) 1498*9c5db199SXin Li if os.path.exists(src_dir): 1499*9c5db199SXin Li src_dirs += [src_dir] 1500*9c5db199SXin Li break 1501*9c5db199SXin Li elif pkg_type == 'profiler': 1502*9c5db199SXin Li src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)] 1503*9c5db199SXin Li elif pkg_type == 'dep': 1504*9c5db199SXin Li src_dirs += [os.path.join(self.job.clientdir, 'deps', name)] 1505*9c5db199SXin Li elif pkg_type == 'client': 1506*9c5db199SXin Li return # you must already have a client to hit this anyway 1507*9c5db199SXin Li else: 1508*9c5db199SXin Li return # no other types are supported 1509*9c5db199SXin Li 1510*9c5db199SXin Li # iterate over src_dirs until we find one that exists, then tar it 1511*9c5db199SXin Li for src_dir in src_dirs: 1512*9c5db199SXin Li if os.path.exists(src_dir): 1513*9c5db199SXin Li try: 1514*9c5db199SXin Li logging.info('Bundling %s into %s', src_dir, pkg_name) 1515*9c5db199SXin Li temp_dir = autotemp.tempdir(unique_id='autoserv-packager', 1516*9c5db199SXin Li dir=self.job.tmpdir) 1517*9c5db199SXin Li tarball_path = self.job.pkgmgr.tar_package( 1518*9c5db199SXin Li pkg_name, src_dir, temp_dir.name, " .") 1519*9c5db199SXin Li self.host.send_file(tarball_path, remote_dest) 1520*9c5db199SXin Li finally: 1521*9c5db199SXin Li temp_dir.clean() 1522*9c5db199SXin Li return 1523*9c5db199SXin Li 1524*9c5db199SXin Li 1525*9c5db199SXin Li def log_warning(self, msg, warning_type): 1526*9c5db199SXin Li """Injects a WARN message into the current status logging stream.""" 1527*9c5db199SXin Li timestamp = int(time.time()) 1528*9c5db199SXin Li if self.job.warning_manager.is_valid(timestamp, warning_type): 1529*9c5db199SXin Li self.job.record('WARN', None, None, msg) 1530*9c5db199SXin Li 1531*9c5db199SXin Li 1532*9c5db199SXin Li def write(self, data): 1533*9c5db199SXin Li # now start processing the existing buffer and the new data 1534*9c5db199SXin Li data = self.leftover + data 1535*9c5db199SXin Li lines = data.split('\n') 1536*9c5db199SXin Li processed_lines = 0 1537*9c5db199SXin Li try: 1538*9c5db199SXin Li # process all the buffered data except the last line 1539*9c5db199SXin Li # ignore the last line since we may not have all of it yet 1540*9c5db199SXin Li for line in lines[:-1]: 1541*9c5db199SXin Li self._process_line(line) 1542*9c5db199SXin Li processed_lines += 1 1543*9c5db199SXin Li finally: 1544*9c5db199SXin Li # save any unprocessed lines for future processing 1545*9c5db199SXin Li self.leftover = '\n'.join(lines[processed_lines:]) 1546*9c5db199SXin Li 1547*9c5db199SXin Li 1548*9c5db199SXin Li def flush(self): 1549*9c5db199SXin Li sys.stdout.flush() 1550*9c5db199SXin Li 1551*9c5db199SXin Li 1552*9c5db199SXin Li def flush_all_buffers(self): 1553*9c5db199SXin Li if self.leftover: 1554*9c5db199SXin Li self._process_line(self.leftover) 1555*9c5db199SXin Li self.leftover = "" 1556*9c5db199SXin Li self._process_logs() 1557*9c5db199SXin Li self.flush() 1558*9c5db199SXin Li 1559*9c5db199SXin Li 1560*9c5db199SXin Li def close(self): 1561*9c5db199SXin Li self.flush_all_buffers() 1562