1# Lint as: python2, python3 2# Copyright 2016 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# repohooks/pre-upload.py currently does not run pylint. But for developers who 7# want to check their code manually we disable several harmless pylint warnings 8# which just distract from more serious remaining issues. 9# 10# The instance variables _host and _install_paths are not defined in __init__(). 11# pylint: disable=attribute-defined-outside-init 12# 13# Many short variable names don't follow the naming convention. 14# pylint: disable=invalid-name 15# 16# _parse_result() and _dir_size() don't access self and could be functions. 17# pylint: disable=no-self-use 18 19from collections import namedtuple 20import errno 21import glob 22import hashlib 23import logging 24import os 25import pipes 26import re 27import shutil 28import stat 29import subprocess 30import tempfile 31import time 32import six.moves.urllib_parse as urlparse 33 34from autotest_lib.client.bin import utils as client_utils 35from autotest_lib.client.common_lib import error 36from autotest_lib.server import test 37from autotest_lib.server import utils 38from autotest_lib.server.cros.tradefed import adb as adb_utils 39from autotest_lib.server.cros.tradefed import cts_expected_failure_parser 40from autotest_lib.server.cros.tradefed import tradefed_chromelogin as login 41from autotest_lib.server.cros.tradefed import tradefed_constants as constants 42from autotest_lib.server.cros.tradefed import tradefed_utils 43from autotest_lib.server.cros.tradefed import tradefed_prerequisite 44from autotest_lib.server.autotest import OFFLOAD_ENVVAR 45 46# TODO(kinaba): Move to tradefed_utils together with the setup/cleanup methods. 47MediaAsset = namedtuple('MediaAssetInfo', ['uri', 'localpath']) 48 49 50class TradefedTest(test.test): 51 """Base class to prepare DUT to run tests via tradefed.""" 52 version = 1 53 54 # Default and upperbounds of max_retry, based on board and revision 55 # after branching (that is, 'y' of R74-12345.y.z). 56 # 57 # By default, 0<=y<1 does 5 retries and 1<=y does 10. The |max_retry| 58 # parameter in control files can override the count, within the 59 # _BRANCH_MAX_RETRY limit below. 60 _BRANCH_DEFAULT_RETRY = [(0, 5), (1, 10)] # dev=5, beta=stable=10 61 _BRANCH_MAX_RETRY = [(0, 12), (1, 30), # dev=12, beta=30, stable=99 62 (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)] 63 # TODO(kinaba): betty-arcnext 64 _BOARD_MAX_RETRY = {'betty': 0} 65 66 _SHARD_CMD = None 67 _board_arch = None 68 _board_name = None 69 _model_name = None 70 _release_branch_number = None # The 'y' of OS version Rxx-xxxxx.y.z 71 _android_version = None 72 _first_api_level = None 73 _num_media_bundles = 0 74 _abilist = [] 75 76 # A job will be aborted after 16h. Subtract 30m for setup/teardown. 77 _MAX_LAB_JOB_LENGTH_IN_SEC = 16 * 60 * 60 - 30 * 60 78 _job_deadline = None 79 80 # Currently this is only used for dependency injection for testing. 81 def __init__(self, *args, **kwargs): 82 super().__init__(*args) 83 self._adb = kwargs.get('adb', adb_utils.Adb()) 84 85 def _log_java_version(self): 86 """Log java version to debug failures due to version mismatch.""" 87 utils.run( 88 'java', 89 args=('-version',), 90 ignore_status=False, 91 verbose=True, 92 stdout_tee=utils.TEE_TO_LOGS, 93 stderr_tee=utils.TEE_TO_LOGS) 94 95 def initialize(self, 96 bundle=None, 97 uri=None, 98 host=None, 99 hosts=None, 100 max_retry=None, 101 load_waivers=True, 102 retry_manual_tests=False, 103 warn_on_test_retry=True, 104 hard_reboot_on_failure=False, 105 use_jdk9=False, 106 use_old_adb=False): 107 """Sets up the tools and binary bundles for the test.""" 108 if utils.is_in_container() and not client_utils.is_moblab(): 109 self._job_deadline = time.time() + self._MAX_LAB_JOB_LENGTH_IN_SEC 110 111 self._install_paths = [] 112 # TODO(pwang): Remove host if we enable multiple hosts everywhere. 113 self._hosts = [host] if host else hosts 114 for host in self._hosts: 115 logging.info('Hostname: %s', host.host_port) 116 self._verify_hosts() 117 118 self._max_retry = self._get_max_retry(max_retry) 119 self._warn_on_test_retry = warn_on_test_retry 120 # Tests in the lab run within individual lxc container instances. 121 if utils.is_in_container(): 122 cache_root = constants.TRADEFED_CACHE_CONTAINER 123 else: 124 cache_root = constants.TRADEFED_CACHE_LOCAL 125 126 # The content of the cache survives across jobs. 127 self._safe_makedirs(cache_root) 128 self._tradefed_cache = os.path.join(cache_root, 'cache') 129 self._tradefed_cache_lock = os.path.join(cache_root, 'lock') 130 self._tradefed_cache_dirty = os.path.join(cache_root, 'dirty') 131 # The content of the install location does not survive across jobs and 132 # is isolated (by using a unique path)_against other autotest instances. 133 # This is not needed for the lab, but if somebody wants to run multiple 134 # TradedefTest instance. 135 self._tradefed_install = tempfile.mkdtemp( 136 prefix=constants.TRADEFED_PREFIX) 137 # Under lxc the cache is shared between multiple autotest/tradefed 138 # instances. We need to synchronize access to it. All binaries are 139 # installed through the (shared) cache into the local (unshared) 140 # lxc/autotest instance storage. 141 # If clearing the cache it must happen before all downloads. 142 self._clean_download_cache_if_needed() 143 # Set permissions (rwxr-xr-x) to the executable binaries. 144 permission = ( 145 stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH 146 | stat.S_IXOTH) 147 148 adb_dir = constants.ADB_DIR_OLD if use_old_adb else constants.ADB_DIR 149 self._install_files(adb_dir, constants.ADB_FILES, permission) 150 self._install_files(constants.SDK_TOOLS_DIR, 151 constants.SDK_TOOLS_FILES, permission) 152 153 # If use_jdk9 is set true, use jdk9 than default jdk8. 154 if use_jdk9: 155 if utils.is_in_container() and not client_utils.is_moblab(): 156 logging.info('Lab: switching to JDK9') 157 try: 158 os.environ['JAVA_HOME'] = '/usr/lib/jvm/jdk-9.0.4' 159 os.environ['PATH'] = os.environ['JAVA_HOME']\ 160 + '/bin:' + os.environ['PATH'] 161 logging.info( 162 subprocess.check_output(['java', '-version'], 163 stderr=subprocess.STDOUT)) 164 except OSError: 165 logging.error('Can\'t change current PATH directory') 166 else: 167 logging.info('Non-lab environment: should be using JDK9+') 168 169 # TODO(kinaba): Remove the hack and fully enable the feature. 170 # For release branches (Rx-yyyyy.3.0 or above), always use the 171 # official build instead of the release build. See b/210369548 172 if uri == 'DEV' and self._get_release_branch_number() >= 3: 173 uri = 'LATEST' 174 # Install the tradefed bundle. 175 bundle_install_path = self._install_bundle( 176 self._get_bundle_url(uri, bundle)) 177 self._repository = os.path.join(bundle_install_path, 178 self._get_tradefed_base_dir()) 179 180 # Load expected test failures to exclude them from re-runs. 181 self._waivers = set() 182 if load_waivers: 183 self._waivers.update( 184 self._get_expected_failures('expectations', bundle)) 185 if not retry_manual_tests: 186 self._waivers.update( 187 self._get_expected_failures('manual_tests', bundle)) 188 189 # Load modules with no tests. 190 self._notest_modules = self._get_expected_failures('notest_modules', 191 bundle) 192 self._hard_reboot_on_failure = hard_reboot_on_failure 193 194 def _output_perf(self): 195 """Output performance values.""" 196 base = self._default_tradefed_base_dir() 197 path = tradefed_utils.get_test_result_xml_path(base) 198 if path: 199 for metric in tradefed_utils.get_perf_metrics_from_test_result_xml( 200 path, self.resultsdir): 201 self.output_perf_value(**metric) 202 203 def _prepare_synchronous_offloads(self): 204 """ 205 Copy files needed for APFE to synchronous offload dir, with some 206 structure to make the post-job postprocessing simpler. 207 """ 208 testname = os.path.basename(self.outputdir) 209 # This is yyyy.mm.dd_hh.mm.ss (start time) 210 timestamp_pattern = ("[0-9][0-9][0-9][0-9].[0-9][0-9].[0-9][0-9]" + 211 "_[0-9][0-9].[0-9][0-9].[0-9][0-9]") 212 time_glob = os.path.join( 213 self._default_tradefed_base_dir(), timestamp_pattern 214 ) 215 for dirpath in glob.glob(time_glob): 216 timestamp = os.path.basename(dirpath) 217 locs = [os.path.join(dirpath, f) for f in ["test_result.xml", 218 "testResult.xml"]] 219 for f in locs: 220 if os.path.exists(f): 221 subdirs = self._subdirs(f, testname, timestamp) 222 self._copy_to_offload_dir(f, subdirs) 223 for z in glob.glob(time_glob+".zip"): 224 self._copy_to_offload_dir(z, self._subdirs(z, testname)) 225 226 def _copy_to_offload_dir(self, src_path, subdirs, recursive=True): 227 target = os.path.join(os.getenv(OFFLOAD_ENVVAR), *subdirs) 228 self._safe_makedirs(target) 229 if not recursive or os.path.isfile(src_path): 230 return shutil.copy2(src_path, str(target)) 231 return shutil.copytree(src_path, str(target)) 232 233 def _subdirs(self, path, testname, timestamp=""): 234 # CTS results from bvt-arc suites need to be sent to the 235 # specially-designated bucket for early EDI entries in APFE, 236 # but only there. 237 dest = "BVT" if 'bvt-arc' in path else "CTS" 238 return ["APFE", dest, testname, timestamp] 239 240 def cleanup(self): 241 """Cleans up any dirtied state.""" 242 243 # We also run a postprocess result and performance data 244 # offloading here so that WARN and FAIL runs also run the 245 # steps. postprocess() method only runs for PASSing jobs. 246 self._prepare_synchronous_offloads() 247 self._output_perf() 248 249 try: 250 # Clean up test data that may not be deletable on previous 251 # ChromeOS versions. See b/170276268. 252 self._run_commands([ 253 'cryptohome --action=remove --force [email protected]' 254 ], 255 ignore_status=True) 256 except: 257 logging.error('Failed to clean up the test account.') 258 259 self._kill_adb_server() 260 261 if hasattr(self, '_tradefed_install'): 262 logging.info('Cleaning up %s.', self._tradefed_install) 263 try: 264 shutil.rmtree(self._tradefed_install) 265 except IOError: 266 pass 267 268 def _kill_adb_server(self): 269 # Kill any lingering adb servers. 270 try: 271 self._adb.run(None, 272 verbose=True, 273 args=('kill-server', ), 274 timeout=constants.ADB_KILL_SERVER_TIMEOUT_SECONDS) 275 except error.CmdTimeoutError as e: 276 logging.warn(e) 277 # `adb kill-server` sometimes hangs up. Kill it more brutally. 278 try: 279 client_utils.system( 280 'killall adb', 281 ignore_status=True, 282 timeout=constants.ADB_KILL_SERVER_TIMEOUT_SECONDS) 283 except error.CmdTimeoutError as e: 284 # The timeout is ignored, since the only known failure pattern 285 # b/142828365 is due to a zombie process that does not prevent 286 # starting a new server with a new adb key. 287 logging.warn(e) 288 except (error.CmdError, AttributeError): 289 pass 290 291 def _verify_hosts(self): 292 """Verify all hosts' ChromeOS consistency.""" 293 # Check release builder path. E.g. cave-release/R66-10435.0.0 294 release_builder_path = set(host.get_release_builder_path() 295 for host in self._hosts) 296 if len(release_builder_path) > 1: 297 raise error.TestFail('Hosts\' CHROMEOS_RELEASE_BUILDER_PATH is ' 298 'different: %s', release_builder_path) 299 300 # Check ChromeOS ARC VERSION. E.g. 301 arc_version = set(host.get_arc_version() for host in self._hosts) 302 if len(arc_version) > 1: 303 raise error.TestFail('Hosts\' CHROMEOS_ARC_VERSION is different: ' 304 '%s', arc_version) 305 306 # Check ChromeOS model for unibuild. 307 # TODO(pwang): Adding a check if we found how to detect host's model. 308 309 def _verify_arc_hosts(self): 310 """Verify all hosts' Android configuration consistency. 311 312 This method should only be called after all hosts' Android has been 313 successfully booted up.""" 314 # Check all hosts have same Android fingerprint. 315 fingerprint = set( 316 self._adb.run(host, 317 args=('shell', 'getprop', 318 'ro.build.fingerprint')).stdout 319 for host in self._hosts) 320 if len(fingerprint) > 1: 321 raise error.TestFail('Hosts\' supported fingerprint is different: ' 322 '%s', fingerprint) 323 324 def _calculate_test_count_factor(self, bundle): 325 """ Calculate the multiplicative factor for the test case number. 326 327 The value equals to the times each test case is run, which is determined 328 by the intersection of the supported ABIs of the CTS/GTS bundle and that 329 of the tested device.""" 330 # This is only a conservative approximation. Some suites only run the 331 # primary ABI, so to be fully precise, those have to be counted as 1. 332 arm_abis = set(('armeabi-v7a', 'arm64-v8a')) 333 x86_abis = set(('x86', 'x86_64')) 334 if bundle and bundle.startswith('arm'): 335 tradefed_abis = arm_abis 336 elif bundle and bundle.startswith('x86'): 337 tradefed_abis = x86_abis 338 else: 339 tradefed_abis = arm_abis | x86_abis 340 self._test_count_factor = len(set(self._get_abilist()) & tradefed_abis) 341 # Avoid setting timeout=0 (None) in any cases. 342 self._timeout_factor = max(1, self._test_count_factor) 343 344 def _try_adb_connect(self, host): 345 """Attempts to connect to adb on the DUT. 346 347 @param host: DUT that need to be connected. 348 @return boolean indicating if adb connected successfully. 349 """ 350 # Add ADB_TRACE=all for debugging adb connection failures. 351 env = os.environ.copy() 352 env['ADB_TRACE'] = 'all' 353 try: 354 # This may fail return failure due to a race condition in adb 355 # connect (b/29370989). If adb is already connected, this command 356 # will immediately return success. 357 host_port = adb_utils.get_adb_target(host) 358 result = self._adb.run( 359 host, 360 args=('connect', host_port), 361 verbose=True, 362 env=env, 363 ignore_status=True, 364 timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS) 365 if result.exit_status != 0: 366 return False 367 368 result = self._adb.run( 369 host, 370 args=('devices', ), 371 env=env, 372 timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS) 373 if not re.search(r'{}\s+(device|unauthorized)'.format( 374 re.escape(host_port)), result.stdout): 375 logging.info('No result found in with pattern: %s', 376 r'{}\s+(device|unauthorized)'.format( 377 re.escape(host_port))) 378 return False 379 380 # Actually test the connection with an adb command as there can be 381 # a race between detecting the connected device and actually being 382 # able to run a command with authenticated adb. 383 result = self._adb.run( 384 host, 385 args=('shell', 'exit'), 386 env=env, 387 ignore_status=True, 388 timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS) 389 return result.exit_status == 0 390 except error.CmdTimeoutError as e: 391 logging.warning(e) 392 return False 393 394 def _android_shell(self, host, command): 395 """Run a command remotely on the device in an android shell 396 397 This function is strictly for internal use only, as commands do not run 398 in a fully consistent Android environment. Prefer adb shell instead. 399 """ 400 host.run('android-sh -c ' + pipes.quote(command)) 401 402 def _connect_adb(self, host): 403 """Sets up ADB connection to the ARC container. 404 405 @param host: DUT that should be connected to. 406 """ 407 logging.info('Setting up adb connection.') 408 409 # adbd may take some time to come up. Repeatedly try to connect to adb. 410 utils.poll_for_condition( 411 lambda: self._try_adb_connect(host), 412 timeout=constants.ADB_READY_TIMEOUT_SECONDS, 413 sleep_interval=constants.ADB_POLLING_INTERVAL_SECONDS) 414 415 logging.info('Successfully setup adb connection.') 416 417 def _wait_for_arc_boot(self, host): 418 """Wait until ARC is fully booted. 419 420 Tests for the presence of the intent helper app to determine whether ARC 421 has finished booting. 422 @param host: DUT that need to be connected to. 423 """ 424 425 def _intent_helper_running(): 426 result = self._adb.run(host, 427 args=('shell', 'pgrep', '-f', 428 'org.chromium.arc.intent_helper'), 429 ignore_status=True) 430 return bool(result.stdout) 431 432 utils.poll_for_condition( 433 _intent_helper_running, 434 exception=error.TestFail( 435 'Error: Timed out waiting for intent helper.'), 436 timeout=constants.ARC_READY_TIMEOUT_SECONDS, 437 sleep_interval=constants.ARC_POLLING_INTERVAL_SECONDS) 438 439 def _disable_adb_install_dialog(self, host): 440 """Disables a dialog shown on adb install execution. 441 442 By default, on adb install execution, "Allow Google to regularly check 443 device activity ... " dialog is shown. It requires manual user action 444 so that tests are blocked at the point. 445 This method disables it. 446 """ 447 logging.info('Disabling the adb install dialog.') 448 result = self._adb.run(host, 449 verbose=True, 450 args=('shell', 'settings', 'put', 'global', 451 'verifier_verify_adb_installs', '0')) 452 logging.info('Disable adb dialog: %s', result.stdout) 453 454 # Android "RescueParty" feature can reset the above settings when the 455 # device crashes often. Disable the rescue during testing. 456 # Keeping only for P and below since R has SELinux restrictions. 457 if self._get_android_version() < 29: 458 self._android_shell(host, 'setprop persist.sys.disable_rescue true') 459 460 def _ready_arc(self): 461 """Ready ARC and adb in parallel for running tests via tradefed.""" 462 key_path = os.path.join(self.tmpdir, 'test_key') 463 with open(key_path, 'w') as f: 464 f.write(constants.PRIVATE_KEY) 465 os.environ['ADB_VENDOR_KEYS'] = key_path 466 467 for _ in range(2): 468 try: 469 # Kill existing adb server to ensure that the env var is picked 470 # up, and reset any previous bad state. 471 self._kill_adb_server() 472 473 # TODO(pwang): connect_adb takes 10+ seconds on a single DUT. 474 # Parallelize it if it becomes a bottleneck. 475 for host in self._hosts: 476 self._connect_adb(host) 477 self._disable_adb_install_dialog(host) 478 self._wait_for_arc_boot(host) 479 self._verify_arc_hosts() 480 return 481 except (utils.TimeoutError, error.CmdTimeoutError): 482 logging.error('Failed to set up adb connection. Retrying...') 483 raise error.TestFail('Error: Failed to set up adb connection') 484 485 def _safe_makedirs(self, path): 486 """Creates a directory at |path| and its ancestors. 487 488 Unlike os.makedirs(), ignore errors even if directories exist. 489 """ 490 try: 491 os.makedirs(path) 492 except OSError as e: 493 if not (e.errno == errno.EEXIST and os.path.isdir(path)): 494 raise 495 496 def _unzip(self, filename): 497 """Unzip the file. 498 499 The destination directory name will be the stem of filename. 500 E.g., _unzip('foo/bar/baz.zip') will create directory at 501 'foo/bar/baz', and then will inflate zip's content under the directory. 502 If here is already a directory at the stem, that directory will be used. 503 504 @param filename: Path to the zip archive. 505 @return Path to the inflated directory. 506 """ 507 destination = os.path.splitext(filename)[0] 508 if os.path.isdir(destination): 509 logging.info('Skipping unzip %s, reusing content of %s', filename, 510 destination) 511 return destination 512 tmp = tempfile.mkdtemp(dir=os.path.dirname(filename)) 513 logging.info('Begin unzip %s', filename) 514 try: 515 utils.run('unzip', args=('-d', tmp, filename)) 516 except: 517 logging.error('Failed unzip, cleaning up.') 518 # Clean up just created files. 519 shutil.rmtree(tmp, ignore_errors=True) 520 raise 521 logging.info('End unzip %s', filename) 522 try: 523 os.renames(tmp, destination) 524 except: 525 logging.error('Failed rename, cleaning up.') 526 shutil.rmtree(destination, ignore_errors=True) 527 shutil.rmtree(tmp, ignore_errors=True) 528 raise 529 return destination 530 531 def _dir_size(self, directory): 532 """Compute recursive size in bytes of directory.""" 533 size = 0 534 for root, _, files in os.walk(directory): 535 for name in files: 536 try: 537 size += os.path.getsize(os.path.join(root, name)) 538 except OSError: 539 logging.error('Inaccessible path (crbug/793696): %s/%s', 540 root, name) 541 return size 542 543 def _invalidate_download_cache(self): 544 """Marks the download cache for deferred deletion. 545 546 Used to make cache file operations atomic across failures and reboots. 547 The caller is responsible to hold the lock to the cache. 548 """ 549 if not os.path.exists(self._tradefed_cache_dirty): 550 os.mkdir(self._tradefed_cache_dirty) 551 552 def _validate_download_cache(self): 553 """Validates and unmarks the download cache from deletion. 554 555 Used to make cache file operations atomic across failures and reboots. 556 The caller is responsible to hold the lock to the cache. 557 """ 558 shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True) 559 560 def _clean_download_cache_if_needed(self, force=False): 561 """Invalidates cache to prevent it from growing too large.""" 562 # If the cache is large enough to hold a working set, we can simply 563 # delete everything without thrashing. 564 # TODO(ihf): Investigate strategies like LRU. 565 clean = force 566 with tradefed_utils.lock(self._tradefed_cache_lock): 567 size = self._dir_size(self._tradefed_cache) 568 if size > constants.TRADEFED_CACHE_MAX_SIZE: 569 logging.info( 570 'Current cache size=%d got too large. Clearing %s.', size, 571 self._tradefed_cache) 572 clean = True 573 else: 574 logging.info('Current cache size=%d of %s.', size, 575 self._tradefed_cache) 576 if os.path.exists(self._tradefed_cache_dirty): 577 logging.info('Found dirty cache.') 578 clean = True 579 if clean: 580 logging.warning('Cleaning download cache.') 581 shutil.rmtree(self._tradefed_cache, ignore_errors=True) 582 self._safe_makedirs(self._tradefed_cache) 583 shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True) 584 585 def _download_to_cache(self, uri): 586 """Downloads the uri from the storage server. 587 588 It always checks the cache for available binaries first and skips 589 download if binaries are already in cache. 590 591 The caller of this function is responsible for holding the cache lock. 592 593 @param uri: The Google Storage, dl.google.com or local uri. 594 @return Path to the downloaded object, name. 595 """ 596 # We are hashing the uri instead of the binary. This is acceptable, as 597 # the uris are supposed to contain version information and an object is 598 # not supposed to be changed once created. 599 output_dir = os.path.join(self._tradefed_cache, 600 hashlib.md5(uri.encode()).hexdigest()) 601 # Check for existence of cache entry. We check for directory existence 602 # instead of file existence, so that _install_bundle can delete original 603 # zip files to save disk space. 604 if os.path.exists(output_dir): 605 # TODO(crbug.com/800657): Mitigation for the invalid state. Normally 606 # this should not happen, but when a lock is force borken due to 607 # high IO load, multiple processes may enter the critical section 608 # and leave a bad state permanently. 609 if os.listdir(output_dir): 610 logging.info('Skipping download of %s, reusing content of %s.', 611 uri, output_dir) 612 return os.path.join(output_dir, 613 os.path.basename(urlparse.urlparse(uri).path)) 614 logging.error('Empty cache entry detected %s', output_dir) 615 return self._download_to_dir(uri, output_dir) 616 617 def _download_to_dir(self, uri, output_dir): 618 """Downloads the gs|http|https|file uri from the storage server. 619 620 @param uri: The Google Storage, dl.google.com or local uri. 621 @output_dir: The directory where the downloaded file should be placed. 622 @return Path to the downloaded object, name. 623 """ 624 # Split uri into 3 pieces for use by gsutil and also by wget. 625 parsed = urlparse.urlparse(uri) 626 filename = os.path.basename(parsed.path) 627 output = os.path.join(output_dir, filename) 628 629 self._safe_makedirs(output_dir) 630 if parsed.scheme not in ['gs', 'http', 'https', 'file']: 631 raise error.TestFail( 632 'Error: Unknown download scheme %s' % parsed.scheme) 633 if parsed.scheme in ['http', 'https']: 634 logging.info('Using wget to download %s to %s.', uri, output_dir) 635 # We are downloading 1 file at a time, hence using -O over -P. 636 utils.run( 637 'wget', 638 args=('--report-speed=bits', '-O', output, uri), 639 verbose=True) 640 return output 641 642 if parsed.scheme in ['file']: 643 logging.info('Copy the local file from %s to %s.', parsed.path, 644 output_dir) 645 utils.run( 646 'cp', 647 args=('-f', parsed.path, output), 648 verbose=True) 649 return output 650 651 # If the machine can access to the storage server directly, 652 # defer to "gsutil" for downloading. 653 logging.info('Downloading %s directly to %s.', uri, output) 654 # b/17445576: gsutil rsync of individual files is not implemented. 655 res = utils.run('gsutil', 656 args=('cp', uri, output), 657 verbose=True, 658 ignore_status=True) 659 if not res or res.exit_status != 0: 660 logging.warning('Retrying download...') 661 utils.run('gsutil', args=('cp', uri, output), verbose=True) 662 return output 663 664 def _instance_copyfile(self, cache_path): 665 """Makes a copy of a file from the (shared) cache to a wholy owned 666 local instance. Also copies one level of cache directoy (MD5 named). 667 """ 668 filename = os.path.basename(cache_path) 669 dirname = os.path.basename(os.path.dirname(cache_path)) 670 instance_dir = os.path.join(self._tradefed_install, dirname) 671 # Make sure destination directory is named the same. 672 self._safe_makedirs(instance_dir) 673 instance_path = os.path.join(instance_dir, filename) 674 shutil.copyfile(cache_path, instance_path) 675 return instance_path 676 677 def _instance_copytree(self, cache_path): 678 """Makes a copy of a directory from the (shared and writable) cache to 679 a wholy owned local instance. 680 681 TODO(ihf): Consider using cp -al to only copy links. Not sure if this 682 is really a benefit across the container boundary, but it is risky due 683 to the possibility of corrupting the original files by an lxc instance. 684 """ 685 # We keep the top 2 names from the cache_path = .../dir1/dir2. 686 dir2 = os.path.basename(cache_path) 687 dir1 = os.path.basename(os.path.dirname(cache_path)) 688 instance_path = os.path.join(self._tradefed_install, dir1, dir2) 689 # TODO(kinaba): Fix in a safer way. 690 # Below is a workaround to avoid copying large CTS/GTS tree in test lab. 691 # Contents of $cache_path/android-cts are symlinked to the destination 692 # rather than copied. 693 # 1) Why not symlink 'android-cts' itself? Because the tests will 694 # create results/ logs/ subplans/ subdirectory there. We do not 695 # want to write to the shared cache. 696 # 2) Why not hardlink? Cache and the local directory may be on 697 # different mount points, so hardlink may not work. 698 # 3) Why this isn't safe? Cache is cleared when it became full, even 699 # during the test is run on an instance. 700 # 4) Why this is acceptable despite the unsatefy? Cache clearance is 701 # a rare event (once in 6 months). Skylab drones won't usually 702 # live that long, and even if it did, the failure is once in 6 703 # months after all. 704 special_src = None 705 special_dest = None 706 if utils.is_in_container() and not client_utils.is_moblab(): 707 for xts_name in ['android-cts', 'android-gts', 'android-sts']: 708 xts_root = os.path.join(cache_path, xts_name) 709 if os.path.exists(xts_root): 710 special_src = xts_root 711 special_dest = os.path.join(instance_path, xts_name) 712 break 713 if special_src: 714 logging.info('SYMLINK© contents of %s to instance %s', 715 cache_path, instance_path) 716 self._safe_makedirs(special_dest) 717 for entry in os.listdir(special_src): 718 # Subdirectories are created by relative path from 719 # tools/cts_tradefed. So for 'tools' dir we copy. 720 if entry == 'tools': 721 shutil.copytree(os.path.join(special_src, entry), 722 os.path.join(special_dest, entry)) 723 elif entry == 'testcases': 724 # Directory structure in testcases/ needs to be 725 # instantiated, because CTS tries `find` command 726 # in the directory without following symlinks 727 for subdir, _, files in os.walk( 728 os.path.join(special_src, entry)): 729 rel = os.path.relpath(subdir, special_src) 730 os.mkdir(os.path.join(special_dest, rel)) 731 for file in files: 732 os.symlink(os.path.join(special_src, rel, file), 733 os.path.join(special_dest, rel, file)) 734 else: 735 os.symlink(os.path.join(special_src, entry), 736 os.path.join(special_dest, entry)) 737 else: 738 logging.info('Copying %s to instance %s', cache_path, 739 instance_path) 740 shutil.copytree(cache_path, instance_path) 741 return instance_path 742 743 def _install_bundle(self, gs_uri): 744 """Downloads a zip file, installs it and returns the local path. 745 746 @param gs_uri: GS bucket that contains the necessary files. 747 """ 748 if not gs_uri.endswith('.zip'): 749 raise error.TestFail('Error: Not a .zip file %s.', gs_uri) 750 # Atomic write through of file. 751 with tradefed_utils.lock(self._tradefed_cache_lock): 752 # Atomic operations. 753 self._invalidate_download_cache() 754 # Download is lazy (cache_path may not actually exist if 755 # cache_unzipped does). 756 cache_path = self._download_to_cache(gs_uri) 757 # Unzip is lazy as well (but cache_unzipped guaranteed to 758 # exist). 759 cache_unzipped = self._unzip(cache_path) 760 # To save space we delete the original zip file. This works as 761 # _download only checks existence of the cache directory for 762 # lazily skipping download, and unzip itself will bail if the 763 # unzipped destination exists. Hence we don't need the original 764 # anymore. 765 if os.path.exists(cache_path): 766 logging.info('Deleting original %s', cache_path) 767 os.remove(cache_path) 768 # Erase dirty marker from disk. 769 self._validate_download_cache() 770 # We always copy files to give tradefed a clean copy of the 771 # bundle. 772 unzipped_local = self._instance_copytree(cache_unzipped) 773 return unzipped_local 774 775 def _install_files(self, gs_dir, files, permission): 776 """Installs binary tools.""" 777 for filename in files: 778 gs_uri = os.path.join(gs_dir, filename) 779 # Atomic write through of file. 780 with tradefed_utils.lock(self._tradefed_cache_lock): 781 # We don't want to leave a corrupt cache for other jobs. 782 self._invalidate_download_cache() 783 cache_path = self._download_to_cache(gs_uri) 784 # Mark cache as clean again. 785 self._validate_download_cache() 786 # This only affects the current job, so not part of cache 787 # validation. 788 local = self._instance_copyfile(cache_path) 789 os.chmod(local, permission) 790 # Keep track of PATH. 791 local_dir = os.path.dirname(local) 792 self._install_paths.append(local_dir) 793 self._adb.add_path(local_dir) 794 795 def _prepare_media(self, media_asset): 796 """Downloads and offers the cached media files to tradefed.""" 797 if media_asset.uri: 798 media = self._install_bundle(media_asset.uri) 799 if os.path.islink(media_asset.localpath): 800 os.unlink(media_asset.localpath) 801 if os.path.isdir(media_asset.localpath): 802 shutil.rmtree(media_asset.localpath) 803 self._safe_makedirs(os.path.dirname(media_asset.localpath)) 804 os.symlink(media, media_asset.localpath) 805 806 logging.info('Offered %s as a media directory in %s', 807 media, media_asset.localpath) 808 809 # Records the number of existing media bundles, to check later. 810 if os.path.isdir(media_asset.localpath): 811 self._num_media_bundles = len( 812 os.listdir(media_asset.localpath)) 813 814 def _cleanup_media(self, media_asset): 815 """Clean up the local copy of cached media files.""" 816 self._fail_on_unexpected_media_download(media_asset) 817 if os.path.islink(media_asset.localpath): 818 path = os.readlink(media_asset.localpath) 819 os.unlink(media_asset.localpath) 820 if os.path.isdir(path): 821 logging.info('Cleaning up media files in %s', path) 822 shutil.rmtree(path) 823 824 def _fail_on_unexpected_media_download(self, media_asset): 825 if os.path.isdir(media_asset.localpath): 826 contents = os.listdir(media_asset.localpath) 827 # Ignore a table-of-contents file created by newer xTS 828 TOC_FILE = 'contents.toc' 829 if TOC_FILE in contents: 830 contents.remove(TOC_FILE) 831 if len(contents) > self._num_media_bundles: 832 raise error.TestFail( 833 'Failed: Unexpected media bundle was added %s' % contents) 834 835 def _should_push_mediastress_asset(self, target_module, board): 836 """Returns whether we should manually push mediastress assets. 837 838 TODO(b/210801048): Remove this workaround once ARCVM storage performance 839 on ARM becomes good enough. 840 """ 841 return (target_module and 'CtsMediaStressTestCases' in target_module 842 and board in ['kukui-arc-r']) 843 844 def _push_mediastress_asset(self, media_asset): 845 """Pushes mediastress assets to the DUT for the upcoming test.""" 846 logging.info( 847 'Pushing mediastress assets in advance to workaround slow ' 848 'storage on ARM boards (b/210801048)') 849 850 media_dir = os.path.join(media_asset.localpath, 851 'android-cts-media-1.5') 852 copy_media_sh = os.path.join(media_dir, 'copy_media.sh') 853 os.chmod(copy_media_sh, 0o755) 854 855 old_cwd = os.getcwd() 856 os.chdir(media_dir) 857 try: 858 for host in self._hosts: 859 host_port = adb_utils.get_adb_target(host) 860 self._run( 861 copy_media_sh, 862 args=('all', '-s', host_port), 863 timeout=constants.ADB_PUSH_MEDIASTRESS_TIMEOUT_SECONDS, 864 verbose=True, 865 ignore_status=False, 866 stdout_tee=utils.TEE_TO_LOGS, 867 stderr_tee=utils.TEE_TO_LOGS) 868 finally: 869 os.chdir(old_cwd) 870 871 def _fetch_helpers_from_dut(self): 872 """Fetches the CTS helpers from the dut and installs into the testcases 873 subdirectory of our local autotest copy. 874 """ 875 tf_testcases = os.path.join(self._repository, 'testcases') 876 877 # Earlier checks enforce that each host has the same build fingerprint, 878 # so we can assume that the packages from the first host will work 879 # across the whole set. 880 package_list = self._adb.run( 881 self._hosts[0], 882 args=('shell', 'getprop', 883 constants.TRADEFED_CTS_HELPERS_PROPERTY)).stdout.strip() 884 for pkg in package_list.split(':'): 885 if not pkg: 886 continue 887 apk_name = pkg + '.apk' 888 logging.info('Installing CTS helper package %s to %s', apk_name, 889 tf_testcases) 890 self._hosts[0].get_file( 891 os.path.join(constants.BOARD_CTS_HELPERS_DIR, apk_name), 892 tf_testcases) 893 894 def _run(self, *args, **kwargs): 895 """Executes the given command line. 896 897 To support SDK tools, such as adb or aapt, this adds _install_paths 898 to the extra_paths. Before invoking this, ensure _install_files() has 899 been called. 900 """ 901 kwargs['extra_paths'] = ( 902 kwargs.get('extra_paths', []) + self._install_paths) 903 return utils.run(*args, **kwargs) 904 905 def _collect_tradefed_global_log(self, result, destination): 906 """Collects the tradefed global log. 907 908 @param result: The result object from utils.run. 909 @param destination: Autotest result directory (destination of logs). 910 """ 911 match = re.search(r'Saved log to /tmp/(tradefed_global_log_.*\.txt)', 912 result.stdout) 913 if not match: 914 logging.debug(result.stdout) 915 logging.error('no tradefed_global_log file is found') 916 return 917 918 name = match.group(1) 919 dest = os.path.join(destination, 'logs', 'tmp') 920 self._safe_makedirs(dest) 921 shutil.copy(os.path.join('/tmp', name), os.path.join(dest, name)) 922 923 def _get_expected_failures(self, directory, bundle_abi): 924 """Return a list of expected failures or no test module. 925 926 @param directory: A directory with expected no tests or failures files. 927 @param bundle_abi: 'arm' or 'x86' if the test is for the particular ABI. 928 None otherwise (like GTS, built for multi-ABI.) 929 @return: A list of expected failures or no test modules for the current 930 testing device. 931 """ 932 # Load waivers and manual tests so TF doesn't re-run them. 933 expected_fail_files = [] 934 test_board = self._get_board_name() 935 test_model = self._get_model_name() 936 test_arch = self._get_board_arch() 937 sdk_ver = self._get_android_version() 938 first_api_level = self._get_first_api_level() 939 expected_fail_dir = os.path.join(self.bindir, directory) 940 if os.path.exists(expected_fail_dir): 941 expected_fail_files += glob.glob(expected_fail_dir + '/*.yaml') 942 943 waivers = cts_expected_failure_parser.ParseKnownCTSFailures( 944 expected_fail_files) 945 return waivers.find_waivers(test_arch, test_board, test_model, 946 bundle_abi, sdk_ver, first_api_level, 947 self._hosts[0]) 948 949 def _get_abilist(self): 950 """Return the abilist supported by calling adb command. 951 952 This method should only be called after the android environment is 953 successfully initialized.""" 954 if not self._abilist: 955 for _ in range(3): 956 abilist_str = self._adb.run( 957 self._hosts[0], 958 args=('shell', 'getprop', 959 'ro.product.cpu.abilist')).stdout.strip() 960 if abilist_str: 961 self._abilist = abilist_str.split(',') 962 break 963 else: 964 # TODO(kinaba): Sometimes getprop returns an empty string. 965 # Investigate why. For now we mitigate the bug by retries. 966 logging.error('Empty abilist.') 967 return self._abilist 968 969 def _get_release_branch_number(self): 970 """Returns the DUT branch number (z of Rxx-yyyyy.z.w) or 0 on error.""" 971 if not self._release_branch_number: 972 ver = (self._hosts[0].get_release_version() or '').split('.') 973 self._release_branch_number = (int(ver[1]) if len(ver) >= 3 else 0) 974 return self._release_branch_number 975 976 def _get_board_arch(self): 977 """Return target DUT arch name.""" 978 if not self._board_arch: 979 self._board_arch = ('arm' if self._hosts[0].get_cpu_arch() == 'arm' 980 else 'x86') 981 return self._board_arch 982 983 def _get_board_name(self): 984 """Return target DUT board name.""" 985 if not self._board_name: 986 self._board_name = self._hosts[0].get_board().split(':')[1] 987 return self._board_name 988 989 def _get_model_name(self): 990 """Return target DUT model name.""" 991 if not self._model_name: 992 self._model_name = self._hosts[0].get_model_from_cros_config() 993 return self._model_name 994 995 def _get_android_version(self): 996 """Return target DUT Android SDK version""" 997 # TODO(kinaba): factor this out to server/hosts/cros_host.py 998 if not self._android_version: 999 self._android_version = self._hosts[0].run( 1000 'grep ANDROID_SDK /etc/lsb-release', 1001 ignore_status=True).stdout.rstrip().split('=')[1] 1002 return int(self._android_version) 1003 1004 def _get_first_api_level(self): 1005 """Return target DUT Android first API level.""" 1006 if not self._first_api_level: 1007 self._first_api_level = self._hosts[0].get_arc_first_api_level() 1008 return int(self._first_api_level) 1009 1010 def _get_max_retry(self, max_retry): 1011 """Return the maximum number of retries. 1012 1013 @param max_retry: max_retry specified in the control file. 1014 @return: number of retries for this specific host. 1015 """ 1016 if max_retry is None: 1017 max_retry = self._get_branch_retry(self._BRANCH_DEFAULT_RETRY) 1018 candidate = [max_retry] 1019 candidate.append(self._get_board_retry()) 1020 candidate.append(self._get_branch_retry(self._BRANCH_MAX_RETRY)) 1021 return min(x for x in candidate if x is not None) 1022 1023 def _get_board_retry(self): 1024 """Return the maximum number of retries for DUT board name. 1025 1026 @return: number of max_retry or None. 1027 """ 1028 board = self._get_board_name() 1029 if board in self._BOARD_MAX_RETRY: 1030 return self._BOARD_MAX_RETRY[board] 1031 logging.info('No board retry specified for board: %s', board) 1032 return None 1033 1034 def _get_branch_retry(self, table): 1035 """Returns the retry count for DUT branch number defined in |table|.""" 1036 number = self._get_release_branch_number() 1037 for lowerbound, retry in reversed(table): 1038 if lowerbound <= number: 1039 return retry 1040 logging.warning('Could not establish channel. Using retry=0.') 1041 return 0 1042 1043 def _is_tablet_mode_device(self): 1044 """Returns if running the test on a tabled mode device""" 1045 # TODO(kinaba): consider adding per-model check 1046 board = self._get_board_name() 1047 return any(board.startswith(b) for b in constants.TABLET_MODE_BOARDS) 1048 1049 def _run_commands(self, commands, **kwargs): 1050 """Run commands on all the hosts.""" 1051 # We need to copy the ADB key to the device to run adb on it. 1052 pre_commands = [] 1053 if any(command.startswith('adb ') for command in commands): 1054 key_path = '/tmp/arc.adb_key' 1055 for host in self._hosts: 1056 host.env['ADB_VENDOR_KEYS'] = key_path 1057 pre_commands = [ 1058 'adb kill-server', 1059 'echo %s > %s' % 1060 (pipes.quote(constants.PRIVATE_KEY), key_path) 1061 ] 1062 1063 for host in self._hosts: 1064 if pre_commands: 1065 logging.info('Running DUT adb setup') 1066 for command in pre_commands: 1067 host.run(command, ignore_status=True, verbose=False) 1068 for command in commands: 1069 logging.info('RUN: %s\n', command) 1070 output = host.run(command, **kwargs) 1071 logging.info('END: %s\n', command) 1072 logging.debug(output) 1073 1074 def _override_powerd_prefs(self): 1075 """Overrides powerd prefs to prevent screen from turning off, complying 1076 with CTS requirements. 1077 1078 This is a remote version of PowerPrefChanger which ensures overrided 1079 policies won't persist across reboots by bind-mounting onto the config 1080 directory. 1081 """ 1082 pref_dir = constants.POWERD_PREF_DIR 1083 temp_dir = constants.POWERD_TEMP_DIR 1084 commands = ( 1085 'cp -r %s %s' % (pref_dir, temp_dir), 1086 'echo 1 > %s/ignore_external_policy' % temp_dir, 1087 'echo 0 | tee %s/{,un}plugged_{dim,off,suspend}_ms' % temp_dir, 1088 'mount --bind %s %s' % (temp_dir, pref_dir), 1089 'restart powerd', 1090 ) 1091 try: 1092 self._run_commands(commands) 1093 except (error.AutoservRunError, error.AutoservSSHTimeout): 1094 logging.warning('Failed to override powerd policy, tests depending ' 1095 'on screen being always on may fail.') 1096 1097 def _restore_powerd_prefs(self): 1098 """Restores powerd prefs overrided by _override_powerd_prefs().""" 1099 pref_dir = constants.POWERD_PREF_DIR 1100 temp_dir = constants.POWERD_TEMP_DIR 1101 commands = ( 1102 'umount %s' % pref_dir, 1103 'restart powerd', 1104 'rm -rf %s' % temp_dir, 1105 ) 1106 try: 1107 self._run_commands(commands) 1108 except (error.AutoservRunError, error.AutoservSSHTimeout): 1109 logging.warning('Failed to restore powerd policy, overrided policy ' 1110 'will persist until device reboot.') 1111 1112 def _should_set_cpu_governor(self, target_module, board): 1113 """Returns whether we should set performance governor.""" 1114 # TODO(kinaba): The current restore logic only applies to Kukui 1115 # and Trogdor. Please update the logic when expanding the scope. 1116 return (target_module and "CtsDeqp" in target_module) and (board in [ 1117 'kukui-arc-r', 'trogdor-arc-r' 1118 ]) 1119 1120 def _set_cpu_governor(self, governor): 1121 """Set the specified CPU governor.""" 1122 self._run_commands([('for i in /sys/devices/system/cpu/cpufreq/*; do' 1123 ' echo %s > $i/scaling_governor; done') % governor 1124 ]) 1125 1126 def _override_cpu_governor(self): 1127 """Override the CPU governor for performance mode.""" 1128 try: 1129 self._set_cpu_governor('performance') 1130 except (error.AutoservRunError, error.AutoservSSHTimeout): 1131 logging.warning('Failed to override CPU governor, tests depending ' 1132 'on boosted performance may fail.') 1133 1134 def _restore_cpu_governor(self): 1135 """Restore the CPU governor to the default value.""" 1136 try: 1137 self._set_cpu_governor('schedutil') 1138 except (error.AutoservRunError, error.AutoservSSHTimeout): 1139 logging.warning('Failed to restore CPU governor, overrided policy ' 1140 'will persist until device reboot.') 1141 1142 def _mute_device(self): 1143 """Mutes the device to avoid noises while running tests""" 1144 try: 1145 self._run_commands(['cras_test_client --mute 1'], 1146 ignore_status=True) 1147 except: 1148 logging.warning('Failed to mute the device') 1149 1150 def _clean_crash_logs(self): 1151 try: 1152 self._run_commands(['rm -f /home/chronos/crash/*']) 1153 except (error.AutoservRunError, error.AutoservSSHTimeout): 1154 logging.warning('Failed to clean up crash logs.') 1155 1156 def _run_and_parse_tradefed(self, command): 1157 """Kick off the tradefed command. 1158 1159 @param command: Lists of command tokens. 1160 @raise TestFail: when a test failure is detected. 1161 @return: tuple of (tests, pass, fail, notexecuted) counts. 1162 """ 1163 target_argument = [] 1164 for host in self._hosts: 1165 target_argument += ['-s', adb_utils.get_adb_target(host)] 1166 shard_argument = [] 1167 if len(self._hosts) > 1: 1168 if self._SHARD_CMD: 1169 shard_argument = [self._SHARD_CMD, str(len(self._hosts))] 1170 else: 1171 logging.warning('cts-tradefed shard command isn\'t defined, ' 1172 'falling back to use single device.') 1173 command = command + target_argument + shard_argument 1174 1175 try: 1176 output = self._run_tradefed(command) 1177 except Exception as e: 1178 self._log_java_version() 1179 if not isinstance(e, error.CmdTimeoutError): 1180 # In case this happened due to file corruptions, try to 1181 # force to recreate the cache. 1182 logging.error('Failed to run tradefed! Cleaning up now.') 1183 self._clean_download_cache_if_needed(force=True) 1184 raise 1185 1186 result_destination = self._default_tradefed_base_dir() 1187 # Gather the global log first. Datetime parsing below can abort the test 1188 # if tradefed startup had failed. Even then the global log is useful. 1189 self._collect_tradefed_global_log(output, result_destination) 1190 # Result parsing must come after all other essential operations as test 1191 # warnings, errors and failures can be raised here. 1192 base = self._default_tradefed_base_dir() 1193 path = tradefed_utils.get_test_result_xml_path(base) 1194 return tradefed_utils.parse_tradefed_testresults_xml( 1195 test_result_xml_path=path, 1196 waivers=self._waivers) 1197 1198 def _setup_result_directories(self): 1199 """Sets up the results and logs directories for tradefed. 1200 1201 Tradefed saves the logs and results at: 1202 self._repository/results/$datetime/ 1203 self._repository/results/$datetime.zip 1204 self._repository/logs/$datetime/ 1205 Because other tools rely on the currently chosen Google storage paths 1206 we need to keep destination_results in: 1207 self.resultsdir/android-cts/results/$datetime/ 1208 self.resultsdir/android-cts/results/$datetime.zip 1209 self.resultsdir/android-cts/results/logs/$datetime/ 1210 To bridge between them, create symlinks from the former to the latter. 1211 """ 1212 logging.info('Setting up tradefed results and logs directories.') 1213 1214 results_destination = self._default_tradefed_base_dir() 1215 logs_destination = os.path.join(results_destination, 'logs') 1216 directory_mapping = [ 1217 (os.path.join(self._repository, 'results'), results_destination), 1218 (os.path.join(self._repository, 'logs'), logs_destination), 1219 ] 1220 1221 for (tradefed_path, final_path) in directory_mapping: 1222 if os.path.exists(tradefed_path): 1223 shutil.rmtree(tradefed_path) 1224 self._safe_makedirs(final_path) 1225 os.symlink(final_path, tradefed_path) 1226 1227 def _default_tradefed_base_dir(self): 1228 return os.path.join(self.resultsdir, self._get_tradefed_base_dir()) 1229 1230 def _install_plan(self, subplan): 1231 """Copy test subplan to CTS-TF. 1232 1233 @param subplan: CTS subplan to be copied into TF. 1234 """ 1235 logging.info('Install subplan: %s', subplan) 1236 subplans_tf_dir = os.path.join(self._repository, 'subplans') 1237 if not os.path.exists(subplans_tf_dir): 1238 os.makedirs(subplans_tf_dir) 1239 test_subplan_file = os.path.join(self.bindir, 'subplans', 1240 '%s.xml' % subplan) 1241 try: 1242 shutil.copy(test_subplan_file, subplans_tf_dir) 1243 except (shutil.Error, OSError, IOError) as e: 1244 raise error.TestFail( 1245 'Error: failed to copy test subplan %s to CTS bundle. %s' % 1246 (test_subplan_file, e)) 1247 1248 def _should_skip_test(self, _bundle): 1249 """Some tests are expected to fail and are skipped. 1250 1251 Subclasses should override with specific details. 1252 """ 1253 return False 1254 1255 def _should_reboot(self, steps): 1256 """Oracle to decide if DUT should reboot or just restart Chrome. 1257 1258 For now we will not reboot after the first two iterations, but on all 1259 iterations afterward as before. In particular this means that most CTS 1260 tests will now not get a "clean" machine, but one on which tests ran 1261 before. But we will still reboot after persistent failures, hopefully 1262 not causing too many flakes down the line. 1263 """ 1264 if steps < 3: 1265 return False 1266 return True 1267 1268 def _copy_extra_artifacts_dut(self, extra_artifacts, host, output_dir): 1269 """ Upload the custom artifacts """ 1270 self._safe_makedirs(output_dir) 1271 1272 for artifact in extra_artifacts: 1273 logging.info('Copying extra artifacts from "%s" to "%s".', 1274 artifact, output_dir) 1275 try: 1276 self._adb.run(host, 1277 verbose=True, 1278 timeout=120, 1279 args=('pull', artifact, output_dir)) 1280 except: 1281 # Maybe ADB connection failed, or the artifacts don't exist. 1282 logging.exception('Copying extra artifacts failed.') 1283 1284 def _copy_extra_artifacts_host(self, extra_artifacts, host, output_dir): 1285 """ Upload the custom artifacts """ 1286 self._safe_makedirs(output_dir) 1287 1288 for artifact in extra_artifacts: 1289 logging.info('Copying extra artifacts from "%s" to "%s".', 1290 artifact, output_dir) 1291 for extracted_path in glob.glob(artifact): 1292 logging.info('... %s', extracted_path) 1293 # Move it not to collect it again in future retries. 1294 shutil.move(extracted_path, output_dir) 1295 1296 def _run_tradefed_list_results(self): 1297 """Run the `tradefed list results` command. 1298 1299 @return: tuple of the last (session_id, pass, fail, all_done?). 1300 """ 1301 1302 # Fix b/143580192: We set the timeout to 3 min. It never takes more than 1303 # 10s on light disk load. 1304 output = self._run_tradefed_with_timeout(['list', 'results'], 180) 1305 1306 # Parses the last session from the output that looks like: 1307 # 1308 # Session Pass Fail Modules Complete ... 1309 # 0 90 10 1 of 2 1310 # 1 199 1 2 of 2 1311 # ... 1312 lastmatch = None 1313 for m in re.finditer(r'^(\d+)\s+(\d+)\s+(\d+)\s+(\d+) of (\d+)', 1314 output.stdout, re.MULTILINE): 1315 session, passed, failed, done, total = map(int, 1316 m.group(1, 2, 3, 4, 5)) 1317 lastmatch = (session, passed, failed, done == total) 1318 return lastmatch 1319 1320 def _get_bundle_url(self, uri, bundle): 1321 # TODO: Replace with NotImplementedError once all subclasses are done 1322 return self._get_latest_bundle_url(bundle) if uri == 'LATEST' else ( 1323 uri or self._get_default_bundle_url(bundle)) 1324 1325 def _tradefed_retry_command(self, template, session_id): 1326 raise NotImplementedError('Subclass should override this function') 1327 1328 def _tradefed_run_command(self, template): 1329 raise NotImplementedError('Subclass should override this function') 1330 1331 def _tradefed_cmd_path(self): 1332 raise NotImplementedError('Subclass should override this function') 1333 1334 def _tradefed_env(self): 1335 return None 1336 1337 def _run_tradefed_with_timeout(self, command, timeout): 1338 tradefed = self._tradefed_cmd_path() 1339 with tradefed_utils.adb_keepalive( 1340 adb_utils.get_adb_targets(self._hosts), self._install_paths): 1341 logging.info('RUN(timeout=%d): %s', timeout, 1342 ' '.join([tradefed] + command)) 1343 output = self._run( 1344 tradefed, 1345 args=tuple(command), 1346 env=self._tradefed_env(), 1347 timeout=timeout, 1348 verbose=True, 1349 ignore_status=False, 1350 # Make sure to tee tradefed stdout/stderr to autotest logs 1351 # continuously during the test run. 1352 stdout_tee=utils.TEE_TO_LOGS, 1353 stderr_tee=utils.TEE_TO_LOGS) 1354 logging.info('END: %s\n', ' '.join([tradefed] + command)) 1355 return output 1356 1357 def _run_tradefed(self, command): 1358 timeout = self._timeout * self._timeout_factor 1359 if self._job_deadline is not None: 1360 clipped = int(min(timeout, self._job_deadline - time.time())) 1361 # Even the shortest tradefed run takes 1.5 minutes. Took 2x'ed 1362 # value as a threshold that a meaningful test can run. 1363 if clipped < 3 * 60: 1364 raise error.TestError( 1365 'Hitting job time limit: only %s seconds left' % 1366 clipped) 1367 timeout = clipped 1368 return self._run_tradefed_with_timeout(command, timeout) 1369 1370 def _run_tradefed_with_retries(self, 1371 test_name, 1372 run_template, 1373 retry_template, 1374 timeout, 1375 media_asset=None, 1376 enable_default_apps=False, 1377 target_module=None, 1378 target_plan=None, 1379 executable_test_count=None, 1380 bundle=None, 1381 use_helpers=False, 1382 extra_artifacts=[], 1383 extra_artifacts_host=[], 1384 login_precondition_commands=[], 1385 precondition_commands=[], 1386 prerequisites=[]): 1387 """Run CTS/GTS with retry logic. 1388 1389 We first kick off the specified module. Then rerun just the failures 1390 on the next MAX_RETRY iterations. 1391 """ 1392 for prereq in prerequisites: 1393 result = tradefed_prerequisite.check(prereq, self._hosts) 1394 if not result[0]: 1395 raise error.TestError(result[1]) 1396 1397 # On dev and beta channels timeouts are sharp, lenient on stable. 1398 self._timeout = timeout 1399 if (self._get_release_branch_number() >= 1400 constants.APPROXIMATE_STABLE_BRANCH_NUMBER): 1401 self._timeout += 3600 1402 1403 if self._should_skip_test(bundle): 1404 logging.warning('Skipped test %s', ' '.join(test_name)) 1405 return 1406 1407 steps = -1 # For historic reasons the first iteration is not counted. 1408 self.summary = '' 1409 board = self._get_board_name() 1410 session_id = None 1411 1412 self._setup_result_directories() 1413 if media_asset: 1414 self._prepare_media(media_asset) 1415 1416 # This loop retries failures. For this reason please do not raise 1417 # TestFail in this loop if you suspect the failure might be fixed 1418 # in the next loop iteration. 1419 while steps < self._max_retry: 1420 steps += 1 1421 keep_media = media_asset and media_asset.uri and steps >= 1 1422 self._run_commands(login_precondition_commands, ignore_status=True) 1423 # TODO(kinaba): Make it a general config (per-model choice 1424 # of tablet,clamshell,default) if the code below works. 1425 if utils.is_in_container() and not client_utils.is_moblab(): 1426 # Force laptop mode for non TABLET_MODE_BOARDS 1427 if not self._is_tablet_mode_device(): 1428 self._run_commands( 1429 ['inject_powerd_input_event --code=tablet --value=0'], 1430 ignore_status=True) 1431 1432 session_log_dir = os.path.join(self.resultsdir, 1433 'login_session_log', 1434 'step%02d' % steps) 1435 with login.login_chrome(hosts=self._hosts, 1436 board=board, 1437 dont_override_profile=keep_media, 1438 enable_default_apps=enable_default_apps, 1439 log_dir=session_log_dir) as current_logins: 1440 if self._should_reboot(steps): 1441 # TODO(rohitbm): Evaluate if power cycle really helps with 1442 # Bluetooth test failures, and then make the implementation 1443 # more strict by first running complete restart and reboot 1444 # retries and then perform power cycle. 1445 # 1446 # Currently, (steps + 1 == self._max_retry) means that 1447 # hard_reboot is attempted after "this" cycle failed. Then, 1448 # the last remaining 1 step will be run on the rebooted DUT. 1449 hard_reboot = (self._hard_reboot_on_failure 1450 and steps + 1 == self._max_retry) 1451 for current_login in current_logins: 1452 current_login.need_reboot(hard_reboot=hard_reboot) 1453 self._ready_arc() 1454 self._calculate_test_count_factor(bundle) 1455 1456 # Check the ABI list and skip (pass) the tests if not applicable. 1457 # This needs to be done after _ready_arc() for reading the device's 1458 # ABI list from the booted ARC instance. 1459 if '--abi' in run_template: 1460 abi = run_template[run_template.index('--abi') + 1] 1461 abilist = self._get_abilist() 1462 if abilist and abi not in abilist: 1463 logging.info( 1464 'Specified ABI %s is not in the device ABI list %s. Skipping.', 1465 abi, abilist) 1466 return 1467 1468 # For CtsMediaStressTestCases, push media assets in advance if 1469 # applicable. 1470 if (not keep_media and media_asset 1471 and self._should_push_mediastress_asset( 1472 target_module, board)): 1473 self._push_mediastress_asset(media_asset) 1474 1475 self._run_commands(precondition_commands, ignore_status=True) 1476 if use_helpers: 1477 self._fetch_helpers_from_dut() 1478 1479 # Run tradefed. 1480 if session_id == None: 1481 if target_plan is not None: 1482 self._install_plan(target_plan) 1483 1484 logging.info('Running %s:', test_name) 1485 command = self._tradefed_run_command(run_template) 1486 else: 1487 logging.info('Retrying failures of %s with session_id %d:', 1488 test_name, session_id) 1489 command = self._tradefed_retry_command(retry_template, 1490 session_id) 1491 1492 if media_asset and media_asset.uri: 1493 # Clean-up crash logs from previous sessions to ensure 1494 # enough disk space for 16GB storage devices: b/156075084. 1495 if not keep_media: 1496 self._clean_crash_logs() 1497 # b/196748125. Mute before running tests to avoid noises. 1498 self._mute_device() 1499 set_performance_governor = self._should_set_cpu_governor( 1500 target_module, board) 1501 # TODO(b/182397469): speculatively disable the "screen-on" 1502 # handler for dEQP. Revert when the issue is resolved. 1503 keep_screen_on = not (target_module 1504 and "CtsDeqpTestCases" in target_module) 1505 if set_performance_governor: 1506 self._override_cpu_governor() 1507 if keep_screen_on: 1508 self._override_powerd_prefs() 1509 try: 1510 waived_tests = self._run_and_parse_tradefed(command) 1511 finally: 1512 if keep_screen_on: 1513 self._restore_powerd_prefs() 1514 if set_performance_governor: 1515 self._restore_cpu_governor() 1516 if media_asset: 1517 self._fail_on_unexpected_media_download(media_asset) 1518 result = self._run_tradefed_list_results() 1519 if not result: 1520 logging.error('Did not find any test results. Retry.') 1521 for current_login in current_logins: 1522 current_login.need_reboot() 1523 continue 1524 1525 last_waived = len(waived_tests) 1526 last_session_id, last_passed, last_failed, last_all_done =\ 1527 result 1528 1529 if last_failed > last_waived or not utils.is_in_container(): 1530 for host in self._hosts: 1531 dir_name = "%s-step%02d" % (host.hostname, steps) 1532 output_dir = os.path.join( 1533 self.resultsdir, 'extra_artifacts', dir_name) 1534 self._copy_extra_artifacts_dut( 1535 extra_artifacts, host, output_dir) 1536 self._copy_extra_artifacts_host( 1537 extra_artifacts_host, host, output_dir) 1538 1539 if last_passed + last_failed > 0: 1540 # At least one test had run, which means the media push step 1541 # of tradefed didn't fail. To free up the storage earlier, 1542 # delete the copy on the server side. See crbug.com/970881 1543 if media_asset: 1544 self._cleanup_media(media_asset) 1545 1546 if last_failed < last_waived: 1547 logging.error( 1548 'Error: Internal waiver bookkeeping has become ' 1549 'inconsistent (f=%d, w=%d)', last_failed, last_waived) 1550 1551 msg = 'run' if session_id == None else ' retry' 1552 msg += '(p=%s, f=%s, w=%s)' % (last_passed, last_failed, 1553 last_waived) 1554 self.summary += msg 1555 logging.info('RESULT: %s %s', msg, result) 1556 1557 # Overwrite last_all_done if the executed test count is equal 1558 # to the known test count of the job. 1559 if (not last_all_done and executable_test_count != None and 1560 (last_passed + last_failed in executable_test_count)): 1561 logging.warning('Overwriting all_done as True, since the ' 1562 'explicitly set executable_test_count ' 1563 'tests have run.') 1564 last_all_done = True 1565 1566 # Check for no-test modules. We use the "all_done" indicator 1567 # provided by list_results to decide if there are outstanding 1568 # modules to iterate over (similar to missing tests just on a 1569 # per-module basis). 1570 notest = (last_passed + last_failed == 0 and last_all_done) 1571 if target_module in self._notest_modules: 1572 if notest: 1573 logging.info('Package has no tests as expected.') 1574 return 1575 else: 1576 # We expected no tests, but the new bundle drop must 1577 # have added some for us. Alert us to the situation. 1578 raise error.TestFail( 1579 'Failed: Remove module %s from ' 1580 'notest_modules directory!' % target_module) 1581 elif notest: 1582 logging.error('Did not find any tests in module. Hoping ' 1583 'this is transient. Retry after reboot.') 1584 for current_login in current_logins: 1585 current_login.need_reboot() 1586 continue 1587 1588 # After the no-test check, commit the pass/fail count. 1589 waived = last_waived 1590 session_id, passed, failed, all_done =\ 1591 last_session_id, last_passed, last_failed, last_all_done 1592 1593 # Check if all the tests passed. 1594 if failed <= waived and all_done: 1595 break 1596 1597 # TODO(b/127908450) Tradefed loses track of not-executed tests 1598 # when the commandline pattern included '*', and retry run for 1599 # them wrongly declares all tests passed. This is misleading. 1600 # Rather, we give up the retry and report the result as FAIL. 1601 if not all_done and '*' in ''.join(run_template): 1602 break 1603 1604 if session_id == None: 1605 raise error.TestFail('Error: Could not find any tests in module.') 1606 1607 if failed <= waived and all_done: 1608 # TODO(ihf): Make this error.TestPass('...') once 1609 # available. 1610 if steps > 0 and self._warn_on_test_retry: 1611 raise error.TestWarn( 1612 'Passed: after %d retries passing %d tests, ' 1613 'waived=%d. %s' % (steps, passed, waived, 1614 self.summary)) 1615 return 1616 1617 raise error.TestFail( 1618 'Failed: after %d retries giving up. ' 1619 'passed=%d, failed=%d, waived=%d%s. %s' % 1620 (steps, passed, failed, waived, 1621 '' if all_done else ', notexec>=1', self.summary)) 1622