xref: /aosp_15_r20/external/autotest/server/cros/tradefed/tradefed_test.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# repohooks/pre-upload.py currently does not run pylint. But for developers who
7# want to check their code manually we disable several harmless pylint warnings
8# which just distract from more serious remaining issues.
9#
10# The instance variables _host and _install_paths are not defined in __init__().
11# pylint: disable=attribute-defined-outside-init
12#
13# Many short variable names don't follow the naming convention.
14# pylint: disable=invalid-name
15#
16# _parse_result() and _dir_size() don't access self and could be functions.
17# pylint: disable=no-self-use
18
19from collections import namedtuple
20import errno
21import glob
22import hashlib
23import logging
24import os
25import pipes
26import re
27import shutil
28import stat
29import subprocess
30import tempfile
31import time
32import six.moves.urllib_parse as urlparse
33
34from autotest_lib.client.bin import utils as client_utils
35from autotest_lib.client.common_lib import error
36from autotest_lib.server import test
37from autotest_lib.server import utils
38from autotest_lib.server.cros.tradefed import adb as adb_utils
39from autotest_lib.server.cros.tradefed import cts_expected_failure_parser
40from autotest_lib.server.cros.tradefed import tradefed_chromelogin as login
41from autotest_lib.server.cros.tradefed import tradefed_constants as constants
42from autotest_lib.server.cros.tradefed import tradefed_utils
43from autotest_lib.server.cros.tradefed import tradefed_prerequisite
44from autotest_lib.server.autotest import OFFLOAD_ENVVAR
45
46# TODO(kinaba): Move to tradefed_utils together with the setup/cleanup methods.
47MediaAsset = namedtuple('MediaAssetInfo', ['uri', 'localpath'])
48
49
50class TradefedTest(test.test):
51    """Base class to prepare DUT to run tests via tradefed."""
52    version = 1
53
54    # Default and upperbounds of max_retry, based on board and revision
55    # after branching (that is, 'y' of R74-12345.y.z).
56    #
57    # By default, 0<=y<1 does 5 retries and 1<=y does 10. The |max_retry|
58    # parameter in control files can override the count, within the
59    # _BRANCH_MAX_RETRY limit below.
60    _BRANCH_DEFAULT_RETRY = [(0, 5), (1, 10)]  # dev=5, beta=stable=10
61    _BRANCH_MAX_RETRY = [(0, 12), (1, 30),      # dev=12, beta=30, stable=99
62        (constants.APPROXIMATE_STABLE_BRANCH_NUMBER, 99)]
63    # TODO(kinaba): betty-arcnext
64    _BOARD_MAX_RETRY = {'betty': 0}
65
66    _SHARD_CMD = None
67    _board_arch = None
68    _board_name = None
69    _model_name = None
70    _release_branch_number = None  # The 'y' of OS version Rxx-xxxxx.y.z
71    _android_version = None
72    _first_api_level = None
73    _num_media_bundles = 0
74    _abilist = []
75
76    # A job will be aborted after 16h. Subtract 30m for setup/teardown.
77    _MAX_LAB_JOB_LENGTH_IN_SEC = 16 * 60 * 60 - 30 * 60
78    _job_deadline = None
79
80    # Currently this is only used for dependency injection for testing.
81    def __init__(self, *args, **kwargs):
82        super().__init__(*args)
83        self._adb = kwargs.get('adb', adb_utils.Adb())
84
85    def _log_java_version(self):
86        """Log java version to debug failures due to version mismatch."""
87        utils.run(
88            'java',
89            args=('-version',),
90            ignore_status=False,
91            verbose=True,
92            stdout_tee=utils.TEE_TO_LOGS,
93            stderr_tee=utils.TEE_TO_LOGS)
94
95    def initialize(self,
96                   bundle=None,
97                   uri=None,
98                   host=None,
99                   hosts=None,
100                   max_retry=None,
101                   load_waivers=True,
102                   retry_manual_tests=False,
103                   warn_on_test_retry=True,
104                   hard_reboot_on_failure=False,
105                   use_jdk9=False,
106                   use_old_adb=False):
107        """Sets up the tools and binary bundles for the test."""
108        if utils.is_in_container() and not client_utils.is_moblab():
109            self._job_deadline = time.time() + self._MAX_LAB_JOB_LENGTH_IN_SEC
110
111        self._install_paths = []
112        # TODO(pwang): Remove host if we enable multiple hosts everywhere.
113        self._hosts = [host] if host else hosts
114        for host in self._hosts:
115            logging.info('Hostname: %s', host.host_port)
116        self._verify_hosts()
117
118        self._max_retry = self._get_max_retry(max_retry)
119        self._warn_on_test_retry = warn_on_test_retry
120        # Tests in the lab run within individual lxc container instances.
121        if utils.is_in_container():
122            cache_root = constants.TRADEFED_CACHE_CONTAINER
123        else:
124            cache_root = constants.TRADEFED_CACHE_LOCAL
125
126        # The content of the cache survives across jobs.
127        self._safe_makedirs(cache_root)
128        self._tradefed_cache = os.path.join(cache_root, 'cache')
129        self._tradefed_cache_lock = os.path.join(cache_root, 'lock')
130        self._tradefed_cache_dirty = os.path.join(cache_root, 'dirty')
131        # The content of the install location does not survive across jobs and
132        # is isolated (by using a unique path)_against other autotest instances.
133        # This is not needed for the lab, but if somebody wants to run multiple
134        # TradedefTest instance.
135        self._tradefed_install = tempfile.mkdtemp(
136            prefix=constants.TRADEFED_PREFIX)
137        # Under lxc the cache is shared between multiple autotest/tradefed
138        # instances. We need to synchronize access to it. All binaries are
139        # installed through the (shared) cache into the local (unshared)
140        # lxc/autotest instance storage.
141        # If clearing the cache it must happen before all downloads.
142        self._clean_download_cache_if_needed()
143        # Set permissions (rwxr-xr-x) to the executable binaries.
144        permission = (
145            stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH
146            | stat.S_IXOTH)
147
148        adb_dir = constants.ADB_DIR_OLD if use_old_adb else constants.ADB_DIR
149        self._install_files(adb_dir, constants.ADB_FILES, permission)
150        self._install_files(constants.SDK_TOOLS_DIR,
151                            constants.SDK_TOOLS_FILES, permission)
152
153        # If use_jdk9 is set true, use jdk9 than default jdk8.
154        if use_jdk9:
155            if utils.is_in_container() and not client_utils.is_moblab():
156                logging.info('Lab: switching to JDK9')
157                try:
158                    os.environ['JAVA_HOME'] = '/usr/lib/jvm/jdk-9.0.4'
159                    os.environ['PATH'] = os.environ['JAVA_HOME']\
160                                      + '/bin:' + os.environ['PATH']
161                    logging.info(
162                            subprocess.check_output(['java', '-version'],
163                                                    stderr=subprocess.STDOUT))
164                except OSError:
165                    logging.error('Can\'t change current PATH directory')
166            else:
167                logging.info('Non-lab environment: should be using JDK9+')
168
169        # TODO(kinaba): Remove the hack and fully enable the feature.
170        # For release branches (Rx-yyyyy.3.0 or above), always use the
171        # official build instead of the release build. See b/210369548
172        if uri == 'DEV' and self._get_release_branch_number() >= 3:
173            uri = 'LATEST'
174        # Install the tradefed bundle.
175        bundle_install_path = self._install_bundle(
176                self._get_bundle_url(uri, bundle))
177        self._repository = os.path.join(bundle_install_path,
178                                        self._get_tradefed_base_dir())
179
180        # Load expected test failures to exclude them from re-runs.
181        self._waivers = set()
182        if load_waivers:
183            self._waivers.update(
184                    self._get_expected_failures('expectations', bundle))
185        if not retry_manual_tests:
186            self._waivers.update(
187                    self._get_expected_failures('manual_tests', bundle))
188
189        # Load modules with no tests.
190        self._notest_modules = self._get_expected_failures('notest_modules',
191                bundle)
192        self._hard_reboot_on_failure = hard_reboot_on_failure
193
194    def _output_perf(self):
195        """Output performance values."""
196        base = self._default_tradefed_base_dir()
197        path = tradefed_utils.get_test_result_xml_path(base)
198        if path:
199            for metric in tradefed_utils.get_perf_metrics_from_test_result_xml(
200                path, self.resultsdir):
201                self.output_perf_value(**metric)
202
203    def _prepare_synchronous_offloads(self):
204        """
205        Copy files needed for APFE to synchronous offload dir,  with some
206        structure to make the post-job postprocessing simpler.
207        """
208        testname = os.path.basename(self.outputdir)
209        # This is yyyy.mm.dd_hh.mm.ss  (start time)
210        timestamp_pattern = ("[0-9][0-9][0-9][0-9].[0-9][0-9].[0-9][0-9]" +
211                             "_[0-9][0-9].[0-9][0-9].[0-9][0-9]")
212        time_glob = os.path.join(
213            self._default_tradefed_base_dir(), timestamp_pattern
214        )
215        for dirpath in glob.glob(time_glob):
216            timestamp = os.path.basename(dirpath)
217            locs = [os.path.join(dirpath, f) for f in ["test_result.xml",
218                                                       "testResult.xml"]]
219            for f in locs:
220                if os.path.exists(f):
221                    subdirs = self._subdirs(f, testname, timestamp)
222                    self._copy_to_offload_dir(f, subdirs)
223        for z in glob.glob(time_glob+".zip"):
224            self._copy_to_offload_dir(z, self._subdirs(z, testname))
225
226    def _copy_to_offload_dir(self, src_path, subdirs, recursive=True):
227        target = os.path.join(os.getenv(OFFLOAD_ENVVAR), *subdirs)
228        self._safe_makedirs(target)
229        if not recursive or os.path.isfile(src_path):
230            return shutil.copy2(src_path, str(target))
231        return shutil.copytree(src_path, str(target))
232
233    def _subdirs(self, path, testname, timestamp=""):
234        # CTS results from bvt-arc suites need to be sent to the
235        # specially-designated bucket for early EDI entries in APFE,
236        # but only there.
237        dest = "BVT" if 'bvt-arc' in path else "CTS"
238        return ["APFE", dest, testname, timestamp]
239
240    def cleanup(self):
241        """Cleans up any dirtied state."""
242
243        # We also run a postprocess result and performance data
244        # offloading here so that WARN and FAIL runs also run the
245        # steps. postprocess() method only runs for PASSing jobs.
246        self._prepare_synchronous_offloads()
247        self._output_perf()
248
249        try:
250            # Clean up test data that may not be deletable on previous
251            # ChromeOS versions. See b/170276268.
252            self._run_commands([
253                    'cryptohome --action=remove --force [email protected]'
254            ],
255                               ignore_status=True)
256        except:
257            logging.error('Failed to clean up the test account.')
258
259        self._kill_adb_server()
260
261        if hasattr(self, '_tradefed_install'):
262            logging.info('Cleaning up %s.', self._tradefed_install)
263            try:
264                shutil.rmtree(self._tradefed_install)
265            except IOError:
266                pass
267
268    def _kill_adb_server(self):
269        # Kill any lingering adb servers.
270        try:
271            self._adb.run(None,
272                          verbose=True,
273                          args=('kill-server', ),
274                          timeout=constants.ADB_KILL_SERVER_TIMEOUT_SECONDS)
275        except error.CmdTimeoutError as e:
276            logging.warn(e)
277            # `adb kill-server` sometimes hangs up. Kill it more brutally.
278            try:
279                client_utils.system(
280                    'killall adb',
281                    ignore_status=True,
282                    timeout=constants.ADB_KILL_SERVER_TIMEOUT_SECONDS)
283            except error.CmdTimeoutError as e:
284                # The timeout is ignored, since the only known failure pattern
285                # b/142828365 is due to a zombie process that does not prevent
286                # starting a new server with a new adb key.
287                logging.warn(e)
288        except (error.CmdError, AttributeError):
289            pass
290
291    def _verify_hosts(self):
292        """Verify all hosts' ChromeOS consistency."""
293        # Check release builder path. E.g. cave-release/R66-10435.0.0
294        release_builder_path = set(host.get_release_builder_path()
295                                   for host in self._hosts)
296        if len(release_builder_path) > 1:
297            raise error.TestFail('Hosts\' CHROMEOS_RELEASE_BUILDER_PATH is '
298                                 'different: %s', release_builder_path)
299
300        # Check ChromeOS ARC VERSION. E.g.
301        arc_version = set(host.get_arc_version() for host in self._hosts)
302        if len(arc_version) > 1:
303            raise error.TestFail('Hosts\' CHROMEOS_ARC_VERSION is different: '
304                                 '%s', arc_version)
305
306        # Check ChromeOS model for unibuild.
307        # TODO(pwang): Adding a check if we found how to detect host's model.
308
309    def _verify_arc_hosts(self):
310        """Verify all hosts' Android configuration consistency.
311
312        This method should only be called after all hosts' Android has been
313        successfully booted up."""
314        # Check all hosts have same Android fingerprint.
315        fingerprint = set(
316                self._adb.run(host,
317                              args=('shell', 'getprop',
318                                    'ro.build.fingerprint')).stdout
319                for host in self._hosts)
320        if len(fingerprint) > 1:
321            raise error.TestFail('Hosts\' supported fingerprint is different: '
322                                 '%s', fingerprint)
323
324    def _calculate_test_count_factor(self, bundle):
325        """ Calculate the multiplicative factor for the test case number.
326
327        The value equals to the times each test case is run, which is determined
328        by the intersection of the supported ABIs of the CTS/GTS bundle and that
329        of the tested device."""
330        # This is only a conservative approximation. Some suites only run the
331        # primary ABI, so to be fully precise, those have to be counted as 1.
332        arm_abis = set(('armeabi-v7a', 'arm64-v8a'))
333        x86_abis = set(('x86', 'x86_64'))
334        if bundle and bundle.startswith('arm'):
335            tradefed_abis = arm_abis
336        elif bundle and bundle.startswith('x86'):
337            tradefed_abis = x86_abis
338        else:
339            tradefed_abis = arm_abis | x86_abis
340        self._test_count_factor = len(set(self._get_abilist()) & tradefed_abis)
341        # Avoid setting timeout=0 (None) in any cases.
342        self._timeout_factor = max(1, self._test_count_factor)
343
344    def _try_adb_connect(self, host):
345        """Attempts to connect to adb on the DUT.
346
347        @param host: DUT that need to be connected.
348        @return boolean indicating if adb connected successfully.
349        """
350        # Add ADB_TRACE=all for debugging adb connection failures.
351        env = os.environ.copy()
352        env['ADB_TRACE'] = 'all'
353        try:
354            # This may fail return failure due to a race condition in adb
355            # connect (b/29370989). If adb is already connected, this command
356            # will immediately return success.
357            host_port = adb_utils.get_adb_target(host)
358            result = self._adb.run(
359                    host,
360                    args=('connect', host_port),
361                    verbose=True,
362                    env=env,
363                    ignore_status=True,
364                    timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS)
365            if result.exit_status != 0:
366                return False
367
368            result = self._adb.run(
369                    host,
370                    args=('devices', ),
371                    env=env,
372                    timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS)
373            if not re.search(r'{}\s+(device|unauthorized)'.format(
374                    re.escape(host_port)), result.stdout):
375                logging.info('No result found in with pattern: %s',
376                             r'{}\s+(device|unauthorized)'.format(
377                                 re.escape(host_port)))
378                return False
379
380            # Actually test the connection with an adb command as there can be
381            # a race between detecting the connected device and actually being
382            # able to run a command with authenticated adb.
383            result = self._adb.run(
384                    host,
385                    args=('shell', 'exit'),
386                    env=env,
387                    ignore_status=True,
388                    timeout=constants.ADB_CONNECT_TIMEOUT_SECONDS)
389            return result.exit_status == 0
390        except error.CmdTimeoutError as e:
391            logging.warning(e)
392            return False
393
394    def _android_shell(self, host, command):
395        """Run a command remotely on the device in an android shell
396
397        This function is strictly for internal use only, as commands do not run
398        in a fully consistent Android environment. Prefer adb shell instead.
399        """
400        host.run('android-sh -c ' + pipes.quote(command))
401
402    def _connect_adb(self, host):
403        """Sets up ADB connection to the ARC container.
404
405        @param host: DUT that should be connected to.
406        """
407        logging.info('Setting up adb connection.')
408
409        # adbd may take some time to come up. Repeatedly try to connect to adb.
410        utils.poll_for_condition(
411            lambda: self._try_adb_connect(host),
412            timeout=constants.ADB_READY_TIMEOUT_SECONDS,
413            sleep_interval=constants.ADB_POLLING_INTERVAL_SECONDS)
414
415        logging.info('Successfully setup adb connection.')
416
417    def _wait_for_arc_boot(self, host):
418        """Wait until ARC is fully booted.
419
420        Tests for the presence of the intent helper app to determine whether ARC
421        has finished booting.
422        @param host: DUT that need to be connected to.
423        """
424
425        def _intent_helper_running():
426            result = self._adb.run(host,
427                                   args=('shell', 'pgrep', '-f',
428                                         'org.chromium.arc.intent_helper'),
429                                   ignore_status=True)
430            return bool(result.stdout)
431
432        utils.poll_for_condition(
433            _intent_helper_running,
434            exception=error.TestFail(
435                'Error: Timed out waiting for intent helper.'),
436            timeout=constants.ARC_READY_TIMEOUT_SECONDS,
437            sleep_interval=constants.ARC_POLLING_INTERVAL_SECONDS)
438
439    def _disable_adb_install_dialog(self, host):
440        """Disables a dialog shown on adb install execution.
441
442        By default, on adb install execution, "Allow Google to regularly check
443        device activity ... " dialog is shown. It requires manual user action
444        so that tests are blocked at the point.
445        This method disables it.
446        """
447        logging.info('Disabling the adb install dialog.')
448        result = self._adb.run(host,
449                               verbose=True,
450                               args=('shell', 'settings', 'put', 'global',
451                                     'verifier_verify_adb_installs', '0'))
452        logging.info('Disable adb dialog: %s', result.stdout)
453
454        # Android "RescueParty" feature can reset the above settings when the
455        # device crashes often. Disable the rescue during testing.
456        # Keeping only for P and below since R has SELinux restrictions.
457        if self._get_android_version() < 29:
458            self._android_shell(host, 'setprop persist.sys.disable_rescue true')
459
460    def _ready_arc(self):
461        """Ready ARC and adb in parallel for running tests via tradefed."""
462        key_path = os.path.join(self.tmpdir, 'test_key')
463        with open(key_path, 'w') as f:
464            f.write(constants.PRIVATE_KEY)
465        os.environ['ADB_VENDOR_KEYS'] = key_path
466
467        for _ in range(2):
468            try:
469                # Kill existing adb server to ensure that the env var is picked
470                # up, and reset any previous bad state.
471                self._kill_adb_server()
472
473                # TODO(pwang): connect_adb takes 10+ seconds on a single DUT.
474                #              Parallelize it if it becomes a bottleneck.
475                for host in self._hosts:
476                    self._connect_adb(host)
477                    self._disable_adb_install_dialog(host)
478                    self._wait_for_arc_boot(host)
479                self._verify_arc_hosts()
480                return
481            except (utils.TimeoutError, error.CmdTimeoutError):
482                logging.error('Failed to set up adb connection. Retrying...')
483        raise error.TestFail('Error: Failed to set up adb connection')
484
485    def _safe_makedirs(self, path):
486        """Creates a directory at |path| and its ancestors.
487
488        Unlike os.makedirs(), ignore errors even if directories exist.
489        """
490        try:
491            os.makedirs(path)
492        except OSError as e:
493            if not (e.errno == errno.EEXIST and os.path.isdir(path)):
494                raise
495
496    def _unzip(self, filename):
497        """Unzip the file.
498
499        The destination directory name will be the stem of filename.
500        E.g., _unzip('foo/bar/baz.zip') will create directory at
501        'foo/bar/baz', and then will inflate zip's content under the directory.
502        If here is already a directory at the stem, that directory will be used.
503
504        @param filename: Path to the zip archive.
505        @return Path to the inflated directory.
506        """
507        destination = os.path.splitext(filename)[0]
508        if os.path.isdir(destination):
509            logging.info('Skipping unzip %s, reusing content of %s', filename,
510                         destination)
511            return destination
512        tmp = tempfile.mkdtemp(dir=os.path.dirname(filename))
513        logging.info('Begin unzip %s', filename)
514        try:
515            utils.run('unzip', args=('-d', tmp, filename))
516        except:
517            logging.error('Failed unzip, cleaning up.')
518            # Clean up just created files.
519            shutil.rmtree(tmp, ignore_errors=True)
520            raise
521        logging.info('End unzip %s', filename)
522        try:
523            os.renames(tmp, destination)
524        except:
525            logging.error('Failed rename, cleaning up.')
526            shutil.rmtree(destination, ignore_errors=True)
527            shutil.rmtree(tmp, ignore_errors=True)
528            raise
529        return destination
530
531    def _dir_size(self, directory):
532        """Compute recursive size in bytes of directory."""
533        size = 0
534        for root, _, files in os.walk(directory):
535            for name in files:
536                try:
537                    size += os.path.getsize(os.path.join(root, name))
538                except OSError:
539                    logging.error('Inaccessible path (crbug/793696): %s/%s',
540                                  root, name)
541        return size
542
543    def _invalidate_download_cache(self):
544        """Marks the download cache for deferred deletion.
545
546        Used to make cache file operations atomic across failures and reboots.
547        The caller is responsible to hold the lock to the cache.
548        """
549        if not os.path.exists(self._tradefed_cache_dirty):
550            os.mkdir(self._tradefed_cache_dirty)
551
552    def _validate_download_cache(self):
553        """Validates and unmarks the download cache from deletion.
554
555        Used to make cache file operations atomic across failures and reboots.
556        The caller is responsible to hold the lock to the cache.
557        """
558        shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True)
559
560    def _clean_download_cache_if_needed(self, force=False):
561        """Invalidates cache to prevent it from growing too large."""
562        # If the cache is large enough to hold a working set, we can simply
563        # delete everything without thrashing.
564        # TODO(ihf): Investigate strategies like LRU.
565        clean = force
566        with tradefed_utils.lock(self._tradefed_cache_lock):
567            size = self._dir_size(self._tradefed_cache)
568            if size > constants.TRADEFED_CACHE_MAX_SIZE:
569                logging.info(
570                    'Current cache size=%d got too large. Clearing %s.', size,
571                    self._tradefed_cache)
572                clean = True
573            else:
574                logging.info('Current cache size=%d of %s.', size,
575                             self._tradefed_cache)
576            if os.path.exists(self._tradefed_cache_dirty):
577                logging.info('Found dirty cache.')
578                clean = True
579            if clean:
580                logging.warning('Cleaning download cache.')
581                shutil.rmtree(self._tradefed_cache, ignore_errors=True)
582                self._safe_makedirs(self._tradefed_cache)
583                shutil.rmtree(self._tradefed_cache_dirty, ignore_errors=True)
584
585    def _download_to_cache(self, uri):
586        """Downloads the uri from the storage server.
587
588        It always checks the cache for available binaries first and skips
589        download if binaries are already in cache.
590
591        The caller of this function is responsible for holding the cache lock.
592
593        @param uri: The Google Storage, dl.google.com or local uri.
594        @return Path to the downloaded object, name.
595        """
596        # We are hashing the uri instead of the binary. This is acceptable, as
597        # the uris are supposed to contain version information and an object is
598        # not supposed to be changed once created.
599        output_dir = os.path.join(self._tradefed_cache,
600                                  hashlib.md5(uri.encode()).hexdigest())
601        # Check for existence of cache entry. We check for directory existence
602        # instead of file existence, so that _install_bundle can delete original
603        # zip files to save disk space.
604        if os.path.exists(output_dir):
605            # TODO(crbug.com/800657): Mitigation for the invalid state. Normally
606            # this should not happen, but when a lock is force borken due to
607            # high IO load, multiple processes may enter the critical section
608            # and leave a bad state permanently.
609            if os.listdir(output_dir):
610                logging.info('Skipping download of %s, reusing content of %s.',
611                             uri, output_dir)
612                return os.path.join(output_dir,
613                    os.path.basename(urlparse.urlparse(uri).path))
614            logging.error('Empty cache entry detected %s', output_dir)
615        return self._download_to_dir(uri, output_dir)
616
617    def _download_to_dir(self, uri, output_dir):
618        """Downloads the gs|http|https|file uri from the storage server.
619
620        @param uri: The Google Storage, dl.google.com or local uri.
621        @output_dir: The directory where the downloaded file should be placed.
622        @return Path to the downloaded object, name.
623        """
624        # Split uri into 3 pieces for use by gsutil and also by wget.
625        parsed = urlparse.urlparse(uri)
626        filename = os.path.basename(parsed.path)
627        output = os.path.join(output_dir, filename)
628
629        self._safe_makedirs(output_dir)
630        if parsed.scheme not in ['gs', 'http', 'https', 'file']:
631            raise error.TestFail(
632                'Error: Unknown download scheme %s' % parsed.scheme)
633        if parsed.scheme in ['http', 'https']:
634            logging.info('Using wget to download %s to %s.', uri, output_dir)
635            # We are downloading 1 file at a time, hence using -O over -P.
636            utils.run(
637                'wget',
638                args=('--report-speed=bits', '-O', output, uri),
639                verbose=True)
640            return output
641
642        if parsed.scheme in ['file']:
643            logging.info('Copy the local file from %s to %s.', parsed.path,
644                         output_dir)
645            utils.run(
646                'cp',
647                args=('-f', parsed.path, output),
648                verbose=True)
649            return output
650
651        # If the machine can access to the storage server directly,
652        # defer to "gsutil" for downloading.
653        logging.info('Downloading %s directly to %s.', uri, output)
654        # b/17445576: gsutil rsync of individual files is not implemented.
655        res = utils.run('gsutil',
656                        args=('cp', uri, output),
657                        verbose=True,
658                        ignore_status=True)
659        if not res or res.exit_status != 0:
660            logging.warning('Retrying download...')
661            utils.run('gsutil', args=('cp', uri, output), verbose=True)
662        return output
663
664    def _instance_copyfile(self, cache_path):
665        """Makes a copy of a file from the (shared) cache to a wholy owned
666        local instance. Also copies one level of cache directoy (MD5 named).
667        """
668        filename = os.path.basename(cache_path)
669        dirname = os.path.basename(os.path.dirname(cache_path))
670        instance_dir = os.path.join(self._tradefed_install, dirname)
671        # Make sure destination directory is named the same.
672        self._safe_makedirs(instance_dir)
673        instance_path = os.path.join(instance_dir, filename)
674        shutil.copyfile(cache_path, instance_path)
675        return instance_path
676
677    def _instance_copytree(self, cache_path):
678        """Makes a copy of a directory from the (shared and writable) cache to
679        a wholy owned local instance.
680
681        TODO(ihf): Consider using cp -al to only copy links. Not sure if this
682        is really a benefit across the container boundary, but it is risky due
683        to the possibility of corrupting the original files by an lxc instance.
684        """
685        # We keep the top 2 names from the cache_path = .../dir1/dir2.
686        dir2 = os.path.basename(cache_path)
687        dir1 = os.path.basename(os.path.dirname(cache_path))
688        instance_path = os.path.join(self._tradefed_install, dir1, dir2)
689        # TODO(kinaba): Fix in a safer way.
690        # Below is a workaround to avoid copying large CTS/GTS tree in test lab.
691        # Contents of $cache_path/android-cts are symlinked to the destination
692        # rather than copied.
693        #  1) Why not symlink 'android-cts' itself? Because the tests will
694        #     create results/ logs/ subplans/ subdirectory there. We do not
695        #     want to write to the shared cache.
696        #  2) Why not hardlink? Cache and the local directory may be on
697        #     different mount points, so hardlink may not work.
698        #  3) Why this isn't safe? Cache is cleared when it became full, even
699        #     during the test is run on an instance.
700        #  4) Why this is acceptable despite the unsatefy? Cache clearance is
701        #     a rare event (once in 6 months). Skylab drones won't usually
702        #     live that long, and even if it did, the failure is once in 6
703        #     months after all.
704        special_src = None
705        special_dest = None
706        if utils.is_in_container() and not client_utils.is_moblab():
707            for xts_name in ['android-cts', 'android-gts', 'android-sts']:
708                xts_root = os.path.join(cache_path, xts_name)
709                if os.path.exists(xts_root):
710                    special_src = xts_root
711                    special_dest = os.path.join(instance_path, xts_name)
712                    break
713        if special_src:
714            logging.info('SYMLINK&COPY contents of %s to instance %s',
715                         cache_path, instance_path)
716            self._safe_makedirs(special_dest)
717            for entry in os.listdir(special_src):
718                # Subdirectories are created by relative path from
719                # tools/cts_tradefed. So for 'tools' dir we copy.
720                if entry == 'tools':
721                    shutil.copytree(os.path.join(special_src, entry),
722                                    os.path.join(special_dest, entry))
723                elif entry == 'testcases':
724                    # Directory structure in testcases/ needs to be
725                    # instantiated, because CTS tries `find` command
726                    # in the directory without following symlinks
727                    for subdir, _, files in os.walk(
728                            os.path.join(special_src, entry)):
729                        rel = os.path.relpath(subdir, special_src)
730                        os.mkdir(os.path.join(special_dest, rel))
731                        for file in files:
732                            os.symlink(os.path.join(special_src, rel, file),
733                                       os.path.join(special_dest, rel, file))
734                else:
735                    os.symlink(os.path.join(special_src, entry),
736                               os.path.join(special_dest, entry))
737        else:
738            logging.info('Copying %s to instance %s', cache_path,
739                         instance_path)
740            shutil.copytree(cache_path, instance_path)
741        return instance_path
742
743    def _install_bundle(self, gs_uri):
744        """Downloads a zip file, installs it and returns the local path.
745
746        @param gs_uri: GS bucket that contains the necessary files.
747        """
748        if not gs_uri.endswith('.zip'):
749            raise error.TestFail('Error: Not a .zip file %s.', gs_uri)
750        # Atomic write through of file.
751        with tradefed_utils.lock(self._tradefed_cache_lock):
752            # Atomic operations.
753            self._invalidate_download_cache()
754            # Download is lazy (cache_path may not actually exist if
755            # cache_unzipped does).
756            cache_path = self._download_to_cache(gs_uri)
757            # Unzip is lazy as well (but cache_unzipped guaranteed to
758            # exist).
759            cache_unzipped = self._unzip(cache_path)
760            # To save space we delete the original zip file. This works as
761            # _download only checks existence of the cache directory for
762            # lazily skipping download, and unzip itself will bail if the
763            # unzipped destination exists. Hence we don't need the original
764            # anymore.
765            if os.path.exists(cache_path):
766                logging.info('Deleting original %s', cache_path)
767                os.remove(cache_path)
768            # Erase dirty marker from disk.
769            self._validate_download_cache()
770            # We always copy files to give tradefed a clean copy of the
771            # bundle.
772            unzipped_local = self._instance_copytree(cache_unzipped)
773        return unzipped_local
774
775    def _install_files(self, gs_dir, files, permission):
776        """Installs binary tools."""
777        for filename in files:
778            gs_uri = os.path.join(gs_dir, filename)
779            # Atomic write through of file.
780            with tradefed_utils.lock(self._tradefed_cache_lock):
781                # We don't want to leave a corrupt cache for other jobs.
782                self._invalidate_download_cache()
783                cache_path = self._download_to_cache(gs_uri)
784                # Mark cache as clean again.
785                self._validate_download_cache()
786                # This only affects the current job, so not part of cache
787                # validation.
788                local = self._instance_copyfile(cache_path)
789            os.chmod(local, permission)
790            # Keep track of PATH.
791            local_dir = os.path.dirname(local)
792            self._install_paths.append(local_dir)
793            self._adb.add_path(local_dir)
794
795    def _prepare_media(self, media_asset):
796        """Downloads and offers the cached media files to tradefed."""
797        if media_asset.uri:
798            media = self._install_bundle(media_asset.uri)
799            if os.path.islink(media_asset.localpath):
800                os.unlink(media_asset.localpath)
801            if os.path.isdir(media_asset.localpath):
802                shutil.rmtree(media_asset.localpath)
803            self._safe_makedirs(os.path.dirname(media_asset.localpath))
804            os.symlink(media, media_asset.localpath)
805
806            logging.info('Offered %s as a media directory in %s',
807                    media, media_asset.localpath)
808
809        # Records the number of existing media bundles, to check later.
810        if os.path.isdir(media_asset.localpath):
811            self._num_media_bundles = len(
812                    os.listdir(media_asset.localpath))
813
814    def _cleanup_media(self, media_asset):
815        """Clean up the local copy of cached media files."""
816        self._fail_on_unexpected_media_download(media_asset)
817        if os.path.islink(media_asset.localpath):
818            path = os.readlink(media_asset.localpath)
819            os.unlink(media_asset.localpath)
820            if os.path.isdir(path):
821                logging.info('Cleaning up media files in %s', path)
822                shutil.rmtree(path)
823
824    def _fail_on_unexpected_media_download(self, media_asset):
825        if os.path.isdir(media_asset.localpath):
826            contents = os.listdir(media_asset.localpath)
827            # Ignore a table-of-contents file created by newer xTS
828            TOC_FILE = 'contents.toc'
829            if TOC_FILE in contents:
830                contents.remove(TOC_FILE)
831            if len(contents) > self._num_media_bundles:
832                raise error.TestFail(
833                    'Failed: Unexpected media bundle was added %s' % contents)
834
835    def _should_push_mediastress_asset(self, target_module, board):
836        """Returns whether we should manually push mediastress assets.
837
838        TODO(b/210801048): Remove this workaround once ARCVM storage performance
839        on ARM becomes good enough.
840        """
841        return (target_module and 'CtsMediaStressTestCases' in target_module
842                and board in ['kukui-arc-r'])
843
844    def _push_mediastress_asset(self, media_asset):
845        """Pushes mediastress assets to the DUT for the upcoming test."""
846        logging.info(
847                'Pushing mediastress assets in advance to workaround slow '
848                'storage on ARM boards (b/210801048)')
849
850        media_dir = os.path.join(media_asset.localpath,
851                                 'android-cts-media-1.5')
852        copy_media_sh = os.path.join(media_dir, 'copy_media.sh')
853        os.chmod(copy_media_sh, 0o755)
854
855        old_cwd = os.getcwd()
856        os.chdir(media_dir)
857        try:
858            for host in self._hosts:
859                host_port = adb_utils.get_adb_target(host)
860                self._run(
861                        copy_media_sh,
862                        args=('all', '-s', host_port),
863                        timeout=constants.ADB_PUSH_MEDIASTRESS_TIMEOUT_SECONDS,
864                        verbose=True,
865                        ignore_status=False,
866                        stdout_tee=utils.TEE_TO_LOGS,
867                        stderr_tee=utils.TEE_TO_LOGS)
868        finally:
869            os.chdir(old_cwd)
870
871    def _fetch_helpers_from_dut(self):
872        """Fetches the CTS helpers from the dut and installs into the testcases
873           subdirectory of our local autotest copy.
874        """
875        tf_testcases = os.path.join(self._repository, 'testcases')
876
877        # Earlier checks enforce that each host has the same build fingerprint,
878        # so we can assume that the packages from the first host will work
879        # across the whole set.
880        package_list = self._adb.run(
881                self._hosts[0],
882                args=('shell', 'getprop',
883                      constants.TRADEFED_CTS_HELPERS_PROPERTY)).stdout.strip()
884        for pkg in package_list.split(':'):
885            if not pkg:
886                continue
887            apk_name = pkg + '.apk'
888            logging.info('Installing CTS helper package %s to %s', apk_name,
889                         tf_testcases)
890            self._hosts[0].get_file(
891                    os.path.join(constants.BOARD_CTS_HELPERS_DIR, apk_name),
892                    tf_testcases)
893
894    def _run(self, *args, **kwargs):
895        """Executes the given command line.
896
897        To support SDK tools, such as adb or aapt, this adds _install_paths
898        to the extra_paths. Before invoking this, ensure _install_files() has
899        been called.
900        """
901        kwargs['extra_paths'] = (
902            kwargs.get('extra_paths', []) + self._install_paths)
903        return utils.run(*args, **kwargs)
904
905    def _collect_tradefed_global_log(self, result, destination):
906        """Collects the tradefed global log.
907
908        @param result: The result object from utils.run.
909        @param destination: Autotest result directory (destination of logs).
910        """
911        match = re.search(r'Saved log to /tmp/(tradefed_global_log_.*\.txt)',
912                          result.stdout)
913        if not match:
914            logging.debug(result.stdout)
915            logging.error('no tradefed_global_log file is found')
916            return
917
918        name = match.group(1)
919        dest = os.path.join(destination, 'logs', 'tmp')
920        self._safe_makedirs(dest)
921        shutil.copy(os.path.join('/tmp', name), os.path.join(dest, name))
922
923    def _get_expected_failures(self, directory, bundle_abi):
924        """Return a list of expected failures or no test module.
925
926        @param directory: A directory with expected no tests or failures files.
927        @param bundle_abi: 'arm' or 'x86' if the test is for the particular ABI.
928                           None otherwise (like GTS, built for multi-ABI.)
929        @return: A list of expected failures or no test modules for the current
930                 testing device.
931        """
932        # Load waivers and manual tests so TF doesn't re-run them.
933        expected_fail_files = []
934        test_board = self._get_board_name()
935        test_model = self._get_model_name()
936        test_arch = self._get_board_arch()
937        sdk_ver = self._get_android_version()
938        first_api_level = self._get_first_api_level()
939        expected_fail_dir = os.path.join(self.bindir, directory)
940        if os.path.exists(expected_fail_dir):
941            expected_fail_files += glob.glob(expected_fail_dir + '/*.yaml')
942
943        waivers = cts_expected_failure_parser.ParseKnownCTSFailures(
944            expected_fail_files)
945        return waivers.find_waivers(test_arch, test_board, test_model,
946                                    bundle_abi, sdk_ver, first_api_level,
947                                    self._hosts[0])
948
949    def _get_abilist(self):
950        """Return the abilist supported by calling adb command.
951
952        This method should only be called after the android environment is
953        successfully initialized."""
954        if not self._abilist:
955            for _ in range(3):
956                abilist_str = self._adb.run(
957                        self._hosts[0],
958                        args=('shell', 'getprop',
959                              'ro.product.cpu.abilist')).stdout.strip()
960                if abilist_str:
961                    self._abilist = abilist_str.split(',')
962                    break
963                else:
964                    # TODO(kinaba): Sometimes getprop returns an empty string.
965                    # Investigate why. For now we mitigate the bug by retries.
966                    logging.error('Empty abilist.')
967        return self._abilist
968
969    def _get_release_branch_number(self):
970        """Returns the DUT branch number (z of Rxx-yyyyy.z.w) or 0 on error."""
971        if not self._release_branch_number:
972            ver = (self._hosts[0].get_release_version() or '').split('.')
973            self._release_branch_number = (int(ver[1]) if len(ver) >= 3 else 0)
974        return self._release_branch_number
975
976    def _get_board_arch(self):
977        """Return target DUT arch name."""
978        if not self._board_arch:
979            self._board_arch = ('arm' if self._hosts[0].get_cpu_arch() == 'arm'
980                else 'x86')
981        return self._board_arch
982
983    def _get_board_name(self):
984        """Return target DUT board name."""
985        if not self._board_name:
986            self._board_name = self._hosts[0].get_board().split(':')[1]
987        return self._board_name
988
989    def _get_model_name(self):
990        """Return target DUT model name."""
991        if not self._model_name:
992            self._model_name = self._hosts[0].get_model_from_cros_config()
993        return self._model_name
994
995    def _get_android_version(self):
996        """Return target DUT Android SDK version"""
997        # TODO(kinaba): factor this out to server/hosts/cros_host.py
998        if not self._android_version:
999            self._android_version = self._hosts[0].run(
1000                'grep ANDROID_SDK /etc/lsb-release',
1001                ignore_status=True).stdout.rstrip().split('=')[1]
1002        return int(self._android_version)
1003
1004    def _get_first_api_level(self):
1005        """Return target DUT Android first API level."""
1006        if not self._first_api_level:
1007            self._first_api_level = self._hosts[0].get_arc_first_api_level()
1008        return int(self._first_api_level)
1009
1010    def _get_max_retry(self, max_retry):
1011        """Return the maximum number of retries.
1012
1013        @param max_retry: max_retry specified in the control file.
1014        @return: number of retries for this specific host.
1015        """
1016        if max_retry is None:
1017            max_retry = self._get_branch_retry(self._BRANCH_DEFAULT_RETRY)
1018        candidate = [max_retry]
1019        candidate.append(self._get_board_retry())
1020        candidate.append(self._get_branch_retry(self._BRANCH_MAX_RETRY))
1021        return min(x for x in candidate if x is not None)
1022
1023    def _get_board_retry(self):
1024        """Return the maximum number of retries for DUT board name.
1025
1026        @return: number of max_retry or None.
1027        """
1028        board = self._get_board_name()
1029        if board in self._BOARD_MAX_RETRY:
1030            return self._BOARD_MAX_RETRY[board]
1031        logging.info('No board retry specified for board: %s', board)
1032        return None
1033
1034    def _get_branch_retry(self, table):
1035        """Returns the retry count for DUT branch number defined in |table|."""
1036        number = self._get_release_branch_number()
1037        for lowerbound, retry in reversed(table):
1038            if lowerbound <= number:
1039                return retry
1040        logging.warning('Could not establish channel. Using retry=0.')
1041        return 0
1042
1043    def _is_tablet_mode_device(self):
1044        """Returns if running the test on a tabled mode device"""
1045        # TODO(kinaba): consider adding per-model check
1046        board = self._get_board_name()
1047        return any(board.startswith(b) for b in constants.TABLET_MODE_BOARDS)
1048
1049    def _run_commands(self, commands, **kwargs):
1050        """Run commands on all the hosts."""
1051        # We need to copy the ADB key to the device to run adb on it.
1052        pre_commands = []
1053        if any(command.startswith('adb ') for command in commands):
1054            key_path = '/tmp/arc.adb_key'
1055            for host in self._hosts:
1056                host.env['ADB_VENDOR_KEYS'] = key_path
1057            pre_commands = [
1058                    'adb kill-server',
1059                    'echo %s > %s' %
1060                    (pipes.quote(constants.PRIVATE_KEY), key_path)
1061            ]
1062
1063        for host in self._hosts:
1064            if pre_commands:
1065                logging.info('Running DUT adb setup')
1066                for command in pre_commands:
1067                    host.run(command, ignore_status=True, verbose=False)
1068            for command in commands:
1069                logging.info('RUN: %s\n', command)
1070                output = host.run(command, **kwargs)
1071                logging.info('END: %s\n', command)
1072                logging.debug(output)
1073
1074    def _override_powerd_prefs(self):
1075        """Overrides powerd prefs to prevent screen from turning off, complying
1076        with CTS requirements.
1077
1078        This is a remote version of PowerPrefChanger which ensures overrided
1079        policies won't persist across reboots by bind-mounting onto the config
1080        directory.
1081        """
1082        pref_dir = constants.POWERD_PREF_DIR
1083        temp_dir = constants.POWERD_TEMP_DIR
1084        commands = (
1085                'cp -r %s %s' % (pref_dir, temp_dir),
1086                'echo 1 > %s/ignore_external_policy' % temp_dir,
1087                'echo 0 | tee %s/{,un}plugged_{dim,off,suspend}_ms' % temp_dir,
1088                'mount --bind %s %s' % (temp_dir, pref_dir),
1089                'restart powerd',
1090        )
1091        try:
1092            self._run_commands(commands)
1093        except (error.AutoservRunError, error.AutoservSSHTimeout):
1094            logging.warning('Failed to override powerd policy, tests depending '
1095                            'on screen being always on may fail.')
1096
1097    def _restore_powerd_prefs(self):
1098        """Restores powerd prefs overrided by _override_powerd_prefs()."""
1099        pref_dir = constants.POWERD_PREF_DIR
1100        temp_dir = constants.POWERD_TEMP_DIR
1101        commands = (
1102                'umount %s' % pref_dir,
1103                'restart powerd',
1104                'rm -rf %s' % temp_dir,
1105        )
1106        try:
1107            self._run_commands(commands)
1108        except (error.AutoservRunError, error.AutoservSSHTimeout):
1109            logging.warning('Failed to restore powerd policy, overrided policy '
1110                            'will persist until device reboot.')
1111
1112    def _should_set_cpu_governor(self, target_module, board):
1113        """Returns whether we should set performance governor."""
1114        # TODO(kinaba): The current restore logic only applies to Kukui
1115        # and Trogdor. Please update the logic when expanding the scope.
1116        return (target_module and "CtsDeqp" in target_module) and (board in [
1117                'kukui-arc-r', 'trogdor-arc-r'
1118        ])
1119
1120    def _set_cpu_governor(self, governor):
1121        """Set the specified CPU governor."""
1122        self._run_commands([('for i in /sys/devices/system/cpu/cpufreq/*; do'
1123                             ' echo %s > $i/scaling_governor; done') % governor
1124                            ])
1125
1126    def _override_cpu_governor(self):
1127        """Override the CPU governor for performance mode."""
1128        try:
1129            self._set_cpu_governor('performance')
1130        except (error.AutoservRunError, error.AutoservSSHTimeout):
1131            logging.warning('Failed to override CPU governor, tests depending '
1132                            'on boosted performance may fail.')
1133
1134    def _restore_cpu_governor(self):
1135        """Restore the CPU governor to the default value."""
1136        try:
1137            self._set_cpu_governor('schedutil')
1138        except (error.AutoservRunError, error.AutoservSSHTimeout):
1139            logging.warning('Failed to restore CPU governor, overrided policy '
1140                            'will persist until device reboot.')
1141
1142    def _mute_device(self):
1143        """Mutes the device to avoid noises while running tests"""
1144        try:
1145            self._run_commands(['cras_test_client --mute 1'],
1146                               ignore_status=True)
1147        except:
1148            logging.warning('Failed to mute the device')
1149
1150    def _clean_crash_logs(self):
1151        try:
1152            self._run_commands(['rm -f /home/chronos/crash/*'])
1153        except (error.AutoservRunError, error.AutoservSSHTimeout):
1154            logging.warning('Failed to clean up crash logs.')
1155
1156    def _run_and_parse_tradefed(self, command):
1157        """Kick off the tradefed command.
1158
1159        @param command: Lists of command tokens.
1160        @raise TestFail: when a test failure is detected.
1161        @return: tuple of (tests, pass, fail, notexecuted) counts.
1162        """
1163        target_argument = []
1164        for host in self._hosts:
1165            target_argument += ['-s', adb_utils.get_adb_target(host)]
1166        shard_argument = []
1167        if len(self._hosts) > 1:
1168            if self._SHARD_CMD:
1169                shard_argument = [self._SHARD_CMD, str(len(self._hosts))]
1170            else:
1171                logging.warning('cts-tradefed shard command isn\'t defined, '
1172                                'falling back to use single device.')
1173        command = command + target_argument + shard_argument
1174
1175        try:
1176            output = self._run_tradefed(command)
1177        except Exception as e:
1178            self._log_java_version()
1179            if not isinstance(e, error.CmdTimeoutError):
1180                # In case this happened due to file corruptions, try to
1181                # force to recreate the cache.
1182                logging.error('Failed to run tradefed! Cleaning up now.')
1183                self._clean_download_cache_if_needed(force=True)
1184            raise
1185
1186        result_destination = self._default_tradefed_base_dir()
1187        # Gather the global log first. Datetime parsing below can abort the test
1188        # if tradefed startup had failed. Even then the global log is useful.
1189        self._collect_tradefed_global_log(output, result_destination)
1190        # Result parsing must come after all other essential operations as test
1191        # warnings, errors and failures can be raised here.
1192        base = self._default_tradefed_base_dir()
1193        path = tradefed_utils.get_test_result_xml_path(base)
1194        return tradefed_utils.parse_tradefed_testresults_xml(
1195            test_result_xml_path=path,
1196            waivers=self._waivers)
1197
1198    def _setup_result_directories(self):
1199        """Sets up the results and logs directories for tradefed.
1200
1201        Tradefed saves the logs and results at:
1202          self._repository/results/$datetime/
1203          self._repository/results/$datetime.zip
1204          self._repository/logs/$datetime/
1205        Because other tools rely on the currently chosen Google storage paths
1206        we need to keep destination_results in:
1207          self.resultsdir/android-cts/results/$datetime/
1208          self.resultsdir/android-cts/results/$datetime.zip
1209          self.resultsdir/android-cts/results/logs/$datetime/
1210        To bridge between them, create symlinks from the former to the latter.
1211        """
1212        logging.info('Setting up tradefed results and logs directories.')
1213
1214        results_destination = self._default_tradefed_base_dir()
1215        logs_destination = os.path.join(results_destination, 'logs')
1216        directory_mapping = [
1217            (os.path.join(self._repository, 'results'), results_destination),
1218            (os.path.join(self._repository, 'logs'), logs_destination),
1219        ]
1220
1221        for (tradefed_path, final_path) in directory_mapping:
1222            if os.path.exists(tradefed_path):
1223                shutil.rmtree(tradefed_path)
1224            self._safe_makedirs(final_path)
1225            os.symlink(final_path, tradefed_path)
1226
1227    def _default_tradefed_base_dir(self):
1228        return os.path.join(self.resultsdir, self._get_tradefed_base_dir())
1229
1230    def _install_plan(self, subplan):
1231        """Copy test subplan to CTS-TF.
1232
1233        @param subplan: CTS subplan to be copied into TF.
1234        """
1235        logging.info('Install subplan: %s', subplan)
1236        subplans_tf_dir = os.path.join(self._repository, 'subplans')
1237        if not os.path.exists(subplans_tf_dir):
1238            os.makedirs(subplans_tf_dir)
1239        test_subplan_file = os.path.join(self.bindir, 'subplans',
1240                                         '%s.xml' % subplan)
1241        try:
1242            shutil.copy(test_subplan_file, subplans_tf_dir)
1243        except (shutil.Error, OSError, IOError) as e:
1244            raise error.TestFail(
1245                'Error: failed to copy test subplan %s to CTS bundle. %s' %
1246                (test_subplan_file, e))
1247
1248    def _should_skip_test(self, _bundle):
1249        """Some tests are expected to fail and are skipped.
1250
1251        Subclasses should override with specific details.
1252        """
1253        return False
1254
1255    def _should_reboot(self, steps):
1256        """Oracle to decide if DUT should reboot or just restart Chrome.
1257
1258        For now we will not reboot after the first two iterations, but on all
1259        iterations afterward as before. In particular this means that most CTS
1260        tests will now not get a "clean" machine, but one on which tests ran
1261        before. But we will still reboot after persistent failures, hopefully
1262        not causing too many flakes down the line.
1263        """
1264        if steps < 3:
1265            return False
1266        return True
1267
1268    def _copy_extra_artifacts_dut(self, extra_artifacts, host, output_dir):
1269        """ Upload the custom artifacts """
1270        self._safe_makedirs(output_dir)
1271
1272        for artifact in extra_artifacts:
1273            logging.info('Copying extra artifacts from "%s" to "%s".',
1274                         artifact, output_dir)
1275            try:
1276                self._adb.run(host,
1277                              verbose=True,
1278                              timeout=120,
1279                              args=('pull', artifact, output_dir))
1280            except:
1281                # Maybe ADB connection failed, or the artifacts don't exist.
1282                logging.exception('Copying extra artifacts failed.')
1283
1284    def _copy_extra_artifacts_host(self, extra_artifacts, host, output_dir):
1285        """ Upload the custom artifacts """
1286        self._safe_makedirs(output_dir)
1287
1288        for artifact in extra_artifacts:
1289            logging.info('Copying extra artifacts from "%s" to "%s".',
1290                         artifact, output_dir)
1291            for extracted_path in glob.glob(artifact):
1292                logging.info('... %s', extracted_path)
1293                # Move it not to collect it again in future retries.
1294                shutil.move(extracted_path, output_dir)
1295
1296    def _run_tradefed_list_results(self):
1297        """Run the `tradefed list results` command.
1298
1299        @return: tuple of the last (session_id, pass, fail, all_done?).
1300        """
1301
1302        # Fix b/143580192: We set the timeout to 3 min. It never takes more than
1303        # 10s on light disk load.
1304        output = self._run_tradefed_with_timeout(['list', 'results'], 180)
1305
1306        # Parses the last session from the output that looks like:
1307        #
1308        # Session  Pass  Fail  Modules Complete ...
1309        # 0        90    10    1 of 2
1310        # 1        199   1     2 of 2
1311        # ...
1312        lastmatch = None
1313        for m in re.finditer(r'^(\d+)\s+(\d+)\s+(\d+)\s+(\d+) of (\d+)',
1314                             output.stdout, re.MULTILINE):
1315            session, passed, failed, done, total = map(int,
1316                                                       m.group(1, 2, 3, 4, 5))
1317            lastmatch = (session, passed, failed, done == total)
1318        return lastmatch
1319
1320    def _get_bundle_url(self, uri, bundle):
1321        # TODO: Replace with NotImplementedError once all subclasses are done
1322        return self._get_latest_bundle_url(bundle) if uri == 'LATEST' else (
1323                uri or self._get_default_bundle_url(bundle))
1324
1325    def _tradefed_retry_command(self, template, session_id):
1326        raise NotImplementedError('Subclass should override this function')
1327
1328    def _tradefed_run_command(self, template):
1329        raise NotImplementedError('Subclass should override this function')
1330
1331    def _tradefed_cmd_path(self):
1332        raise NotImplementedError('Subclass should override this function')
1333
1334    def _tradefed_env(self):
1335        return None
1336
1337    def _run_tradefed_with_timeout(self, command, timeout):
1338        tradefed = self._tradefed_cmd_path()
1339        with tradefed_utils.adb_keepalive(
1340                adb_utils.get_adb_targets(self._hosts), self._install_paths):
1341            logging.info('RUN(timeout=%d): %s', timeout,
1342                         ' '.join([tradefed] + command))
1343            output = self._run(
1344                tradefed,
1345                args=tuple(command),
1346                env=self._tradefed_env(),
1347                timeout=timeout,
1348                verbose=True,
1349                ignore_status=False,
1350                # Make sure to tee tradefed stdout/stderr to autotest logs
1351                # continuously during the test run.
1352                stdout_tee=utils.TEE_TO_LOGS,
1353                stderr_tee=utils.TEE_TO_LOGS)
1354            logging.info('END: %s\n', ' '.join([tradefed] + command))
1355        return output
1356
1357    def _run_tradefed(self, command):
1358        timeout = self._timeout * self._timeout_factor
1359        if self._job_deadline is not None:
1360            clipped = int(min(timeout, self._job_deadline - time.time()))
1361            # Even the shortest tradefed run takes 1.5 minutes. Took 2x'ed
1362            # value as a threshold that a meaningful test can run.
1363            if clipped < 3 * 60:
1364                raise error.TestError(
1365                        'Hitting job time limit: only %s seconds left' %
1366                        clipped)
1367            timeout = clipped
1368        return self._run_tradefed_with_timeout(command, timeout)
1369
1370    def _run_tradefed_with_retries(self,
1371                                   test_name,
1372                                   run_template,
1373                                   retry_template,
1374                                   timeout,
1375                                   media_asset=None,
1376                                   enable_default_apps=False,
1377                                   target_module=None,
1378                                   target_plan=None,
1379                                   executable_test_count=None,
1380                                   bundle=None,
1381                                   use_helpers=False,
1382                                   extra_artifacts=[],
1383                                   extra_artifacts_host=[],
1384                                   login_precondition_commands=[],
1385                                   precondition_commands=[],
1386                                   prerequisites=[]):
1387        """Run CTS/GTS with retry logic.
1388
1389        We first kick off the specified module. Then rerun just the failures
1390        on the next MAX_RETRY iterations.
1391        """
1392        for prereq in prerequisites:
1393            result = tradefed_prerequisite.check(prereq, self._hosts)
1394            if not result[0]:
1395                raise error.TestError(result[1])
1396
1397        # On dev and beta channels timeouts are sharp, lenient on stable.
1398        self._timeout = timeout
1399        if (self._get_release_branch_number() >=
1400                constants.APPROXIMATE_STABLE_BRANCH_NUMBER):
1401            self._timeout += 3600
1402
1403        if self._should_skip_test(bundle):
1404            logging.warning('Skipped test %s', ' '.join(test_name))
1405            return
1406
1407        steps = -1  # For historic reasons the first iteration is not counted.
1408        self.summary = ''
1409        board = self._get_board_name()
1410        session_id = None
1411
1412        self._setup_result_directories()
1413        if media_asset:
1414            self._prepare_media(media_asset)
1415
1416        # This loop retries failures. For this reason please do not raise
1417        # TestFail in this loop if you suspect the failure might be fixed
1418        # in the next loop iteration.
1419        while steps < self._max_retry:
1420            steps += 1
1421            keep_media = media_asset and media_asset.uri and steps >= 1
1422            self._run_commands(login_precondition_commands, ignore_status=True)
1423            # TODO(kinaba): Make it a general config (per-model choice
1424            # of tablet,clamshell,default) if the code below works.
1425            if utils.is_in_container() and not client_utils.is_moblab():
1426                # Force laptop mode for non TABLET_MODE_BOARDS
1427                if not self._is_tablet_mode_device():
1428                    self._run_commands(
1429                        ['inject_powerd_input_event --code=tablet --value=0'],
1430                        ignore_status=True)
1431
1432            session_log_dir = os.path.join(self.resultsdir,
1433                                           'login_session_log',
1434                                           'step%02d' % steps)
1435            with login.login_chrome(hosts=self._hosts,
1436                                    board=board,
1437                                    dont_override_profile=keep_media,
1438                                    enable_default_apps=enable_default_apps,
1439                                    log_dir=session_log_dir) as current_logins:
1440                if self._should_reboot(steps):
1441                    # TODO(rohitbm): Evaluate if power cycle really helps with
1442                    # Bluetooth test failures, and then make the implementation
1443                    # more strict by first running complete restart and reboot
1444                    # retries and then perform power cycle.
1445                    #
1446                    # Currently, (steps + 1 == self._max_retry) means that
1447                    # hard_reboot is attempted after "this" cycle failed. Then,
1448                    # the last remaining 1 step will be run on the rebooted DUT.
1449                    hard_reboot = (self._hard_reboot_on_failure
1450                        and steps + 1 == self._max_retry)
1451                    for current_login in current_logins:
1452                        current_login.need_reboot(hard_reboot=hard_reboot)
1453                self._ready_arc()
1454                self._calculate_test_count_factor(bundle)
1455
1456                # Check the ABI list and skip (pass) the tests if not applicable.
1457                # This needs to be done after _ready_arc() for reading the device's
1458                # ABI list from the booted ARC instance.
1459                if '--abi' in run_template:
1460                    abi = run_template[run_template.index('--abi') + 1]
1461                    abilist = self._get_abilist()
1462                    if abilist and abi not in abilist:
1463                        logging.info(
1464                                'Specified ABI %s is not in the device ABI list %s. Skipping.',
1465                                abi, abilist)
1466                        return
1467
1468                # For CtsMediaStressTestCases, push media assets in advance if
1469                # applicable.
1470                if (not keep_media and media_asset
1471                            and self._should_push_mediastress_asset(
1472                                    target_module, board)):
1473                    self._push_mediastress_asset(media_asset)
1474
1475                self._run_commands(precondition_commands, ignore_status=True)
1476                if use_helpers:
1477                    self._fetch_helpers_from_dut()
1478
1479                # Run tradefed.
1480                if session_id == None:
1481                    if target_plan is not None:
1482                        self._install_plan(target_plan)
1483
1484                    logging.info('Running %s:', test_name)
1485                    command = self._tradefed_run_command(run_template)
1486                else:
1487                    logging.info('Retrying failures of %s with session_id %d:',
1488                                 test_name, session_id)
1489                    command = self._tradefed_retry_command(retry_template,
1490                                                           session_id)
1491
1492                if media_asset and media_asset.uri:
1493                    # Clean-up crash logs from previous sessions to ensure
1494                    # enough disk space for 16GB storage devices: b/156075084.
1495                    if not keep_media:
1496                        self._clean_crash_logs()
1497                # b/196748125. Mute before running tests to avoid noises.
1498                self._mute_device()
1499                set_performance_governor = self._should_set_cpu_governor(
1500                        target_module, board)
1501                # TODO(b/182397469): speculatively disable the "screen-on"
1502                # handler for dEQP. Revert when the issue is resolved.
1503                keep_screen_on = not (target_module
1504                                      and "CtsDeqpTestCases" in target_module)
1505                if set_performance_governor:
1506                    self._override_cpu_governor()
1507                if keep_screen_on:
1508                    self._override_powerd_prefs()
1509                try:
1510                    waived_tests = self._run_and_parse_tradefed(command)
1511                finally:
1512                    if keep_screen_on:
1513                        self._restore_powerd_prefs()
1514                    if set_performance_governor:
1515                        self._restore_cpu_governor()
1516                if media_asset:
1517                    self._fail_on_unexpected_media_download(media_asset)
1518                result = self._run_tradefed_list_results()
1519                if not result:
1520                    logging.error('Did not find any test results. Retry.')
1521                    for current_login in current_logins:
1522                        current_login.need_reboot()
1523                    continue
1524
1525                last_waived = len(waived_tests)
1526                last_session_id, last_passed, last_failed, last_all_done =\
1527                    result
1528
1529                if last_failed > last_waived or not utils.is_in_container():
1530                    for host in self._hosts:
1531                        dir_name = "%s-step%02d" % (host.hostname, steps)
1532                        output_dir = os.path.join(
1533                            self.resultsdir, 'extra_artifacts', dir_name)
1534                        self._copy_extra_artifacts_dut(
1535                            extra_artifacts, host, output_dir)
1536                        self._copy_extra_artifacts_host(
1537                            extra_artifacts_host, host, output_dir)
1538
1539                if last_passed + last_failed > 0:
1540                    # At least one test had run, which means the media push step
1541                    # of tradefed didn't fail. To free up the storage earlier,
1542                    # delete the copy on the server side. See crbug.com/970881
1543                    if media_asset:
1544                        self._cleanup_media(media_asset)
1545
1546                if last_failed < last_waived:
1547                    logging.error(
1548                        'Error: Internal waiver bookkeeping has become '
1549                        'inconsistent (f=%d, w=%d)', last_failed, last_waived)
1550
1551                msg = 'run' if session_id == None else ' retry'
1552                msg += '(p=%s, f=%s, w=%s)' % (last_passed, last_failed,
1553                                               last_waived)
1554                self.summary += msg
1555                logging.info('RESULT: %s %s', msg, result)
1556
1557                # Overwrite last_all_done if the executed test count is equal
1558                # to the known test count of the job.
1559                if (not last_all_done and executable_test_count != None and
1560                    (last_passed + last_failed in executable_test_count)):
1561                    logging.warning('Overwriting all_done as True, since the '
1562                                    'explicitly set executable_test_count '
1563                                    'tests have run.')
1564                    last_all_done = True
1565
1566                # Check for no-test modules. We use the "all_done" indicator
1567                # provided by list_results to decide if there are outstanding
1568                # modules to iterate over (similar to missing tests just on a
1569                # per-module basis).
1570                notest = (last_passed + last_failed == 0 and last_all_done)
1571                if target_module in self._notest_modules:
1572                    if notest:
1573                        logging.info('Package has no tests as expected.')
1574                        return
1575                    else:
1576                        # We expected no tests, but the new bundle drop must
1577                        # have added some for us. Alert us to the situation.
1578                        raise error.TestFail(
1579                            'Failed: Remove module %s from '
1580                            'notest_modules directory!' % target_module)
1581                elif notest:
1582                    logging.error('Did not find any tests in module. Hoping '
1583                                  'this is transient. Retry after reboot.')
1584                    for current_login in current_logins:
1585                        current_login.need_reboot()
1586                    continue
1587
1588                # After the no-test check, commit the pass/fail count.
1589                waived = last_waived
1590                session_id, passed, failed, all_done =\
1591                    last_session_id, last_passed, last_failed, last_all_done
1592
1593                # Check if all the tests passed.
1594                if failed <= waived and all_done:
1595                    break
1596
1597                # TODO(b/127908450) Tradefed loses track of not-executed tests
1598                # when the commandline pattern included '*', and retry run for
1599                # them wrongly declares all tests passed. This is misleading.
1600                # Rather, we give up the retry and report the result as FAIL.
1601                if not all_done and '*' in ''.join(run_template):
1602                    break
1603
1604        if session_id == None:
1605            raise error.TestFail('Error: Could not find any tests in module.')
1606
1607        if failed <= waived and all_done:
1608            # TODO(ihf): Make this error.TestPass('...') once
1609            # available.
1610            if steps > 0 and self._warn_on_test_retry:
1611                raise error.TestWarn(
1612                    'Passed: after %d retries passing %d tests, '
1613                    'waived=%d. %s' % (steps, passed, waived,
1614                                       self.summary))
1615            return
1616
1617        raise error.TestFail(
1618                'Failed: after %d retries giving up. '
1619                'passed=%d, failed=%d, waived=%d%s. %s' %
1620                (steps, passed, failed, waived,
1621                 '' if all_done else ', notexec>=1', self.summary))
1622