xref: /aosp_15_r20/external/autotest/server/cros/dynamic_suite/suite.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Lint as: python2, python3
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from __future__ import absolute_import
7from __future__ import division
8from __future__ import print_function
9
10import abc
11import datetime
12import difflib
13import functools
14import hashlib
15import logging
16import operator
17import os
18import re
19import six
20import sys
21import warnings
22
23import common
24
25from autotest_lib.frontend.afe.json_rpc import proxy
26from autotest_lib.client.common_lib import autotest_enum
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config
29from autotest_lib.client.common_lib import priorities
30from autotest_lib.client.common_lib import time_utils
31from autotest_lib.client.common_lib import utils
32from autotest_lib.frontend.afe import model_attributes
33from autotest_lib.frontend.afe.json_rpc import proxy
34from autotest_lib.server.cros import provision
35from autotest_lib.server.cros.dynamic_suite import constants
36from autotest_lib.server.cros.dynamic_suite import control_file_getter
37from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
38from autotest_lib.server.cros.dynamic_suite import job_status
39from autotest_lib.server.cros.dynamic_suite import suite_common
40from autotest_lib.server.cros.dynamic_suite import tools
41from autotest_lib.server.cros.dynamic_suite.job_status import Status
42
43try:
44    from autotest_lib.server.cros.dynamic_suite import boolparse_lib
45except ImportError as e:
46    print('Unable to import boolparse_lib: %s' % (e,))
47    print('This script must be either:')
48    print('  - Be run in the chroot.')
49    print('  - (not yet supported) be run after running ')
50    print('    ../utils/build_externals.py')
51
52_FILE_BUG_SUITES = [
53        'au', 'bvt', 'bvt-cq', 'bvt-inline', 'calibration', 'paygen_au_beta',
54        'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 'sanity',
55        'push_to_prod'
56]
57_AUTOTEST_DIR = global_config.global_config.get_config_value(
58        'SCHEDULER', 'drone_installation_directory')
59
60
61class RetryHandler(object):
62    """Maintain retry information.
63
64    @var _retry_map: A dictionary that stores retry history.
65            The key is afe job id. The value is a dictionary.
66            {job_id: {'state':RetryHandler.States, 'retry_max':int}}
67            - state:
68                The retry state of a job.
69                NOT_ATTEMPTED:
70                    We haven't done anything about the job.
71                ATTEMPTED:
72                    We've made an attempt to schedule a retry job. The
73                    scheduling may or may not be successful, e.g.
74                    it might encounter an rpc error. Note failure
75                    in scheduling a retry is different from a retry job failure.
76                    For each job, we only attempt to schedule a retry once.
77                    For example, assume we have a test with JOB_RETRIES=5 and
78                    its second retry job failed. When we attempt to create
79                    a third retry job to retry the second, we hit an rpc
80                    error. In such case, we will give up on all following
81                    retries.
82                RETRIED:
83                    A retry job has already been successfully
84                    scheduled.
85            - retry_max:
86                The maximum of times the job can still
87                be retried, taking into account retries
88                that have occurred.
89    @var _retry_level: A retry might be triggered only if the result
90            is worse than the level.
91    @var _max_retries: Maximum retry limit at suite level.
92                     Regardless how many times each individual test
93                     has been retried, the total number of retries happening in
94                     the suite can't exceed _max_retries.
95    """
96
97    States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
98                                        start_value=1, step=1)
99
100    def __init__(self, initial_jobs_to_tests, retry_level='WARN',
101                 max_retries=None):
102        """Initialize RetryHandler.
103
104        @param initial_jobs_to_tests: A dictionary that maps a job id to
105                a ControlData object. This dictionary should contain
106                jobs that are originally scheduled by the suite.
107        @param retry_level: A retry might be triggered only if the result is
108                worse than the level.
109        @param max_retries: Integer, maxmium total retries allowed
110                                  for the suite. Default to None, no max.
111        """
112        self._retry_map = {}
113        self._retry_level = retry_level
114        self._max_retries = (max_retries
115                             if max_retries is not None else sys.maxsize)
116        for job_id, test in initial_jobs_to_tests.items():
117            if test.job_retries > 0:
118                self._add_job(new_job_id=job_id,
119                              retry_max=test.job_retries)
120            else:
121                logging.debug("Test %s has no retries", test.name)
122
123
124    def _add_job(self, new_job_id, retry_max):
125        """Add a newly-created job to the retry map.
126
127        @param new_job_id: The afe_job_id of a newly created job.
128        @param retry_max: The maximum of times that we could retry
129                          the test if the job fails.
130
131        @raises ValueError if new_job_id is already in retry map.
132
133        """
134        if new_job_id in self._retry_map:
135            raise ValueError('add_job called when job is already in retry map.')
136
137        self._retry_map[new_job_id] = {
138                'state': self.States.NOT_ATTEMPTED,
139                'retry_max': retry_max}
140
141
142    def _suite_max_reached(self):
143        """Return whether maximum retry limit for a suite has been reached."""
144        return self._max_retries <= 0
145
146
147    def add_retry(self, old_job_id, new_job_id):
148        """Record a retry.
149
150        Update retry map with the retry information.
151
152        @param old_job_id: The afe_job_id of the job that is retried.
153        @param new_job_id: The afe_job_id of the retry job.
154
155        @raises KeyError if old_job_id isn't in the retry map.
156        @raises ValueError if we have already retried or made an attempt
157                to retry the old job.
158
159        """
160        old_record = self._retry_map[old_job_id]
161        if old_record['state'] != self.States.NOT_ATTEMPTED:
162            raise ValueError(
163                    'We have already retried or attempted to retry job %d' %
164                    old_job_id)
165        old_record['state'] = self.States.RETRIED
166        self._add_job(new_job_id=new_job_id,
167                      retry_max=old_record['retry_max'] - 1)
168        self._max_retries -= 1
169
170
171    def set_attempted(self, job_id):
172        """Set the state of the job to ATTEMPTED.
173
174        @param job_id: afe_job_id of a job.
175
176        @raises KeyError if job_id isn't in the retry map.
177        @raises ValueError if the current state is not NOT_ATTEMPTED.
178
179        """
180        current_state = self._retry_map[job_id]['state']
181        if current_state != self.States.NOT_ATTEMPTED:
182            # We are supposed to retry or attempt to retry each job
183            # only once. Raise an error if this is not the case.
184            raise ValueError('Unexpected state transition: %s -> %s' %
185                             (self.States.get_string(current_state),
186                              self.States.get_string(self.States.ATTEMPTED)))
187        else:
188            self._retry_map[job_id]['state'] = self.States.ATTEMPTED
189
190
191    def has_following_retry(self, result):
192        """Check whether there will be a following retry.
193
194        We have the following cases for a given job id (result.id),
195        - no retry map entry -> retry not required, no following retry
196        - has retry map entry:
197            - already retried -> has following retry
198            - has not retried
199                (this branch can be handled by checking should_retry(result))
200                - retry_max == 0 --> the last retry job, no more retry
201                - retry_max > 0
202                   - attempted, but has failed in scheduling a
203                     following retry due to rpc error  --> no more retry
204                   - has not attempped --> has following retry if test failed.
205
206        @param result: A result, encapsulating the status of the job.
207
208        @returns: True, if there will be a following retry.
209                  False otherwise.
210
211        """
212        return (result.test_executed
213                and result.id in self._retry_map
214                and (self._retry_map[result.id]['state'] == self.States.RETRIED
215                     or self._should_retry(result)))
216
217
218    def _should_retry(self, result):
219        """Check whether we should retry a job based on its result.
220
221        We will retry the job that corresponds to the result
222        when all of the following are true.
223        a) The test was actually executed, meaning that if
224           a job was aborted before it could ever reach the state
225           of 'Running', the job will not be retried.
226        b) The result is worse than |self._retry_level| which
227           defaults to 'WARN'.
228        c) The test requires retry, i.e. the job has an entry in the retry map.
229        d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
230           Note that if a test has JOB_RETRIES=5, and the second time
231           it was retried it hit an rpc error, we will give up on
232           all following retries.
233        e) The job has not reached its retry max, i.e. retry_max > 0
234
235        @param result: A result, encapsulating the status of the job.
236
237        @returns: True if we should retry the job.
238
239        """
240        return (
241            result.test_executed
242            and result.id in self._retry_map
243            and not self._suite_max_reached()
244            and result.is_worse_than(
245                job_status.Status(self._retry_level, '', 'reason'))
246            and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
247            and self._retry_map[result.id]['retry_max'] > 0
248        )
249
250    def _should_retry_local_job(self, job_id):
251        """Check whether we should retry a job based on information available
252        for a local job without a Result object.
253
254        We will retry the job that corresponds to the result
255        when all of the following are true.
256        a) The test requires retry, i.e. the job has an entry in the retry map.
257        b) We haven't made any retry attempt yet for this job, i.e.
258           state == NOT_ATTEMPTED
259           If the job is aborted,  we will give up on all following retries,
260           regardless of max_retries.
261        c) The job has not reached its retry max, i.e. retry_max > 0
262
263        @param job_id: the id for the job, to look up relevant information.
264
265        @returns: True if we should retry the job.
266
267        """
268        if self._suite_max_reached():
269            logging.debug('suite max_retries reached, not retrying.')
270            return False
271        if job_id not in self._retry_map:
272            logging.debug('job_id not in retry map, not retrying.')
273            return False
274        if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
275            logging.debug("job state was %s not 'Not Attempted', not retrying",
276                          self._retry_map[job_id]['state'])
277            return False
278        if self._retry_map[job_id]['retry_max'] <= 0:
279            logging.debug('test-level retries exhausted, not retrying')
280            return False
281        return True
282
283
284    def job_present(self, job_id):
285        """Check whether a job id present in the retry map.
286
287        @param job_id: afe_job_id of a job.
288
289        @returns: A True if the job is present, False if not.
290        """
291        return bool(self._retry_map.get(job_id))
292
293
294
295    def get_retry_max(self, job_id):
296        """Get the maximum times the job can still be retried.
297
298        @param job_id: afe_job_id of a job.
299
300        @returns: An int, representing the maximum times the job can still be
301                  retried.
302        @raises KeyError if job_id isn't in the retry map.
303
304        """
305        return self._retry_map[job_id]['retry_max']
306
307
308class _SuiteChildJobCreator(object):
309    """Create test jobs for a suite."""
310
311    def __init__(
312            self,
313            tag,
314            builds,
315            board,
316            afe=None,
317            max_runtime_mins=24*60,
318            timeout_mins=24*60,
319            suite_job_id=None,
320            ignore_deps=False,
321            extra_deps=(),
322            priority=priorities.Priority.DEFAULT,
323            offload_failures_only=False,
324            test_source_build=None,
325            job_keyvals=None,
326    ):
327        """
328        Constructor
329
330        @param tag: a string with which to tag jobs run in this suite.
331        @param builds: the builds on which we're running this suite.
332        @param board: the board on which we're running this suite.
333        @param afe: an instance of AFE as defined in server/frontend.py.
334        @param max_runtime_mins: Maximum suite runtime, in minutes.
335        @param timeout_mins: Maximum job lifetime, in minutes.
336        @param suite_job_id: Job id that will act as parent id to all sub jobs.
337                             Default: None
338        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
339                            attribute and skip applying of dependency labels.
340                            (Default:False)
341        @param extra_deps: A list of strings which are the extra DEPENDENCIES
342                           to add to each test being scheduled.
343        @param priority: Integer priority level.  Higher is more important.
344        @param offload_failures_only: Only enable gs_offloading for failed
345                                      jobs.
346        @param test_source_build: Build that contains the server-side test code.
347        @param job_keyvals: General job keyvals to be inserted into keyval file,
348                            which will be used by tko/parse later.
349        """
350        self._tag = tag
351        self._builds = builds
352        self._board = board
353        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
354                                                         delay_sec=10,
355                                                         debug=False)
356        self._max_runtime_mins = max_runtime_mins
357        self._timeout_mins = timeout_mins
358        self._suite_job_id = suite_job_id
359        self._ignore_deps = ignore_deps
360        self._extra_deps = tuple(extra_deps)
361        self._priority = priority
362        self._offload_failures_only = offload_failures_only
363        self._test_source_build = test_source_build
364        self._job_keyvals = job_keyvals
365
366
367    @property
368    def cros_build(self):
369        """Return the CrOS build or the first build in the builds dict."""
370        # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
371        # sure what the implications of this are, but it's probably not a
372        # good thing.
373        return self._builds.get(provision.CROS_VERSION_PREFIX,
374                                list(self._builds.values())[0])
375
376
377    def create_job(self, test, retry_for=None):
378        """
379        Thin wrapper around frontend.AFE.create_job().
380
381        @param test: ControlData object for a test to run.
382        @param retry_for: If the to-be-created job is a retry for an
383                          old job, the afe_job_id of the old job will
384                          be passed in as |retry_for|, which will be
385                          recorded in the new job's keyvals.
386        @returns: A frontend.Job object with an added test_name member.
387                  test_name is used to preserve the higher level TEST_NAME
388                  name of the job.
389        """
390        # For a system running multiple suites which share tests, the priority
391        # overridden may lead to unexpected scheduling order that adds extra
392        # provision jobs.
393        test_priority = self._priority
394        if utils.is_moblab():
395            test_priority = max(self._priority, test.priority)
396
397        reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
398                         else None)
399
400        test_obj = self._afe.create_job(
401            control_file=test.text,
402            name=tools.create_job_name(
403                    self._test_source_build or self.cros_build,
404                    self._tag,
405                    test.name),
406            control_type=test.test_type.capitalize(),
407            meta_hosts=[self._board]*test.sync_count,
408            dependencies=self._create_job_deps(test),
409            keyvals=self._create_keyvals_for_test_job(test, retry_for),
410            max_runtime_mins=self._max_runtime_mins,
411            timeout_mins=self._timeout_mins,
412            parent_job_id=self._suite_job_id,
413            reboot_before=reboot_before,
414            run_reset=not test.fast,
415            priority=test_priority,
416            synch_count=test.sync_count,
417            require_ssp=test.require_ssp)
418
419        test_obj.test_name = test.name
420        return test_obj
421
422
423    def _create_job_deps(self, test):
424        """Create job deps list for a test job.
425
426        @returns: A list of dependency strings.
427        """
428        if self._ignore_deps:
429            job_deps = []
430        else:
431            job_deps = list(test.dependencies)
432        job_deps.extend(self._extra_deps)
433        return job_deps
434
435
436    def _create_keyvals_for_test_job(self, test, retry_for=None):
437        """Create keyvals dict for creating a test job.
438
439        @param test: ControlData object for a test to run.
440        @param retry_for: If the to-be-created job is a retry for an
441                          old job, the afe_job_id of the old job will
442                          be passed in as |retry_for|, which will be
443                          recorded in the new job's keyvals.
444        @returns: A keyvals dict for creating the test job.
445        """
446        keyvals = {
447            constants.JOB_BUILD_KEY: self.cros_build,
448            constants.JOB_SUITE_KEY: self._tag,
449            constants.JOB_EXPERIMENTAL_KEY: test.experimental,
450            constants.JOB_BUILDS_KEY: self._builds
451        }
452        # test_source_build is saved to job_keyvals so scheduler can retrieve
453        # the build name from database when compiling autoserv commandline.
454        # This avoid a database change to add a new field in afe_jobs.
455        #
456        # Only add `test_source_build` to job keyvals if the build is different
457        # from the CrOS build or the job uses more than one build, e.g., both
458        # firmware and CrOS will be updated in the dut.
459        # This is for backwards compatibility, so the update Autotest code can
460        # compile an autoserv command line to run in a SSP container using
461        # previous builds.
462        if (self._test_source_build and
463            (self.cros_build != self._test_source_build or
464             len(self._builds) > 1)):
465            keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
466                    self._test_source_build
467            for prefix, build in six.iteritems(self._builds):
468                if prefix == provision.FW_RW_VERSION_PREFIX:
469                    keyvals[constants.FWRW_BUILD]= build
470                elif prefix == provision.FW_RO_VERSION_PREFIX:
471                    keyvals[constants.FWRO_BUILD] = build
472        # Add suite job id to keyvals so tko parser can read it from keyval
473        # file.
474        if self._suite_job_id:
475            keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
476        # We drop the old job's id in the new job's keyval file so that
477        # later our tko parser can figure out the retry relationship and
478        # invalidate the results of the old job in tko database.
479        if retry_for:
480            keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
481        if self._offload_failures_only:
482            keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
483        if self._job_keyvals:
484            for key in constants.INHERITED_KEYVALS:
485                if key in self._job_keyvals:
486                    keyvals[key] = self._job_keyvals[key]
487        return keyvals
488
489
490class _ControlFileRetriever(object):
491    """Retrieves control files.
492
493    This returns control data instances, unlike control file getters
494    which simply return the control file text contents.
495    """
496
497    def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
498                 test_args=None):
499        """Initialize instance.
500
501        @param cf_getter: a control_file_getter.ControlFileGetter used to list
502               and fetch the content of control files
503        @param forgiving_parser: If False, will raise ControlVariableExceptions
504                                 if any are encountered when parsing control
505                                 files. Note that this can raise an exception
506                                 for syntax errors in unrelated files, because
507                                 we parse them before applying the predicate.
508        @param run_prod_code: If true, the retrieved tests will run the test
509                              code that lives in prod aka the test code
510                              currently on the lab servers by disabling
511                              SSP for the discovered tests.
512        @param test_args: A dict of args to be seeded in test control file under
513                          the name |args_dict|.
514        """
515        self._cf_getter = cf_getter
516        self._forgiving_parser = forgiving_parser
517        self._run_prod_code = run_prod_code
518        self._test_args = test_args
519
520
521    def retrieve_for_test(self, test_name):
522        """Retrieve a test's control data.
523
524        This ignores forgiving_parser because we cannot return a
525        forgiving value.
526
527        @param test_name: Name of test to retrieve.
528
529        @raises ControlVariableException: There is a syntax error in a
530                                          control file.
531
532        @returns a ControlData object
533        """
534        return suite_common.retrieve_control_data_for_test(
535                self._cf_getter, test_name)
536
537
538    def retrieve_for_suite(self, suite_name=''):
539        """Scan through all tests and find all tests.
540
541        @param suite_name: If specified, this method will attempt to restrain
542                           the search space to just this suite's control files.
543
544        @raises ControlVariableException: If forgiving_parser is False and there
545                                          is a syntax error in a control file.
546
547        @returns a dictionary of ControlData objects that based on given
548                 parameters.
549        """
550        tests = suite_common.retrieve_for_suite(
551                self._cf_getter, suite_name, self._forgiving_parser,
552                self._test_args)
553        if self._run_prod_code:
554            for test in six.itervalues(tests):
555                test.require_ssp = False
556
557        return tests
558
559
560def list_all_suites(build, devserver, cf_getter=None):
561    """
562    Parses all ControlData objects with a SUITE tag and extracts all
563    defined suite names.
564
565    @param build: the build on which we're running this suite.
566    @param devserver: the devserver which contains the build.
567    @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
568                      using DevServerGetter.
569
570    @return list of suites
571    """
572    if cf_getter is None:
573        cf_getter = _create_ds_getter(build, devserver)
574
575    suites = set()
576    predicate = lambda t: True
577    for test in find_and_parse_tests(cf_getter, predicate):
578        suites.update(test.suite_tag_parts)
579    return list(suites)
580
581
582def test_file_similarity_predicate(test_file_pattern):
583    """Returns predicate that gets the similarity based on a test's file
584    name pattern.
585
586    Builds a predicate that takes in a parsed control file (a ControlData)
587    and returns a tuple of (file path, ratio), where ratio is the
588    similarity between the test file name and the given test_file_pattern.
589
590    @param test_file_pattern: regular expression (string) to match against
591                              control file names.
592    @return a callable that takes a ControlData and and returns a tuple of
593            (file path, ratio), where ratio is the similarity between the
594            test file name and the given test_file_pattern.
595    """
596    return lambda t: ((None, 0) if not hasattr(t, 'path') else
597            (t.path, difflib.SequenceMatcher(a=t.path,
598                                             b=test_file_pattern).ratio()))
599
600
601def test_name_similarity_predicate(test_name):
602    """Returns predicate that matched based on a test's name.
603
604    Builds a predicate that takes in a parsed control file (a ControlData)
605    and returns a tuple of (test name, ratio), where ratio is the similarity
606    between the test name and the given test_name.
607
608    @param test_name: the test name to base the predicate on.
609    @return a callable that takes a ControlData and returns a tuple of
610            (test name, ratio), where ratio is the similarity between the
611            test name and the given test_name.
612    """
613    return lambda t: ((None, 0) if not hasattr(t, 'name') else
614            (t.name,
615             difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
616
617
618def matches_attribute_expression_predicate(test_attr_boolstr):
619    """Returns predicate that matches based on boolean expression of
620    attributes.
621
622    Builds a predicate that takes in a parsed control file (a ControlData)
623    ans returns True if the test attributes satisfy the given attribute
624    boolean expression.
625
626    @param test_attr_boolstr: boolean expression of the attributes to be
627                              test, like 'system:all and interval:daily'.
628
629    @return a callable that takes a ControlData and returns True if the test
630            attributes satisfy the given boolean expression.
631    """
632    return lambda t: boolparse_lib.BoolstrResult(
633        test_attr_boolstr, t.attributes)
634
635
636def test_file_matches_pattern_predicate(test_file_pattern):
637    """Returns predicate that matches based on a test's file name pattern.
638
639    Builds a predicate that takes in a parsed control file (a ControlData)
640    and returns True if the test's control file name matches the given
641    regular expression.
642
643    @param test_file_pattern: regular expression (string) to match against
644                              control file names.
645    @return a callable that takes a ControlData and and returns
646            True if control file name matches the pattern.
647    """
648    return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
649                                                     t.path)
650
651
652def test_name_matches_pattern_predicate(test_name_pattern):
653    """Returns predicate that matches based on a test's name pattern.
654
655    Builds a predicate that takes in a parsed control file (a ControlData)
656    and returns True if the test name matches the given regular expression.
657
658    @param test_name_pattern: regular expression (string) to match against
659                              test names.
660    @return a callable that takes a ControlData and returns
661            True if the name fields matches the pattern.
662    """
663    return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
664                                                     t.name)
665
666
667def test_name_equals_predicate(test_name):
668    """Returns predicate that matched based on a test's name.
669
670    Builds a predicate that takes in a parsed control file (a ControlData)
671    and returns True if the test name is equal to |test_name|.
672
673    @param test_name: the test name to base the predicate on.
674    @return a callable that takes a ControlData and looks for |test_name|
675            in that ControlData's name.
676    """
677    return lambda t: hasattr(t, 'name') and test_name == t.name
678
679
680def name_in_tag_similarity_predicate(name):
681    """Returns predicate that takes a control file and gets the similarity
682    of the suites in the control file and the given name.
683
684    Builds a predicate that takes in a parsed control file (a ControlData)
685    and returns a list of tuples of (suite name, ratio), where suite name
686    is each suite listed in the control file, and ratio is the similarity
687    between each suite and the given name.
688
689    @param name: the suite name to base the predicate on.
690    @return a callable that takes a ControlData and returns a list of tuples
691            of (suite name, ratio), where suite name is each suite listed in
692            the control file, and ratio is the similarity between each suite
693            and the given name.
694    """
695    return lambda t: [(suite,
696                       difflib.SequenceMatcher(a=suite, b=name).ratio())
697                      for suite in t.suite_tag_parts] or [(None, 0)]
698
699
700def name_in_tag_predicate(name):
701    """Returns predicate that takes a control file and looks for |name|.
702
703    Builds a predicate that takes in a parsed control file (a ControlData)
704    and returns True if the SUITE tag is present and contains |name|.
705
706    @param name: the suite name to base the predicate on.
707    @return a callable that takes a ControlData and looks for |name| in that
708            ControlData object's suite member.
709    """
710    return suite_common.name_in_tag_predicate(name)
711
712
713def create_fs_getter(autotest_dir):
714    """
715    @param autotest_dir: the place to find autotests.
716    @return a FileSystemGetter instance that looks under |autotest_dir|.
717    """
718    # currently hard-coded places to look for tests.
719    subpaths = ['server/site_tests', 'client/site_tests',
720                'server/tests', 'client/tests']
721    directories = [os.path.join(autotest_dir, p) for p in subpaths]
722    return control_file_getter.FileSystemGetter(directories)
723
724
725def _create_ds_getter(build, devserver):
726    """
727    @param build: the build on which we're running this suite.
728    @param devserver: the devserver which contains the build.
729    @return a FileSystemGetter instance that looks under |autotest_dir|.
730    """
731    return control_file_getter.DevServerGetter(build, devserver)
732
733
734def _non_experimental_tests_predicate(test_data):
735    """Test predicate for non-experimental tests."""
736    return not test_data.experimental
737
738
739def find_and_parse_tests(cf_getter, predicate, suite_name='',
740                         add_experimental=False, forgiving_parser=True,
741                         run_prod_code=False, test_args=None):
742    """
743    Function to scan through all tests and find eligible tests.
744
745    Search through all tests based on given cf_getter, suite_name,
746    add_experimental and forgiving_parser, return the tests that match
747    given predicate.
748
749    @param cf_getter: a control_file_getter.ControlFileGetter used to list
750           and fetch the content of control files
751    @param predicate: a function that should return True when run over a
752           ControlData representation of a control file that should be in
753           this Suite.
754    @param suite_name: If specified, this method will attempt to restrain
755                       the search space to just this suite's control files.
756    @param add_experimental: add tests with experimental attribute set.
757    @param forgiving_parser: If False, will raise ControlVariableExceptions
758                             if any are encountered when parsing control
759                             files. Note that this can raise an exception
760                             for syntax errors in unrelated files, because
761                             we parse them before applying the predicate.
762    @param run_prod_code: If true, the suite will run the test code that
763                          lives in prod aka the test code currently on the
764                          lab servers by disabling SSP for the discovered
765                          tests.
766    @param test_args: A dict of args to be seeded in test control file.
767
768    @raises ControlVariableException: If forgiving_parser is False and there
769                                      is a syntax error in a control file.
770
771    @return list of ControlData objects that should be run, with control
772            file text added in |text| attribute. Results are sorted based
773            on the TIME setting in control file, slowest test comes first.
774    """
775    logging.debug('Getting control file list for suite: %s', suite_name)
776    retriever = _ControlFileRetriever(cf_getter,
777                                      forgiving_parser=forgiving_parser,
778                                      run_prod_code=run_prod_code,
779                                      test_args=test_args)
780    tests = retriever.retrieve_for_suite(suite_name)
781    if not add_experimental:
782        predicate = _ComposedPredicate([predicate,
783                                        _non_experimental_tests_predicate])
784    return suite_common.filter_tests(tests, predicate)
785
786
787def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
788    """
789    Function to scan through all tests and find possible tests.
790
791    Search through all tests based on given cf_getter, suite_name,
792    add_experimental and forgiving_parser. Use the given predicate to
793    calculate the similarity and return the top 10 matches.
794
795    @param cf_getter: a control_file_getter.ControlFileGetter used to list
796           and fetch the content of control files
797    @param predicate: a function that should return a tuple of (name, ratio)
798           when run over a ControlData representation of a control file that
799           should be in this Suite. `name` is the key to be compared, e.g.,
800           a suite name or test name. `ratio` is a value between [0,1]
801           indicating the similarity of `name` and the value to be compared.
802    @param suite_name: If specified, this method will attempt to restrain
803                       the search space to just this suite's control files.
804    @param count: Number of suggestions to return, default to 10.
805
806    @return list of top names that similar to the given test, sorted by
807            match ratio.
808    """
809    logging.debug('Getting control file list for suite: %s', suite_name)
810    tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
811    logging.debug('Parsed %s control files.', len(tests))
812    similarities = {}
813    for test in six.itervalues(tests):
814        ratios = predicate(test)
815        # Some predicates may return a list of tuples, e.g.,
816        # name_in_tag_similarity_predicate. Convert all returns to a list.
817        if not isinstance(ratios, list):
818            ratios = [ratios]
819        for name, ratio in ratios:
820            similarities[name] = ratio
821    return [s[0] for s in
822            sorted(list(similarities.items()), key=operator.itemgetter(1),
823                   reverse=True)][:count]
824
825
826def _deprecated_suite_method(func):
827    """Decorator for deprecated Suite static methods.
828
829    TODO(ayatane): This is used to decorate functions that are called as
830    static methods on Suite.
831    """
832    @functools.wraps(func)
833    def wrapper(*args, **kwargs):
834        """Wraps |func| for warning."""
835        warnings.warn('Calling method "%s" from Suite is deprecated' %
836                      func.__name__)
837        return func(*args, **kwargs)
838    return staticmethod(wrapper)
839
840
841class _BaseSuite(object):
842    """
843    A suite of tests, defined by some predicate over control file variables.
844
845    Given a place to search for control files a predicate to match the desired
846    tests, can gather tests and fire off jobs to run them, and then wait for
847    results.
848
849    @var _predicate: a function that should return True when run over a
850         ControlData representation of a control file that should be in
851         this Suite.
852    @var _tag: a string with which to tag jobs run in this suite.
853    @var _builds: the builds on which we're running this suite.
854    @var _afe: an instance of AFE as defined in server/frontend.py.
855    @var _tko: an instance of TKO as defined in server/frontend.py.
856    @var _jobs: currently scheduled jobs, if any.
857    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
858                         ControlData objects.
859    @var _retry: a bool value indicating whether jobs should be retried on
860                 failure.
861    @var _retry_handler: a RetryHandler object.
862
863    """
864
865
866    def __init__(
867            self,
868            tests,
869            tag,
870            builds,
871            board,
872            afe=None,
873            tko=None,
874            pool=None,
875            results_dir=None,
876            max_runtime_mins=24*60,
877            timeout_mins=24*60,
878            file_bugs=False,
879            suite_job_id=None,
880            ignore_deps=False,
881            extra_deps=None,
882            priority=priorities.Priority.DEFAULT,
883            wait_for_results=True,
884            job_retry=False,
885            max_retries=sys.maxsize,
886            offload_failures_only=False,
887            test_source_build=None,
888            job_keyvals=None,
889            child_dependencies=(),
890            result_reporter=None,
891    ):
892        """Initialize instance.
893
894        @param tests: Iterable of tests to run.
895        @param tag: a string with which to tag jobs run in this suite.
896        @param builds: the builds on which we're running this suite.
897        @param board: the board on which we're running this suite.
898        @param afe: an instance of AFE as defined in server/frontend.py.
899        @param tko: an instance of TKO as defined in server/frontend.py.
900        @param pool: Specify the pool of machines to use for scheduling
901                purposes.
902        @param results_dir: The directory where the job can write results to.
903                            This must be set if you want job_id of sub-jobs
904                            list in the job keyvals.
905        @param max_runtime_mins: Maximum suite runtime, in minutes.
906        @param timeout: Maximum job lifetime, in hours.
907        @param suite_job_id: Job id that will act as parent id to all sub jobs.
908                             Default: None
909        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
910                            attribute and skip applying of dependency labels.
911                            (Default:False)
912        @param extra_deps: A list of strings which are the extra DEPENDENCIES
913                           to add to each test being scheduled.
914        @param priority: Integer priority level.  Higher is more important.
915        @param wait_for_results: Set to False to run the suite job without
916                                 waiting for test jobs to finish. Default is
917                                 True.
918        @param job_retry: A bool value indicating whether jobs should be retried
919                          on failure. If True, the field 'JOB_RETRIES' in
920                          control files will be respected. If False, do not
921                          retry.
922        @param max_retries: Maximum retry limit at suite level.
923                            Regardless how many times each individual test
924                            has been retried, the total number of retries
925                            happening in the suite can't exceed _max_retries.
926                            Default to sys.maxint.
927        @param offload_failures_only: Only enable gs_offloading for failed
928                                      jobs.
929        @param test_source_build: Build that contains the server-side test code.
930        @param job_keyvals: General job keyvals to be inserted into keyval file,
931                            which will be used by tko/parse later.
932        @param child_dependencies: (optional) list of dependency strings
933                to be added as dependencies to child jobs.
934        @param result_reporter: A _ResultReporter instance to report results. If
935                None, an _EmailReporter will be created.
936        """
937
938        self.tests = list(tests)
939        self._tag = tag
940        self._builds = builds
941        self._results_dir = results_dir
942        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
943                                                         delay_sec=10,
944                                                         debug=False)
945        self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
946                                                         delay_sec=10,
947                                                         debug=False)
948        self._jobs = []
949        self._jobs_to_tests = {}
950
951        self._file_bugs = file_bugs
952        self._suite_job_id = suite_job_id
953        self._job_retry=job_retry
954        self._max_retries = max_retries
955        # RetryHandler to be initialized in schedule()
956        self._retry_handler = None
957        self.wait_for_results = wait_for_results
958        self._job_keyvals = job_keyvals
959        if result_reporter is None:
960            self._result_reporter = _EmailReporter(self)
961        else:
962            self._result_reporter = result_reporter
963
964        if extra_deps is None:
965            extra_deps = []
966        extra_deps.append(board)
967        if pool:
968            extra_deps.append(pool)
969        extra_deps.extend(child_dependencies)
970        self._dependencies = tuple(extra_deps)
971
972        self._job_creator = _SuiteChildJobCreator(
973            tag=tag,
974            builds=builds,
975            board=board,
976            afe=afe,
977            max_runtime_mins=max_runtime_mins,
978            timeout_mins=timeout_mins,
979            suite_job_id=suite_job_id,
980            ignore_deps=ignore_deps,
981            extra_deps=extra_deps,
982            priority=priority,
983            offload_failures_only=offload_failures_only,
984            test_source_build=test_source_build,
985            job_keyvals=job_keyvals,
986        )
987
988
989    def _schedule_test(self, record, test, retry_for=None):
990        """Schedule a single test and return the job.
991
992        Schedule a single test by creating a job, and then update relevant
993        data structures that are used to keep track of all running jobs.
994
995        Emits a TEST_NA status log entry if it failed to schedule the test due
996        to NoEligibleHostException or a non-existent board label.
997
998        Returns a frontend.Job object if the test is successfully scheduled.
999        If scheduling failed due to NoEligibleHostException or a non-existent
1000        board label, returns None.
1001
1002        @param record: A callable to use for logging.
1003                       prototype: record(base_job.status_log_entry)
1004        @param test: ControlData for a test to run.
1005        @param retry_for: If we are scheduling a test to retry an
1006                          old job, the afe_job_id of the old job
1007                          will be passed in as |retry_for|.
1008
1009        @returns: A frontend.Job object or None
1010        """
1011        msg = 'Scheduling %s' % test.name
1012        if retry_for:
1013            msg = msg + ', to retry afe job %d' % retry_for
1014        logging.debug(msg)
1015        begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
1016        try:
1017            job = self._job_creator.create_job(test, retry_for=retry_for)
1018        except (error.NoEligibleHostException, proxy.ValidationError) as e:
1019            if (isinstance(e, error.NoEligibleHostException)
1020                or (isinstance(e, proxy.ValidationError)
1021                    and _is_nonexistent_board_error(e))):
1022                # Treat a dependency on a non-existent board label the same as
1023                # a dependency on a board that exists, but for which there's no
1024                # hardware.
1025                logging.debug('%s not applicable for this board/pool. '
1026                              'Emitting TEST_NA.', test.name)
1027                Status('TEST_NA', test.name,
1028                       'Skipping:  test not supported on this board/pool.',
1029                       begin_time_str=begin_time_str).record_all(record)
1030                return None
1031            else:
1032                raise e
1033        except (error.RPCException, proxy.JSONRPCException):
1034            if retry_for:
1035                # Mark that we've attempted to retry the old job.
1036                logging.debug("RPC exception occurred")
1037                self._retry_handler.set_attempted(job_id=retry_for)
1038            raise
1039        else:
1040            self._jobs.append(job)
1041            self._jobs_to_tests[job.id] = test
1042            if retry_for:
1043                # A retry job was just created, record it.
1044                self._retry_handler.add_retry(
1045                        old_job_id=retry_for, new_job_id=job.id)
1046                retry_count = (test.job_retries -
1047                               self._retry_handler.get_retry_max(job.id))
1048                logging.debug('Job %d created to retry job %d. '
1049                              'Have retried for %d time(s)',
1050                              job.id, retry_for, retry_count)
1051            self._remember_job_keyval(job)
1052            return job
1053
1054    def schedule(self, record):
1055        """
1056        Schedule jobs using |self._afe|.
1057
1058        frontend.Job objects representing each scheduled job will be put in
1059        |self._jobs|.
1060
1061        @param record: A callable to use for logging.
1062                       prototype: record(base_job.status_log_entry)
1063        @returns: The number of tests that were scheduled.
1064        """
1065        scheduled_test_names = []
1066        logging.debug('Discovered %d tests.', len(self.tests))
1067
1068        Status('INFO', 'Start %s' % self._tag).record_result(record)
1069        try:
1070            # Write job_keyvals into keyval file.
1071            if self._job_keyvals:
1072                utils.write_keyval(self._results_dir, self._job_keyvals)
1073
1074            # TODO(crbug.com/730885): This is a hack to protect tests that are
1075            # not usually retried from getting hit by a provision error when run
1076            # as part of a suite. Remove this hack once provision is separated
1077            # out in its own suite.
1078            self._bump_up_test_retries(self.tests)
1079            for test in self.tests:
1080                scheduled_job = self._schedule_test(record, test)
1081                if scheduled_job is not None:
1082                    scheduled_test_names.append(test.name)
1083
1084            # Write the num of scheduled tests and name of them to keyval file.
1085            logging.debug('Scheduled %d tests, writing the total to keyval.',
1086                          len(scheduled_test_names))
1087            utils.write_keyval(
1088                self._results_dir,
1089                self._make_scheduled_tests_keyvals(scheduled_test_names))
1090        except Exception:
1091            logging.exception('Exception while scheduling suite')
1092            Status('FAIL', self._tag,
1093                   'Exception while scheduling suite').record_result(record)
1094
1095        if self._job_retry:
1096            logging.debug("Initializing RetryHandler for suite %s.", self._tag)
1097            self._retry_handler = RetryHandler(
1098                    initial_jobs_to_tests=self._jobs_to_tests,
1099                    max_retries=self._max_retries)
1100            logging.debug("retry map created: %s ",
1101                          self._retry_handler._retry_map)
1102        else:
1103            logging.info("Will not retry jobs from suite %s.", self._tag)
1104        return len(scheduled_test_names)
1105
1106
1107    def _bump_up_test_retries(self, tests):
1108        """Bump up individual test retries to match suite retry options."""
1109        if not self._job_retry:
1110            return
1111
1112        for test in tests:
1113            # We do honor if a test insists on JOB_RETRIES = 0.
1114            if test.job_retries is None:
1115                logging.debug(
1116                        'Test %s did not request retries, but suite requires '
1117                        'retries. Bumping retries up to 1. '
1118                        '(See crbug.com/730885)',
1119                        test.name)
1120                test.job_retries = 1
1121
1122
1123    def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1124        """Make a keyvals dict to write for scheduled test names.
1125
1126        @param scheduled_test_names: A list of scheduled test name strings.
1127
1128        @returns: A keyvals dict.
1129        """
1130        return {
1131            constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1132            constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1133        }
1134
1135
1136    def _should_report(self, result):
1137        """
1138        Returns True if this failure requires to be reported.
1139
1140        @param result: A result, encapsulating the status of the failed job.
1141        @return: True if we should report this failure.
1142        """
1143        return (self._file_bugs and result.test_executed and
1144                not result.is_testna() and
1145                result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
1146
1147
1148    def _has_retry(self, result):
1149        """
1150        Return True if this result gets to retry.
1151
1152        @param result: A result, encapsulating the status of the failed job.
1153        @return: bool
1154        """
1155        return (self._job_retry
1156                and self._retry_handler.has_following_retry(result))
1157
1158
1159    def wait(self, record):
1160        """
1161        Polls for the job statuses, using |record| to print status when each
1162        completes.
1163
1164        @param record: callable that records job status.
1165                 prototype:
1166                   record(base_job.status_log_entry)
1167        """
1168        waiter = job_status.JobResultWaiter(self._afe, self._tko)
1169        try:
1170            if self._suite_job_id:
1171                jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
1172            else:
1173                logging.warning('Unknown suite_job_id, falling back to less '
1174                                'efficient results_generator.')
1175                jobs = self._jobs
1176            waiter.add_jobs(jobs)
1177            for result in waiter.wait_for_results():
1178                self._handle_result(result=result, record=record, waiter=waiter)
1179                if self._finished_waiting():
1180                    break
1181        except Exception:  # pylint: disable=W0703
1182            logging.exception('Exception waiting for results')
1183            Status('FAIL', self._tag,
1184                   'Exception waiting for results').record_result(record)
1185
1186
1187    def _finished_waiting(self):
1188        """Return whether the suite is finished waiting for child jobs."""
1189        return False
1190
1191
1192    def _handle_result(self, result, record, waiter):
1193        """
1194        Handle a test job result.
1195
1196        @param result: Status instance for job.
1197        @param record: callable that records job status.
1198                 prototype:
1199                   record(base_job.status_log_entry)
1200        @param waiter: JobResultsWaiter instance.
1201
1202        @instance_param _result_reporter: _ResultReporter instance.
1203        """
1204        self._record_result(result, record)
1205        rescheduled = False
1206        if self._job_retry and self._retry_handler._should_retry(result):
1207            rescheduled = self._retry_result(result, record, waiter)
1208        # TODO (crbug.com/751428): If the suite times out before a retry could
1209        # finish, we would lose the chance to report errors from the original
1210        # job.
1211        if self._has_retry(result) and rescheduled:
1212            return
1213
1214        if self._should_report(result):
1215            self._result_reporter.report(result)
1216
1217    def _record_result(self, result, record):
1218        """
1219        Record a test job result.
1220
1221        @param result: Status instance for job.
1222        @param record: callable that records job status.
1223                 prototype:
1224                   record(base_job.status_log_entry)
1225        """
1226        result.record_all(record)
1227        self._remember_job_keyval(result)
1228
1229
1230    def _retry_result(self, result, record, waiter):
1231        """
1232        Retry a test job result.
1233
1234        @param result: Status instance for job.
1235        @param record: callable that records job status.
1236                 prototype:
1237                   record(base_job.status_log_entry)
1238        @param waiter: JobResultsWaiter instance.
1239        @returns: True if a job was scheduled for retry, False otherwise.
1240        """
1241        test = self._jobs_to_tests[result.id]
1242        try:
1243            # It only takes effect for CQ retriable job:
1244            #   1) in first try, test.fast=True.
1245            #   2) in second try, test will be run in normal mode, so reset
1246            #       test.fast=False.
1247            test.fast = False
1248            new_job = self._schedule_test(
1249                    record=record, test=test, retry_for=result.id)
1250        except (error.RPCException, proxy.JSONRPCException) as e:
1251            logging.error('Failed to schedule test: %s, Reason: %s',
1252                          test.name, e)
1253            return False
1254        else:
1255            waiter.add_job(new_job)
1256            return bool(new_job)
1257
1258    @property
1259    def jobs(self):
1260        """Give a copy of the associated jobs
1261
1262        @returns: array of jobs"""
1263        return [job for job in self._jobs]
1264
1265
1266    @property
1267    def _should_file_bugs(self):
1268        """Return whether bugs should be filed.
1269
1270        @returns: bool
1271        """
1272        # File bug when failure is one of the _FILE_BUG_SUITES,
1273        # otherwise send an email to the owner anc cc.
1274        return self._tag in _FILE_BUG_SUITES
1275
1276
1277    def abort(self):
1278        """
1279        Abort all scheduled test jobs.
1280        """
1281        if self._jobs:
1282            job_ids = [job.id for job in self._jobs]
1283            self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
1284
1285
1286    def _remember_job_keyval(self, job):
1287        """
1288        Record provided job as a suite job keyval, for later referencing.
1289
1290        @param job: some representation of a job that has the attributes:
1291                    id, test_name, and owner
1292        """
1293        if self._results_dir and job.id and job.owner and job.test_name:
1294            job_id_owner = '%s-%s' % (job.id, job.owner)
1295            logging.debug('Adding job keyval for %s=%s',
1296                          job.test_name, job_id_owner)
1297            utils.write_keyval(
1298                self._results_dir,
1299                {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1300
1301
1302class Suite(_BaseSuite):
1303    """
1304    A suite of tests, defined by some predicate over control file variables.
1305
1306    Given a place to search for control files a predicate to match the desired
1307    tests, can gather tests and fire off jobs to run them, and then wait for
1308    results.
1309
1310    @var _predicate: a function that should return True when run over a
1311         ControlData representation of a control file that should be in
1312         this Suite.
1313    @var _tag: a string with which to tag jobs run in this suite.
1314    @var _builds: the builds on which we're running this suite.
1315    @var _afe: an instance of AFE as defined in server/frontend.py.
1316    @var _tko: an instance of TKO as defined in server/frontend.py.
1317    @var _jobs: currently scheduled jobs, if any.
1318    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1319                         ControlData objects.
1320    @var _cf_getter: a control_file_getter.ControlFileGetter
1321    @var _retry: a bool value indicating whether jobs should be retried on
1322                 failure.
1323    @var _retry_handler: a RetryHandler object.
1324
1325    """
1326
1327    # TODO(ayatane): These methods are kept on the Suite class for
1328    # backward compatibility.
1329    find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1330    find_possible_tests = _deprecated_suite_method(find_possible_tests)
1331    create_fs_getter = _deprecated_suite_method(create_fs_getter)
1332    name_in_tag_predicate = _deprecated_suite_method(
1333            suite_common.name_in_tag_predicate)
1334    name_in_tag_similarity_predicate = _deprecated_suite_method(
1335            name_in_tag_similarity_predicate)
1336    test_name_equals_predicate = _deprecated_suite_method(
1337            test_name_equals_predicate)
1338    test_name_in_list_predicate = _deprecated_suite_method(
1339            suite_common.test_name_in_list_predicate)
1340    test_name_matches_pattern_predicate = _deprecated_suite_method(
1341            test_name_matches_pattern_predicate)
1342    test_file_matches_pattern_predicate = _deprecated_suite_method(
1343            test_file_matches_pattern_predicate)
1344    matches_attribute_expression_predicate = _deprecated_suite_method(
1345            matches_attribute_expression_predicate)
1346    test_name_similarity_predicate = _deprecated_suite_method(
1347            test_name_similarity_predicate)
1348    test_file_similarity_predicate = _deprecated_suite_method(
1349            test_file_similarity_predicate)
1350    list_all_suites = _deprecated_suite_method(list_all_suites)
1351    get_test_source_build = _deprecated_suite_method(
1352            suite_common.get_test_source_build)
1353
1354
1355    @classmethod
1356    def create_from_predicates(cls, predicates, builds, board, devserver,
1357                               cf_getter=None, name='ad_hoc_suite',
1358                               run_prod_code=False, **dargs):
1359        """
1360        Create a Suite using a given predicate test filters.
1361
1362        Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1363        |autotest_dir| and will schedule them using |afe|.  Pulls control files
1364        from the default dev server. Results will be pulled from |tko| upon
1365        completion.
1366
1367        @param predicates: A list of callables that accept ControlData
1368                           representations of control files. A test will be
1369                           included in suite if all callables in this list
1370                           return True on the given control file.
1371        @param builds: the builds on which we're running this suite. It's a
1372                       dictionary of version_prefix:build.
1373        @param board: the board on which we're running this suite.
1374        @param devserver: the devserver which contains the build.
1375        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1376                          using DevServerGetter.
1377        @param name: name of suite. Defaults to 'ad_hoc_suite'
1378        @param run_prod_code: If true, the suite will run the tests that
1379                              lives in prod aka the test code currently on the
1380                              lab servers.
1381        @param **dargs: Any other Suite constructor parameters, as described
1382                        in Suite.__init__ docstring.
1383        @return a Suite instance.
1384        """
1385        if cf_getter is None:
1386            if run_prod_code:
1387                cf_getter = create_fs_getter(_AUTOTEST_DIR)
1388            else:
1389                build = suite_common.get_test_source_build(builds, **dargs)
1390                cf_getter = _create_ds_getter(build, devserver)
1391
1392        return cls(predicates,
1393                   name, builds, board, cf_getter, run_prod_code, **dargs)
1394
1395
1396    @classmethod
1397    def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1398                         **dargs):
1399        """
1400        Create a Suite using a predicate based on the SUITE control file var.
1401
1402        Makes a predicate based on |name| and uses it to instantiate a Suite
1403        that looks for tests in |autotest_dir| and will schedule them using
1404        |afe|.  Pulls control files from the default dev server.
1405        Results will be pulled from |tko| upon completion.
1406
1407        @param name: a value of the SUITE control file variable to search for.
1408        @param builds: the builds on which we're running this suite. It's a
1409                       dictionary of version_prefix:build.
1410        @param board: the board on which we're running this suite.
1411        @param devserver: the devserver which contains the build.
1412        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1413                          using DevServerGetter.
1414        @param **dargs: Any other Suite constructor parameters, as described
1415                        in Suite.__init__ docstring.
1416        @return a Suite instance.
1417        """
1418        if cf_getter is None:
1419            build = suite_common.get_test_source_build(builds, **dargs)
1420            cf_getter = _create_ds_getter(build, devserver)
1421
1422        return cls([suite_common.name_in_tag_predicate(name)],
1423                   name, builds, board, cf_getter, **dargs)
1424
1425
1426    def __init__(
1427            self,
1428            predicates,
1429            tag,
1430            builds,
1431            board,
1432            cf_getter,
1433            run_prod_code=False,
1434            afe=None,
1435            tko=None,
1436            pool=None,
1437            results_dir=None,
1438            max_runtime_mins=24*60,
1439            timeout_mins=24*60,
1440            file_bugs=False,
1441            suite_job_id=None,
1442            ignore_deps=False,
1443            extra_deps=None,
1444            priority=priorities.Priority.DEFAULT,
1445            forgiving_parser=True,
1446            wait_for_results=True,
1447            job_retry=False,
1448            max_retries=sys.maxsize,
1449            offload_failures_only=False,
1450            test_source_build=None,
1451            job_keyvals=None,
1452            test_args=None,
1453            child_dependencies=(),
1454            result_reporter=None,
1455    ):
1456        """
1457        Constructor
1458
1459        @param predicates: A list of callables that accept ControlData
1460                           representations of control files. A test will be
1461                           included in suite if all callables in this list
1462                           return True on the given control file.
1463        @param tag: a string with which to tag jobs run in this suite.
1464        @param builds: the builds on which we're running this suite.
1465        @param board: the board on which we're running this suite.
1466        @param cf_getter: a control_file_getter.ControlFileGetter
1467        @param afe: an instance of AFE as defined in server/frontend.py.
1468        @param tko: an instance of TKO as defined in server/frontend.py.
1469        @param pool: Specify the pool of machines to use for scheduling
1470                purposes.
1471        @param run_prod_code: If true, the suite will run the test code that
1472                              lives in prod aka the test code currently on the
1473                              lab servers.
1474        @param results_dir: The directory where the job can write results to.
1475                            This must be set if you want job_id of sub-jobs
1476                            list in the job keyvals.
1477        @param max_runtime_mins: Maximum suite runtime, in minutes.
1478        @param timeout: Maximum job lifetime, in hours.
1479        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1480                             Default: None
1481        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1482                            attribute and skip applying of dependency labels.
1483                            (Default:False)
1484        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1485                           to add to each test being scheduled.
1486        @param priority: Integer priority level.  Higher is more important.
1487        @param wait_for_results: Set to False to run the suite job without
1488                                 waiting for test jobs to finish. Default is
1489                                 True.
1490        @param job_retry: A bool value indicating whether jobs should be retried
1491                          on failure. If True, the field 'JOB_RETRIES' in
1492                          control files will be respected. If False, do not
1493                          retry.
1494        @param max_retries: Maximum retry limit at suite level.
1495                            Regardless how many times each individual test
1496                            has been retried, the total number of retries
1497                            happening in the suite can't exceed _max_retries.
1498                            Default to sys.maxint.
1499        @param offload_failures_only: Only enable gs_offloading for failed
1500                                      jobs.
1501        @param test_source_build: Build that contains the server-side test code.
1502        @param job_keyvals: General job keyvals to be inserted into keyval file,
1503                            which will be used by tko/parse later.
1504        @param test_args: A dict of args passed all the way to each individual
1505                          test that will be actually ran.
1506        @param child_dependencies: (optional) list of dependency strings
1507                to be added as dependencies to child jobs.
1508        @param result_reporter: A _ResultReporter instance to report results. If
1509                None, an _EmailReporter will be created.
1510        """
1511        tests = find_and_parse_tests(
1512                cf_getter,
1513                _ComposedPredicate(predicates),
1514                tag,
1515                forgiving_parser=forgiving_parser,
1516                run_prod_code=run_prod_code,
1517                test_args=test_args,
1518        )
1519        super(Suite, self).__init__(
1520                tests=tests,
1521                tag=tag,
1522                builds=builds,
1523                board=board,
1524                afe=afe,
1525                tko=tko,
1526                pool=pool,
1527                results_dir=results_dir,
1528                max_runtime_mins=max_runtime_mins,
1529                timeout_mins=timeout_mins,
1530                file_bugs=file_bugs,
1531                suite_job_id=suite_job_id,
1532                ignore_deps=ignore_deps,
1533                extra_deps=extra_deps,
1534                priority=priority,
1535                wait_for_results=wait_for_results,
1536                job_retry=job_retry,
1537                max_retries=max_retries,
1538                offload_failures_only=offload_failures_only,
1539                test_source_build=test_source_build,
1540                job_keyvals=job_keyvals,
1541                child_dependencies=child_dependencies,
1542                result_reporter=result_reporter,
1543        )
1544
1545
1546class ProvisionSuite(_BaseSuite):
1547    """
1548    A suite for provisioning DUTs.
1549
1550    This is done by creating stub_Pass tests.
1551    """
1552
1553
1554    def __init__(
1555            self,
1556            tag,
1557            builds,
1558            board,
1559            devserver,
1560            num_required,
1561            num_max=float('inf'),
1562            cf_getter=None,
1563            run_prod_code=False,
1564            test_args=None,
1565            test_source_build=None,
1566            **kwargs):
1567        """
1568        Constructor
1569
1570        @param tag: a string with which to tag jobs run in this suite.
1571        @param builds: the builds on which we're running this suite.
1572        @param board: the board on which we're running this suite.
1573        @param devserver: the devserver which contains the build.
1574        @param num_required: number of tests that must pass.  This is
1575                             capped by the number of tests that are run.
1576        @param num_max: max number of tests to make.  By default there
1577                        is no cap, a test is created for each eligible host.
1578        @param cf_getter: a control_file_getter.ControlFileGetter.
1579        @param test_args: A dict of args passed all the way to each individual
1580                          test that will be actually ran.
1581        @param test_source_build: Build that contains the server-side test code.
1582        @param kwargs: Various keyword arguments passed to
1583                       _BaseSuite constructor.
1584        """
1585        super(ProvisionSuite, self).__init__(
1586                tests=[],
1587                tag=tag,
1588                builds=builds,
1589                board=board,
1590                **kwargs)
1591        self._num_successful = 0
1592        self._num_required = 0
1593        self.tests = []
1594
1595        static_deps = [dep for dep in self._dependencies
1596                       if not provision.Provision.acts_on(dep)]
1597        if 'pool:suites' in static_deps:
1598            logging.info('Provision suite is disabled on suites pool')
1599            return
1600        logging.debug('Looking for hosts matching %r', static_deps)
1601        hosts = self._afe.get_hosts(
1602                invalid=False, multiple_labels=static_deps)
1603        logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
1604        available_hosts = [h for h in hosts if h.is_available()]
1605        logging.debug('Found %d available hosts for ProvisionSuite',
1606                      len(available_hosts))
1607        dummy_test = _load_dummy_test(
1608                builds, devserver, cf_getter,
1609                run_prod_code, test_args, test_source_build)
1610        self.tests = [dummy_test] * min(len(available_hosts), num_max)
1611        logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1612        self._num_required = min(num_required, len(self.tests))
1613        logging.debug('Expecting %d tests to pass for ProvisionSuite',
1614                      self._num_required)
1615
1616    def _handle_result(self, result, record, waiter):
1617        super(ProvisionSuite, self)._handle_result(result, record, waiter)
1618        if result.is_good():
1619            self._num_successful += 1
1620
1621    def _finished_waiting(self):
1622        return self._num_successful >= self._num_required
1623
1624
1625def _load_dummy_test(
1626        builds,
1627        devserver,
1628        cf_getter=None,
1629        run_prod_code=False,
1630        test_args=None,
1631        test_source_build=None):
1632    """
1633    Load and return the dummy pass test.
1634
1635    @param builds: the builds on which we're running this suite.
1636    @param devserver: the devserver which contains the build.
1637    @param cf_getter: a control_file_getter.ControlFileGetter.
1638    @param test_args: A dict of args passed all the way to each individual
1639                      test that will be actually ran.
1640    @param test_source_build: Build that contains the server-side test code.
1641    """
1642    if cf_getter is None:
1643        if run_prod_code:
1644            cf_getter = create_fs_getter(_AUTOTEST_DIR)
1645        else:
1646            build = suite_common.get_test_source_build(
1647                    builds, test_source_build=test_source_build)
1648            devserver.stage_artifacts(image=build,
1649                                      artifacts=['control_files'])
1650            cf_getter = _create_ds_getter(build, devserver)
1651    retriever = _ControlFileRetriever(cf_getter,
1652                                      run_prod_code=run_prod_code,
1653                                      test_args=test_args)
1654    return retriever.retrieve_for_test('stub_Pass')
1655
1656
1657class _ComposedPredicate(object):
1658    """Return the composition of the predicates.
1659
1660    Predicates are functions that take a test control data object and
1661    return True of that test is to be included.  The returned
1662    predicate's set is the intersection of all of the input predicates'
1663    sets (it returns True if all predicates return True).
1664    """
1665
1666    def __init__(self, predicates):
1667        """Initialize instance.
1668
1669        @param predicates: Iterable of predicates.
1670        """
1671        self._predicates = list(predicates)
1672
1673    def __repr__(self):
1674        return '{cls}({this._predicates!r})'.format(
1675            cls=type(self).__name__,
1676            this=self,
1677        )
1678
1679    def __call__(self, control_data_):
1680        return all(f(control_data_) for f in self._predicates)
1681
1682
1683def _is_nonexistent_board_error(e):
1684    """Return True if error is caused by nonexistent board label.
1685
1686    As of this writing, the particular case we want looks like this:
1687
1688     1) e.problem_keys is a dictionary
1689     2) e.problem_keys['meta_hosts'] exists as the only key
1690        in the dictionary.
1691     3) e.problem_keys['meta_hosts'] matches this pattern:
1692        "Label "board:.*" not found"
1693
1694    We check for conditions 1) and 2) on the
1695    theory that they're relatively immutable.
1696    We don't check condition 3) because it seems
1697    likely to be a maintenance burden, and for the
1698    times when we're wrong, being right shouldn't
1699    matter enough (we _hope_).
1700
1701    @param e: proxy.ValidationError instance
1702    @returns: boolean
1703    """
1704    return (isinstance(e.problem_keys, dict)
1705            and len(e.problem_keys) == 1
1706            and 'meta_hosts' in e.problem_keys)
1707
1708
1709class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)):
1710    """Abstract base class for reporting test results.
1711
1712    Usually, this is used to report test failures.
1713    """
1714
1715    @abc.abstractmethod
1716    def report(self, result):
1717        """Report test result.
1718
1719        @param result: Status instance for job.
1720        """
1721
1722
1723class _EmailReporter(_ResultReporter):
1724    """Class that emails based on test failures."""
1725
1726    def __init__(self, suite, bug_template=None):
1727        self._suite = suite
1728        self._bug_template = bug_template or {}
1729
1730    def _get_test_bug(self, result):
1731        """Get TestBug for the given result.
1732
1733        @param result: Status instance for a test job.
1734        @returns: TestBug instance.
1735        """
1736        # reporting modules have dependency on external packages, e.g., httplib2
1737        # Such dependency can cause issue to any module tries to import suite.py
1738        # without building site-packages first. Since the reporting modules are
1739        # only used in this function, move the imports here avoid the
1740        # requirement of building site packages to use other functions in this
1741        # module.
1742        from autotest_lib.server.cros.dynamic_suite import reporting
1743
1744        job_views = self._suite._tko.run('get_detailed_test_views',
1745                                         afe_job_id=result.id)
1746        return reporting.TestBug(self._suite._job_creator.cros_build,
1747                utils.get_chrome_version(job_views),
1748                self._suite._tag,
1749                result)
1750
1751    def _get_bug_template(self, result):
1752        """Get BugTemplate for test job.
1753
1754        @param result: Status instance for job.
1755        @param bug_template: A template dictionary specifying the default bug
1756                             filing options for failures in this suite.
1757        @returns: BugTemplate instance
1758        """
1759        # reporting modules have dependency on external packages, e.g., httplib2
1760        # Such dependency can cause issue to any module tries to import suite.py
1761        # without building site-packages first. Since the reporting modules are
1762        # only used in this function, move the imports here avoid the
1763        # requirement of building site packages to use other functions in this
1764        # module.
1765        from autotest_lib.server.cros.dynamic_suite import reporting_utils
1766
1767        # Try to merge with bug template in test control file.
1768        template = reporting_utils.BugTemplate(self._bug_template)
1769        try:
1770            test_data = self._suite._jobs_to_tests[result.id]
1771            return template.finalize_bug_template(
1772                    test_data.bug_template)
1773        except AttributeError:
1774            # Test control file does not have bug template defined.
1775            return template.bug_template
1776        except reporting_utils.InvalidBugTemplateException as e:
1777            logging.error('Merging bug templates failed with '
1778                          'error: %s An empty bug template will '
1779                          'be used.', e)
1780            return {}
1781
1782    def report(self, result):
1783        # reporting modules have dependency on external
1784        # packages, e.g., httplib2 Such dependency can cause
1785        # issue to any module tries to import suite.py without
1786        # building site-packages first. Since the reporting
1787        # modules are only used in this function, move the
1788        # imports here avoid the requirement of building site
1789        # packages to use other functions in this module.
1790        from autotest_lib.server.cros.dynamic_suite import reporting
1791
1792        reporting.send_email(
1793                self._get_test_bug(result),
1794                self._get_bug_template(result))
1795