1# Lint as: python2, python3 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10import abc 11import datetime 12import difflib 13import functools 14import hashlib 15import logging 16import operator 17import os 18import re 19import six 20import sys 21import warnings 22 23import common 24 25from autotest_lib.frontend.afe.json_rpc import proxy 26from autotest_lib.client.common_lib import autotest_enum 27from autotest_lib.client.common_lib import error 28from autotest_lib.client.common_lib import global_config 29from autotest_lib.client.common_lib import priorities 30from autotest_lib.client.common_lib import time_utils 31from autotest_lib.client.common_lib import utils 32from autotest_lib.frontend.afe import model_attributes 33from autotest_lib.frontend.afe.json_rpc import proxy 34from autotest_lib.server.cros import provision 35from autotest_lib.server.cros.dynamic_suite import constants 36from autotest_lib.server.cros.dynamic_suite import control_file_getter 37from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 38from autotest_lib.server.cros.dynamic_suite import job_status 39from autotest_lib.server.cros.dynamic_suite import suite_common 40from autotest_lib.server.cros.dynamic_suite import tools 41from autotest_lib.server.cros.dynamic_suite.job_status import Status 42 43try: 44 from autotest_lib.server.cros.dynamic_suite import boolparse_lib 45except ImportError as e: 46 print('Unable to import boolparse_lib: %s' % (e,)) 47 print('This script must be either:') 48 print(' - Be run in the chroot.') 49 print(' - (not yet supported) be run after running ') 50 print(' ../utils/build_externals.py') 51 52_FILE_BUG_SUITES = [ 53 'au', 'bvt', 'bvt-cq', 'bvt-inline', 'calibration', 'paygen_au_beta', 54 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 'sanity', 55 'push_to_prod' 56] 57_AUTOTEST_DIR = global_config.global_config.get_config_value( 58 'SCHEDULER', 'drone_installation_directory') 59 60 61class RetryHandler(object): 62 """Maintain retry information. 63 64 @var _retry_map: A dictionary that stores retry history. 65 The key is afe job id. The value is a dictionary. 66 {job_id: {'state':RetryHandler.States, 'retry_max':int}} 67 - state: 68 The retry state of a job. 69 NOT_ATTEMPTED: 70 We haven't done anything about the job. 71 ATTEMPTED: 72 We've made an attempt to schedule a retry job. The 73 scheduling may or may not be successful, e.g. 74 it might encounter an rpc error. Note failure 75 in scheduling a retry is different from a retry job failure. 76 For each job, we only attempt to schedule a retry once. 77 For example, assume we have a test with JOB_RETRIES=5 and 78 its second retry job failed. When we attempt to create 79 a third retry job to retry the second, we hit an rpc 80 error. In such case, we will give up on all following 81 retries. 82 RETRIED: 83 A retry job has already been successfully 84 scheduled. 85 - retry_max: 86 The maximum of times the job can still 87 be retried, taking into account retries 88 that have occurred. 89 @var _retry_level: A retry might be triggered only if the result 90 is worse than the level. 91 @var _max_retries: Maximum retry limit at suite level. 92 Regardless how many times each individual test 93 has been retried, the total number of retries happening in 94 the suite can't exceed _max_retries. 95 """ 96 97 States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', 98 start_value=1, step=1) 99 100 def __init__(self, initial_jobs_to_tests, retry_level='WARN', 101 max_retries=None): 102 """Initialize RetryHandler. 103 104 @param initial_jobs_to_tests: A dictionary that maps a job id to 105 a ControlData object. This dictionary should contain 106 jobs that are originally scheduled by the suite. 107 @param retry_level: A retry might be triggered only if the result is 108 worse than the level. 109 @param max_retries: Integer, maxmium total retries allowed 110 for the suite. Default to None, no max. 111 """ 112 self._retry_map = {} 113 self._retry_level = retry_level 114 self._max_retries = (max_retries 115 if max_retries is not None else sys.maxsize) 116 for job_id, test in initial_jobs_to_tests.items(): 117 if test.job_retries > 0: 118 self._add_job(new_job_id=job_id, 119 retry_max=test.job_retries) 120 else: 121 logging.debug("Test %s has no retries", test.name) 122 123 124 def _add_job(self, new_job_id, retry_max): 125 """Add a newly-created job to the retry map. 126 127 @param new_job_id: The afe_job_id of a newly created job. 128 @param retry_max: The maximum of times that we could retry 129 the test if the job fails. 130 131 @raises ValueError if new_job_id is already in retry map. 132 133 """ 134 if new_job_id in self._retry_map: 135 raise ValueError('add_job called when job is already in retry map.') 136 137 self._retry_map[new_job_id] = { 138 'state': self.States.NOT_ATTEMPTED, 139 'retry_max': retry_max} 140 141 142 def _suite_max_reached(self): 143 """Return whether maximum retry limit for a suite has been reached.""" 144 return self._max_retries <= 0 145 146 147 def add_retry(self, old_job_id, new_job_id): 148 """Record a retry. 149 150 Update retry map with the retry information. 151 152 @param old_job_id: The afe_job_id of the job that is retried. 153 @param new_job_id: The afe_job_id of the retry job. 154 155 @raises KeyError if old_job_id isn't in the retry map. 156 @raises ValueError if we have already retried or made an attempt 157 to retry the old job. 158 159 """ 160 old_record = self._retry_map[old_job_id] 161 if old_record['state'] != self.States.NOT_ATTEMPTED: 162 raise ValueError( 163 'We have already retried or attempted to retry job %d' % 164 old_job_id) 165 old_record['state'] = self.States.RETRIED 166 self._add_job(new_job_id=new_job_id, 167 retry_max=old_record['retry_max'] - 1) 168 self._max_retries -= 1 169 170 171 def set_attempted(self, job_id): 172 """Set the state of the job to ATTEMPTED. 173 174 @param job_id: afe_job_id of a job. 175 176 @raises KeyError if job_id isn't in the retry map. 177 @raises ValueError if the current state is not NOT_ATTEMPTED. 178 179 """ 180 current_state = self._retry_map[job_id]['state'] 181 if current_state != self.States.NOT_ATTEMPTED: 182 # We are supposed to retry or attempt to retry each job 183 # only once. Raise an error if this is not the case. 184 raise ValueError('Unexpected state transition: %s -> %s' % 185 (self.States.get_string(current_state), 186 self.States.get_string(self.States.ATTEMPTED))) 187 else: 188 self._retry_map[job_id]['state'] = self.States.ATTEMPTED 189 190 191 def has_following_retry(self, result): 192 """Check whether there will be a following retry. 193 194 We have the following cases for a given job id (result.id), 195 - no retry map entry -> retry not required, no following retry 196 - has retry map entry: 197 - already retried -> has following retry 198 - has not retried 199 (this branch can be handled by checking should_retry(result)) 200 - retry_max == 0 --> the last retry job, no more retry 201 - retry_max > 0 202 - attempted, but has failed in scheduling a 203 following retry due to rpc error --> no more retry 204 - has not attempped --> has following retry if test failed. 205 206 @param result: A result, encapsulating the status of the job. 207 208 @returns: True, if there will be a following retry. 209 False otherwise. 210 211 """ 212 return (result.test_executed 213 and result.id in self._retry_map 214 and (self._retry_map[result.id]['state'] == self.States.RETRIED 215 or self._should_retry(result))) 216 217 218 def _should_retry(self, result): 219 """Check whether we should retry a job based on its result. 220 221 We will retry the job that corresponds to the result 222 when all of the following are true. 223 a) The test was actually executed, meaning that if 224 a job was aborted before it could ever reach the state 225 of 'Running', the job will not be retried. 226 b) The result is worse than |self._retry_level| which 227 defaults to 'WARN'. 228 c) The test requires retry, i.e. the job has an entry in the retry map. 229 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED 230 Note that if a test has JOB_RETRIES=5, and the second time 231 it was retried it hit an rpc error, we will give up on 232 all following retries. 233 e) The job has not reached its retry max, i.e. retry_max > 0 234 235 @param result: A result, encapsulating the status of the job. 236 237 @returns: True if we should retry the job. 238 239 """ 240 return ( 241 result.test_executed 242 and result.id in self._retry_map 243 and not self._suite_max_reached() 244 and result.is_worse_than( 245 job_status.Status(self._retry_level, '', 'reason')) 246 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED 247 and self._retry_map[result.id]['retry_max'] > 0 248 ) 249 250 def _should_retry_local_job(self, job_id): 251 """Check whether we should retry a job based on information available 252 for a local job without a Result object. 253 254 We will retry the job that corresponds to the result 255 when all of the following are true. 256 a) The test requires retry, i.e. the job has an entry in the retry map. 257 b) We haven't made any retry attempt yet for this job, i.e. 258 state == NOT_ATTEMPTED 259 If the job is aborted, we will give up on all following retries, 260 regardless of max_retries. 261 c) The job has not reached its retry max, i.e. retry_max > 0 262 263 @param job_id: the id for the job, to look up relevant information. 264 265 @returns: True if we should retry the job. 266 267 """ 268 if self._suite_max_reached(): 269 logging.debug('suite max_retries reached, not retrying.') 270 return False 271 if job_id not in self._retry_map: 272 logging.debug('job_id not in retry map, not retrying.') 273 return False 274 if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED: 275 logging.debug("job state was %s not 'Not Attempted', not retrying", 276 self._retry_map[job_id]['state']) 277 return False 278 if self._retry_map[job_id]['retry_max'] <= 0: 279 logging.debug('test-level retries exhausted, not retrying') 280 return False 281 return True 282 283 284 def job_present(self, job_id): 285 """Check whether a job id present in the retry map. 286 287 @param job_id: afe_job_id of a job. 288 289 @returns: A True if the job is present, False if not. 290 """ 291 return bool(self._retry_map.get(job_id)) 292 293 294 295 def get_retry_max(self, job_id): 296 """Get the maximum times the job can still be retried. 297 298 @param job_id: afe_job_id of a job. 299 300 @returns: An int, representing the maximum times the job can still be 301 retried. 302 @raises KeyError if job_id isn't in the retry map. 303 304 """ 305 return self._retry_map[job_id]['retry_max'] 306 307 308class _SuiteChildJobCreator(object): 309 """Create test jobs for a suite.""" 310 311 def __init__( 312 self, 313 tag, 314 builds, 315 board, 316 afe=None, 317 max_runtime_mins=24*60, 318 timeout_mins=24*60, 319 suite_job_id=None, 320 ignore_deps=False, 321 extra_deps=(), 322 priority=priorities.Priority.DEFAULT, 323 offload_failures_only=False, 324 test_source_build=None, 325 job_keyvals=None, 326 ): 327 """ 328 Constructor 329 330 @param tag: a string with which to tag jobs run in this suite. 331 @param builds: the builds on which we're running this suite. 332 @param board: the board on which we're running this suite. 333 @param afe: an instance of AFE as defined in server/frontend.py. 334 @param max_runtime_mins: Maximum suite runtime, in minutes. 335 @param timeout_mins: Maximum job lifetime, in minutes. 336 @param suite_job_id: Job id that will act as parent id to all sub jobs. 337 Default: None 338 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 339 attribute and skip applying of dependency labels. 340 (Default:False) 341 @param extra_deps: A list of strings which are the extra DEPENDENCIES 342 to add to each test being scheduled. 343 @param priority: Integer priority level. Higher is more important. 344 @param offload_failures_only: Only enable gs_offloading for failed 345 jobs. 346 @param test_source_build: Build that contains the server-side test code. 347 @param job_keyvals: General job keyvals to be inserted into keyval file, 348 which will be used by tko/parse later. 349 """ 350 self._tag = tag 351 self._builds = builds 352 self._board = board 353 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 354 delay_sec=10, 355 debug=False) 356 self._max_runtime_mins = max_runtime_mins 357 self._timeout_mins = timeout_mins 358 self._suite_job_id = suite_job_id 359 self._ignore_deps = ignore_deps 360 self._extra_deps = tuple(extra_deps) 361 self._priority = priority 362 self._offload_failures_only = offload_failures_only 363 self._test_source_build = test_source_build 364 self._job_keyvals = job_keyvals 365 366 367 @property 368 def cros_build(self): 369 """Return the CrOS build or the first build in the builds dict.""" 370 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not 371 # sure what the implications of this are, but it's probably not a 372 # good thing. 373 return self._builds.get(provision.CROS_VERSION_PREFIX, 374 list(self._builds.values())[0]) 375 376 377 def create_job(self, test, retry_for=None): 378 """ 379 Thin wrapper around frontend.AFE.create_job(). 380 381 @param test: ControlData object for a test to run. 382 @param retry_for: If the to-be-created job is a retry for an 383 old job, the afe_job_id of the old job will 384 be passed in as |retry_for|, which will be 385 recorded in the new job's keyvals. 386 @returns: A frontend.Job object with an added test_name member. 387 test_name is used to preserve the higher level TEST_NAME 388 name of the job. 389 """ 390 # For a system running multiple suites which share tests, the priority 391 # overridden may lead to unexpected scheduling order that adds extra 392 # provision jobs. 393 test_priority = self._priority 394 if utils.is_moblab(): 395 test_priority = max(self._priority, test.priority) 396 397 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast 398 else None) 399 400 test_obj = self._afe.create_job( 401 control_file=test.text, 402 name=tools.create_job_name( 403 self._test_source_build or self.cros_build, 404 self._tag, 405 test.name), 406 control_type=test.test_type.capitalize(), 407 meta_hosts=[self._board]*test.sync_count, 408 dependencies=self._create_job_deps(test), 409 keyvals=self._create_keyvals_for_test_job(test, retry_for), 410 max_runtime_mins=self._max_runtime_mins, 411 timeout_mins=self._timeout_mins, 412 parent_job_id=self._suite_job_id, 413 reboot_before=reboot_before, 414 run_reset=not test.fast, 415 priority=test_priority, 416 synch_count=test.sync_count, 417 require_ssp=test.require_ssp) 418 419 test_obj.test_name = test.name 420 return test_obj 421 422 423 def _create_job_deps(self, test): 424 """Create job deps list for a test job. 425 426 @returns: A list of dependency strings. 427 """ 428 if self._ignore_deps: 429 job_deps = [] 430 else: 431 job_deps = list(test.dependencies) 432 job_deps.extend(self._extra_deps) 433 return job_deps 434 435 436 def _create_keyvals_for_test_job(self, test, retry_for=None): 437 """Create keyvals dict for creating a test job. 438 439 @param test: ControlData object for a test to run. 440 @param retry_for: If the to-be-created job is a retry for an 441 old job, the afe_job_id of the old job will 442 be passed in as |retry_for|, which will be 443 recorded in the new job's keyvals. 444 @returns: A keyvals dict for creating the test job. 445 """ 446 keyvals = { 447 constants.JOB_BUILD_KEY: self.cros_build, 448 constants.JOB_SUITE_KEY: self._tag, 449 constants.JOB_EXPERIMENTAL_KEY: test.experimental, 450 constants.JOB_BUILDS_KEY: self._builds 451 } 452 # test_source_build is saved to job_keyvals so scheduler can retrieve 453 # the build name from database when compiling autoserv commandline. 454 # This avoid a database change to add a new field in afe_jobs. 455 # 456 # Only add `test_source_build` to job keyvals if the build is different 457 # from the CrOS build or the job uses more than one build, e.g., both 458 # firmware and CrOS will be updated in the dut. 459 # This is for backwards compatibility, so the update Autotest code can 460 # compile an autoserv command line to run in a SSP container using 461 # previous builds. 462 if (self._test_source_build and 463 (self.cros_build != self._test_source_build or 464 len(self._builds) > 1)): 465 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ 466 self._test_source_build 467 for prefix, build in six.iteritems(self._builds): 468 if prefix == provision.FW_RW_VERSION_PREFIX: 469 keyvals[constants.FWRW_BUILD]= build 470 elif prefix == provision.FW_RO_VERSION_PREFIX: 471 keyvals[constants.FWRO_BUILD] = build 472 # Add suite job id to keyvals so tko parser can read it from keyval 473 # file. 474 if self._suite_job_id: 475 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id 476 # We drop the old job's id in the new job's keyval file so that 477 # later our tko parser can figure out the retry relationship and 478 # invalidate the results of the old job in tko database. 479 if retry_for: 480 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for 481 if self._offload_failures_only: 482 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True 483 if self._job_keyvals: 484 for key in constants.INHERITED_KEYVALS: 485 if key in self._job_keyvals: 486 keyvals[key] = self._job_keyvals[key] 487 return keyvals 488 489 490class _ControlFileRetriever(object): 491 """Retrieves control files. 492 493 This returns control data instances, unlike control file getters 494 which simply return the control file text contents. 495 """ 496 497 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, 498 test_args=None): 499 """Initialize instance. 500 501 @param cf_getter: a control_file_getter.ControlFileGetter used to list 502 and fetch the content of control files 503 @param forgiving_parser: If False, will raise ControlVariableExceptions 504 if any are encountered when parsing control 505 files. Note that this can raise an exception 506 for syntax errors in unrelated files, because 507 we parse them before applying the predicate. 508 @param run_prod_code: If true, the retrieved tests will run the test 509 code that lives in prod aka the test code 510 currently on the lab servers by disabling 511 SSP for the discovered tests. 512 @param test_args: A dict of args to be seeded in test control file under 513 the name |args_dict|. 514 """ 515 self._cf_getter = cf_getter 516 self._forgiving_parser = forgiving_parser 517 self._run_prod_code = run_prod_code 518 self._test_args = test_args 519 520 521 def retrieve_for_test(self, test_name): 522 """Retrieve a test's control data. 523 524 This ignores forgiving_parser because we cannot return a 525 forgiving value. 526 527 @param test_name: Name of test to retrieve. 528 529 @raises ControlVariableException: There is a syntax error in a 530 control file. 531 532 @returns a ControlData object 533 """ 534 return suite_common.retrieve_control_data_for_test( 535 self._cf_getter, test_name) 536 537 538 def retrieve_for_suite(self, suite_name=''): 539 """Scan through all tests and find all tests. 540 541 @param suite_name: If specified, this method will attempt to restrain 542 the search space to just this suite's control files. 543 544 @raises ControlVariableException: If forgiving_parser is False and there 545 is a syntax error in a control file. 546 547 @returns a dictionary of ControlData objects that based on given 548 parameters. 549 """ 550 tests = suite_common.retrieve_for_suite( 551 self._cf_getter, suite_name, self._forgiving_parser, 552 self._test_args) 553 if self._run_prod_code: 554 for test in six.itervalues(tests): 555 test.require_ssp = False 556 557 return tests 558 559 560def list_all_suites(build, devserver, cf_getter=None): 561 """ 562 Parses all ControlData objects with a SUITE tag and extracts all 563 defined suite names. 564 565 @param build: the build on which we're running this suite. 566 @param devserver: the devserver which contains the build. 567 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 568 using DevServerGetter. 569 570 @return list of suites 571 """ 572 if cf_getter is None: 573 cf_getter = _create_ds_getter(build, devserver) 574 575 suites = set() 576 predicate = lambda t: True 577 for test in find_and_parse_tests(cf_getter, predicate): 578 suites.update(test.suite_tag_parts) 579 return list(suites) 580 581 582def test_file_similarity_predicate(test_file_pattern): 583 """Returns predicate that gets the similarity based on a test's file 584 name pattern. 585 586 Builds a predicate that takes in a parsed control file (a ControlData) 587 and returns a tuple of (file path, ratio), where ratio is the 588 similarity between the test file name and the given test_file_pattern. 589 590 @param test_file_pattern: regular expression (string) to match against 591 control file names. 592 @return a callable that takes a ControlData and and returns a tuple of 593 (file path, ratio), where ratio is the similarity between the 594 test file name and the given test_file_pattern. 595 """ 596 return lambda t: ((None, 0) if not hasattr(t, 'path') else 597 (t.path, difflib.SequenceMatcher(a=t.path, 598 b=test_file_pattern).ratio())) 599 600 601def test_name_similarity_predicate(test_name): 602 """Returns predicate that matched based on a test's name. 603 604 Builds a predicate that takes in a parsed control file (a ControlData) 605 and returns a tuple of (test name, ratio), where ratio is the similarity 606 between the test name and the given test_name. 607 608 @param test_name: the test name to base the predicate on. 609 @return a callable that takes a ControlData and returns a tuple of 610 (test name, ratio), where ratio is the similarity between the 611 test name and the given test_name. 612 """ 613 return lambda t: ((None, 0) if not hasattr(t, 'name') else 614 (t.name, 615 difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) 616 617 618def matches_attribute_expression_predicate(test_attr_boolstr): 619 """Returns predicate that matches based on boolean expression of 620 attributes. 621 622 Builds a predicate that takes in a parsed control file (a ControlData) 623 ans returns True if the test attributes satisfy the given attribute 624 boolean expression. 625 626 @param test_attr_boolstr: boolean expression of the attributes to be 627 test, like 'system:all and interval:daily'. 628 629 @return a callable that takes a ControlData and returns True if the test 630 attributes satisfy the given boolean expression. 631 """ 632 return lambda t: boolparse_lib.BoolstrResult( 633 test_attr_boolstr, t.attributes) 634 635 636def test_file_matches_pattern_predicate(test_file_pattern): 637 """Returns predicate that matches based on a test's file name pattern. 638 639 Builds a predicate that takes in a parsed control file (a ControlData) 640 and returns True if the test's control file name matches the given 641 regular expression. 642 643 @param test_file_pattern: regular expression (string) to match against 644 control file names. 645 @return a callable that takes a ControlData and and returns 646 True if control file name matches the pattern. 647 """ 648 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, 649 t.path) 650 651 652def test_name_matches_pattern_predicate(test_name_pattern): 653 """Returns predicate that matches based on a test's name pattern. 654 655 Builds a predicate that takes in a parsed control file (a ControlData) 656 and returns True if the test name matches the given regular expression. 657 658 @param test_name_pattern: regular expression (string) to match against 659 test names. 660 @return a callable that takes a ControlData and returns 661 True if the name fields matches the pattern. 662 """ 663 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, 664 t.name) 665 666 667def test_name_equals_predicate(test_name): 668 """Returns predicate that matched based on a test's name. 669 670 Builds a predicate that takes in a parsed control file (a ControlData) 671 and returns True if the test name is equal to |test_name|. 672 673 @param test_name: the test name to base the predicate on. 674 @return a callable that takes a ControlData and looks for |test_name| 675 in that ControlData's name. 676 """ 677 return lambda t: hasattr(t, 'name') and test_name == t.name 678 679 680def name_in_tag_similarity_predicate(name): 681 """Returns predicate that takes a control file and gets the similarity 682 of the suites in the control file and the given name. 683 684 Builds a predicate that takes in a parsed control file (a ControlData) 685 and returns a list of tuples of (suite name, ratio), where suite name 686 is each suite listed in the control file, and ratio is the similarity 687 between each suite and the given name. 688 689 @param name: the suite name to base the predicate on. 690 @return a callable that takes a ControlData and returns a list of tuples 691 of (suite name, ratio), where suite name is each suite listed in 692 the control file, and ratio is the similarity between each suite 693 and the given name. 694 """ 695 return lambda t: [(suite, 696 difflib.SequenceMatcher(a=suite, b=name).ratio()) 697 for suite in t.suite_tag_parts] or [(None, 0)] 698 699 700def name_in_tag_predicate(name): 701 """Returns predicate that takes a control file and looks for |name|. 702 703 Builds a predicate that takes in a parsed control file (a ControlData) 704 and returns True if the SUITE tag is present and contains |name|. 705 706 @param name: the suite name to base the predicate on. 707 @return a callable that takes a ControlData and looks for |name| in that 708 ControlData object's suite member. 709 """ 710 return suite_common.name_in_tag_predicate(name) 711 712 713def create_fs_getter(autotest_dir): 714 """ 715 @param autotest_dir: the place to find autotests. 716 @return a FileSystemGetter instance that looks under |autotest_dir|. 717 """ 718 # currently hard-coded places to look for tests. 719 subpaths = ['server/site_tests', 'client/site_tests', 720 'server/tests', 'client/tests'] 721 directories = [os.path.join(autotest_dir, p) for p in subpaths] 722 return control_file_getter.FileSystemGetter(directories) 723 724 725def _create_ds_getter(build, devserver): 726 """ 727 @param build: the build on which we're running this suite. 728 @param devserver: the devserver which contains the build. 729 @return a FileSystemGetter instance that looks under |autotest_dir|. 730 """ 731 return control_file_getter.DevServerGetter(build, devserver) 732 733 734def _non_experimental_tests_predicate(test_data): 735 """Test predicate for non-experimental tests.""" 736 return not test_data.experimental 737 738 739def find_and_parse_tests(cf_getter, predicate, suite_name='', 740 add_experimental=False, forgiving_parser=True, 741 run_prod_code=False, test_args=None): 742 """ 743 Function to scan through all tests and find eligible tests. 744 745 Search through all tests based on given cf_getter, suite_name, 746 add_experimental and forgiving_parser, return the tests that match 747 given predicate. 748 749 @param cf_getter: a control_file_getter.ControlFileGetter used to list 750 and fetch the content of control files 751 @param predicate: a function that should return True when run over a 752 ControlData representation of a control file that should be in 753 this Suite. 754 @param suite_name: If specified, this method will attempt to restrain 755 the search space to just this suite's control files. 756 @param add_experimental: add tests with experimental attribute set. 757 @param forgiving_parser: If False, will raise ControlVariableExceptions 758 if any are encountered when parsing control 759 files. Note that this can raise an exception 760 for syntax errors in unrelated files, because 761 we parse them before applying the predicate. 762 @param run_prod_code: If true, the suite will run the test code that 763 lives in prod aka the test code currently on the 764 lab servers by disabling SSP for the discovered 765 tests. 766 @param test_args: A dict of args to be seeded in test control file. 767 768 @raises ControlVariableException: If forgiving_parser is False and there 769 is a syntax error in a control file. 770 771 @return list of ControlData objects that should be run, with control 772 file text added in |text| attribute. Results are sorted based 773 on the TIME setting in control file, slowest test comes first. 774 """ 775 logging.debug('Getting control file list for suite: %s', suite_name) 776 retriever = _ControlFileRetriever(cf_getter, 777 forgiving_parser=forgiving_parser, 778 run_prod_code=run_prod_code, 779 test_args=test_args) 780 tests = retriever.retrieve_for_suite(suite_name) 781 if not add_experimental: 782 predicate = _ComposedPredicate([predicate, 783 _non_experimental_tests_predicate]) 784 return suite_common.filter_tests(tests, predicate) 785 786 787def find_possible_tests(cf_getter, predicate, suite_name='', count=10): 788 """ 789 Function to scan through all tests and find possible tests. 790 791 Search through all tests based on given cf_getter, suite_name, 792 add_experimental and forgiving_parser. Use the given predicate to 793 calculate the similarity and return the top 10 matches. 794 795 @param cf_getter: a control_file_getter.ControlFileGetter used to list 796 and fetch the content of control files 797 @param predicate: a function that should return a tuple of (name, ratio) 798 when run over a ControlData representation of a control file that 799 should be in this Suite. `name` is the key to be compared, e.g., 800 a suite name or test name. `ratio` is a value between [0,1] 801 indicating the similarity of `name` and the value to be compared. 802 @param suite_name: If specified, this method will attempt to restrain 803 the search space to just this suite's control files. 804 @param count: Number of suggestions to return, default to 10. 805 806 @return list of top names that similar to the given test, sorted by 807 match ratio. 808 """ 809 logging.debug('Getting control file list for suite: %s', suite_name) 810 tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name) 811 logging.debug('Parsed %s control files.', len(tests)) 812 similarities = {} 813 for test in six.itervalues(tests): 814 ratios = predicate(test) 815 # Some predicates may return a list of tuples, e.g., 816 # name_in_tag_similarity_predicate. Convert all returns to a list. 817 if not isinstance(ratios, list): 818 ratios = [ratios] 819 for name, ratio in ratios: 820 similarities[name] = ratio 821 return [s[0] for s in 822 sorted(list(similarities.items()), key=operator.itemgetter(1), 823 reverse=True)][:count] 824 825 826def _deprecated_suite_method(func): 827 """Decorator for deprecated Suite static methods. 828 829 TODO(ayatane): This is used to decorate functions that are called as 830 static methods on Suite. 831 """ 832 @functools.wraps(func) 833 def wrapper(*args, **kwargs): 834 """Wraps |func| for warning.""" 835 warnings.warn('Calling method "%s" from Suite is deprecated' % 836 func.__name__) 837 return func(*args, **kwargs) 838 return staticmethod(wrapper) 839 840 841class _BaseSuite(object): 842 """ 843 A suite of tests, defined by some predicate over control file variables. 844 845 Given a place to search for control files a predicate to match the desired 846 tests, can gather tests and fire off jobs to run them, and then wait for 847 results. 848 849 @var _predicate: a function that should return True when run over a 850 ControlData representation of a control file that should be in 851 this Suite. 852 @var _tag: a string with which to tag jobs run in this suite. 853 @var _builds: the builds on which we're running this suite. 854 @var _afe: an instance of AFE as defined in server/frontend.py. 855 @var _tko: an instance of TKO as defined in server/frontend.py. 856 @var _jobs: currently scheduled jobs, if any. 857 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 858 ControlData objects. 859 @var _retry: a bool value indicating whether jobs should be retried on 860 failure. 861 @var _retry_handler: a RetryHandler object. 862 863 """ 864 865 866 def __init__( 867 self, 868 tests, 869 tag, 870 builds, 871 board, 872 afe=None, 873 tko=None, 874 pool=None, 875 results_dir=None, 876 max_runtime_mins=24*60, 877 timeout_mins=24*60, 878 file_bugs=False, 879 suite_job_id=None, 880 ignore_deps=False, 881 extra_deps=None, 882 priority=priorities.Priority.DEFAULT, 883 wait_for_results=True, 884 job_retry=False, 885 max_retries=sys.maxsize, 886 offload_failures_only=False, 887 test_source_build=None, 888 job_keyvals=None, 889 child_dependencies=(), 890 result_reporter=None, 891 ): 892 """Initialize instance. 893 894 @param tests: Iterable of tests to run. 895 @param tag: a string with which to tag jobs run in this suite. 896 @param builds: the builds on which we're running this suite. 897 @param board: the board on which we're running this suite. 898 @param afe: an instance of AFE as defined in server/frontend.py. 899 @param tko: an instance of TKO as defined in server/frontend.py. 900 @param pool: Specify the pool of machines to use for scheduling 901 purposes. 902 @param results_dir: The directory where the job can write results to. 903 This must be set if you want job_id of sub-jobs 904 list in the job keyvals. 905 @param max_runtime_mins: Maximum suite runtime, in minutes. 906 @param timeout: Maximum job lifetime, in hours. 907 @param suite_job_id: Job id that will act as parent id to all sub jobs. 908 Default: None 909 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 910 attribute and skip applying of dependency labels. 911 (Default:False) 912 @param extra_deps: A list of strings which are the extra DEPENDENCIES 913 to add to each test being scheduled. 914 @param priority: Integer priority level. Higher is more important. 915 @param wait_for_results: Set to False to run the suite job without 916 waiting for test jobs to finish. Default is 917 True. 918 @param job_retry: A bool value indicating whether jobs should be retried 919 on failure. If True, the field 'JOB_RETRIES' in 920 control files will be respected. If False, do not 921 retry. 922 @param max_retries: Maximum retry limit at suite level. 923 Regardless how many times each individual test 924 has been retried, the total number of retries 925 happening in the suite can't exceed _max_retries. 926 Default to sys.maxint. 927 @param offload_failures_only: Only enable gs_offloading for failed 928 jobs. 929 @param test_source_build: Build that contains the server-side test code. 930 @param job_keyvals: General job keyvals to be inserted into keyval file, 931 which will be used by tko/parse later. 932 @param child_dependencies: (optional) list of dependency strings 933 to be added as dependencies to child jobs. 934 @param result_reporter: A _ResultReporter instance to report results. If 935 None, an _EmailReporter will be created. 936 """ 937 938 self.tests = list(tests) 939 self._tag = tag 940 self._builds = builds 941 self._results_dir = results_dir 942 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 943 delay_sec=10, 944 debug=False) 945 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, 946 delay_sec=10, 947 debug=False) 948 self._jobs = [] 949 self._jobs_to_tests = {} 950 951 self._file_bugs = file_bugs 952 self._suite_job_id = suite_job_id 953 self._job_retry=job_retry 954 self._max_retries = max_retries 955 # RetryHandler to be initialized in schedule() 956 self._retry_handler = None 957 self.wait_for_results = wait_for_results 958 self._job_keyvals = job_keyvals 959 if result_reporter is None: 960 self._result_reporter = _EmailReporter(self) 961 else: 962 self._result_reporter = result_reporter 963 964 if extra_deps is None: 965 extra_deps = [] 966 extra_deps.append(board) 967 if pool: 968 extra_deps.append(pool) 969 extra_deps.extend(child_dependencies) 970 self._dependencies = tuple(extra_deps) 971 972 self._job_creator = _SuiteChildJobCreator( 973 tag=tag, 974 builds=builds, 975 board=board, 976 afe=afe, 977 max_runtime_mins=max_runtime_mins, 978 timeout_mins=timeout_mins, 979 suite_job_id=suite_job_id, 980 ignore_deps=ignore_deps, 981 extra_deps=extra_deps, 982 priority=priority, 983 offload_failures_only=offload_failures_only, 984 test_source_build=test_source_build, 985 job_keyvals=job_keyvals, 986 ) 987 988 989 def _schedule_test(self, record, test, retry_for=None): 990 """Schedule a single test and return the job. 991 992 Schedule a single test by creating a job, and then update relevant 993 data structures that are used to keep track of all running jobs. 994 995 Emits a TEST_NA status log entry if it failed to schedule the test due 996 to NoEligibleHostException or a non-existent board label. 997 998 Returns a frontend.Job object if the test is successfully scheduled. 999 If scheduling failed due to NoEligibleHostException or a non-existent 1000 board label, returns None. 1001 1002 @param record: A callable to use for logging. 1003 prototype: record(base_job.status_log_entry) 1004 @param test: ControlData for a test to run. 1005 @param retry_for: If we are scheduling a test to retry an 1006 old job, the afe_job_id of the old job 1007 will be passed in as |retry_for|. 1008 1009 @returns: A frontend.Job object or None 1010 """ 1011 msg = 'Scheduling %s' % test.name 1012 if retry_for: 1013 msg = msg + ', to retry afe job %d' % retry_for 1014 logging.debug(msg) 1015 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) 1016 try: 1017 job = self._job_creator.create_job(test, retry_for=retry_for) 1018 except (error.NoEligibleHostException, proxy.ValidationError) as e: 1019 if (isinstance(e, error.NoEligibleHostException) 1020 or (isinstance(e, proxy.ValidationError) 1021 and _is_nonexistent_board_error(e))): 1022 # Treat a dependency on a non-existent board label the same as 1023 # a dependency on a board that exists, but for which there's no 1024 # hardware. 1025 logging.debug('%s not applicable for this board/pool. ' 1026 'Emitting TEST_NA.', test.name) 1027 Status('TEST_NA', test.name, 1028 'Skipping: test not supported on this board/pool.', 1029 begin_time_str=begin_time_str).record_all(record) 1030 return None 1031 else: 1032 raise e 1033 except (error.RPCException, proxy.JSONRPCException): 1034 if retry_for: 1035 # Mark that we've attempted to retry the old job. 1036 logging.debug("RPC exception occurred") 1037 self._retry_handler.set_attempted(job_id=retry_for) 1038 raise 1039 else: 1040 self._jobs.append(job) 1041 self._jobs_to_tests[job.id] = test 1042 if retry_for: 1043 # A retry job was just created, record it. 1044 self._retry_handler.add_retry( 1045 old_job_id=retry_for, new_job_id=job.id) 1046 retry_count = (test.job_retries - 1047 self._retry_handler.get_retry_max(job.id)) 1048 logging.debug('Job %d created to retry job %d. ' 1049 'Have retried for %d time(s)', 1050 job.id, retry_for, retry_count) 1051 self._remember_job_keyval(job) 1052 return job 1053 1054 def schedule(self, record): 1055 """ 1056 Schedule jobs using |self._afe|. 1057 1058 frontend.Job objects representing each scheduled job will be put in 1059 |self._jobs|. 1060 1061 @param record: A callable to use for logging. 1062 prototype: record(base_job.status_log_entry) 1063 @returns: The number of tests that were scheduled. 1064 """ 1065 scheduled_test_names = [] 1066 logging.debug('Discovered %d tests.', len(self.tests)) 1067 1068 Status('INFO', 'Start %s' % self._tag).record_result(record) 1069 try: 1070 # Write job_keyvals into keyval file. 1071 if self._job_keyvals: 1072 utils.write_keyval(self._results_dir, self._job_keyvals) 1073 1074 # TODO(crbug.com/730885): This is a hack to protect tests that are 1075 # not usually retried from getting hit by a provision error when run 1076 # as part of a suite. Remove this hack once provision is separated 1077 # out in its own suite. 1078 self._bump_up_test_retries(self.tests) 1079 for test in self.tests: 1080 scheduled_job = self._schedule_test(record, test) 1081 if scheduled_job is not None: 1082 scheduled_test_names.append(test.name) 1083 1084 # Write the num of scheduled tests and name of them to keyval file. 1085 logging.debug('Scheduled %d tests, writing the total to keyval.', 1086 len(scheduled_test_names)) 1087 utils.write_keyval( 1088 self._results_dir, 1089 self._make_scheduled_tests_keyvals(scheduled_test_names)) 1090 except Exception: 1091 logging.exception('Exception while scheduling suite') 1092 Status('FAIL', self._tag, 1093 'Exception while scheduling suite').record_result(record) 1094 1095 if self._job_retry: 1096 logging.debug("Initializing RetryHandler for suite %s.", self._tag) 1097 self._retry_handler = RetryHandler( 1098 initial_jobs_to_tests=self._jobs_to_tests, 1099 max_retries=self._max_retries) 1100 logging.debug("retry map created: %s ", 1101 self._retry_handler._retry_map) 1102 else: 1103 logging.info("Will not retry jobs from suite %s.", self._tag) 1104 return len(scheduled_test_names) 1105 1106 1107 def _bump_up_test_retries(self, tests): 1108 """Bump up individual test retries to match suite retry options.""" 1109 if not self._job_retry: 1110 return 1111 1112 for test in tests: 1113 # We do honor if a test insists on JOB_RETRIES = 0. 1114 if test.job_retries is None: 1115 logging.debug( 1116 'Test %s did not request retries, but suite requires ' 1117 'retries. Bumping retries up to 1. ' 1118 '(See crbug.com/730885)', 1119 test.name) 1120 test.job_retries = 1 1121 1122 1123 def _make_scheduled_tests_keyvals(self, scheduled_test_names): 1124 """Make a keyvals dict to write for scheduled test names. 1125 1126 @param scheduled_test_names: A list of scheduled test name strings. 1127 1128 @returns: A keyvals dict. 1129 """ 1130 return { 1131 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), 1132 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), 1133 } 1134 1135 1136 def _should_report(self, result): 1137 """ 1138 Returns True if this failure requires to be reported. 1139 1140 @param result: A result, encapsulating the status of the failed job. 1141 @return: True if we should report this failure. 1142 """ 1143 return (self._file_bugs and result.test_executed and 1144 not result.is_testna() and 1145 result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) 1146 1147 1148 def _has_retry(self, result): 1149 """ 1150 Return True if this result gets to retry. 1151 1152 @param result: A result, encapsulating the status of the failed job. 1153 @return: bool 1154 """ 1155 return (self._job_retry 1156 and self._retry_handler.has_following_retry(result)) 1157 1158 1159 def wait(self, record): 1160 """ 1161 Polls for the job statuses, using |record| to print status when each 1162 completes. 1163 1164 @param record: callable that records job status. 1165 prototype: 1166 record(base_job.status_log_entry) 1167 """ 1168 waiter = job_status.JobResultWaiter(self._afe, self._tko) 1169 try: 1170 if self._suite_job_id: 1171 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) 1172 else: 1173 logging.warning('Unknown suite_job_id, falling back to less ' 1174 'efficient results_generator.') 1175 jobs = self._jobs 1176 waiter.add_jobs(jobs) 1177 for result in waiter.wait_for_results(): 1178 self._handle_result(result=result, record=record, waiter=waiter) 1179 if self._finished_waiting(): 1180 break 1181 except Exception: # pylint: disable=W0703 1182 logging.exception('Exception waiting for results') 1183 Status('FAIL', self._tag, 1184 'Exception waiting for results').record_result(record) 1185 1186 1187 def _finished_waiting(self): 1188 """Return whether the suite is finished waiting for child jobs.""" 1189 return False 1190 1191 1192 def _handle_result(self, result, record, waiter): 1193 """ 1194 Handle a test job result. 1195 1196 @param result: Status instance for job. 1197 @param record: callable that records job status. 1198 prototype: 1199 record(base_job.status_log_entry) 1200 @param waiter: JobResultsWaiter instance. 1201 1202 @instance_param _result_reporter: _ResultReporter instance. 1203 """ 1204 self._record_result(result, record) 1205 rescheduled = False 1206 if self._job_retry and self._retry_handler._should_retry(result): 1207 rescheduled = self._retry_result(result, record, waiter) 1208 # TODO (crbug.com/751428): If the suite times out before a retry could 1209 # finish, we would lose the chance to report errors from the original 1210 # job. 1211 if self._has_retry(result) and rescheduled: 1212 return 1213 1214 if self._should_report(result): 1215 self._result_reporter.report(result) 1216 1217 def _record_result(self, result, record): 1218 """ 1219 Record a test job result. 1220 1221 @param result: Status instance for job. 1222 @param record: callable that records job status. 1223 prototype: 1224 record(base_job.status_log_entry) 1225 """ 1226 result.record_all(record) 1227 self._remember_job_keyval(result) 1228 1229 1230 def _retry_result(self, result, record, waiter): 1231 """ 1232 Retry a test job result. 1233 1234 @param result: Status instance for job. 1235 @param record: callable that records job status. 1236 prototype: 1237 record(base_job.status_log_entry) 1238 @param waiter: JobResultsWaiter instance. 1239 @returns: True if a job was scheduled for retry, False otherwise. 1240 """ 1241 test = self._jobs_to_tests[result.id] 1242 try: 1243 # It only takes effect for CQ retriable job: 1244 # 1) in first try, test.fast=True. 1245 # 2) in second try, test will be run in normal mode, so reset 1246 # test.fast=False. 1247 test.fast = False 1248 new_job = self._schedule_test( 1249 record=record, test=test, retry_for=result.id) 1250 except (error.RPCException, proxy.JSONRPCException) as e: 1251 logging.error('Failed to schedule test: %s, Reason: %s', 1252 test.name, e) 1253 return False 1254 else: 1255 waiter.add_job(new_job) 1256 return bool(new_job) 1257 1258 @property 1259 def jobs(self): 1260 """Give a copy of the associated jobs 1261 1262 @returns: array of jobs""" 1263 return [job for job in self._jobs] 1264 1265 1266 @property 1267 def _should_file_bugs(self): 1268 """Return whether bugs should be filed. 1269 1270 @returns: bool 1271 """ 1272 # File bug when failure is one of the _FILE_BUG_SUITES, 1273 # otherwise send an email to the owner anc cc. 1274 return self._tag in _FILE_BUG_SUITES 1275 1276 1277 def abort(self): 1278 """ 1279 Abort all scheduled test jobs. 1280 """ 1281 if self._jobs: 1282 job_ids = [job.id for job in self._jobs] 1283 self._afe.run('abort_host_queue_entries', job__id__in=job_ids) 1284 1285 1286 def _remember_job_keyval(self, job): 1287 """ 1288 Record provided job as a suite job keyval, for later referencing. 1289 1290 @param job: some representation of a job that has the attributes: 1291 id, test_name, and owner 1292 """ 1293 if self._results_dir and job.id and job.owner and job.test_name: 1294 job_id_owner = '%s-%s' % (job.id, job.owner) 1295 logging.debug('Adding job keyval for %s=%s', 1296 job.test_name, job_id_owner) 1297 utils.write_keyval( 1298 self._results_dir, 1299 {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) 1300 1301 1302class Suite(_BaseSuite): 1303 """ 1304 A suite of tests, defined by some predicate over control file variables. 1305 1306 Given a place to search for control files a predicate to match the desired 1307 tests, can gather tests and fire off jobs to run them, and then wait for 1308 results. 1309 1310 @var _predicate: a function that should return True when run over a 1311 ControlData representation of a control file that should be in 1312 this Suite. 1313 @var _tag: a string with which to tag jobs run in this suite. 1314 @var _builds: the builds on which we're running this suite. 1315 @var _afe: an instance of AFE as defined in server/frontend.py. 1316 @var _tko: an instance of TKO as defined in server/frontend.py. 1317 @var _jobs: currently scheduled jobs, if any. 1318 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 1319 ControlData objects. 1320 @var _cf_getter: a control_file_getter.ControlFileGetter 1321 @var _retry: a bool value indicating whether jobs should be retried on 1322 failure. 1323 @var _retry_handler: a RetryHandler object. 1324 1325 """ 1326 1327 # TODO(ayatane): These methods are kept on the Suite class for 1328 # backward compatibility. 1329 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) 1330 find_possible_tests = _deprecated_suite_method(find_possible_tests) 1331 create_fs_getter = _deprecated_suite_method(create_fs_getter) 1332 name_in_tag_predicate = _deprecated_suite_method( 1333 suite_common.name_in_tag_predicate) 1334 name_in_tag_similarity_predicate = _deprecated_suite_method( 1335 name_in_tag_similarity_predicate) 1336 test_name_equals_predicate = _deprecated_suite_method( 1337 test_name_equals_predicate) 1338 test_name_in_list_predicate = _deprecated_suite_method( 1339 suite_common.test_name_in_list_predicate) 1340 test_name_matches_pattern_predicate = _deprecated_suite_method( 1341 test_name_matches_pattern_predicate) 1342 test_file_matches_pattern_predicate = _deprecated_suite_method( 1343 test_file_matches_pattern_predicate) 1344 matches_attribute_expression_predicate = _deprecated_suite_method( 1345 matches_attribute_expression_predicate) 1346 test_name_similarity_predicate = _deprecated_suite_method( 1347 test_name_similarity_predicate) 1348 test_file_similarity_predicate = _deprecated_suite_method( 1349 test_file_similarity_predicate) 1350 list_all_suites = _deprecated_suite_method(list_all_suites) 1351 get_test_source_build = _deprecated_suite_method( 1352 suite_common.get_test_source_build) 1353 1354 1355 @classmethod 1356 def create_from_predicates(cls, predicates, builds, board, devserver, 1357 cf_getter=None, name='ad_hoc_suite', 1358 run_prod_code=False, **dargs): 1359 """ 1360 Create a Suite using a given predicate test filters. 1361 1362 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in 1363 |autotest_dir| and will schedule them using |afe|. Pulls control files 1364 from the default dev server. Results will be pulled from |tko| upon 1365 completion. 1366 1367 @param predicates: A list of callables that accept ControlData 1368 representations of control files. A test will be 1369 included in suite if all callables in this list 1370 return True on the given control file. 1371 @param builds: the builds on which we're running this suite. It's a 1372 dictionary of version_prefix:build. 1373 @param board: the board on which we're running this suite. 1374 @param devserver: the devserver which contains the build. 1375 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1376 using DevServerGetter. 1377 @param name: name of suite. Defaults to 'ad_hoc_suite' 1378 @param run_prod_code: If true, the suite will run the tests that 1379 lives in prod aka the test code currently on the 1380 lab servers. 1381 @param **dargs: Any other Suite constructor parameters, as described 1382 in Suite.__init__ docstring. 1383 @return a Suite instance. 1384 """ 1385 if cf_getter is None: 1386 if run_prod_code: 1387 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1388 else: 1389 build = suite_common.get_test_source_build(builds, **dargs) 1390 cf_getter = _create_ds_getter(build, devserver) 1391 1392 return cls(predicates, 1393 name, builds, board, cf_getter, run_prod_code, **dargs) 1394 1395 1396 @classmethod 1397 def create_from_name(cls, name, builds, board, devserver, cf_getter=None, 1398 **dargs): 1399 """ 1400 Create a Suite using a predicate based on the SUITE control file var. 1401 1402 Makes a predicate based on |name| and uses it to instantiate a Suite 1403 that looks for tests in |autotest_dir| and will schedule them using 1404 |afe|. Pulls control files from the default dev server. 1405 Results will be pulled from |tko| upon completion. 1406 1407 @param name: a value of the SUITE control file variable to search for. 1408 @param builds: the builds on which we're running this suite. It's a 1409 dictionary of version_prefix:build. 1410 @param board: the board on which we're running this suite. 1411 @param devserver: the devserver which contains the build. 1412 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1413 using DevServerGetter. 1414 @param **dargs: Any other Suite constructor parameters, as described 1415 in Suite.__init__ docstring. 1416 @return a Suite instance. 1417 """ 1418 if cf_getter is None: 1419 build = suite_common.get_test_source_build(builds, **dargs) 1420 cf_getter = _create_ds_getter(build, devserver) 1421 1422 return cls([suite_common.name_in_tag_predicate(name)], 1423 name, builds, board, cf_getter, **dargs) 1424 1425 1426 def __init__( 1427 self, 1428 predicates, 1429 tag, 1430 builds, 1431 board, 1432 cf_getter, 1433 run_prod_code=False, 1434 afe=None, 1435 tko=None, 1436 pool=None, 1437 results_dir=None, 1438 max_runtime_mins=24*60, 1439 timeout_mins=24*60, 1440 file_bugs=False, 1441 suite_job_id=None, 1442 ignore_deps=False, 1443 extra_deps=None, 1444 priority=priorities.Priority.DEFAULT, 1445 forgiving_parser=True, 1446 wait_for_results=True, 1447 job_retry=False, 1448 max_retries=sys.maxsize, 1449 offload_failures_only=False, 1450 test_source_build=None, 1451 job_keyvals=None, 1452 test_args=None, 1453 child_dependencies=(), 1454 result_reporter=None, 1455 ): 1456 """ 1457 Constructor 1458 1459 @param predicates: A list of callables that accept ControlData 1460 representations of control files. A test will be 1461 included in suite if all callables in this list 1462 return True on the given control file. 1463 @param tag: a string with which to tag jobs run in this suite. 1464 @param builds: the builds on which we're running this suite. 1465 @param board: the board on which we're running this suite. 1466 @param cf_getter: a control_file_getter.ControlFileGetter 1467 @param afe: an instance of AFE as defined in server/frontend.py. 1468 @param tko: an instance of TKO as defined in server/frontend.py. 1469 @param pool: Specify the pool of machines to use for scheduling 1470 purposes. 1471 @param run_prod_code: If true, the suite will run the test code that 1472 lives in prod aka the test code currently on the 1473 lab servers. 1474 @param results_dir: The directory where the job can write results to. 1475 This must be set if you want job_id of sub-jobs 1476 list in the job keyvals. 1477 @param max_runtime_mins: Maximum suite runtime, in minutes. 1478 @param timeout: Maximum job lifetime, in hours. 1479 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1480 Default: None 1481 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1482 attribute and skip applying of dependency labels. 1483 (Default:False) 1484 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1485 to add to each test being scheduled. 1486 @param priority: Integer priority level. Higher is more important. 1487 @param wait_for_results: Set to False to run the suite job without 1488 waiting for test jobs to finish. Default is 1489 True. 1490 @param job_retry: A bool value indicating whether jobs should be retried 1491 on failure. If True, the field 'JOB_RETRIES' in 1492 control files will be respected. If False, do not 1493 retry. 1494 @param max_retries: Maximum retry limit at suite level. 1495 Regardless how many times each individual test 1496 has been retried, the total number of retries 1497 happening in the suite can't exceed _max_retries. 1498 Default to sys.maxint. 1499 @param offload_failures_only: Only enable gs_offloading for failed 1500 jobs. 1501 @param test_source_build: Build that contains the server-side test code. 1502 @param job_keyvals: General job keyvals to be inserted into keyval file, 1503 which will be used by tko/parse later. 1504 @param test_args: A dict of args passed all the way to each individual 1505 test that will be actually ran. 1506 @param child_dependencies: (optional) list of dependency strings 1507 to be added as dependencies to child jobs. 1508 @param result_reporter: A _ResultReporter instance to report results. If 1509 None, an _EmailReporter will be created. 1510 """ 1511 tests = find_and_parse_tests( 1512 cf_getter, 1513 _ComposedPredicate(predicates), 1514 tag, 1515 forgiving_parser=forgiving_parser, 1516 run_prod_code=run_prod_code, 1517 test_args=test_args, 1518 ) 1519 super(Suite, self).__init__( 1520 tests=tests, 1521 tag=tag, 1522 builds=builds, 1523 board=board, 1524 afe=afe, 1525 tko=tko, 1526 pool=pool, 1527 results_dir=results_dir, 1528 max_runtime_mins=max_runtime_mins, 1529 timeout_mins=timeout_mins, 1530 file_bugs=file_bugs, 1531 suite_job_id=suite_job_id, 1532 ignore_deps=ignore_deps, 1533 extra_deps=extra_deps, 1534 priority=priority, 1535 wait_for_results=wait_for_results, 1536 job_retry=job_retry, 1537 max_retries=max_retries, 1538 offload_failures_only=offload_failures_only, 1539 test_source_build=test_source_build, 1540 job_keyvals=job_keyvals, 1541 child_dependencies=child_dependencies, 1542 result_reporter=result_reporter, 1543 ) 1544 1545 1546class ProvisionSuite(_BaseSuite): 1547 """ 1548 A suite for provisioning DUTs. 1549 1550 This is done by creating stub_Pass tests. 1551 """ 1552 1553 1554 def __init__( 1555 self, 1556 tag, 1557 builds, 1558 board, 1559 devserver, 1560 num_required, 1561 num_max=float('inf'), 1562 cf_getter=None, 1563 run_prod_code=False, 1564 test_args=None, 1565 test_source_build=None, 1566 **kwargs): 1567 """ 1568 Constructor 1569 1570 @param tag: a string with which to tag jobs run in this suite. 1571 @param builds: the builds on which we're running this suite. 1572 @param board: the board on which we're running this suite. 1573 @param devserver: the devserver which contains the build. 1574 @param num_required: number of tests that must pass. This is 1575 capped by the number of tests that are run. 1576 @param num_max: max number of tests to make. By default there 1577 is no cap, a test is created for each eligible host. 1578 @param cf_getter: a control_file_getter.ControlFileGetter. 1579 @param test_args: A dict of args passed all the way to each individual 1580 test that will be actually ran. 1581 @param test_source_build: Build that contains the server-side test code. 1582 @param kwargs: Various keyword arguments passed to 1583 _BaseSuite constructor. 1584 """ 1585 super(ProvisionSuite, self).__init__( 1586 tests=[], 1587 tag=tag, 1588 builds=builds, 1589 board=board, 1590 **kwargs) 1591 self._num_successful = 0 1592 self._num_required = 0 1593 self.tests = [] 1594 1595 static_deps = [dep for dep in self._dependencies 1596 if not provision.Provision.acts_on(dep)] 1597 if 'pool:suites' in static_deps: 1598 logging.info('Provision suite is disabled on suites pool') 1599 return 1600 logging.debug('Looking for hosts matching %r', static_deps) 1601 hosts = self._afe.get_hosts( 1602 invalid=False, multiple_labels=static_deps) 1603 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) 1604 available_hosts = [h for h in hosts if h.is_available()] 1605 logging.debug('Found %d available hosts for ProvisionSuite', 1606 len(available_hosts)) 1607 dummy_test = _load_dummy_test( 1608 builds, devserver, cf_getter, 1609 run_prod_code, test_args, test_source_build) 1610 self.tests = [dummy_test] * min(len(available_hosts), num_max) 1611 logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) 1612 self._num_required = min(num_required, len(self.tests)) 1613 logging.debug('Expecting %d tests to pass for ProvisionSuite', 1614 self._num_required) 1615 1616 def _handle_result(self, result, record, waiter): 1617 super(ProvisionSuite, self)._handle_result(result, record, waiter) 1618 if result.is_good(): 1619 self._num_successful += 1 1620 1621 def _finished_waiting(self): 1622 return self._num_successful >= self._num_required 1623 1624 1625def _load_dummy_test( 1626 builds, 1627 devserver, 1628 cf_getter=None, 1629 run_prod_code=False, 1630 test_args=None, 1631 test_source_build=None): 1632 """ 1633 Load and return the dummy pass test. 1634 1635 @param builds: the builds on which we're running this suite. 1636 @param devserver: the devserver which contains the build. 1637 @param cf_getter: a control_file_getter.ControlFileGetter. 1638 @param test_args: A dict of args passed all the way to each individual 1639 test that will be actually ran. 1640 @param test_source_build: Build that contains the server-side test code. 1641 """ 1642 if cf_getter is None: 1643 if run_prod_code: 1644 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1645 else: 1646 build = suite_common.get_test_source_build( 1647 builds, test_source_build=test_source_build) 1648 devserver.stage_artifacts(image=build, 1649 artifacts=['control_files']) 1650 cf_getter = _create_ds_getter(build, devserver) 1651 retriever = _ControlFileRetriever(cf_getter, 1652 run_prod_code=run_prod_code, 1653 test_args=test_args) 1654 return retriever.retrieve_for_test('stub_Pass') 1655 1656 1657class _ComposedPredicate(object): 1658 """Return the composition of the predicates. 1659 1660 Predicates are functions that take a test control data object and 1661 return True of that test is to be included. The returned 1662 predicate's set is the intersection of all of the input predicates' 1663 sets (it returns True if all predicates return True). 1664 """ 1665 1666 def __init__(self, predicates): 1667 """Initialize instance. 1668 1669 @param predicates: Iterable of predicates. 1670 """ 1671 self._predicates = list(predicates) 1672 1673 def __repr__(self): 1674 return '{cls}({this._predicates!r})'.format( 1675 cls=type(self).__name__, 1676 this=self, 1677 ) 1678 1679 def __call__(self, control_data_): 1680 return all(f(control_data_) for f in self._predicates) 1681 1682 1683def _is_nonexistent_board_error(e): 1684 """Return True if error is caused by nonexistent board label. 1685 1686 As of this writing, the particular case we want looks like this: 1687 1688 1) e.problem_keys is a dictionary 1689 2) e.problem_keys['meta_hosts'] exists as the only key 1690 in the dictionary. 1691 3) e.problem_keys['meta_hosts'] matches this pattern: 1692 "Label "board:.*" not found" 1693 1694 We check for conditions 1) and 2) on the 1695 theory that they're relatively immutable. 1696 We don't check condition 3) because it seems 1697 likely to be a maintenance burden, and for the 1698 times when we're wrong, being right shouldn't 1699 matter enough (we _hope_). 1700 1701 @param e: proxy.ValidationError instance 1702 @returns: boolean 1703 """ 1704 return (isinstance(e.problem_keys, dict) 1705 and len(e.problem_keys) == 1 1706 and 'meta_hosts' in e.problem_keys) 1707 1708 1709class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)): 1710 """Abstract base class for reporting test results. 1711 1712 Usually, this is used to report test failures. 1713 """ 1714 1715 @abc.abstractmethod 1716 def report(self, result): 1717 """Report test result. 1718 1719 @param result: Status instance for job. 1720 """ 1721 1722 1723class _EmailReporter(_ResultReporter): 1724 """Class that emails based on test failures.""" 1725 1726 def __init__(self, suite, bug_template=None): 1727 self._suite = suite 1728 self._bug_template = bug_template or {} 1729 1730 def _get_test_bug(self, result): 1731 """Get TestBug for the given result. 1732 1733 @param result: Status instance for a test job. 1734 @returns: TestBug instance. 1735 """ 1736 # reporting modules have dependency on external packages, e.g., httplib2 1737 # Such dependency can cause issue to any module tries to import suite.py 1738 # without building site-packages first. Since the reporting modules are 1739 # only used in this function, move the imports here avoid the 1740 # requirement of building site packages to use other functions in this 1741 # module. 1742 from autotest_lib.server.cros.dynamic_suite import reporting 1743 1744 job_views = self._suite._tko.run('get_detailed_test_views', 1745 afe_job_id=result.id) 1746 return reporting.TestBug(self._suite._job_creator.cros_build, 1747 utils.get_chrome_version(job_views), 1748 self._suite._tag, 1749 result) 1750 1751 def _get_bug_template(self, result): 1752 """Get BugTemplate for test job. 1753 1754 @param result: Status instance for job. 1755 @param bug_template: A template dictionary specifying the default bug 1756 filing options for failures in this suite. 1757 @returns: BugTemplate instance 1758 """ 1759 # reporting modules have dependency on external packages, e.g., httplib2 1760 # Such dependency can cause issue to any module tries to import suite.py 1761 # without building site-packages first. Since the reporting modules are 1762 # only used in this function, move the imports here avoid the 1763 # requirement of building site packages to use other functions in this 1764 # module. 1765 from autotest_lib.server.cros.dynamic_suite import reporting_utils 1766 1767 # Try to merge with bug template in test control file. 1768 template = reporting_utils.BugTemplate(self._bug_template) 1769 try: 1770 test_data = self._suite._jobs_to_tests[result.id] 1771 return template.finalize_bug_template( 1772 test_data.bug_template) 1773 except AttributeError: 1774 # Test control file does not have bug template defined. 1775 return template.bug_template 1776 except reporting_utils.InvalidBugTemplateException as e: 1777 logging.error('Merging bug templates failed with ' 1778 'error: %s An empty bug template will ' 1779 'be used.', e) 1780 return {} 1781 1782 def report(self, result): 1783 # reporting modules have dependency on external 1784 # packages, e.g., httplib2 Such dependency can cause 1785 # issue to any module tries to import suite.py without 1786 # building site-packages first. Since the reporting 1787 # modules are only used in this function, move the 1788 # imports here avoid the requirement of building site 1789 # packages to use other functions in this module. 1790 from autotest_lib.server.cros.dynamic_suite import reporting 1791 1792 reporting.send_email( 1793 self._get_test_bug(result), 1794 self._get_bug_template(result)) 1795