1*9c5db199SXin Li#!/usr/bin/python3 -u 2*9c5db199SXin Li 3*9c5db199SXin Lifrom __future__ import absolute_import 4*9c5db199SXin Lifrom __future__ import division 5*9c5db199SXin Lifrom __future__ import print_function 6*9c5db199SXin Li 7*9c5db199SXin Liimport collections 8*9c5db199SXin Liimport errno 9*9c5db199SXin Liimport fcntl 10*9c5db199SXin Liimport json 11*9c5db199SXin Liimport optparse 12*9c5db199SXin Liimport os 13*9c5db199SXin Liimport socket 14*9c5db199SXin Liimport sys 15*9c5db199SXin Liimport time 16*9c5db199SXin Liimport traceback 17*9c5db199SXin Li 18*9c5db199SXin Liimport common 19*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import utils as result_utils 20*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 21*9c5db199SXin Lifrom autotest_lib.client.bin.result_tools import runner as result_runner 22*9c5db199SXin Lifrom autotest_lib.client.common_lib import control_data 23*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config 24*9c5db199SXin Lifrom autotest_lib.client.common_lib import mail, pidfile 25*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils 26*9c5db199SXin Lifrom autotest_lib.frontend import setup_django_environment 27*9c5db199SXin Lifrom autotest_lib.frontend.tko import models as tko_models 28*9c5db199SXin Lifrom autotest_lib.server import site_utils 29*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import constants 30*9c5db199SXin Lifrom autotest_lib.tko import db as tko_db, utils as tko_utils 31*9c5db199SXin Lifrom autotest_lib.tko import models, parser_lib 32*9c5db199SXin Lifrom autotest_lib.tko.perf_upload import perf_uploader 33*9c5db199SXin Lifrom autotest_lib.utils.side_effects import config_loader 34*9c5db199SXin Liimport six 35*9c5db199SXin Li 36*9c5db199SXin Litry: 37*9c5db199SXin Li from autotest_lib.utils.frozen_chromite.lib import metrics 38*9c5db199SXin Liexcept ImportError: 39*9c5db199SXin Li metrics = utils.metrics_mock 40*9c5db199SXin Li 41*9c5db199SXin Li 42*9c5db199SXin Li_ParseOptions = collections.namedtuple( 43*9c5db199SXin Li 'ParseOptions', ['reparse', 'mail_on_failure', 'dry_run', 'suite_report', 44*9c5db199SXin Li 'datastore_creds', 'export_to_gcloud_path', 45*9c5db199SXin Li 'disable_perf_upload']) 46*9c5db199SXin Li 47*9c5db199SXin Li_HARDCODED_CONTROL_FILE_NAMES = ( 48*9c5db199SXin Li # client side test control, as saved in old Autotest paths. 49*9c5db199SXin Li 'control', 50*9c5db199SXin Li # server side test control, as saved in old Autotest paths. 51*9c5db199SXin Li 'control.srv', 52*9c5db199SXin Li # All control files, as saved in skylab. 53*9c5db199SXin Li 'control.from_control_name', 54*9c5db199SXin Li) 55*9c5db199SXin Li 56*9c5db199SXin Li# Max size for the parser is 350mb due to large suites getting throttled. 57*9c5db199SXin LiDEFAULT_MAX_RESULT_SIZE_KB = 350000 58*9c5db199SXin Li 59*9c5db199SXin Li 60*9c5db199SXin Lidef parse_args(): 61*9c5db199SXin Li """Parse args.""" 62*9c5db199SXin Li # build up our options parser and parse sys.argv 63*9c5db199SXin Li parser = optparse.OptionParser() 64*9c5db199SXin Li parser.add_option("-m", help="Send mail for FAILED tests", 65*9c5db199SXin Li dest="mailit", action="store_true") 66*9c5db199SXin Li parser.add_option("-r", help="Reparse the results of a job", 67*9c5db199SXin Li dest="reparse", action="store_true") 68*9c5db199SXin Li parser.add_option("-o", help="Parse a single results directory", 69*9c5db199SXin Li dest="singledir", action="store_true") 70*9c5db199SXin Li parser.add_option("-l", help=("Levels of subdirectories to include " 71*9c5db199SXin Li "in the job name"), 72*9c5db199SXin Li type="int", dest="level", default=1) 73*9c5db199SXin Li parser.add_option("-n", help="No blocking on an existing parse", 74*9c5db199SXin Li dest="noblock", action="store_true") 75*9c5db199SXin Li parser.add_option("-s", help="Database server hostname", 76*9c5db199SXin Li dest="db_host", action="store") 77*9c5db199SXin Li parser.add_option("-u", help="Database username", dest="db_user", 78*9c5db199SXin Li action="store") 79*9c5db199SXin Li parser.add_option("-p", help="Database password", dest="db_pass", 80*9c5db199SXin Li action="store") 81*9c5db199SXin Li parser.add_option("-d", help="Database name", dest="db_name", 82*9c5db199SXin Li action="store") 83*9c5db199SXin Li parser.add_option("--dry-run", help="Do not actually commit any results.", 84*9c5db199SXin Li dest="dry_run", action="store_true", default=False) 85*9c5db199SXin Li parser.add_option( 86*9c5db199SXin Li "--detach", action="store_true", 87*9c5db199SXin Li help="Detach parsing process from the caller process. Used by " 88*9c5db199SXin Li "monitor_db to safely restart without affecting parsing.", 89*9c5db199SXin Li default=False) 90*9c5db199SXin Li parser.add_option("--write-pidfile", 91*9c5db199SXin Li help="write pidfile (.parser_execute)", 92*9c5db199SXin Li dest="write_pidfile", action="store_true", 93*9c5db199SXin Li default=False) 94*9c5db199SXin Li parser.add_option("--record-duration", 95*9c5db199SXin Li help="[DEPRECATED] Record timing to metadata db", 96*9c5db199SXin Li dest="record_duration", action="store_true", 97*9c5db199SXin Li default=False) 98*9c5db199SXin Li parser.add_option("--suite-report", 99*9c5db199SXin Li help=("Allows parsing job to attempt to create a suite " 100*9c5db199SXin Li "timeline report, if it detects that the job being " 101*9c5db199SXin Li "parsed is a suite job."), 102*9c5db199SXin Li dest="suite_report", action="store_true", 103*9c5db199SXin Li default=False) 104*9c5db199SXin Li parser.add_option("--datastore-creds", 105*9c5db199SXin Li help=("[DEPRECATED] " 106*9c5db199SXin Li "The path to gcloud datastore credentials file, " 107*9c5db199SXin Li "which will be used to upload suite timeline " 108*9c5db199SXin Li "report to gcloud."), 109*9c5db199SXin Li dest="datastore_creds", 110*9c5db199SXin Li action="store", 111*9c5db199SXin Li default=None) 112*9c5db199SXin Li parser.add_option( 113*9c5db199SXin Li "--export-to-gcloud-path", 114*9c5db199SXin Li help=("[DEPRECATED] " 115*9c5db199SXin Li "The path to export_to_gcloud script. Please find " 116*9c5db199SXin Li "chromite path on your server. The script is under " 117*9c5db199SXin Li "chromite/bin/."), 118*9c5db199SXin Li dest="export_to_gcloud_path", 119*9c5db199SXin Li action="store", 120*9c5db199SXin Li default=None) 121*9c5db199SXin Li parser.add_option("--disable-perf-upload", 122*9c5db199SXin Li help=("Do not upload perf results to chrome perf."), 123*9c5db199SXin Li dest="disable_perf_upload", action="store_true", 124*9c5db199SXin Li default=False) 125*9c5db199SXin Li options, args = parser.parse_args() 126*9c5db199SXin Li 127*9c5db199SXin Li # we need a results directory 128*9c5db199SXin Li if len(args) == 0: 129*9c5db199SXin Li tko_utils.dprint("ERROR: at least one results directory must " 130*9c5db199SXin Li "be provided") 131*9c5db199SXin Li parser.print_help() 132*9c5db199SXin Li sys.exit(1) 133*9c5db199SXin Li 134*9c5db199SXin Li # pass the options back 135*9c5db199SXin Li return options, args 136*9c5db199SXin Li 137*9c5db199SXin Li 138*9c5db199SXin Lidef format_failure_message(jobname, kernel, testname, status, reason): 139*9c5db199SXin Li """Format failure message with the given information. 140*9c5db199SXin Li 141*9c5db199SXin Li @param jobname: String representing the job name. 142*9c5db199SXin Li @param kernel: String representing the kernel. 143*9c5db199SXin Li @param testname: String representing the test name. 144*9c5db199SXin Li @param status: String representing the test status. 145*9c5db199SXin Li @param reason: String representing the reason. 146*9c5db199SXin Li 147*9c5db199SXin Li @return: Failure message as a string. 148*9c5db199SXin Li """ 149*9c5db199SXin Li format_string = "%-12s %-20s %-12s %-10s %s" 150*9c5db199SXin Li return format_string % (jobname, kernel, testname, status, reason) 151*9c5db199SXin Li 152*9c5db199SXin Li 153*9c5db199SXin Lidef mailfailure(jobname, job, message): 154*9c5db199SXin Li """Send an email about the failure. 155*9c5db199SXin Li 156*9c5db199SXin Li @param jobname: String representing the job name. 157*9c5db199SXin Li @param job: A job object. 158*9c5db199SXin Li @param message: The message to mail. 159*9c5db199SXin Li """ 160*9c5db199SXin Li message_lines = [""] 161*9c5db199SXin Li message_lines.append("The following tests FAILED for this job") 162*9c5db199SXin Li message_lines.append("http://%s/results/%s" % 163*9c5db199SXin Li (socket.gethostname(), jobname)) 164*9c5db199SXin Li message_lines.append("") 165*9c5db199SXin Li message_lines.append(format_failure_message("Job name", "Kernel", 166*9c5db199SXin Li "Test name", "FAIL/WARN", 167*9c5db199SXin Li "Failure reason")) 168*9c5db199SXin Li message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8, 169*9c5db199SXin Li "=" * 8, "=" * 14)) 170*9c5db199SXin Li message_header = "\n".join(message_lines) 171*9c5db199SXin Li 172*9c5db199SXin Li subject = "AUTOTEST: FAILED tests from job %s" % jobname 173*9c5db199SXin Li mail.send("", job.user, "", subject, message_header + message) 174*9c5db199SXin Li 175*9c5db199SXin Li 176*9c5db199SXin Lidef _invalidate_original_tests(orig_job_idx, retry_job_idx): 177*9c5db199SXin Li """Retry tests invalidates original tests. 178*9c5db199SXin Li 179*9c5db199SXin Li Whenever a retry job is complete, we want to invalidate the original 180*9c5db199SXin Li job's test results, such that the consumers of the tko database 181*9c5db199SXin Li (e.g. tko frontend, wmatrix) could figure out which results are the latest. 182*9c5db199SXin Li 183*9c5db199SXin Li When a retry job is parsed, we retrieve the original job's afe_job_id 184*9c5db199SXin Li from the retry job's keyvals, which is then converted to tko job_idx and 185*9c5db199SXin Li passed into this method as |orig_job_idx|. 186*9c5db199SXin Li 187*9c5db199SXin Li In this method, we are going to invalidate the rows in tko_tests that are 188*9c5db199SXin Li associated with the original job by flipping their 'invalid' bit to True. 189*9c5db199SXin Li In addition, in tko_tests, we also maintain a pointer from the retry results 190*9c5db199SXin Li to the original results, so that later we can always know which rows in 191*9c5db199SXin Li tko_tests are retries and which are the corresponding original results. 192*9c5db199SXin Li This is done by setting the field 'invalidates_test_idx' of the tests 193*9c5db199SXin Li associated with the retry job. 194*9c5db199SXin Li 195*9c5db199SXin Li For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after 196*9c5db199SXin Li this method is run, their tko_tests rows will look like: 197*9c5db199SXin Li __________________________________________________________________________ 198*9c5db199SXin Li test_idx| job_idx | test | ... | invalid | invalidates_test_idx 199*9c5db199SXin Li 10 | 105 | example_Fail.Error| ... | 1 | NULL 200*9c5db199SXin Li 11 | 105 | example_Fail.Fail | ... | 1 | NULL 201*9c5db199SXin Li ... 202*9c5db199SXin Li 20 | 108 | example_Fail.Error| ... | 0 | 10 203*9c5db199SXin Li 21 | 108 | example_Fail.Fail | ... | 0 | 11 204*9c5db199SXin Li __________________________________________________________________________ 205*9c5db199SXin Li Note the invalid bits of the rows for Job(job_idx=105) are set to '1'. 206*9c5db199SXin Li And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108) 207*9c5db199SXin Li are set to 10 and 11 (the test_idx of the rows for the original job). 208*9c5db199SXin Li 209*9c5db199SXin Li @param orig_job_idx: An integer representing the original job's 210*9c5db199SXin Li tko job_idx. Tests associated with this job will 211*9c5db199SXin Li be marked as 'invalid'. 212*9c5db199SXin Li @param retry_job_idx: An integer representing the retry job's 213*9c5db199SXin Li tko job_idx. The field 'invalidates_test_idx' 214*9c5db199SXin Li of the tests associated with this job will be updated. 215*9c5db199SXin Li 216*9c5db199SXin Li """ 217*9c5db199SXin Li msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx) 218*9c5db199SXin Li if not orig_job_idx or not retry_job_idx: 219*9c5db199SXin Li tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg) 220*9c5db199SXin Li # Using django models here makes things easier, but make sure that 221*9c5db199SXin Li # before this method is called, all other relevant transactions have been 222*9c5db199SXin Li # committed to avoid race condition. In the long run, we might consider 223*9c5db199SXin Li # to make the rest of parser use django models. 224*9c5db199SXin Li orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx) 225*9c5db199SXin Li retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx) 226*9c5db199SXin Li 227*9c5db199SXin Li # Invalidate original tests. 228*9c5db199SXin Li orig_tests.update(invalid=True) 229*9c5db199SXin Li 230*9c5db199SXin Li # Maintain a dictionary that maps (test, subdir) to original tests. 231*9c5db199SXin Li # Note that within the scope of a job, (test, subdir) uniquelly 232*9c5db199SXin Li # identifies a test run, but 'test' does not. 233*9c5db199SXin Li # In a control file, one could run the same test with different 234*9c5db199SXin Li # 'subdir_tag', for example, 235*9c5db199SXin Li # job.run_test('example_Fail', tag='Error', subdir_tag='subdir_1') 236*9c5db199SXin Li # job.run_test('example_Fail', tag='Error', subdir_tag='subdir_2') 237*9c5db199SXin Li # In tko, we will get 238*9c5db199SXin Li # (test='example_Fail.Error', subdir='example_Fail.Error.subdir_1') 239*9c5db199SXin Li # (test='example_Fail.Error', subdir='example_Fail.Error.subdir_2') 240*9c5db199SXin Li invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test 241*9c5db199SXin Li for orig_test in orig_tests} 242*9c5db199SXin Li for retry in retry_tests: 243*9c5db199SXin Li # It is possible that (retry.test, retry.subdir) doesn't exist 244*9c5db199SXin Li # in invalidated_tests. This could happen when the original job 245*9c5db199SXin Li # didn't run some of its tests. For example, a dut goes offline 246*9c5db199SXin Li # since the beginning of the job, in which case invalidated_tests 247*9c5db199SXin Li # will only have one entry for 'SERVER_JOB'. 248*9c5db199SXin Li orig_test = invalidated_tests.get((retry.test, retry.subdir), None) 249*9c5db199SXin Li if orig_test: 250*9c5db199SXin Li retry.invalidates_test = orig_test 251*9c5db199SXin Li retry.save() 252*9c5db199SXin Li tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg) 253*9c5db199SXin Li 254*9c5db199SXin Li 255*9c5db199SXin Lidef _throttle_result_size(path): 256*9c5db199SXin Li """Limit the total size of test results for the given path. 257*9c5db199SXin Li 258*9c5db199SXin Li @param path: Path of the result directory. 259*9c5db199SXin Li """ 260*9c5db199SXin Li if not result_runner.ENABLE_RESULT_THROTTLING: 261*9c5db199SXin Li tko_utils.dprint( 262*9c5db199SXin Li 'Result throttling is not enabled. Skipping throttling %s' % 263*9c5db199SXin Li path) 264*9c5db199SXin Li return 265*9c5db199SXin Li 266*9c5db199SXin Li max_result_size_KB = _max_result_size_from_control(path) 267*9c5db199SXin Li if max_result_size_KB is None: 268*9c5db199SXin Li max_result_size_KB = DEFAULT_MAX_RESULT_SIZE_KB 269*9c5db199SXin Li 270*9c5db199SXin Li try: 271*9c5db199SXin Li result_utils.execute(path, max_result_size_KB) 272*9c5db199SXin Li except: 273*9c5db199SXin Li tko_utils.dprint( 274*9c5db199SXin Li 'Failed to throttle result size of %s.\nDetails %s' % 275*9c5db199SXin Li (path, traceback.format_exc())) 276*9c5db199SXin Li 277*9c5db199SXin Li 278*9c5db199SXin Lidef _max_result_size_from_control(path): 279*9c5db199SXin Li """Gets the max result size set in a control file, if any. 280*9c5db199SXin Li 281*9c5db199SXin Li If not overrides is found, returns None. 282*9c5db199SXin Li """ 283*9c5db199SXin Li for control_file in _HARDCODED_CONTROL_FILE_NAMES: 284*9c5db199SXin Li control = os.path.join(path, control_file) 285*9c5db199SXin Li if not os.path.exists(control): 286*9c5db199SXin Li continue 287*9c5db199SXin Li 288*9c5db199SXin Li try: 289*9c5db199SXin Li max_result_size_KB = control_data.parse_control( 290*9c5db199SXin Li control, raise_warnings=False).max_result_size_KB 291*9c5db199SXin Li if max_result_size_KB != DEFAULT_MAX_RESULT_SIZE_KB: 292*9c5db199SXin Li return max_result_size_KB 293*9c5db199SXin Li except IOError as e: 294*9c5db199SXin Li tko_utils.dprint( 295*9c5db199SXin Li 'Failed to access %s. Error: %s\nDetails %s' % 296*9c5db199SXin Li (control, e, traceback.format_exc())) 297*9c5db199SXin Li except control_data.ControlVariableException as e: 298*9c5db199SXin Li tko_utils.dprint( 299*9c5db199SXin Li 'Failed to parse %s. Error: %s\nDetails %s' % 300*9c5db199SXin Li (control, e, traceback.format_exc())) 301*9c5db199SXin Li return None 302*9c5db199SXin Li 303*9c5db199SXin Li 304*9c5db199SXin Lidef export_tko_job_to_file(job, jobname, filename): 305*9c5db199SXin Li """Exports the tko job to disk file. 306*9c5db199SXin Li 307*9c5db199SXin Li @param job: database object. 308*9c5db199SXin Li @param jobname: the job name as string. 309*9c5db199SXin Li @param filename: the serialized binary destination path. 310*9c5db199SXin Li """ 311*9c5db199SXin Li from autotest_lib.tko import job_serializer 312*9c5db199SXin Li 313*9c5db199SXin Li serializer = job_serializer.JobSerializer() 314*9c5db199SXin Li serializer.serialize_to_binary(job, jobname, filename) 315*9c5db199SXin Li 316*9c5db199SXin Li 317*9c5db199SXin Lidef parse_one(db, pid_file_manager, jobname, path, parse_options): 318*9c5db199SXin Li """Parse a single job. Optionally send email on failure. 319*9c5db199SXin Li 320*9c5db199SXin Li @param db: database object. 321*9c5db199SXin Li @param pid_file_manager: pidfile.PidFileManager object. 322*9c5db199SXin Li @param jobname: the tag used to search for existing job in db, 323*9c5db199SXin Li e.g. '1234-chromeos-test/host1' 324*9c5db199SXin Li @param path: The path to the results to be parsed. 325*9c5db199SXin Li @param parse_options: _ParseOptions instance. 326*9c5db199SXin Li 327*9c5db199SXin Li @return job: the parsed job object 328*9c5db199SXin Li """ 329*9c5db199SXin Li reparse = parse_options.reparse 330*9c5db199SXin Li mail_on_failure = parse_options.mail_on_failure 331*9c5db199SXin Li dry_run = parse_options.dry_run 332*9c5db199SXin Li suite_report = parse_options.suite_report 333*9c5db199SXin Li 334*9c5db199SXin Li tko_utils.dprint("\nScanning %s (%s)" % (jobname, path)) 335*9c5db199SXin Li old_job_idx = db.find_job(jobname) 336*9c5db199SXin Li if old_job_idx is not None and not reparse: 337*9c5db199SXin Li tko_utils.dprint("! Job is already parsed, done") 338*9c5db199SXin Li return None 339*9c5db199SXin Li 340*9c5db199SXin Li # look up the status version 341*9c5db199SXin Li job_keyval = models.job.read_keyval(path) 342*9c5db199SXin Li status_version = job_keyval.get("status_version", 0) 343*9c5db199SXin Li 344*9c5db199SXin Li parser = parser_lib.parser(status_version) 345*9c5db199SXin Li job = parser.make_job(path) 346*9c5db199SXin Li tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname)) 347*9c5db199SXin Li status_log_path = _find_status_log_path(path) 348*9c5db199SXin Li if not status_log_path: 349*9c5db199SXin Li tko_utils.dprint("! Unable to parse job, no status file") 350*9c5db199SXin Li return None 351*9c5db199SXin Li _parse_status_log(parser, job, status_log_path) 352*9c5db199SXin Li 353*9c5db199SXin Li if old_job_idx is not None: 354*9c5db199SXin Li job.job_idx = old_job_idx 355*9c5db199SXin Li unmatched_tests = _match_existing_tests(db, job) 356*9c5db199SXin Li if not dry_run: 357*9c5db199SXin Li _delete_tests_from_db(db, unmatched_tests) 358*9c5db199SXin Li 359*9c5db199SXin Li job.afe_job_id = tko_utils.get_afe_job_id(jobname) 360*9c5db199SXin Li job.skylab_task_id = tko_utils.get_skylab_task_id(jobname) 361*9c5db199SXin Li job.afe_parent_job_id = job_keyval.get(constants.PARENT_JOB_ID) 362*9c5db199SXin Li job.skylab_parent_task_id = job_keyval.get(constants.PARENT_JOB_ID) 363*9c5db199SXin Li job.build = None 364*9c5db199SXin Li job.board = None 365*9c5db199SXin Li job.build_version = None 366*9c5db199SXin Li job.suite = None 367*9c5db199SXin Li if job.label: 368*9c5db199SXin Li label_info = site_utils.parse_job_name(job.label) 369*9c5db199SXin Li if label_info: 370*9c5db199SXin Li job.build = label_info.get('build', None) 371*9c5db199SXin Li job.build_version = label_info.get('build_version', None) 372*9c5db199SXin Li job.board = label_info.get('board', None) 373*9c5db199SXin Li job.suite = label_info.get('suite', None) 374*9c5db199SXin Li 375*9c5db199SXin Li if 'suite' in job.keyval_dict: 376*9c5db199SXin Li job.suite = job.keyval_dict['suite'] 377*9c5db199SXin Li 378*9c5db199SXin Li result_utils_lib.LOG = tko_utils.dprint 379*9c5db199SXin Li 380*9c5db199SXin Li # Do not throttle results for now (b/207409280) 381*9c5db199SXin Li # _throttle_result_size(path) 382*9c5db199SXin Li 383*9c5db199SXin Li # Record test result size to job_keyvals 384*9c5db199SXin Li start_time = time.time() 385*9c5db199SXin Li result_size_info = site_utils.collect_result_sizes( 386*9c5db199SXin Li path, log=tko_utils.dprint) 387*9c5db199SXin Li tko_utils.dprint('Finished collecting result sizes after %s seconds' % 388*9c5db199SXin Li (time.time()-start_time)) 389*9c5db199SXin Li job.keyval_dict.update(result_size_info._asdict()) 390*9c5db199SXin Li 391*9c5db199SXin Li # TODO(dshi): Update sizes with sponge_invocation.xml and throttle it. 392*9c5db199SXin Li 393*9c5db199SXin Li # check for failures 394*9c5db199SXin Li message_lines = [""] 395*9c5db199SXin Li job_successful = True 396*9c5db199SXin Li for test in job.tests: 397*9c5db199SXin Li if not test.subdir: 398*9c5db199SXin Li continue 399*9c5db199SXin Li tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s" 400*9c5db199SXin Li % (test.testname, test.subdir, test.status, 401*9c5db199SXin Li test.reason)) 402*9c5db199SXin Li if test.status not in ('GOOD', 'WARN'): 403*9c5db199SXin Li job_successful = False 404*9c5db199SXin Li pid_file_manager.num_tests_failed += 1 405*9c5db199SXin Li message_lines.append(format_failure_message( 406*9c5db199SXin Li jobname, test.kernel.base, test.subdir, 407*9c5db199SXin Li test.status, test.reason)) 408*9c5db199SXin Li 409*9c5db199SXin Li message = "\n".join(message_lines) 410*9c5db199SXin Li 411*9c5db199SXin Li if not dry_run: 412*9c5db199SXin Li # send out a email report of failure 413*9c5db199SXin Li if len(message) > 2 and mail_on_failure: 414*9c5db199SXin Li tko_utils.dprint("Sending email report of failure on %s to %s" 415*9c5db199SXin Li % (jobname, job.user)) 416*9c5db199SXin Li mailfailure(jobname, job, message) 417*9c5db199SXin Li 418*9c5db199SXin Li # Upload perf values to the perf dashboard, if applicable. 419*9c5db199SXin Li if parse_options.disable_perf_upload: 420*9c5db199SXin Li tko_utils.dprint("Skipping results upload to chrome perf as it is " 421*9c5db199SXin Li "disabled by config") 422*9c5db199SXin Li else: 423*9c5db199SXin Li for test in job.tests: 424*9c5db199SXin Li perf_uploader.upload_test(job, test, jobname) 425*9c5db199SXin Li 426*9c5db199SXin Li _write_job_to_db(db, jobname, job) 427*9c5db199SXin Li 428*9c5db199SXin Li # Verify the job data is written to the database. 429*9c5db199SXin Li if job.tests: 430*9c5db199SXin Li tests_in_db = db.find_tests(job.job_idx) 431*9c5db199SXin Li tests_in_db_count = len(tests_in_db) if tests_in_db else 0 432*9c5db199SXin Li if tests_in_db_count != len(job.tests): 433*9c5db199SXin Li tko_utils.dprint( 434*9c5db199SXin Li 'Failed to find enough tests for job_idx: %d. The ' 435*9c5db199SXin Li 'job should have %d tests, only found %d tests.' % 436*9c5db199SXin Li (job.job_idx, len(job.tests), tests_in_db_count)) 437*9c5db199SXin Li metrics.Counter( 438*9c5db199SXin Li 'chromeos/autotest/result/db_save_failure', 439*9c5db199SXin Li description='The number of times parse failed to ' 440*9c5db199SXin Li 'save job to TKO database.').increment() 441*9c5db199SXin Li 442*9c5db199SXin Li # Although the cursor has autocommit, we still need to force it to 443*9c5db199SXin Li # commit existing changes before we can use django models, otherwise 444*9c5db199SXin Li # it will go into deadlock when django models try to start a new 445*9c5db199SXin Li # trasaction while the current one has not finished yet. 446*9c5db199SXin Li db.commit() 447*9c5db199SXin Li 448*9c5db199SXin Li # Handle retry job. 449*9c5db199SXin Li orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID, 450*9c5db199SXin Li None) 451*9c5db199SXin Li if orig_afe_job_id: 452*9c5db199SXin Li orig_job_idx = tko_models.Job.objects.get( 453*9c5db199SXin Li afe_job_id=orig_afe_job_id).job_idx 454*9c5db199SXin Li _invalidate_original_tests(orig_job_idx, job.job_idx) 455*9c5db199SXin Li 456*9c5db199SXin Li # Serializing job into a binary file 457*9c5db199SXin Li export_tko_to_file = global_config.global_config.get_config_value( 458*9c5db199SXin Li 'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False) 459*9c5db199SXin Li 460*9c5db199SXin Li binary_file_name = os.path.join(path, "job.serialize") 461*9c5db199SXin Li if export_tko_to_file: 462*9c5db199SXin Li export_tko_job_to_file(job, jobname, binary_file_name) 463*9c5db199SXin Li 464*9c5db199SXin Li if not dry_run: 465*9c5db199SXin Li db.commit() 466*9c5db199SXin Li 467*9c5db199SXin Li # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of 468*9c5db199SXin Li # the function, so any failure, e.g., db connection error, will stop 469*9c5db199SXin Li # gs_offloader_instructions being updated, and logs can be uploaded for 470*9c5db199SXin Li # troubleshooting. 471*9c5db199SXin Li if job_successful: 472*9c5db199SXin Li # Check if we should not offload this test's results. 473*9c5db199SXin Li if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False): 474*9c5db199SXin Li # Update the gs_offloader_instructions json file. 475*9c5db199SXin Li gs_instructions_file = os.path.join( 476*9c5db199SXin Li path, constants.GS_OFFLOADER_INSTRUCTIONS) 477*9c5db199SXin Li gs_offloader_instructions = {} 478*9c5db199SXin Li if os.path.exists(gs_instructions_file): 479*9c5db199SXin Li with open(gs_instructions_file, 'r') as f: 480*9c5db199SXin Li gs_offloader_instructions = json.load(f) 481*9c5db199SXin Li 482*9c5db199SXin Li gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True 483*9c5db199SXin Li with open(gs_instructions_file, 'w') as f: 484*9c5db199SXin Li json.dump(gs_offloader_instructions, f) 485*9c5db199SXin Li return job 486*9c5db199SXin Li 487*9c5db199SXin Li 488*9c5db199SXin Lidef _write_job_to_db(db, jobname, job): 489*9c5db199SXin Li """Write all TKO data associated with a job to DB. 490*9c5db199SXin Li 491*9c5db199SXin Li This updates the job object as a side effect. 492*9c5db199SXin Li 493*9c5db199SXin Li @param db: tko.db.db_sql object. 494*9c5db199SXin Li @param jobname: Name of the job to write. 495*9c5db199SXin Li @param job: tko.models.job object. 496*9c5db199SXin Li """ 497*9c5db199SXin Li db.insert_or_update_machine(job) 498*9c5db199SXin Li db.insert_job(jobname, job) 499*9c5db199SXin Li db.insert_or_update_task_reference( 500*9c5db199SXin Li job, 501*9c5db199SXin Li 'skylab' if tko_utils.is_skylab_task(jobname) else 'afe', 502*9c5db199SXin Li ) 503*9c5db199SXin Li db.update_job_keyvals(job) 504*9c5db199SXin Li for test in job.tests: 505*9c5db199SXin Li db.insert_test(job, test) 506*9c5db199SXin Li 507*9c5db199SXin Li 508*9c5db199SXin Lidef _find_status_log_path(path): 509*9c5db199SXin Li if os.path.exists(os.path.join(path, "status.log")): 510*9c5db199SXin Li return os.path.join(path, "status.log") 511*9c5db199SXin Li if os.path.exists(os.path.join(path, "status")): 512*9c5db199SXin Li return os.path.join(path, "status") 513*9c5db199SXin Li return "" 514*9c5db199SXin Li 515*9c5db199SXin Li 516*9c5db199SXin Lidef _parse_status_log(parser, job, status_log_path): 517*9c5db199SXin Li status_lines = open(status_log_path).readlines() 518*9c5db199SXin Li parser.start(job) 519*9c5db199SXin Li tests = parser.end(status_lines) 520*9c5db199SXin Li 521*9c5db199SXin Li # parser.end can return the same object multiple times, so filter out dups 522*9c5db199SXin Li job.tests = [] 523*9c5db199SXin Li already_added = set() 524*9c5db199SXin Li for test in tests: 525*9c5db199SXin Li if test not in already_added: 526*9c5db199SXin Li already_added.add(test) 527*9c5db199SXin Li job.tests.append(test) 528*9c5db199SXin Li 529*9c5db199SXin Li 530*9c5db199SXin Lidef _match_existing_tests(db, job): 531*9c5db199SXin Li """Find entries in the DB corresponding to the job's tests, update job. 532*9c5db199SXin Li 533*9c5db199SXin Li @return: Any unmatched tests in the db. 534*9c5db199SXin Li """ 535*9c5db199SXin Li old_job_idx = job.job_idx 536*9c5db199SXin Li raw_old_tests = db.select("test_idx,subdir,test", "tko_tests", 537*9c5db199SXin Li {"job_idx": old_job_idx}) 538*9c5db199SXin Li if raw_old_tests: 539*9c5db199SXin Li old_tests = dict(((test, subdir), test_idx) 540*9c5db199SXin Li for test_idx, subdir, test in raw_old_tests) 541*9c5db199SXin Li else: 542*9c5db199SXin Li old_tests = {} 543*9c5db199SXin Li 544*9c5db199SXin Li for test in job.tests: 545*9c5db199SXin Li test_idx = old_tests.pop((test.testname, test.subdir), None) 546*9c5db199SXin Li if test_idx is not None: 547*9c5db199SXin Li test.test_idx = test_idx 548*9c5db199SXin Li else: 549*9c5db199SXin Li tko_utils.dprint("! Reparse returned new test " 550*9c5db199SXin Li "testname=%r subdir=%r" % 551*9c5db199SXin Li (test.testname, test.subdir)) 552*9c5db199SXin Li return old_tests 553*9c5db199SXin Li 554*9c5db199SXin Li 555*9c5db199SXin Lidef _delete_tests_from_db(db, tests): 556*9c5db199SXin Li for test_idx in six.itervalues(tests): 557*9c5db199SXin Li where = {'test_idx' : test_idx} 558*9c5db199SXin Li db.delete('tko_iteration_result', where) 559*9c5db199SXin Li db.delete('tko_iteration_perf_value', where) 560*9c5db199SXin Li db.delete('tko_iteration_attributes', where) 561*9c5db199SXin Li db.delete('tko_test_attributes', where) 562*9c5db199SXin Li db.delete('tko_test_labels_tests', {'test_id': test_idx}) 563*9c5db199SXin Li db.delete('tko_tests', where) 564*9c5db199SXin Li 565*9c5db199SXin Li 566*9c5db199SXin Lidef _get_job_subdirs(path): 567*9c5db199SXin Li """ 568*9c5db199SXin Li Returns a list of job subdirectories at path. Returns None if the test 569*9c5db199SXin Li is itself a job directory. Does not recurse into the subdirs. 570*9c5db199SXin Li """ 571*9c5db199SXin Li # if there's a .machines file, use it to get the subdirs 572*9c5db199SXin Li machine_list = os.path.join(path, ".machines") 573*9c5db199SXin Li if os.path.exists(machine_list): 574*9c5db199SXin Li with open(machine_list, 'r') as ml: 575*9c5db199SXin Li subdirs = set(line.strip() for line in ml.readlines()) 576*9c5db199SXin Li existing_subdirs = set(subdir for subdir in subdirs 577*9c5db199SXin Li if os.path.exists(os.path.join(path, subdir))) 578*9c5db199SXin Li if len(existing_subdirs) != 0: 579*9c5db199SXin Li return existing_subdirs 580*9c5db199SXin Li 581*9c5db199SXin Li # if this dir contains ONLY subdirectories, return them 582*9c5db199SXin Li contents = set(os.listdir(path)) 583*9c5db199SXin Li contents.discard(".parse.lock") 584*9c5db199SXin Li subdirs = set(sub for sub in contents if 585*9c5db199SXin Li os.path.isdir(os.path.join(path, sub))) 586*9c5db199SXin Li if len(contents) == len(subdirs) != 0: 587*9c5db199SXin Li return subdirs 588*9c5db199SXin Li 589*9c5db199SXin Li # this is a job directory, or something else we don't understand 590*9c5db199SXin Li return None 591*9c5db199SXin Li 592*9c5db199SXin Li 593*9c5db199SXin Lidef parse_leaf_path(db, pid_file_manager, path, level, parse_options): 594*9c5db199SXin Li """Parse a leaf path. 595*9c5db199SXin Li 596*9c5db199SXin Li @param db: database handle. 597*9c5db199SXin Li @param pid_file_manager: pidfile.PidFileManager object. 598*9c5db199SXin Li @param path: The path to the results to be parsed. 599*9c5db199SXin Li @param level: Integer, level of subdirectories to include in the job name. 600*9c5db199SXin Li @param parse_options: _ParseOptions instance. 601*9c5db199SXin Li 602*9c5db199SXin Li @returns: The job name of the parsed job, e.g. '123-chromeos-test/host1' 603*9c5db199SXin Li """ 604*9c5db199SXin Li job_elements = path.split("/")[-level:] 605*9c5db199SXin Li jobname = "/".join(job_elements) 606*9c5db199SXin Li db.run_with_retry(parse_one, db, pid_file_manager, jobname, path, 607*9c5db199SXin Li parse_options) 608*9c5db199SXin Li return jobname 609*9c5db199SXin Li 610*9c5db199SXin Li 611*9c5db199SXin Lidef parse_path(db, pid_file_manager, path, level, parse_options): 612*9c5db199SXin Li """Parse a path 613*9c5db199SXin Li 614*9c5db199SXin Li @param db: database handle. 615*9c5db199SXin Li @param pid_file_manager: pidfile.PidFileManager object. 616*9c5db199SXin Li @param path: The path to the results to be parsed. 617*9c5db199SXin Li @param level: Integer, level of subdirectories to include in the job name. 618*9c5db199SXin Li @param parse_options: _ParseOptions instance. 619*9c5db199SXin Li 620*9c5db199SXin Li @returns: A set of job names of the parsed jobs. 621*9c5db199SXin Li set(['123-chromeos-test/host1', '123-chromeos-test/host2']) 622*9c5db199SXin Li """ 623*9c5db199SXin Li processed_jobs = set() 624*9c5db199SXin Li job_subdirs = _get_job_subdirs(path) 625*9c5db199SXin Li if job_subdirs is not None: 626*9c5db199SXin Li # parse status.log in current directory, if it exists. multi-machine 627*9c5db199SXin Li # synchronous server side tests record output in this directory. without 628*9c5db199SXin Li # this check, we do not parse these results. 629*9c5db199SXin Li if os.path.exists(os.path.join(path, 'status.log')): 630*9c5db199SXin Li new_job = parse_leaf_path(db, pid_file_manager, path, level, 631*9c5db199SXin Li parse_options) 632*9c5db199SXin Li processed_jobs.add(new_job) 633*9c5db199SXin Li # multi-machine job 634*9c5db199SXin Li for subdir in job_subdirs: 635*9c5db199SXin Li jobpath = os.path.join(path, subdir) 636*9c5db199SXin Li new_jobs = parse_path(db, pid_file_manager, jobpath, level + 1, 637*9c5db199SXin Li parse_options) 638*9c5db199SXin Li processed_jobs.update(new_jobs) 639*9c5db199SXin Li else: 640*9c5db199SXin Li # single machine job 641*9c5db199SXin Li new_job = parse_leaf_path(db, pid_file_manager, path, level, 642*9c5db199SXin Li parse_options) 643*9c5db199SXin Li processed_jobs.add(new_job) 644*9c5db199SXin Li return processed_jobs 645*9c5db199SXin Li 646*9c5db199SXin Li 647*9c5db199SXin Lidef _detach_from_parent_process(): 648*9c5db199SXin Li """Allow reparenting the parse process away from caller. 649*9c5db199SXin Li 650*9c5db199SXin Li When monitor_db is run via upstart, restarting the job sends SIGTERM to 651*9c5db199SXin Li the whole process group. This makes us immune from that. 652*9c5db199SXin Li """ 653*9c5db199SXin Li if os.getpid() != os.getpgid(0): 654*9c5db199SXin Li os.setsid() 655*9c5db199SXin Li 656*9c5db199SXin Li 657*9c5db199SXin Lidef main(): 658*9c5db199SXin Li """tko_parse entry point.""" 659*9c5db199SXin Li options, args = parse_args() 660*9c5db199SXin Li 661*9c5db199SXin Li # We are obliged to use indirect=False, not use the SetupTsMonGlobalState 662*9c5db199SXin Li # context manager, and add a manual flush, because tko/parse is expected to 663*9c5db199SXin Li # be a very short lived (<1 min) script when working effectively, and we 664*9c5db199SXin Li # can't afford to either a) wait for up to 1min for metrics to flush at the 665*9c5db199SXin Li # end or b) drop metrics that were sent within the last minute of execution. 666*9c5db199SXin Li site_utils.SetupTsMonGlobalState('tko_parse', indirect=False, 667*9c5db199SXin Li short_lived=True) 668*9c5db199SXin Li try: 669*9c5db199SXin Li with metrics.SuccessCounter('chromeos/autotest/tko_parse/runs'): 670*9c5db199SXin Li _main_with_options(options, args) 671*9c5db199SXin Li finally: 672*9c5db199SXin Li metrics.Flush() 673*9c5db199SXin Li 674*9c5db199SXin Li 675*9c5db199SXin Lidef _main_with_options(options, args): 676*9c5db199SXin Li """Entry point with options parsed and metrics already set up.""" 677*9c5db199SXin Li # Record the processed jobs so that 678*9c5db199SXin Li # we can send the duration of parsing to metadata db. 679*9c5db199SXin Li processed_jobs = set() 680*9c5db199SXin Li 681*9c5db199SXin Li if options.detach: 682*9c5db199SXin Li _detach_from_parent_process() 683*9c5db199SXin Li 684*9c5db199SXin Li results_dir = os.path.abspath(args[0]) 685*9c5db199SXin Li assert os.path.exists(results_dir) 686*9c5db199SXin Li 687*9c5db199SXin Li _update_db_config_from_json(options, results_dir) 688*9c5db199SXin Li 689*9c5db199SXin Li parse_options = _ParseOptions(options.reparse, options.mailit, 690*9c5db199SXin Li options.dry_run, options.suite_report, 691*9c5db199SXin Li options.datastore_creds, 692*9c5db199SXin Li options.export_to_gcloud_path, 693*9c5db199SXin Li options.disable_perf_upload) 694*9c5db199SXin Li 695*9c5db199SXin Li pid_file_manager = pidfile.PidFileManager("parser", results_dir) 696*9c5db199SXin Li 697*9c5db199SXin Li if options.write_pidfile: 698*9c5db199SXin Li pid_file_manager.open_file() 699*9c5db199SXin Li 700*9c5db199SXin Li try: 701*9c5db199SXin Li # build up the list of job dirs to parse 702*9c5db199SXin Li if options.singledir: 703*9c5db199SXin Li jobs_list = [results_dir] 704*9c5db199SXin Li else: 705*9c5db199SXin Li jobs_list = [os.path.join(results_dir, subdir) 706*9c5db199SXin Li for subdir in os.listdir(results_dir)] 707*9c5db199SXin Li 708*9c5db199SXin Li # build up the database 709*9c5db199SXin Li db = tko_db.db(autocommit=False, host=options.db_host, 710*9c5db199SXin Li user=options.db_user, password=options.db_pass, 711*9c5db199SXin Li database=options.db_name) 712*9c5db199SXin Li 713*9c5db199SXin Li # parse all the jobs 714*9c5db199SXin Li for path in jobs_list: 715*9c5db199SXin Li lockfile = open(os.path.join(path, ".parse.lock"), "w") 716*9c5db199SXin Li flags = fcntl.LOCK_EX 717*9c5db199SXin Li if options.noblock: 718*9c5db199SXin Li flags |= fcntl.LOCK_NB 719*9c5db199SXin Li try: 720*9c5db199SXin Li fcntl.flock(lockfile, flags) 721*9c5db199SXin Li except IOError as e: 722*9c5db199SXin Li # lock is not available and nonblock has been requested 723*9c5db199SXin Li if e.errno == errno.EWOULDBLOCK: 724*9c5db199SXin Li lockfile.close() 725*9c5db199SXin Li continue 726*9c5db199SXin Li else: 727*9c5db199SXin Li raise # something unexpected happened 728*9c5db199SXin Li try: 729*9c5db199SXin Li new_jobs = parse_path(db, pid_file_manager, path, options.level, 730*9c5db199SXin Li parse_options) 731*9c5db199SXin Li processed_jobs.update(new_jobs) 732*9c5db199SXin Li 733*9c5db199SXin Li finally: 734*9c5db199SXin Li fcntl.flock(lockfile, fcntl.LOCK_UN) 735*9c5db199SXin Li lockfile.close() 736*9c5db199SXin Li 737*9c5db199SXin Li except Exception as e: 738*9c5db199SXin Li pid_file_manager.close_file(1) 739*9c5db199SXin Li raise 740*9c5db199SXin Li else: 741*9c5db199SXin Li pid_file_manager.close_file(0) 742*9c5db199SXin Li 743*9c5db199SXin Li 744*9c5db199SXin Lidef _update_db_config_from_json(options, test_results_dir): 745*9c5db199SXin Li """Uptade DB config options using a side_effects_config.json file. 746*9c5db199SXin Li 747*9c5db199SXin Li @param options: parsed args to be updated. 748*9c5db199SXin Li @param test_results_dir: path to test results dir. 749*9c5db199SXin Li 750*9c5db199SXin Li @raises: json_format.ParseError if the file is not a valid JSON. 751*9c5db199SXin Li ValueError if the JSON config is incomplete. 752*9c5db199SXin Li OSError if some files from the JSON config are missing. 753*9c5db199SXin Li """ 754*9c5db199SXin Li # results_dir passed to tko/parse is a subdir of the root results dir 755*9c5db199SXin Li config_dir = os.path.join(test_results_dir, os.pardir) 756*9c5db199SXin Li tko_utils.dprint("Attempting to read side_effects.Config from %s" % 757*9c5db199SXin Li config_dir) 758*9c5db199SXin Li config = config_loader.load(config_dir) 759*9c5db199SXin Li 760*9c5db199SXin Li if config: 761*9c5db199SXin Li tko_utils.dprint("Validating side_effects.Config.tko") 762*9c5db199SXin Li config_loader.validate_tko(config) 763*9c5db199SXin Li 764*9c5db199SXin Li tko_utils.dprint("Using the following DB config params from " 765*9c5db199SXin Li "side_effects.Config.tko:\n%s" % config.tko) 766*9c5db199SXin Li options.db_host = config.tko.proxy_socket 767*9c5db199SXin Li options.db_user = config.tko.mysql_user 768*9c5db199SXin Li 769*9c5db199SXin Li with open(config.tko.mysql_password_file, 'r') as f: 770*9c5db199SXin Li options.db_pass = f.read().rstrip('\n') 771*9c5db199SXin Li 772*9c5db199SXin Li options.disable_perf_upload = not config.chrome_perf.enabled 773*9c5db199SXin Li else: 774*9c5db199SXin Li tko_utils.dprint("No side_effects.Config found in %s - " 775*9c5db199SXin Li "defaulting to DB config values from shadow config" % config_dir) 776*9c5db199SXin Li 777*9c5db199SXin Li 778*9c5db199SXin Liif __name__ == "__main__": 779*9c5db199SXin Li main() 780