1#!/usr/bin/python3 -u 2# Copyright 2007-2008 Martin J. Bligh <[email protected]>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run a control file through the server side engine 7""" 8 9import datetime 10import contextlib 11import getpass 12import logging 13import os 14import re 15import shutil 16import signal 17import socket 18import sys 19import traceback 20import time 21import six 22from six.moves import urllib 23 24import common 25from autotest_lib.client.bin.result_tools import utils as result_utils 26from autotest_lib.client.bin.result_tools import view as result_view 27from autotest_lib.client.common_lib import control_data 28from autotest_lib.client.common_lib import autotest_enum 29from autotest_lib.client.common_lib import error 30from autotest_lib.client.common_lib import global_config 31from autotest_lib.client.common_lib import host_queue_entry_states 32from autotest_lib.client.common_lib import host_states 33from autotest_lib.client.common_lib import seven 34from autotest_lib.server.cros.dynamic_suite import suite 35 36try: 37 from autotest_lib.utils.frozen_chromite.lib import metrics 38 from autotest_lib.utils.frozen_chromite.lib import cloud_trace 39except ImportError as e: 40 from autotest_lib.client.common_lib import utils as common_utils 41 metrics = common_utils.metrics_mock 42 import mock 43 cloud_trace = mock.MagicMock() 44 45# Number of seconds to wait before returning if testing mode is enabled 46TESTING_MODE_SLEEP_SECS = 1 47 48 49from autotest_lib.server import frontend 50from autotest_lib.server import server_logging_config 51from autotest_lib.server import server_job, utils, autoserv_parser, autotest 52from autotest_lib.server import utils as server_utils 53from autotest_lib.server import site_utils 54from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 55from autotest_lib.site_utils import job_directories 56from autotest_lib.site_utils import lxc 57from autotest_lib.site_utils.lxc import utils as lxc_utils 58from autotest_lib.client.common_lib import pidfile, logging_manager 59 60 61# Control segment to stage server-side package. 62STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path( 63 'stage_server_side_package') 64 65# Command line to start servod in a moblab. 66START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s' 67STOP_SERVOD_CMD = 'sudo stop servod' 68 69_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) 70_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name' 71 72_LXC_JOB_FOLDER = 'lxc_job_folder' 73 74def log_alarm(signum, frame): 75 logging.error("Received SIGALARM. Ignoring and continuing on.") 76 sys.exit(1) 77 78 79def _get_companions(parser): 80 """Get a list of companion devices from command line arg -ch. 81 82 @param parser: Parser for the command line arguments. 83 84 @return: A list of companion devices from command line arg -ch. 85 """ 86 if parser.options.companion_hosts: 87 companions = parser.options.companion_hosts.replace(',', ' ').strip().split() 88 else: 89 companions = [] 90 91 if companions: 92 for companion in companions: 93 if not companion or re.search('\s', companion): 94 parser.parser.error("Invalid companion: %s" % str(companion)) 95 companions = list(set(companions)) 96 companions.sort() 97 return companions 98 99 100def _get_dutservers(parser): 101 """Get a list of DUT server addresses from command line arg --dut_servers. 102 103 @param parser: Parser for the command line arguments. 104 105 @return: A list of DUT server addresses from command line arg 106 --dut_servers. 107 """ 108 if parser.options.dut_servers: 109 dut_servers = parser.options.dut_servers.replace( 110 ',', ' ').strip().split() 111 else: 112 dut_servers = [] 113 114 if dut_servers: 115 for dut_server in dut_servers: 116 if not dut_server or re.search('\s', dut_server): 117 parser.parser.error( 118 "Invalid DUT Server address: %s" % str(dut_server)) 119 dut_servers = list(set(dut_servers)) 120 dut_servers.sort() 121 return dut_servers 122 123 124def _get_machines(parser): 125 """Get a list of machine names from command line arg -m or a file. 126 127 @param parser: Parser for the command line arguments. 128 129 @return: A list of machine names from command line arg -m or the 130 machines file specified in the command line arg -M. 131 """ 132 if parser.options.machines: 133 machines = parser.options.machines.replace(',', ' ').strip().split() 134 else: 135 machines = [] 136 machines_file = parser.options.machines_file 137 if machines_file: 138 machines = [] 139 for m in open(machines_file, 'r').readlines(): 140 # remove comments, spaces 141 m = re.sub('#.*', '', m).strip() 142 if m: 143 machines.append(m) 144 logging.debug('Read list of machines from file: %s', machines_file) 145 logging.debug('Machines: %s', ','.join(machines)) 146 147 if machines: 148 for machine in machines: 149 if not machine or re.search('\s', machine): 150 parser.parser.error("Invalid machine: %s" % str(machine)) 151 machines = list(set(machines)) 152 machines.sort() 153 return machines 154 155 156def _stage_ssp(parser, resultsdir): 157 """Stage server-side package. 158 159 This function calls a control segment to stage server-side package based on 160 the job and autoserv command line option. The detail implementation could 161 be different for each host type. Currently, only CrosHost has 162 stage_server_side_package function defined. 163 The script returns None if no server-side package is available. However, 164 it may raise exception if it failed for reasons other than artifact (the 165 server-side package) not found. 166 167 @param parser: Command line arguments parser passed in the autoserv process. 168 @param resultsdir: Folder to store results. This could be different from 169 parser.options.results: parser.options.results can be set to None 170 for results to be stored in a temp folder. resultsdir can be None 171 for autoserv run requires no logging. 172 173 @return: url to the autotest server-side package. None in case of errors. 174 """ 175 machines_list = _get_machines(parser) 176 machines_list = server_job.get_machine_dicts( 177 machine_names=machines_list, 178 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir), 179 in_lab=parser.options.lab, 180 use_shadow_store=not parser.options.local_only_host_info, 181 host_attributes=parser.options.host_attributes, 182 ) 183 184 namespace = {'machines': machines_list, 185 'image': parser.options.test_source_build} 186 script_locals = {} 187 188 seven.exec_file( 189 STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, 190 globals_=namespace, 191 locals_=script_locals, 192 ) 193 ssp_url = script_locals['ssp_url'] 194 if not ssp_url: 195 logging.error('Failed to stage SSP package: %s', 196 script_locals['error_msg']) 197 logging.error('This job will fail later, when attempting to run with' 198 ' SSP') 199 return ssp_url 200 201 202def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url, 203 machines): 204 """Run the server job with server-side packaging. 205 206 @param job: The server job object. 207 @param container_id: ID of the container to run the test. 208 @param job_id: ID of the test job. 209 @param results: Folder to store results. This could be different from 210 parser.options.results: 211 parser.options.results can be set to None for results to be 212 stored in a temp folder. 213 results can be None if the autoserv run requires no logging. 214 @param parser: Command line parser that contains the options. 215 @param ssp_url: url of the staged server-side package. 216 @param machines: A list of machines to run the test. 217 """ 218 if not ssp_url: 219 job.record('FAIL', None, None, 220 'Failed to stage server-side package') 221 raise error.AutoservError('Failed to stage server-side package') 222 223 bucket = lxc.ContainerBucket( 224 base_name=_ssp_base_image_name_or_default(parser.options)) 225 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != '' 226 else None) 227 try: 228 dut_name = machines[0] if len(machines) >= 1 else None 229 test_container = bucket.setup_test(container_id, job_id, ssp_url, 230 results, control=control, 231 job_folder=_LXC_JOB_FOLDER, 232 dut_name=dut_name) 233 except Exception as e: 234 job.record('START', None, None, 'Starting SSP') 235 job.record('END ABORT', None, None, 236 'Failed to setup container for test: %s. Check logs in ' 237 'ssp_logs folder for more details.' % e) 238 raise error.AutoservSSPError 239 240 args = sys.argv[:] 241 args.remove('--require-ssp') 242 # --parent_job_id is only useful in autoserv running in host, not in 243 # container. Include this argument will cause test to fail for builds before 244 # CL 286265 was merged. 245 if '--parent_job_id' in args: 246 index = args.index('--parent_job_id') 247 args.remove('--parent_job_id') 248 # Remove the actual parent job id in command line arg. 249 del args[index] 250 251 # A dictionary of paths to replace in the command line. Key is the path to 252 # be replaced with the one in value. 253 paths_to_replace = {} 254 # Replace the control file path with the one in container. 255 if control: 256 container_control_filename = os.path.join( 257 lxc.CONTROL_TEMP_PATH, os.path.basename(control)) 258 paths_to_replace[control] = container_control_filename 259 # Update result directory with the one in container. 260 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER) 261 if parser.options.results: 262 paths_to_replace[parser.options.results] = container_result_dir 263 args = [paths_to_replace.get(arg, arg) for arg in args] 264 265 # Apply --use-existing-results, results directory is aready created and 266 # mounted in container. Apply this arg to avoid exception being raised. 267 if not '--use-existing-results' in args: 268 args.append('--use-existing-results') 269 270 # Make sure autoserv running in container using a different pid file. 271 if not '--pidfile-label' in args: 272 args.extend(['--pidfile-label', 'container_autoserv']) 273 274 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args]) 275 logging.info('Run command in container: %s', cmd_line) 276 success = False 277 try: 278 test_container.attach_run(cmd_line) 279 success = True 280 except Exception as e: 281 # If the test run inside container fails without generating any log, 282 # write a message to status.log to help troubleshooting. 283 debug_files = os.listdir(os.path.join(results, 'debug')) 284 if not debug_files: 285 job.record('FAIL', None, None, 286 'Failed to run test inside the container: %s. Check ' 287 'logs in ssp_logs folder for more details.' % e) 288 raise 289 finally: 290 metrics.Counter( 291 'chromeos/autotest/experimental/execute_job_in_ssp').increment( 292 fields={'success': success}) 293 test_container.destroy() 294 295 296def correct_results_folder_permission(results): 297 """Make sure the results folder has the right permission settings. 298 299 For tests running with server-side packaging, the results folder has the 300 owner of root. This must be changed to the user running the autoserv 301 process, so parsing job can access the results folder. 302 TODO(dshi): crbug.com/459344 Remove this function when test container can be 303 unprivileged container. 304 305 @param results: Path to the results folder. 306 307 """ 308 if not results: 309 return 310 311 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results)) 312 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results)) 313 314 315def _start_servod(machine): 316 """Try to start servod in moblab if it's not already running or running with 317 different board or port. 318 319 @param machine: Name of the dut used for test. 320 """ 321 if not utils.is_moblab(): 322 return 323 324 logging.debug('Trying to start servod.') 325 try: 326 afe = frontend.AFE() 327 board = server_utils.get_board_from_afe(machine, afe) 328 hosts = afe.get_hosts(hostname=machine) 329 servo_host = hosts[0].attributes.get('servo_host', None) 330 servo_port = hosts[0].attributes.get('servo_port', 9999) 331 if not servo_host in ['localhost', '127.0.0.1']: 332 logging.warning('Starting servod is aborted. The dut\'s servo_host ' 333 'attribute is not set to localhost.') 334 return 335 except (urllib.error.HTTPError, urllib.error.URLError): 336 # Ignore error if RPC failed to get board 337 logging.error('Failed to get board name from AFE. Start servod is ' 338 'aborted') 339 return 340 341 try: 342 pid = utils.run('pgrep servod').stdout 343 cmd_line = utils.run('ps -fp %s' % pid).stdout 344 if ('--board %s' % board in cmd_line and 345 '--port %s' % servo_port in cmd_line): 346 logging.debug('Servod is already running with given board and port.' 347 ' There is no need to restart servod.') 348 return 349 logging.debug('Servod is running with different board or port. ' 350 'Stopping existing servod.') 351 utils.run('sudo stop servod') 352 except error.CmdError: 353 # servod is not running. 354 pass 355 356 try: 357 utils.run(START_SERVOD_CMD % (board, servo_port)) 358 logging.debug('Servod is started') 359 except error.CmdError as e: 360 logging.error('Servod failed to be started, error: %s', e) 361 362 363def _control_path_on_disk(control_name): 364 """Find the control file corresponding to the given control name, on disk. 365 366 @param control_name: NAME attribute of the control file to fetch. 367 @return: Path to the control file. 368 """ 369 cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT) 370 control_name_predicate = suite.test_name_matches_pattern_predicate( 371 '^%s$' % control_name) 372 tests = suite.find_and_parse_tests(cf_getter, control_name_predicate) 373 if not tests: 374 raise error.AutoservError( 375 'Failed to find any control files with NAME %s' % control_name) 376 if len(tests) > 1: 377 logging.error('Found more than one control file with NAME %s: %s', 378 control_name, [t.path for t in tests]) 379 raise error.AutoservError( 380 'Found more than one control file with NAME %s' % control_name) 381 return tests[0].path 382 383 384def _stage_control_file(control_name, results_dir): 385 """Stage the control file to execute from local autotest checkout. 386 387 @param control_name: Name of the control file to stage. 388 @param results_dir: Results directory to stage the control file into. 389 @return: Absolute path to the staged control file. 390 """ 391 control_path = _control_path_on_disk(control_name) 392 new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME) 393 shutil.copy2(control_path, new_control) 394 return new_control 395 396 397def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp): 398 """Run server job with given options. 399 400 @param pid_file_manager: PidFileManager used to monitor the autoserv process 401 @param results: Folder to store results. 402 @param parser: Parser for the command line arguments. 403 @param ssp_url: Url to server-side package. 404 @param use_ssp: Set to True to run with server-side packaging. 405 """ 406 # send stdin to /dev/null 407 dev_null = os.open(os.devnull, os.O_RDONLY) 408 os.dup2(dev_null, sys.stdin.fileno()) 409 os.close(dev_null) 410 411 # Create separate process group if the process is not a process group 412 # leader. This allows autoserv process to keep running after the caller 413 # process (drone manager call) exits. 414 if os.getpid() != os.getpgid(0): 415 os.setsid() 416 417 # Container name is predefined so the container can be destroyed in 418 # handle_sigterm. 419 job_or_task_id = job_directories.get_job_id_or_task_id( 420 parser.options.results) 421 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid()) 422 423 # Implement SIGTERM handler 424 def handle_sigterm(signum, frame): 425 logging.debug('Received SIGTERM') 426 if pid_file_manager: 427 pid_file_manager.close_file(1, signal.SIGTERM) 428 logging.debug('Finished writing to pid_file. Killing process.') 429 430 # Update results folder's file permission. This needs to be done ASAP 431 # before the parsing process tries to access the log. 432 if use_ssp and results: 433 correct_results_folder_permission(results) 434 435 # This sleep allows the pending output to be logged before the kill 436 # signal is sent. 437 time.sleep(.1) 438 if use_ssp: 439 logging.debug('Destroy container %s before aborting the autoserv ' 440 'process.', container_id) 441 try: 442 bucket = lxc.ContainerBucket( 443 base_name=_ssp_base_image_name_or_default( 444 parser.options)) 445 container = bucket.get_container(container_id) 446 if container: 447 container.destroy() 448 logging.debug("Container %s destroyed.", container_id) 449 else: 450 logging.debug('Container %s is not found.', container_id) 451 bucket.scrub_container_location(container_id) 452 except: 453 # Handle any exception so the autoserv process can be aborted. 454 logging.exception('Failed to destroy container %s.', 455 container_id) 456 # Try to correct the result file permission again after the 457 # container is destroyed, as the container might have created some 458 # new files in the result folder. 459 if results: 460 correct_results_folder_permission(results) 461 462 os.killpg(os.getpgrp(), signal.SIGKILL) 463 464 # Set signal handler 465 signal.signal(signal.SIGTERM, handle_sigterm) 466 467 # faulthandler is only needed to debug in the Lab and is not avaliable to 468 # be imported in the chroot as part of VMTest, so Try-Except it. 469 try: 470 import faulthandler 471 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) 472 logging.debug('faulthandler registered on SIGTERM.') 473 except ImportError: 474 # exc_clear() doesn't exist (nor is needed) in python3 475 if six.PY2: 476 sys.exc_clear() 477 478 # Ignore SIGTTOU's generated by output from forked children. 479 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 480 481 # If we received a SIGALARM, let's be loud about it. 482 signal.signal(signal.SIGALRM, log_alarm) 483 484 # Server side tests that call shell scripts often depend on $USER being set 485 # but depending on how you launch your autotest scheduler it may not be set. 486 os.environ['USER'] = getpass.getuser() 487 488 label = parser.options.label 489 group_name = parser.options.group_name 490 user = parser.options.user 491 client = parser.options.client 492 server = parser.options.server 493 verify = parser.options.verify 494 repair = parser.options.repair 495 cleanup = parser.options.cleanup 496 provision = parser.options.provision 497 reset = parser.options.reset 498 job_labels = parser.options.job_labels 499 no_tee = parser.options.no_tee 500 execution_tag = parser.options.execution_tag 501 ssh_user = parser.options.ssh_user 502 ssh_port = parser.options.ssh_port 503 ssh_pass = parser.options.ssh_pass 504 collect_crashinfo = parser.options.collect_crashinfo 505 control_filename = parser.options.control_filename 506 verify_job_repo_url = parser.options.verify_job_repo_url 507 skip_crash_collection = parser.options.skip_crash_collection 508 ssh_verbosity = int(parser.options.ssh_verbosity) 509 ssh_options = parser.options.ssh_options 510 no_use_packaging = parser.options.no_use_packaging 511 in_lab = bool(parser.options.lab) 512 companion_hosts = _get_companions(parser) 513 dut_servers = _get_dutservers(parser) 514 is_cft = parser.options.cft 515 force_full_log_collection = parser.options.force_full_log_collection 516 517 # can't be both a client and a server side test 518 if client and server: 519 parser.parser.error("Can not specify a test as both server and client!") 520 521 if provision and client: 522 parser.parser.error("Cannot specify provisioning and client!") 523 524 is_special_task = (verify or repair or cleanup or collect_crashinfo or 525 provision or reset) 526 use_client_trampoline = False 527 if parser.options.control_name: 528 if use_ssp: 529 # When use_ssp is True, autoserv will be re-executed inside a 530 # container preserving the --control-name argument. Control file 531 # will be staged inside the rexecuted autoserv. 532 control = None 533 else: 534 try: 535 control = _stage_control_file(parser.options.control_name, 536 results) 537 except error.AutoservError as e: 538 logging.info("Using client trampoline because of: %s", e) 539 control = parser.options.control_name 540 use_client_trampoline = True 541 542 elif parser.args: 543 control = parser.args[0] 544 else: 545 if not is_special_task: 546 parser.parser.error("Missing argument: control file") 547 control = None 548 549 if ssh_verbosity > 0: 550 # ssh_verbosity is an integer between 0 and 3, inclusive 551 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity 552 else: 553 ssh_verbosity_flag = '' 554 555 machines = _get_machines(parser) 556 if group_name and len(machines) < 2: 557 parser.parser.error('-G %r may only be supplied with more than one ' 558 'machine.' % group_name) 559 560 logging.debug("Parser.args is %r", parser.args) 561 try: 562 logging.debug("Parser.options.args is %r", parser.options.args) 563 except AttributeError: 564 logging.debug("No Parser.options.args.") 565 566 try: 567 logging.debug("Parser.options is %r", parser.options) 568 except AttributeError: 569 logging.debug("No Parser.options.") 570 job_kwargs = { 571 'control': control, 572 'args': parser.args[1:], 573 'resultdir': results, 574 'label': label, 575 'user': user, 576 'machines': machines, 577 'machine_dict_list': server_job.get_machine_dicts( 578 machine_names=machines, 579 store_dir=os.path.join(results, 580 parser.options.host_info_subdir), 581 in_lab=in_lab, 582 use_shadow_store=not parser.options.local_only_host_info, 583 host_attributes=parser.options.host_attributes, 584 ), 585 'client': client, 586 'ssh_user': ssh_user, 587 'ssh_port': ssh_port, 588 'ssh_pass': ssh_pass, 589 'ssh_verbosity_flag': ssh_verbosity_flag, 590 'ssh_options': ssh_options, 591 'group_name': group_name, 592 'tag': execution_tag, 593 'disable_sysinfo': parser.options.disable_sysinfo, 594 'in_lab': in_lab, 595 'use_client_trampoline': use_client_trampoline, 596 'sync_offload_dir': parser.options.sync_offload_dir, 597 'companion_hosts': server_job.get_machine_dicts( 598 machine_names=companion_hosts, 599 store_dir=os.path.join(results, 600 parser.options.host_info_subdir), 601 in_lab=in_lab, 602 use_shadow_store=not parser.options.local_only_host_info, 603 host_attributes=parser.options.host_attributes), 604 'dut_servers': dut_servers, 605 'is_cft': is_cft, 606 'force_full_log_collection': force_full_log_collection 607 } 608 if parser.options.parent_job_id: 609 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id) 610 if control_filename: 611 job_kwargs['control_filename'] = control_filename 612 if parser.options.image_storage_server: 613 global_config.global_config.override_config_value( 614 'CROS', 'image_storage_server', 615 os.path.join(parser.options.image_storage_server, '')) 616 617 job = server_job.server_job(**job_kwargs) 618 619 job.logging.start_logging() 620 621 # perform checks 622 job.precheck() 623 624 # run the job 625 exit_code = 0 626 auto_start_servod = global_config.global_config.get_config_value( 627 'AUTOSERV', 'auto_start_servod', type=bool, default=False) 628 629 if not utils.is_in_container(): 630 # crbug.com/1054522 -- ts_mon setup is broken inside the SSP container 631 # due to a problem in the installed python packages. 632 # Trying to clean up an incorrectly initialized ts_mon state adds a 5 633 # second overhead in process teardown, so avoid setting up ts_mon 634 # entirely inside the SSP container. 635 site_utils.SetupTsMonGlobalState('autoserv', indirect=False, 636 short_lived=True) 637 try: 638 try: 639 if repair: 640 if auto_start_servod and len(machines) == 1: 641 _start_servod(machines[0]) 642 job.repair(job_labels) 643 elif verify: 644 job.verify(job_labels) 645 elif provision: 646 job.provision(job_labels) 647 elif reset: 648 job.reset(job_labels) 649 elif cleanup: 650 job.cleanup(job_labels) 651 else: 652 if auto_start_servod and len(machines) == 1: 653 _start_servod(machines[0]) 654 if use_ssp: 655 try: 656 _run_with_ssp(job, container_id, job_or_task_id, 657 results, parser, ssp_url, machines) 658 finally: 659 # Update the ownership of files in result folder. 660 correct_results_folder_permission(results) 661 else: 662 if collect_crashinfo: 663 # Update the ownership of files in result folder. If the 664 # job to collect crashinfo was running inside container 665 # (SSP) and crashed before correcting folder permission, 666 # the result folder might have wrong permission setting. 667 try: 668 correct_results_folder_permission(results) 669 except: 670 # Ignore any error as the user may not have root 671 # permission to run sudo command. 672 pass 673 metric_name = ('chromeos/autotest/experimental/' 674 'autoserv_job_run_duration') 675 f = {'in_container': utils.is_in_container(), 676 'success': False} 677 with metrics.SecondsTimer(metric_name, fields=f) as c: 678 job.run(verify_job_repo_url=verify_job_repo_url, 679 only_collect_crashinfo=collect_crashinfo, 680 skip_crash_collection=skip_crash_collection, 681 job_labels=job_labels, 682 use_packaging=(not no_use_packaging)) 683 c['success'] = True 684 685 finally: 686 job.close() 687 except error.AutoservSSPError: 688 # Due to the complexity of the TKO parsing/stainless connection, this 689 # must be 0 so that the "abort" is actually reflected on stainless. 690 exit_code = 0 691 traceback.print_exc() 692 except: 693 exit_code = 1 694 traceback.print_exc() 695 finally: 696 metrics.Flush() 697 698 sys.exit(exit_code) 699 700 701# Job breakdown statuses 702_hs = host_states.Status 703_qs = host_queue_entry_states.Status 704_status_list = [ 705 _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING, 706 _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING, 707 _qs.RUNNING, _qs.GATHERING, _qs.PARSING] 708_JOB_OVERHEAD_STATUS = autotest_enum.AutotestEnum(*_status_list, 709 string_values=True) 710 711 712def get_job_status(options): 713 """Returns the HQE Status for this run. 714 715 @param options: parser options. 716 """ 717 s = _JOB_OVERHEAD_STATUS 718 task_mapping = { 719 'reset': s.RESETTING, 'verify': s.VERIFYING, 720 'provision': s.PROVISIONING, 'repair': s.REPAIRING, 721 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING} 722 match = [task for task in task_mapping if getattr(options, task, False)] 723 return task_mapping[match[0]] if match else s.RUNNING 724 725 726def _require_ssp_from_control(control_name): 727 """Read the value of REQUIRE_SSP from test control file. 728 729 This reads the control file from the prod checkout of autotest and uses that 730 to determine whether to even stage the SSP package on a devserver. 731 732 This means: 733 [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go 734 live. 735 [2] This function may find that the control file does not exist but the SSP 736 package may contain the test file. This function conservatively returns True 737 in that case. 738 739 This function is called very early in autoserv, before logging is setup. 740 """ 741 if not control_name: 742 return True 743 try: 744 path = _control_path_on_disk(control_name) 745 except error.AutoservError as e: 746 sys.stderr.write("autoserv: Could not determine control file path," 747 " assuming we need SSP: %s\n" % e) 748 sys.stderr.flush() 749 return True 750 if not os.path.isfile(path): 751 return True 752 control = control_data.parse_control(path) 753 # There must be explicit directive in the control file to disable SSP. 754 if not control or control.require_ssp is None: 755 return True 756 return control.require_ssp 757 758 759def _ssp_base_image_name_or_default(options): 760 """Extract base image name from autoserv options or the global config.""" 761 if options.ssp_base_image_name: 762 return options.ssp_base_image_name 763 return global_config.global_config.get_config_value('AUTOSERV', 764 'container_base_name') 765 766 767def main(): 768 start_time = datetime.datetime.now() 769 parser = autoserv_parser.autoserv_parser 770 parser.parse_args() 771 772 if len(sys.argv) == 1: 773 parser.parser.print_help() 774 sys.exit(1) 775 776 if parser.options.no_logging: 777 results = None 778 else: 779 results = parser.options.results 780 if not results: 781 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 782 results = os.path.abspath(results) 783 resultdir_exists = False 784 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 785 if os.path.exists(os.path.join(results, filename)): 786 resultdir_exists = True 787 if not parser.options.use_existing_results and resultdir_exists: 788 error = "Error: results directory already exists: %s\n" % results 789 sys.stderr.write(error) 790 sys.exit(1) 791 792 # Now that we certified that there's no leftover results dir from 793 # previous jobs, lets create the result dir since the logging system 794 # needs to create the log file in there. 795 if not os.path.isdir(results): 796 os.makedirs(results) 797 798 if parser.options.require_ssp: 799 # This is currently only used for skylab (i.e., when --control-name is 800 # used). 801 use_ssp = _require_ssp_from_control(parser.options.control_name) 802 else: 803 use_ssp = False 804 805 806 if use_ssp: 807 log_dir = os.path.join(results, 'ssp_logs') if results else None 808 if log_dir and not os.path.exists(log_dir): 809 os.makedirs(log_dir) 810 else: 811 log_dir = results 812 813 logging_manager.configure_logging( 814 server_logging_config.ServerLoggingConfig(), 815 results_dir=log_dir, 816 use_console=not parser.options.no_tee, 817 verbose=parser.options.verbose, 818 no_console_prefix=parser.options.no_console_prefix) 819 820 logging.debug('autoserv is running in drone %s.', socket.gethostname()) 821 logging.debug('autoserv environment: %r', os.environ) 822 logging.debug('autoserv command was: %s', ' '.join(sys.argv)) 823 logging.debug('autoserv parsed options: %s', parser.options) 824 logging.debug('autoserv python version: %s', sys.version) 825 826 if use_ssp: 827 ssp_url = _stage_ssp(parser, results) 828 else: 829 ssp_url = None 830 831 if results: 832 logging.info("Results placed in %s" % results) 833 834 # wait until now to perform this check, so it get properly logged 835 if (parser.options.use_existing_results and not resultdir_exists and 836 not utils.is_in_container()): 837 logging.error("No existing results directory found: %s", results) 838 sys.exit(1) 839 840 if parser.options.write_pidfile and results: 841 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 842 results) 843 pid_file_manager.open_file() 844 else: 845 pid_file_manager = None 846 847 autotest.Autotest.set_install_in_tmpdir( 848 parser.options.install_in_tmpdir) 849 850 exit_code = 0 851 is_task = (parser.options.verify or parser.options.repair or 852 parser.options.provision or parser.options.reset or 853 parser.options.cleanup or parser.options.collect_crashinfo) 854 855 trace_labels = { 856 'job_id': job_directories.get_job_id_or_task_id( 857 parser.options.results) 858 } 859 trace = cloud_trace.SpanStack( 860 labels=trace_labels, 861 global_context=parser.options.cloud_trace_context) 862 trace.enabled = parser.options.cloud_trace_context_enabled == 'True' 863 try: 864 try: 865 with trace.Span(get_job_status(parser.options)): 866 run_autoserv(pid_file_manager, results, parser, ssp_url, 867 use_ssp) 868 except SystemExit as e: 869 exit_code = e.code 870 if exit_code: 871 logging.exception('Uncaught SystemExit with code %s', exit_code) 872 except Exception: 873 # If we don't know what happened, we'll classify it as 874 # an 'abort' and return 1. 875 logging.exception('Uncaught Exception, exit_code = 1.') 876 exit_code = 1 877 finally: 878 if pid_file_manager: 879 pid_file_manager.close_file(exit_code) 880 sys.exit(exit_code) 881 882 883if __name__ == '__main__': 884 main() 885