1#!/usr/bin/python3 -u
2# Copyright 2007-2008 Martin J. Bligh <[email protected]>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import datetime
10import contextlib
11import getpass
12import logging
13import os
14import re
15import shutil
16import signal
17import socket
18import sys
19import traceback
20import time
21import six
22from six.moves import urllib
23
24import common
25from autotest_lib.client.bin.result_tools import utils as result_utils
26from autotest_lib.client.bin.result_tools import view as result_view
27from autotest_lib.client.common_lib import control_data
28from autotest_lib.client.common_lib import autotest_enum
29from autotest_lib.client.common_lib import error
30from autotest_lib.client.common_lib import global_config
31from autotest_lib.client.common_lib import host_queue_entry_states
32from autotest_lib.client.common_lib import host_states
33from autotest_lib.client.common_lib import seven
34from autotest_lib.server.cros.dynamic_suite import suite
35
36try:
37    from autotest_lib.utils.frozen_chromite.lib import metrics
38    from autotest_lib.utils.frozen_chromite.lib import cloud_trace
39except ImportError as e:
40    from autotest_lib.client.common_lib import utils as common_utils
41    metrics = common_utils.metrics_mock
42    import mock
43    cloud_trace = mock.MagicMock()
44
45# Number of seconds to wait before returning if testing mode is enabled
46TESTING_MODE_SLEEP_SECS = 1
47
48
49from autotest_lib.server import frontend
50from autotest_lib.server import server_logging_config
51from autotest_lib.server import server_job, utils, autoserv_parser, autotest
52from autotest_lib.server import utils as server_utils
53from autotest_lib.server import site_utils
54from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
55from autotest_lib.site_utils import job_directories
56from autotest_lib.site_utils import lxc
57from autotest_lib.site_utils.lxc import utils as lxc_utils
58from autotest_lib.client.common_lib import pidfile, logging_manager
59
60
61# Control segment to stage server-side package.
62STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
63        'stage_server_side_package')
64
65# Command line to start servod in a moblab.
66START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
67STOP_SERVOD_CMD = 'sudo stop servod'
68
69_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
70_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
71
72_LXC_JOB_FOLDER = 'lxc_job_folder'
73
74def log_alarm(signum, frame):
75    logging.error("Received SIGALARM. Ignoring and continuing on.")
76    sys.exit(1)
77
78
79def _get_companions(parser):
80    """Get a list of companion devices from command line arg -ch.
81
82    @param parser: Parser for the command line arguments.
83
84    @return: A list of companion devices from command line arg -ch.
85    """
86    if parser.options.companion_hosts:
87        companions = parser.options.companion_hosts.replace(',', ' ').strip().split()
88    else:
89        companions = []
90
91    if companions:
92        for companion in companions:
93            if not companion or re.search('\s', companion):
94                parser.parser.error("Invalid companion: %s" % str(companion))
95        companions = list(set(companions))
96        companions.sort()
97    return companions
98
99
100def _get_dutservers(parser):
101    """Get a list of DUT server addresses from command line arg --dut_servers.
102
103    @param parser: Parser for the command line arguments.
104
105    @return: A list of DUT server addresses from command line arg
106             --dut_servers.
107    """
108    if parser.options.dut_servers:
109        dut_servers = parser.options.dut_servers.replace(
110            ',', ' ').strip().split()
111    else:
112        dut_servers = []
113
114    if dut_servers:
115        for dut_server in dut_servers:
116            if not dut_server or re.search('\s', dut_server):
117                parser.parser.error(
118                    "Invalid DUT Server address: %s" % str(dut_server))
119        dut_servers = list(set(dut_servers))
120        dut_servers.sort()
121    return dut_servers
122
123
124def _get_machines(parser):
125    """Get a list of machine names from command line arg -m or a file.
126
127    @param parser: Parser for the command line arguments.
128
129    @return: A list of machine names from command line arg -m or the
130             machines file specified in the command line arg -M.
131    """
132    if parser.options.machines:
133        machines = parser.options.machines.replace(',', ' ').strip().split()
134    else:
135        machines = []
136    machines_file = parser.options.machines_file
137    if machines_file:
138        machines = []
139        for m in open(machines_file, 'r').readlines():
140            # remove comments, spaces
141            m = re.sub('#.*', '', m).strip()
142            if m:
143                machines.append(m)
144        logging.debug('Read list of machines from file: %s', machines_file)
145        logging.debug('Machines: %s', ','.join(machines))
146
147    if machines:
148        for machine in machines:
149            if not machine or re.search('\s', machine):
150                parser.parser.error("Invalid machine: %s" % str(machine))
151        machines = list(set(machines))
152        machines.sort()
153    return machines
154
155
156def _stage_ssp(parser, resultsdir):
157    """Stage server-side package.
158
159    This function calls a control segment to stage server-side package based on
160    the job and autoserv command line option. The detail implementation could
161    be different for each host type. Currently, only CrosHost has
162    stage_server_side_package function defined.
163    The script returns None if no server-side package is available. However,
164    it may raise exception if it failed for reasons other than artifact (the
165    server-side package) not found.
166
167    @param parser: Command line arguments parser passed in the autoserv process.
168    @param resultsdir: Folder to store results. This could be different from
169            parser.options.results: parser.options.results  can be set to None
170            for results to be stored in a temp folder. resultsdir can be None
171            for autoserv run requires no logging.
172
173    @return: url to the autotest server-side package. None in case of errors.
174    """
175    machines_list = _get_machines(parser)
176    machines_list = server_job.get_machine_dicts(
177            machine_names=machines_list,
178            store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
179            in_lab=parser.options.lab,
180            use_shadow_store=not parser.options.local_only_host_info,
181            host_attributes=parser.options.host_attributes,
182    )
183
184    namespace = {'machines': machines_list,
185                 'image': parser.options.test_source_build}
186    script_locals = {}
187
188    seven.exec_file(
189        STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE,
190        globals_=namespace,
191        locals_=script_locals,
192    )
193    ssp_url = script_locals['ssp_url']
194    if not ssp_url:
195        logging.error('Failed to stage SSP package: %s',
196                      script_locals['error_msg'])
197        logging.error('This job will fail later, when attempting to run with'
198                      ' SSP')
199    return ssp_url
200
201
202def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
203                  machines):
204    """Run the server job with server-side packaging.
205
206    @param job: The server job object.
207    @param container_id: ID of the container to run the test.
208    @param job_id: ID of the test job.
209    @param results: Folder to store results. This could be different from
210                    parser.options.results:
211                    parser.options.results  can be set to None for results to be
212                    stored in a temp folder.
213                    results can be None if the autoserv run requires no logging.
214    @param parser: Command line parser that contains the options.
215    @param ssp_url: url of the staged server-side package.
216    @param machines: A list of machines to run the test.
217    """
218    if not ssp_url:
219        job.record('FAIL', None, None,
220                   'Failed to stage server-side package')
221        raise error.AutoservError('Failed to stage server-side package')
222
223    bucket = lxc.ContainerBucket(
224            base_name=_ssp_base_image_name_or_default(parser.options))
225    control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
226               else None)
227    try:
228        dut_name = machines[0] if len(machines) >= 1 else None
229        test_container = bucket.setup_test(container_id, job_id, ssp_url,
230                                           results, control=control,
231                                           job_folder=_LXC_JOB_FOLDER,
232                                           dut_name=dut_name)
233    except Exception as e:
234        job.record('START', None, None, 'Starting SSP')
235        job.record('END ABORT', None, None,
236                   'Failed to setup container for test: %s. Check logs in '
237                   'ssp_logs folder for more details.' % e)
238        raise error.AutoservSSPError
239
240    args = sys.argv[:]
241    args.remove('--require-ssp')
242    # --parent_job_id is only useful in autoserv running in host, not in
243    # container. Include this argument will cause test to fail for builds before
244    # CL 286265 was merged.
245    if '--parent_job_id' in args:
246        index = args.index('--parent_job_id')
247        args.remove('--parent_job_id')
248        # Remove the actual parent job id in command line arg.
249        del args[index]
250
251    # A dictionary of paths to replace in the command line. Key is the path to
252    # be replaced with the one in value.
253    paths_to_replace = {}
254    # Replace the control file path with the one in container.
255    if control:
256        container_control_filename = os.path.join(
257                lxc.CONTROL_TEMP_PATH, os.path.basename(control))
258        paths_to_replace[control] = container_control_filename
259    # Update result directory with the one in container.
260    container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER)
261    if parser.options.results:
262        paths_to_replace[parser.options.results] = container_result_dir
263    args = [paths_to_replace.get(arg, arg) for arg in args]
264
265    # Apply --use-existing-results, results directory is aready created and
266    # mounted in container. Apply this arg to avoid exception being raised.
267    if not '--use-existing-results' in args:
268        args.append('--use-existing-results')
269
270    # Make sure autoserv running in container using a different pid file.
271    if not '--pidfile-label' in args:
272        args.extend(['--pidfile-label', 'container_autoserv'])
273
274    cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
275    logging.info('Run command in container: %s', cmd_line)
276    success = False
277    try:
278        test_container.attach_run(cmd_line)
279        success = True
280    except Exception as e:
281        # If the test run inside container fails without generating any log,
282        # write a message to status.log to help troubleshooting.
283        debug_files = os.listdir(os.path.join(results, 'debug'))
284        if not debug_files:
285            job.record('FAIL', None, None,
286                       'Failed to run test inside the container: %s. Check '
287                       'logs in ssp_logs folder for more details.' % e)
288        raise
289    finally:
290        metrics.Counter(
291            'chromeos/autotest/experimental/execute_job_in_ssp').increment(
292                fields={'success': success})
293        test_container.destroy()
294
295
296def correct_results_folder_permission(results):
297    """Make sure the results folder has the right permission settings.
298
299    For tests running with server-side packaging, the results folder has the
300    owner of root. This must be changed to the user running the autoserv
301    process, so parsing job can access the results folder.
302    TODO(dshi): crbug.com/459344 Remove this function when test container can be
303    unprivileged container.
304
305    @param results: Path to the results folder.
306
307    """
308    if not results:
309        return
310
311    utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
312    utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
313
314
315def _start_servod(machine):
316    """Try to start servod in moblab if it's not already running or running with
317    different board or port.
318
319    @param machine: Name of the dut used for test.
320    """
321    if not utils.is_moblab():
322        return
323
324    logging.debug('Trying to start servod.')
325    try:
326        afe = frontend.AFE()
327        board = server_utils.get_board_from_afe(machine, afe)
328        hosts = afe.get_hosts(hostname=machine)
329        servo_host = hosts[0].attributes.get('servo_host', None)
330        servo_port = hosts[0].attributes.get('servo_port', 9999)
331        if not servo_host in ['localhost', '127.0.0.1']:
332            logging.warning('Starting servod is aborted. The dut\'s servo_host '
333                         'attribute is not set to localhost.')
334            return
335    except (urllib.error.HTTPError, urllib.error.URLError):
336        # Ignore error if RPC failed to get board
337        logging.error('Failed to get board name from AFE. Start servod is '
338                      'aborted')
339        return
340
341    try:
342        pid = utils.run('pgrep servod').stdout
343        cmd_line = utils.run('ps -fp %s' % pid).stdout
344        if ('--board %s' % board in cmd_line and
345            '--port %s' % servo_port in cmd_line):
346            logging.debug('Servod is already running with given board and port.'
347                          ' There is no need to restart servod.')
348            return
349        logging.debug('Servod is running with different board or port. '
350                      'Stopping existing servod.')
351        utils.run('sudo stop servod')
352    except error.CmdError:
353        # servod is not running.
354        pass
355
356    try:
357        utils.run(START_SERVOD_CMD % (board, servo_port))
358        logging.debug('Servod is started')
359    except error.CmdError as e:
360        logging.error('Servod failed to be started, error: %s', e)
361
362
363def _control_path_on_disk(control_name):
364    """Find the control file corresponding to the given control name, on disk.
365
366    @param control_name: NAME attribute of the control file to fetch.
367    @return: Path to the control file.
368    """
369    cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
370    control_name_predicate = suite.test_name_matches_pattern_predicate(
371            '^%s$' % control_name)
372    tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
373    if not tests:
374        raise error.AutoservError(
375                'Failed to find any control files with NAME %s' % control_name)
376    if len(tests) > 1:
377        logging.error('Found more than one control file with NAME %s: %s',
378                      control_name, [t.path for t in tests])
379        raise error.AutoservError(
380                'Found more than one control file with NAME %s' % control_name)
381    return tests[0].path
382
383
384def _stage_control_file(control_name, results_dir):
385    """Stage the control file to execute from local autotest checkout.
386
387    @param control_name: Name of the control file to stage.
388    @param results_dir: Results directory to stage the control file into.
389    @return: Absolute path to the staged control file.
390    """
391    control_path = _control_path_on_disk(control_name)
392    new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
393    shutil.copy2(control_path, new_control)
394    return new_control
395
396
397def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
398    """Run server job with given options.
399
400    @param pid_file_manager: PidFileManager used to monitor the autoserv process
401    @param results: Folder to store results.
402    @param parser: Parser for the command line arguments.
403    @param ssp_url: Url to server-side package.
404    @param use_ssp: Set to True to run with server-side packaging.
405    """
406    # send stdin to /dev/null
407    dev_null = os.open(os.devnull, os.O_RDONLY)
408    os.dup2(dev_null, sys.stdin.fileno())
409    os.close(dev_null)
410
411    # Create separate process group if the process is not a process group
412    # leader. This allows autoserv process to keep running after the caller
413    # process (drone manager call) exits.
414    if os.getpid() != os.getpgid(0):
415        os.setsid()
416
417    # Container name is predefined so the container can be destroyed in
418    # handle_sigterm.
419    job_or_task_id = job_directories.get_job_id_or_task_id(
420            parser.options.results)
421    container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
422
423    # Implement SIGTERM handler
424    def handle_sigterm(signum, frame):
425        logging.debug('Received SIGTERM')
426        if pid_file_manager:
427            pid_file_manager.close_file(1, signal.SIGTERM)
428        logging.debug('Finished writing to pid_file. Killing process.')
429
430        # Update results folder's file permission. This needs to be done ASAP
431        # before the parsing process tries to access the log.
432        if use_ssp and results:
433            correct_results_folder_permission(results)
434
435        # This sleep allows the pending output to be logged before the kill
436        # signal is sent.
437        time.sleep(.1)
438        if use_ssp:
439            logging.debug('Destroy container %s before aborting the autoserv '
440                          'process.', container_id)
441            try:
442                bucket = lxc.ContainerBucket(
443                        base_name=_ssp_base_image_name_or_default(
444                                parser.options))
445                container = bucket.get_container(container_id)
446                if container:
447                    container.destroy()
448                    logging.debug("Container %s destroyed.", container_id)
449                else:
450                    logging.debug('Container %s is not found.', container_id)
451                    bucket.scrub_container_location(container_id)
452            except:
453                # Handle any exception so the autoserv process can be aborted.
454                logging.exception('Failed to destroy container %s.',
455                                  container_id)
456            # Try to correct the result file permission again after the
457            # container is destroyed, as the container might have created some
458            # new files in the result folder.
459            if results:
460                correct_results_folder_permission(results)
461
462        os.killpg(os.getpgrp(), signal.SIGKILL)
463
464    # Set signal handler
465    signal.signal(signal.SIGTERM, handle_sigterm)
466
467    # faulthandler is only needed to debug in the Lab and is not avaliable to
468    # be imported in the chroot as part of VMTest, so Try-Except it.
469    try:
470        import faulthandler
471        faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
472        logging.debug('faulthandler registered on SIGTERM.')
473    except ImportError:
474        # exc_clear() doesn't exist (nor is needed) in python3
475        if six.PY2:
476            sys.exc_clear()
477
478    # Ignore SIGTTOU's generated by output from forked children.
479    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
480
481    # If we received a SIGALARM, let's be loud about it.
482    signal.signal(signal.SIGALRM, log_alarm)
483
484    # Server side tests that call shell scripts often depend on $USER being set
485    # but depending on how you launch your autotest scheduler it may not be set.
486    os.environ['USER'] = getpass.getuser()
487
488    label = parser.options.label
489    group_name = parser.options.group_name
490    user = parser.options.user
491    client = parser.options.client
492    server = parser.options.server
493    verify = parser.options.verify
494    repair = parser.options.repair
495    cleanup = parser.options.cleanup
496    provision = parser.options.provision
497    reset = parser.options.reset
498    job_labels = parser.options.job_labels
499    no_tee = parser.options.no_tee
500    execution_tag = parser.options.execution_tag
501    ssh_user = parser.options.ssh_user
502    ssh_port = parser.options.ssh_port
503    ssh_pass = parser.options.ssh_pass
504    collect_crashinfo = parser.options.collect_crashinfo
505    control_filename = parser.options.control_filename
506    verify_job_repo_url = parser.options.verify_job_repo_url
507    skip_crash_collection = parser.options.skip_crash_collection
508    ssh_verbosity = int(parser.options.ssh_verbosity)
509    ssh_options = parser.options.ssh_options
510    no_use_packaging = parser.options.no_use_packaging
511    in_lab = bool(parser.options.lab)
512    companion_hosts = _get_companions(parser)
513    dut_servers = _get_dutservers(parser)
514    is_cft = parser.options.cft
515    force_full_log_collection = parser.options.force_full_log_collection
516
517    # can't be both a client and a server side test
518    if client and server:
519        parser.parser.error("Can not specify a test as both server and client!")
520
521    if provision and client:
522        parser.parser.error("Cannot specify provisioning and client!")
523
524    is_special_task = (verify or repair or cleanup or collect_crashinfo or
525                       provision or reset)
526    use_client_trampoline = False
527    if parser.options.control_name:
528        if use_ssp:
529            # When use_ssp is True, autoserv will be re-executed inside a
530            # container preserving the --control-name argument. Control file
531            # will be staged inside the rexecuted autoserv.
532            control = None
533        else:
534            try:
535                control = _stage_control_file(parser.options.control_name,
536                                              results)
537            except error.AutoservError as e:
538                logging.info("Using client trampoline because of: %s", e)
539                control = parser.options.control_name
540                use_client_trampoline = True
541
542    elif parser.args:
543        control = parser.args[0]
544    else:
545        if not is_special_task:
546            parser.parser.error("Missing argument: control file")
547        control = None
548
549    if ssh_verbosity > 0:
550        # ssh_verbosity is an integer between 0 and 3, inclusive
551        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
552    else:
553        ssh_verbosity_flag = ''
554
555    machines = _get_machines(parser)
556    if group_name and len(machines) < 2:
557        parser.parser.error('-G %r may only be supplied with more than one '
558                            'machine.' % group_name)
559
560    logging.debug("Parser.args is %r", parser.args)
561    try:
562      logging.debug("Parser.options.args is %r", parser.options.args)
563    except AttributeError:
564      logging.debug("No Parser.options.args.")
565
566    try:
567      logging.debug("Parser.options is %r", parser.options)
568    except AttributeError:
569      logging.debug("No Parser.options.")
570    job_kwargs = {
571            'control': control,
572            'args': parser.args[1:],
573            'resultdir': results,
574            'label': label,
575            'user': user,
576            'machines': machines,
577            'machine_dict_list': server_job.get_machine_dicts(
578                    machine_names=machines,
579                    store_dir=os.path.join(results,
580                                           parser.options.host_info_subdir),
581                    in_lab=in_lab,
582                    use_shadow_store=not parser.options.local_only_host_info,
583                    host_attributes=parser.options.host_attributes,
584            ),
585            'client': client,
586            'ssh_user': ssh_user,
587            'ssh_port': ssh_port,
588            'ssh_pass': ssh_pass,
589            'ssh_verbosity_flag': ssh_verbosity_flag,
590            'ssh_options': ssh_options,
591            'group_name': group_name,
592            'tag': execution_tag,
593            'disable_sysinfo': parser.options.disable_sysinfo,
594            'in_lab': in_lab,
595            'use_client_trampoline': use_client_trampoline,
596            'sync_offload_dir': parser.options.sync_offload_dir,
597            'companion_hosts': server_job.get_machine_dicts(
598                    machine_names=companion_hosts,
599                    store_dir=os.path.join(results,
600                                           parser.options.host_info_subdir),
601                    in_lab=in_lab,
602                    use_shadow_store=not parser.options.local_only_host_info,
603                    host_attributes=parser.options.host_attributes),
604            'dut_servers': dut_servers,
605            'is_cft': is_cft,
606            'force_full_log_collection': force_full_log_collection
607    }
608    if parser.options.parent_job_id:
609        job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
610    if control_filename:
611        job_kwargs['control_filename'] = control_filename
612    if parser.options.image_storage_server:
613        global_config.global_config.override_config_value(
614            'CROS', 'image_storage_server',
615            os.path.join(parser.options.image_storage_server, ''))
616
617    job = server_job.server_job(**job_kwargs)
618
619    job.logging.start_logging()
620
621    # perform checks
622    job.precheck()
623
624    # run the job
625    exit_code = 0
626    auto_start_servod = global_config.global_config.get_config_value(
627            'AUTOSERV', 'auto_start_servod', type=bool, default=False)
628
629    if not utils.is_in_container():
630        # crbug.com/1054522 -- ts_mon setup is broken inside the SSP container
631        # due to a problem in the installed python packages.
632        # Trying to clean up an incorrectly initialized ts_mon state adds a 5
633        # second overhead in process teardown, so avoid setting up ts_mon
634        # entirely inside the SSP container.
635        site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
636                                         short_lived=True)
637    try:
638        try:
639            if repair:
640                if auto_start_servod and len(machines) == 1:
641                    _start_servod(machines[0])
642                job.repair(job_labels)
643            elif verify:
644                job.verify(job_labels)
645            elif provision:
646                job.provision(job_labels)
647            elif reset:
648                job.reset(job_labels)
649            elif cleanup:
650                job.cleanup(job_labels)
651            else:
652                if auto_start_servod and len(machines) == 1:
653                    _start_servod(machines[0])
654                if use_ssp:
655                    try:
656                        _run_with_ssp(job, container_id, job_or_task_id,
657                                        results, parser, ssp_url, machines)
658                    finally:
659                        # Update the ownership of files in result folder.
660                        correct_results_folder_permission(results)
661                else:
662                    if collect_crashinfo:
663                        # Update the ownership of files in result folder. If the
664                        # job to collect crashinfo was running inside container
665                        # (SSP) and crashed before correcting folder permission,
666                        # the result folder might have wrong permission setting.
667                        try:
668                            correct_results_folder_permission(results)
669                        except:
670                            # Ignore any error as the user may not have root
671                            # permission to run sudo command.
672                            pass
673                    metric_name = ('chromeos/autotest/experimental/'
674                                   'autoserv_job_run_duration')
675                    f = {'in_container': utils.is_in_container(),
676                         'success': False}
677                    with metrics.SecondsTimer(metric_name, fields=f) as c:
678                        job.run(verify_job_repo_url=verify_job_repo_url,
679                                only_collect_crashinfo=collect_crashinfo,
680                                skip_crash_collection=skip_crash_collection,
681                                job_labels=job_labels,
682                                use_packaging=(not no_use_packaging))
683                        c['success'] = True
684
685        finally:
686            job.close()
687    except error.AutoservSSPError:
688        # Due to the complexity of the TKO parsing/stainless connection, this
689        # must be 0 so that the "abort" is actually reflected on stainless.
690        exit_code = 0
691        traceback.print_exc()
692    except:
693        exit_code = 1
694        traceback.print_exc()
695    finally:
696        metrics.Flush()
697
698    sys.exit(exit_code)
699
700
701# Job breakdown statuses
702_hs = host_states.Status
703_qs = host_queue_entry_states.Status
704_status_list = [
705        _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
706        _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
707        _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
708_JOB_OVERHEAD_STATUS = autotest_enum.AutotestEnum(*_status_list,
709                                                  string_values=True)
710
711
712def get_job_status(options):
713    """Returns the HQE Status for this run.
714
715    @param options: parser options.
716    """
717    s = _JOB_OVERHEAD_STATUS
718    task_mapping = {
719            'reset': s.RESETTING, 'verify': s.VERIFYING,
720            'provision': s.PROVISIONING, 'repair': s.REPAIRING,
721            'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
722    match = [task for task in task_mapping if getattr(options, task, False)]
723    return task_mapping[match[0]] if match else s.RUNNING
724
725
726def _require_ssp_from_control(control_name):
727    """Read the value of REQUIRE_SSP from test control file.
728
729    This reads the control file from the prod checkout of autotest and uses that
730    to determine whether to even stage the SSP package on a devserver.
731
732    This means:
733    [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go
734    live.
735    [2] This function may find that the control file does not exist but the SSP
736    package may contain the test file. This function conservatively returns True
737    in that case.
738
739    This function is called very early in autoserv, before logging is setup.
740    """
741    if not control_name:
742        return True
743    try:
744        path = _control_path_on_disk(control_name)
745    except error.AutoservError as e:
746        sys.stderr.write("autoserv: Could not determine control file path,"
747                         " assuming we need SSP: %s\n" % e)
748        sys.stderr.flush()
749        return True
750    if not os.path.isfile(path):
751        return True
752    control = control_data.parse_control(path)
753    # There must be explicit directive in the control file to disable SSP.
754    if not control or control.require_ssp is None:
755        return True
756    return control.require_ssp
757
758
759def _ssp_base_image_name_or_default(options):
760    """Extract base image name from autoserv options or the global config."""
761    if options.ssp_base_image_name:
762        return options.ssp_base_image_name
763    return global_config.global_config.get_config_value('AUTOSERV',
764                                                        'container_base_name')
765
766
767def main():
768    start_time = datetime.datetime.now()
769    parser = autoserv_parser.autoserv_parser
770    parser.parse_args()
771
772    if len(sys.argv) == 1:
773        parser.parser.print_help()
774        sys.exit(1)
775
776    if parser.options.no_logging:
777        results = None
778    else:
779        results = parser.options.results
780        if not results:
781            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
782        results = os.path.abspath(results)
783        resultdir_exists = False
784        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
785            if os.path.exists(os.path.join(results, filename)):
786                resultdir_exists = True
787        if not parser.options.use_existing_results and resultdir_exists:
788            error = "Error: results directory already exists: %s\n" % results
789            sys.stderr.write(error)
790            sys.exit(1)
791
792        # Now that we certified that there's no leftover results dir from
793        # previous jobs, lets create the result dir since the logging system
794        # needs to create the log file in there.
795        if not os.path.isdir(results):
796            os.makedirs(results)
797
798    if parser.options.require_ssp:
799        # This is currently only used for skylab (i.e., when --control-name is
800        # used).
801        use_ssp = _require_ssp_from_control(parser.options.control_name)
802    else:
803        use_ssp = False
804
805
806    if use_ssp:
807        log_dir = os.path.join(results, 'ssp_logs') if results else None
808        if log_dir and not os.path.exists(log_dir):
809            os.makedirs(log_dir)
810    else:
811        log_dir = results
812
813    logging_manager.configure_logging(
814            server_logging_config.ServerLoggingConfig(),
815            results_dir=log_dir,
816            use_console=not parser.options.no_tee,
817            verbose=parser.options.verbose,
818            no_console_prefix=parser.options.no_console_prefix)
819
820    logging.debug('autoserv is running in drone %s.', socket.gethostname())
821    logging.debug('autoserv environment: %r', os.environ)
822    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
823    logging.debug('autoserv parsed options: %s', parser.options)
824    logging.debug('autoserv python version: %s', sys.version)
825
826    if use_ssp:
827        ssp_url = _stage_ssp(parser, results)
828    else:
829        ssp_url = None
830
831    if results:
832        logging.info("Results placed in %s" % results)
833
834        # wait until now to perform this check, so it get properly logged
835        if (parser.options.use_existing_results and not resultdir_exists and
836            not utils.is_in_container()):
837            logging.error("No existing results directory found: %s", results)
838            sys.exit(1)
839
840    if parser.options.write_pidfile and results:
841        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
842                                                  results)
843        pid_file_manager.open_file()
844    else:
845        pid_file_manager = None
846
847    autotest.Autotest.set_install_in_tmpdir(
848        parser.options.install_in_tmpdir)
849
850    exit_code = 0
851    is_task = (parser.options.verify or parser.options.repair or
852               parser.options.provision or parser.options.reset or
853               parser.options.cleanup or parser.options.collect_crashinfo)
854
855    trace_labels = {
856            'job_id': job_directories.get_job_id_or_task_id(
857                    parser.options.results)
858    }
859    trace = cloud_trace.SpanStack(
860            labels=trace_labels,
861            global_context=parser.options.cloud_trace_context)
862    trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
863    try:
864        try:
865            with trace.Span(get_job_status(parser.options)):
866                run_autoserv(pid_file_manager, results, parser, ssp_url,
867                             use_ssp)
868        except SystemExit as e:
869            exit_code = e.code
870            if exit_code:
871                logging.exception('Uncaught SystemExit with code %s', exit_code)
872        except Exception:
873            # If we don't know what happened, we'll classify it as
874            # an 'abort' and return 1.
875            logging.exception('Uncaught Exception, exit_code = 1.')
876            exit_code = 1
877    finally:
878        if pid_file_manager:
879            pid_file_manager.close_file(exit_code)
880    sys.exit(exit_code)
881
882
883if __name__ == '__main__':
884    main()
885