1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import re 7import six 8import sys 9import time 10 11from autotest_lib.client.common_lib import error 12from autotest_lib.client.common_lib import global_config 13from autotest_lib.client.common_lib.cros import dev_server 14from autotest_lib.server import afe_utils 15from autotest_lib.server import test 16from autotest_lib.server import utils 17from autotest_lib.server.cros import provision 18from autotest_lib.server.cros import provisioner 19 20try: 21 from autotest_lib.utils.frozen_chromite.lib import metrics 22except ImportError: 23 metrics = utils.metrics_mock 24 25_CONFIG = global_config.global_config 26# pylint: disable-msg=E1120 27_IMAGE_URL_PATTERN = _CONFIG.get_config_value('CROS', 28 'image_url_pattern', 29 type=str) 30 31 32def _metric_name(base_name): 33 return 'chromeos/autotest/provision/' + base_name 34 35 36def _get_build_metrics_fields(build_name): 37 try: 38 return utils.ParseBuildName(build_name)[0:2] 39 except utils.ParseBuildNameException: 40 logging.warning( 41 'Unable to parse build name %s for metrics. ' 42 'Continuing anyway.', build_name) 43 return ('', '') 44 45 46def _emit_updater_metrics(name_prefix, build_name, failure_reason, duration, 47 fields): 48 # reset_after=True is required for String gauges events to ensure that 49 # the metrics are not repeatedly emitted until the server restarts. 50 metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'), 51 reset_after=True).set(build_name, fields=fields) 52 if failure_reason: 53 metrics.String(_metric_name(name_prefix + 54 '_failure_reason_by_devserver_dut'), 55 reset_after=True).set(failure_reason, fields=fields) 56 metrics.SecondsDistribution( 57 _metric_name(name_prefix + '_duration_by_devserver_dut')).add( 58 duration, fields=fields) 59 60 61def _emit_provision_metrics(update_url, dut_host_name, exception, duration): 62 # The following is high cardinality, but sparse. 63 # Each DUT is of a single board type, and likely build type. 64 # 65 # TODO(jrbarnette) The devserver-triggered provisioning code 66 # includes retries in certain cases. For that reason, the metrics 67 # distinguish 'provision' metrics which summarizes across all 68 # retries, and 'auto_update' which summarizes an individual update 69 # attempt. ChromiumOSProvisioner doesn't do retries, so we just report 70 # the same information twice. We should replace the metrics with 71 # something better tailored to the current implementation. 72 build_name = provisioner.url_to_image_name(update_url) 73 board, build_type = _get_build_metrics_fields(build_name) 74 fields = { 75 'board': board, 76 'build_type': build_type, 77 'dut_host_name': dut_host_name, 78 'dev_server': dev_server.get_resolved_hostname(update_url), 79 'success': not exception, 80 } 81 failure_reason = provisioner.get_update_failure_reason(exception) 82 _emit_updater_metrics('provision', build_name, failure_reason, duration, 83 fields) 84 fields['attempt'] = 1 85 _emit_updater_metrics('auto_update', build_name, failure_reason, duration, 86 fields) 87 88 89class provision_QuickProvision(test.test): 90 """A test that can provision a machine to the correct ChromeOS version.""" 91 version = 1 92 93 def initialize(self, host, value, is_test_na=False): 94 """Initialize. 95 96 @param host: The host object to update to |value|. 97 @param value: The build type and version to install on the host. 98 @param is_test_na: boolean, if True, will simply skip the test 99 and emit TestNAError. The control file 100 determines whether the test should be skipped 101 and passes the decision via this argument. Note 102 we can't raise TestNAError in control file as it won't 103 be caught and handled properly. 104 """ 105 if is_test_na: 106 raise error.TestNAError( 107 'Test not available for test_that. chroot detected, ' 108 'you are probably using test_that.') 109 # We check value in initialize so that it fails faster. 110 if not value: 111 raise error.TestFail('No build version specified.') 112 113 def run_once(self, host, value): 114 """The method called by the control file to start the test. 115 116 @param host: The host object to update to |value|. 117 @param value: The host object to provision with a build corresponding 118 to |value|. 119 """ 120 with_cheets = False 121 logging.debug('Start provisioning %s to %s.', host, value) 122 if value.endswith(provision.CHEETS_SUFFIX): 123 image = re.sub(provision.CHEETS_SUFFIX + '$', '', value) 124 with_cheets = True 125 else: 126 image = value 127 128 # If the host is already on the correct build, we have nothing to do. 129 # Note that this means we're not doing any sort of stateful-only 130 # update, and that we're relying more on cleanup to do cleanup. 131 info = host.host_info_store.get() 132 if info.build == value: 133 # We can't raise a TestNA, as would make sense, as that makes 134 # job.run_test return False as if the job failed. However, it'd 135 # still be nice to get this into the status.log, so we manually 136 # emit an INFO line instead. 137 self.job.record('INFO', None, None, 138 'Host already running %s' % value) 139 return 140 141 try: 142 ds = dev_server.ImageServer.resolve(image, host.hostname) 143 except dev_server.DevServerException as e: 144 six.reraise(error.TestFail, str(e), sys.exc_info()[2]) 145 146 url = _IMAGE_URL_PATTERN % (ds.url(), image) 147 148 logging.debug('Installing image from URL: %s', url) 149 start_time = time.time() 150 failure = None 151 try: 152 afe_utils.machine_install_and_update_labels(host, 153 url, 154 with_cheets, 155 staging_server=ds) 156 except BaseException as e: 157 failure = e 158 raise 159 finally: 160 _emit_provision_metrics(url, host.hostname, failure, 161 time.time() - start_time) 162 logging.debug('Finished provisioning %s to %s', host, value) 163