xref: /aosp_15_r20/external/autotest/server/cros/provisioner.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Lifrom __future__ import print_function
7*9c5db199SXin Li
8*9c5db199SXin Liimport logging
9*9c5db199SXin Liimport os
10*9c5db199SXin Liimport re
11*9c5db199SXin Liimport six
12*9c5db199SXin Liimport sys
13*9c5db199SXin Liimport six.moves.urllib.parse
14*9c5db199SXin Li
15*9c5db199SXin Lifrom autotest_lib.client.bin import utils
16*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
17*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import dev_server
18*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import kernel_utils
19*9c5db199SXin Lifrom autotest_lib.server import autotest
20*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import constants as ds_constants
21*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import tools
22*9c5db199SXin Li
23*9c5db199SXin Litry:
24*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
25*9c5db199SXin Liexcept ImportError:
26*9c5db199SXin Li    metrics = utils.metrics_mock
27*9c5db199SXin Li
28*9c5db199SXin Li
29*9c5db199SXin Lidef _metric_name(base_name):
30*9c5db199SXin Li    return 'chromeos/autotest/provision/' + base_name
31*9c5db199SXin Li
32*9c5db199SXin Li
33*9c5db199SXin Li_QUICK_PROVISION_SCRIPT = 'quick-provision'
34*9c5db199SXin Li
35*9c5db199SXin Li# PROVISION_FAILED - A flag file to indicate provision failures.  The
36*9c5db199SXin Li# file is created at the start of any AU procedure (see
37*9c5db199SXin Li# `ChromiumOSProvisioner._prepare_host()`).  The file's location in
38*9c5db199SXin Li# stateful means that on successul update it will be removed.  Thus, if
39*9c5db199SXin Li# this file exists, it indicates that we've tried and failed in a
40*9c5db199SXin Li# previous attempt to update.
41*9c5db199SXin LiPROVISION_FAILED = '/var/tmp/provision_failed'
42*9c5db199SXin Li
43*9c5db199SXin Li# A flag file used to enable special handling in lab DUTs.  Some
44*9c5db199SXin Li# parts of the system in Chromium OS test images will behave in ways
45*9c5db199SXin Li# convenient to the test lab when this file is present.  Generally,
46*9c5db199SXin Li# we create this immediately after any update completes.
47*9c5db199SXin LiLAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
48*9c5db199SXin Li
49*9c5db199SXin Li# _TARGET_VERSION - A file containing the new version to which we plan
50*9c5db199SXin Li# to update.  This file is used by the CrOS shutdown code to detect and
51*9c5db199SXin Li# handle certain version downgrade cases.  Specifically:  Downgrading
52*9c5db199SXin Li# may trigger an unwanted powerwash in the target build when the
53*9c5db199SXin Li# following conditions are met:
54*9c5db199SXin Li#  * Source build is a v4.4 kernel with R69-10756.0.0 or later.
55*9c5db199SXin Li#  * Target build predates the R69-10756.0.0 cutoff.
56*9c5db199SXin Li# When this file is present and indicates a downgrade, the OS shutdown
57*9c5db199SXin Li# code on the DUT knows how to prevent the powerwash.
58*9c5db199SXin Li_TARGET_VERSION = '/run/update_target_version'
59*9c5db199SXin Li
60*9c5db199SXin Li# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
61*9c5db199SXin Li# when the Host.reboot() method fails.  The source of this text comes
62*9c5db199SXin Li# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
63*9c5db199SXin Li
64*9c5db199SXin Li_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
65*9c5db199SXin Li
66*9c5db199SXin LiDEVSERVER_PORT = '8082'
67*9c5db199SXin LiGS_CACHE_PORT = '8888'
68*9c5db199SXin Li
69*9c5db199SXin Li
70*9c5db199SXin Liclass _AttributedUpdateError(error.TestFail):
71*9c5db199SXin Li    """Update failure with an attributed cause."""
72*9c5db199SXin Li
73*9c5db199SXin Li    def __init__(self, attribution, msg):
74*9c5db199SXin Li        super(_AttributedUpdateError,
75*9c5db199SXin Li              self).__init__('%s: %s' % (attribution, msg))
76*9c5db199SXin Li        self._message = msg
77*9c5db199SXin Li
78*9c5db199SXin Li    def _classify(self):
79*9c5db199SXin Li        for err_pattern, classification in self._CLASSIFIERS:
80*9c5db199SXin Li            if re.match(err_pattern, self._message):
81*9c5db199SXin Li                return classification
82*9c5db199SXin Li        return None
83*9c5db199SXin Li
84*9c5db199SXin Li    @property
85*9c5db199SXin Li    def failure_summary(self):
86*9c5db199SXin Li        """Summarize this error for metrics reporting."""
87*9c5db199SXin Li        classification = self._classify()
88*9c5db199SXin Li        if classification:
89*9c5db199SXin Li            return '%s: %s' % (self._SUMMARY, classification)
90*9c5db199SXin Li        else:
91*9c5db199SXin Li            return self._SUMMARY
92*9c5db199SXin Li
93*9c5db199SXin Li
94*9c5db199SXin Liclass HostUpdateError(_AttributedUpdateError):
95*9c5db199SXin Li    """Failure updating a DUT attributable to the DUT.
96*9c5db199SXin Li
97*9c5db199SXin Li    This class of exception should be raised when the most likely cause
98*9c5db199SXin Li    of failure was a condition existing on the DUT prior to the update,
99*9c5db199SXin Li    such as a hardware problem, or a bug in the software on the DUT.
100*9c5db199SXin Li    """
101*9c5db199SXin Li
102*9c5db199SXin Li    DUT_DOWN = 'No answer to ssh'
103*9c5db199SXin Li
104*9c5db199SXin Li    _SUMMARY = 'DUT failed prior to update'
105*9c5db199SXin Li    _CLASSIFIERS = [
106*9c5db199SXin Li            (DUT_DOWN, DUT_DOWN),
107*9c5db199SXin Li            (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
108*9c5db199SXin Li    ]
109*9c5db199SXin Li
110*9c5db199SXin Li    def __init__(self, hostname, msg):
111*9c5db199SXin Li        super(HostUpdateError,
112*9c5db199SXin Li              self).__init__('Error on %s prior to update' % hostname, msg)
113*9c5db199SXin Li
114*9c5db199SXin Li
115*9c5db199SXin Liclass ImageInstallError(_AttributedUpdateError):
116*9c5db199SXin Li    """Failure updating a DUT when installing from the devserver.
117*9c5db199SXin Li
118*9c5db199SXin Li    This class of exception should be raised when the target DUT fails
119*9c5db199SXin Li    to download and install the target image from the devserver, and
120*9c5db199SXin Li    either the devserver or the DUT might be at fault.
121*9c5db199SXin Li    """
122*9c5db199SXin Li
123*9c5db199SXin Li    _SUMMARY = 'Image failed to download and install'
124*9c5db199SXin Li    _CLASSIFIERS = []
125*9c5db199SXin Li
126*9c5db199SXin Li    def __init__(self, hostname, devserver, msg):
127*9c5db199SXin Li        super(ImageInstallError, self).__init__(
128*9c5db199SXin Li                'Download and install failed from %s onto %s' %
129*9c5db199SXin Li                (devserver, hostname), msg)
130*9c5db199SXin Li
131*9c5db199SXin Li
132*9c5db199SXin Liclass NewBuildUpdateError(_AttributedUpdateError):
133*9c5db199SXin Li    """Failure updating a DUT attributable to the target build.
134*9c5db199SXin Li
135*9c5db199SXin Li    This class of exception should be raised when updating to a new
136*9c5db199SXin Li    build fails, and the most likely cause of the failure is a bug in
137*9c5db199SXin Li    the newly installed target build.
138*9c5db199SXin Li    """
139*9c5db199SXin Li
140*9c5db199SXin Li    CHROME_FAILURE = 'Chrome failed to reach login screen'
141*9c5db199SXin Li    ROLLBACK_FAILURE = 'System rolled back to previous build'
142*9c5db199SXin Li
143*9c5db199SXin Li    _SUMMARY = 'New build failed'
144*9c5db199SXin Li    _CLASSIFIERS = [
145*9c5db199SXin Li            (CHROME_FAILURE, 'Chrome did not start'),
146*9c5db199SXin Li            (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
147*9c5db199SXin Li    ]
148*9c5db199SXin Li
149*9c5db199SXin Li    def __init__(self, update_version, msg):
150*9c5db199SXin Li        super(NewBuildUpdateError,
151*9c5db199SXin Li              self).__init__('Failure in build %s' % update_version, msg)
152*9c5db199SXin Li
153*9c5db199SXin Li    @property
154*9c5db199SXin Li    def failure_summary(self):
155*9c5db199SXin Li        #pylint: disable=missing-docstring
156*9c5db199SXin Li        return 'Build failed to work after installing'
157*9c5db199SXin Li
158*9c5db199SXin Li
159*9c5db199SXin Lidef _url_to_version(update_url):
160*9c5db199SXin Li    """Return the version based on update_url.
161*9c5db199SXin Li
162*9c5db199SXin Li    @param update_url: url to the image to update to.
163*9c5db199SXin Li
164*9c5db199SXin Li    """
165*9c5db199SXin Li    # The ChromeOS version is generally the last element in the URL. The only
166*9c5db199SXin Li    # exception is delta update URLs, which are rooted under the version; e.g.,
167*9c5db199SXin Li    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
168*9c5db199SXin Li    # strip off the au section of the path before reading the version.
169*9c5db199SXin Li    return re.sub('/au/.*', '',
170*9c5db199SXin Li                  six.moves.urllib.parse.urlparse(update_url).path).split(
171*9c5db199SXin Li                          '/')[-1].strip()
172*9c5db199SXin Li
173*9c5db199SXin Li
174*9c5db199SXin Lidef url_to_image_name(update_url):
175*9c5db199SXin Li    """Return the image name based on update_url.
176*9c5db199SXin Li
177*9c5db199SXin Li    From a URL like:
178*9c5db199SXin Li        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
179*9c5db199SXin Li    return lumpy-release/R27-3837.0.0
180*9c5db199SXin Li
181*9c5db199SXin Li    @param update_url: url to the image to update to.
182*9c5db199SXin Li    @returns a string representing the image name in the update_url.
183*9c5db199SXin Li
184*9c5db199SXin Li    """
185*9c5db199SXin Li    return six.moves.urllib.parse.urlparse(update_url).path[len('/update/'):]
186*9c5db199SXin Li
187*9c5db199SXin Li
188*9c5db199SXin Lidef get_update_failure_reason(exception):
189*9c5db199SXin Li    """Convert an exception into a failure reason for metrics.
190*9c5db199SXin Li
191*9c5db199SXin Li    The passed in `exception` should be one raised by failure of
192*9c5db199SXin Li    `ChromiumOSProvisioner.run_provision`.  The returned string will describe
193*9c5db199SXin Li    the failure.  If the input exception value is not a truish value
194*9c5db199SXin Li    the return value will be `None`.
195*9c5db199SXin Li
196*9c5db199SXin Li    The number of possible return strings is restricted to a limited
197*9c5db199SXin Li    enumeration of values so that the string may be safely used in
198*9c5db199SXin Li    Monarch metrics without worrying about cardinality of the range of
199*9c5db199SXin Li    string values.
200*9c5db199SXin Li
201*9c5db199SXin Li    @param exception  Exception to be converted to a failure reason.
202*9c5db199SXin Li
203*9c5db199SXin Li    @return A string suitable for use in Monarch metrics, or `None`.
204*9c5db199SXin Li    """
205*9c5db199SXin Li    if exception:
206*9c5db199SXin Li        if isinstance(exception, _AttributedUpdateError):
207*9c5db199SXin Li            return exception.failure_summary
208*9c5db199SXin Li        else:
209*9c5db199SXin Li            return 'Unknown Error: %s' % type(exception).__name__
210*9c5db199SXin Li    return None
211*9c5db199SXin Li
212*9c5db199SXin Li
213*9c5db199SXin Liclass ChromiumOSProvisioner(object):
214*9c5db199SXin Li    """Chromium OS specific DUT update functionality."""
215*9c5db199SXin Li
216*9c5db199SXin Li    def __init__(self,
217*9c5db199SXin Li                 update_url,
218*9c5db199SXin Li                 host=None,
219*9c5db199SXin Li                 interactive=True,
220*9c5db199SXin Li                 is_release_bucket=None,
221*9c5db199SXin Li                 is_servohost=False,
222*9c5db199SXin Li                 public_bucket=False):
223*9c5db199SXin Li        """Initializes the object.
224*9c5db199SXin Li
225*9c5db199SXin Li        @param update_url: The URL we want the update to use.
226*9c5db199SXin Li        @param host: A client.common_lib.hosts.Host implementation.
227*9c5db199SXin Li        @param interactive: Bool whether we are doing an interactive update.
228*9c5db199SXin Li        @param is_release_bucket: If True, use release bucket
229*9c5db199SXin Li            gs://chromeos-releases.
230*9c5db199SXin Li        @param is_servohost: Bool whether the update target is a servohost.
231*9c5db199SXin Li        @param public_bucket: True to copy payloads to a public throwaway GS
232*9c5db199SXin Li            bucket. This avoids using a lab cache server, so local test runs
233*9c5db199SXin Li            can provision without any special setup.
234*9c5db199SXin Li        """
235*9c5db199SXin Li        self.update_url = update_url
236*9c5db199SXin Li        self.host = host
237*9c5db199SXin Li        self.interactive = interactive
238*9c5db199SXin Li        self.update_version = _url_to_version(update_url)
239*9c5db199SXin Li        self._is_release_bucket = is_release_bucket
240*9c5db199SXin Li        self._is_servohost = is_servohost
241*9c5db199SXin Li        self._public_bucket = public_bucket
242*9c5db199SXin Li
243*9c5db199SXin Li    def _run(self, cmd, *args, **kwargs):
244*9c5db199SXin Li        """Abbreviated form of self.host.run(...)"""
245*9c5db199SXin Li        return self.host.run(cmd, *args, **kwargs)
246*9c5db199SXin Li
247*9c5db199SXin Li    def _rootdev(self, options=''):
248*9c5db199SXin Li        """Returns the stripped output of rootdev <options>.
249*9c5db199SXin Li
250*9c5db199SXin Li        @param options: options to run rootdev.
251*9c5db199SXin Li
252*9c5db199SXin Li        """
253*9c5db199SXin Li        return self._run('rootdev %s' % options).stdout.strip()
254*9c5db199SXin Li
255*9c5db199SXin Li    def _reset_update_engine(self):
256*9c5db199SXin Li        """Resets the host to prepare for a clean update regardless of state."""
257*9c5db199SXin Li        self._run('stop ui || true')
258*9c5db199SXin Li        self._run('stop update-engine || true; start update-engine')
259*9c5db199SXin Li
260*9c5db199SXin Li    def _reset_stateful_partition(self):
261*9c5db199SXin Li        """Clear any pending stateful update request."""
262*9c5db199SXin Li        cmd = ['rm', '-rf']
263*9c5db199SXin Li        for f in ('var_new', 'dev_image_new', '.update_available'):
264*9c5db199SXin Li            cmd += [os.path.join('/mnt/stateful_partition', f)]
265*9c5db199SXin Li        # TODO(b/165024723): This is a temporary measure until we figure out the
266*9c5db199SXin Li        # root cause of this bug.
267*9c5db199SXin Li        for f in ('dev_image/share/tast/data', 'dev_image/libexec/tast',
268*9c5db199SXin Li                  'dev_image/tmp/tast'):
269*9c5db199SXin Li            cmd += [os.path.join('/mnt/stateful_partition', f)]
270*9c5db199SXin Li        cmd += [_TARGET_VERSION, '2>&1']
271*9c5db199SXin Li        self._run(cmd)
272*9c5db199SXin Li
273*9c5db199SXin Li    def _set_target_version(self):
274*9c5db199SXin Li        """Set the "target version" for the update."""
275*9c5db199SXin Li        # Version strings that come from release buckets do not have RXX- at the
276*9c5db199SXin Li        # beginning. So remove this prefix only if the version has it.
277*9c5db199SXin Li        version_number = (self.update_version.split('-')[1] if
278*9c5db199SXin Li                          '-' in self.update_version else self.update_version)
279*9c5db199SXin Li        self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
280*9c5db199SXin Li
281*9c5db199SXin Li    def _revert_boot_partition(self):
282*9c5db199SXin Li        """Revert the boot partition."""
283*9c5db199SXin Li        part = self._rootdev('-s')
284*9c5db199SXin Li        logging.warning('Reverting update; Boot partition will be %s', part)
285*9c5db199SXin Li        return self._run('/postinst %s 2>&1' % part)
286*9c5db199SXin Li
287*9c5db199SXin Li    def _get_remote_script(self, script_name):
288*9c5db199SXin Li        """Ensure that `script_name` is present on the DUT.
289*9c5db199SXin Li
290*9c5db199SXin Li        The given script (e.g. `quick-provision`) may be present in the
291*9c5db199SXin Li        stateful partition under /usr/local/bin, or we may have to
292*9c5db199SXin Li        download it from the devserver.
293*9c5db199SXin Li
294*9c5db199SXin Li        Determine whether the script is present or must be downloaded
295*9c5db199SXin Li        and download if necessary.  Then, return a command fragment
296*9c5db199SXin Li        sufficient to run the script from whereever it now lives on the
297*9c5db199SXin Li        DUT.
298*9c5db199SXin Li
299*9c5db199SXin Li        @param script_name  The name of the script as expected in
300*9c5db199SXin Li                            /usr/local/bin and on the devserver.
301*9c5db199SXin Li        @return A string with the command (minus arguments) that will
302*9c5db199SXin Li                run the target script.
303*9c5db199SXin Li        """
304*9c5db199SXin Li        remote_script = '/usr/local/bin/%s' % script_name
305*9c5db199SXin Li        if self.host.path_exists(remote_script):
306*9c5db199SXin Li            return remote_script
307*9c5db199SXin Li        self.host.run('mkdir -p -m 1777 /usr/local/tmp')
308*9c5db199SXin Li        remote_tmp_script = '/usr/local/tmp/%s' % script_name
309*9c5db199SXin Li        server_name = six.moves.urllib.parse.urlparse(self.update_url)[1]
310*9c5db199SXin Li        script_url = 'http://%s/static/%s' % (server_name, script_name)
311*9c5db199SXin Li        fetch_script = 'curl -Ss -o %s %s && head -1 %s' % (
312*9c5db199SXin Li                remote_tmp_script, script_url, remote_tmp_script)
313*9c5db199SXin Li
314*9c5db199SXin Li        first_line = self._run(fetch_script).stdout.strip()
315*9c5db199SXin Li
316*9c5db199SXin Li        if first_line and first_line.startswith('#!'):
317*9c5db199SXin Li            script_interpreter = first_line.lstrip('#!')
318*9c5db199SXin Li            if script_interpreter:
319*9c5db199SXin Li                return '%s %s' % (script_interpreter, remote_tmp_script)
320*9c5db199SXin Li        return None
321*9c5db199SXin Li
322*9c5db199SXin Li    def _prepare_host(self):
323*9c5db199SXin Li        """Make sure the target DUT is working and ready for update.
324*9c5db199SXin Li
325*9c5db199SXin Li        Initially, the target DUT's state is unknown.  The DUT is
326*9c5db199SXin Li        expected to be online, but we strive to be forgiving if Chrome
327*9c5db199SXin Li        and/or the update engine aren't fully functional.
328*9c5db199SXin Li        """
329*9c5db199SXin Li        # Summary of work, and the rationale:
330*9c5db199SXin Li        #  1. Reboot, because it's a good way to clear out problems.
331*9c5db199SXin Li        #  2. Touch the PROVISION_FAILED file, to allow repair to detect
332*9c5db199SXin Li        #     failure later.
333*9c5db199SXin Li        #  3. Run the hook for host class specific preparation.
334*9c5db199SXin Li        #  4. Stop Chrome, because the system is designed to eventually
335*9c5db199SXin Li        #     reboot if Chrome is stuck in a crash loop.
336*9c5db199SXin Li        #  5. Force `update-engine` to start, because if Chrome failed
337*9c5db199SXin Li        #     to start properly, the status of the `update-engine` job
338*9c5db199SXin Li        #     will be uncertain.
339*9c5db199SXin Li        if not self.host.is_up():
340*9c5db199SXin Li            raise HostUpdateError(self.host.hostname, HostUpdateError.DUT_DOWN)
341*9c5db199SXin Li        self._reset_stateful_partition()
342*9c5db199SXin Li        # Servohost reboot logic is handled by themselves.
343*9c5db199SXin Li        if not self._is_servohost:
344*9c5db199SXin Li            self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
345*9c5db199SXin Li            self._run('touch %s' % PROVISION_FAILED)
346*9c5db199SXin Li        self.host.prepare_for_update()
347*9c5db199SXin Li        # Servohost will only update via quick provision.
348*9c5db199SXin Li        if not self._is_servohost:
349*9c5db199SXin Li            self._reset_update_engine()
350*9c5db199SXin Li        logging.info('Updating from version %s to %s.',
351*9c5db199SXin Li                     self.host.get_release_version(), self.update_version)
352*9c5db199SXin Li
353*9c5db199SXin Li    def _quick_provision_with_gs_cache(self, provision_command, devserver_name,
354*9c5db199SXin Li                                       image_name):
355*9c5db199SXin Li        """Run quick_provision using GsCache server.
356*9c5db199SXin Li
357*9c5db199SXin Li        @param provision_command: The path of quick_provision command.
358*9c5db199SXin Li        @param devserver_name: The devserver name and port (optional).
359*9c5db199SXin Li        @param image_name: The image to be installed.
360*9c5db199SXin Li        """
361*9c5db199SXin Li        logging.info('Try quick provision with gs_cache.')
362*9c5db199SXin Li        # If enabled, GsCache server listion on different port on the
363*9c5db199SXin Li        # devserver.
364*9c5db199SXin Li        gs_cache_server = devserver_name.replace(DEVSERVER_PORT, GS_CACHE_PORT)
365*9c5db199SXin Li        gs_cache_url = (
366*9c5db199SXin Li                'http://%s/download/%s' %
367*9c5db199SXin Li                (gs_cache_server, 'chromeos-releases'
368*9c5db199SXin Li                 if self._is_release_bucket else 'chromeos-image-archive'))
369*9c5db199SXin Li
370*9c5db199SXin Li        # Check if GS_Cache server is enabled on the server.
371*9c5db199SXin Li        self._run('curl -s -o /dev/null %s' % gs_cache_url)
372*9c5db199SXin Li
373*9c5db199SXin Li        command = '%s --noreboot %s %s' % (provision_command, image_name,
374*9c5db199SXin Li                                           gs_cache_url)
375*9c5db199SXin Li        self._run(command)
376*9c5db199SXin Li        metrics.Counter(
377*9c5db199SXin Li                _metric_name('quick_provision')).increment(fields={
378*9c5db199SXin Li                        'devserver': devserver_name,
379*9c5db199SXin Li                        'gs_cache': True
380*9c5db199SXin Li                })
381*9c5db199SXin Li
382*9c5db199SXin Li    def _quick_provision_with_devserver(self, provision_command,
383*9c5db199SXin Li                                        devserver_name, image_name):
384*9c5db199SXin Li        """Run quick_provision using legacy devserver.
385*9c5db199SXin Li
386*9c5db199SXin Li        @param provision_command: The path of quick_provision command.
387*9c5db199SXin Li        @param devserver_name: The devserver name and port (optional).
388*9c5db199SXin Li        @param image_name: The image to be installed.
389*9c5db199SXin Li        """
390*9c5db199SXin Li        logging.info('Try quick provision with devserver.')
391*9c5db199SXin Li        ds = dev_server.ImageServer('http://%s' % devserver_name)
392*9c5db199SXin Li        archive_url = ('gs://chromeos-releases/%s' %
393*9c5db199SXin Li                       image_name if self._is_release_bucket else None)
394*9c5db199SXin Li        try:
395*9c5db199SXin Li            ds.stage_artifacts(
396*9c5db199SXin Li                    image_name,
397*9c5db199SXin Li                    ['quick_provision', 'stateful', 'autotest_packages'],
398*9c5db199SXin Li                    archive_url=archive_url)
399*9c5db199SXin Li        except dev_server.DevServerException as e:
400*9c5db199SXin Li            six.reraise(error.TestFail, str(e), sys.exc_info()[2])
401*9c5db199SXin Li
402*9c5db199SXin Li        static_url = 'http://%s/static' % devserver_name
403*9c5db199SXin Li        command = '%s --noreboot %s %s' % (provision_command, image_name,
404*9c5db199SXin Li                                           static_url)
405*9c5db199SXin Li        self._run(command)
406*9c5db199SXin Li        metrics.Counter(
407*9c5db199SXin Li                _metric_name('quick_provision')).increment(fields={
408*9c5db199SXin Li                        'devserver': devserver_name,
409*9c5db199SXin Li                        'gs_cache': False
410*9c5db199SXin Li                })
411*9c5db199SXin Li
412*9c5db199SXin Li    def _quick_provision_with_public_bucket(self, provision_command,
413*9c5db199SXin Li                                            image_name):
414*9c5db199SXin Li        """Run quick_provision using public GS bucket.
415*9c5db199SXin Li
416*9c5db199SXin Li        @param provision_command: The path of quick_provision command.
417*9c5db199SXin Li        @param image_name: The image to be installed.
418*9c5db199SXin Li        """
419*9c5db199SXin Li        logging.info('Try quick provision with public bucket.')
420*9c5db199SXin Li
421*9c5db199SXin Li        bucket_url = self.update_url[:self.update_url.find(image_name) - 1]
422*9c5db199SXin Li        command = '%s --noreboot %s %s' % (provision_command, image_name,
423*9c5db199SXin Li                                           bucket_url)
424*9c5db199SXin Li        self._run(command)
425*9c5db199SXin Li
426*9c5db199SXin Li    def _install_update(self):
427*9c5db199SXin Li        """Install an updating using the `quick-provision` script.
428*9c5db199SXin Li
429*9c5db199SXin Li        This uses the `quick-provision` script to download and install
430*9c5db199SXin Li        a root FS, kernel and stateful filesystem content.
431*9c5db199SXin Li
432*9c5db199SXin Li        @return The kernel expected to be booted next.
433*9c5db199SXin Li        """
434*9c5db199SXin Li        logging.info('Installing image at %s onto %s', self.update_url,
435*9c5db199SXin Li                     self.host.hostname)
436*9c5db199SXin Li        server_name = six.moves.urllib.parse.urlparse(self.update_url)[1]
437*9c5db199SXin Li        if self._public_bucket:
438*9c5db199SXin Li            image_name = self.update_url.partition('provision/')[2]
439*9c5db199SXin Li        else:
440*9c5db199SXin Li            image_name = url_to_image_name(self.update_url)
441*9c5db199SXin Li
442*9c5db199SXin Li        logging.info('Installing image using quick-provision.')
443*9c5db199SXin Li        provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
444*9c5db199SXin Li        try:
445*9c5db199SXin Li            if self._public_bucket:
446*9c5db199SXin Li                self._quick_provision_with_public_bucket(
447*9c5db199SXin Li                        provision_command, image_name)
448*9c5db199SXin Li            else:
449*9c5db199SXin Li                try:
450*9c5db199SXin Li                    self._quick_provision_with_gs_cache(
451*9c5db199SXin Li                            provision_command, server_name, image_name)
452*9c5db199SXin Li                except Exception as e:
453*9c5db199SXin Li                    logging.error(
454*9c5db199SXin Li                            'Failed to quick-provision with gscache with '
455*9c5db199SXin Li                            'error %s', e)
456*9c5db199SXin Li                    self._quick_provision_with_devserver(
457*9c5db199SXin Li                            provision_command, server_name, image_name)
458*9c5db199SXin Li
459*9c5db199SXin Li            self._set_target_version()
460*9c5db199SXin Li            return kernel_utils.verify_kernel_state_after_update(self.host)
461*9c5db199SXin Li        except Exception:
462*9c5db199SXin Li            # N.B.  We handle only `Exception` here.  Non-Exception
463*9c5db199SXin Li            # classes (such as KeyboardInterrupt) are handled by our
464*9c5db199SXin Li            # caller.
465*9c5db199SXin Li            logging.exception('quick-provision script failed;')
466*9c5db199SXin Li            self._revert_boot_partition()
467*9c5db199SXin Li            self._reset_stateful_partition()
468*9c5db199SXin Li            self._reset_update_engine()
469*9c5db199SXin Li            return None
470*9c5db199SXin Li
471*9c5db199SXin Li    def _complete_update(self, expected_kernel):
472*9c5db199SXin Li        """Finish the update, and confirm that it succeeded.
473*9c5db199SXin Li
474*9c5db199SXin Li        Initial condition is that the target build has been downloaded
475*9c5db199SXin Li        and installed on the DUT, but has not yet been booted.  This
476*9c5db199SXin Li        function is responsible for rebooting the DUT, and checking that
477*9c5db199SXin Li        the new build is running successfully.
478*9c5db199SXin Li
479*9c5db199SXin Li        @param expected_kernel: kernel expected to be active after reboot.
480*9c5db199SXin Li        """
481*9c5db199SXin Li        # Regarding the 'crossystem' command below: In some cases,
482*9c5db199SXin Li        # the update flow puts the TPM into a state such that it
483*9c5db199SXin Li        # fails verification.  We don't know why.  However, this
484*9c5db199SXin Li        # call papers over the problem by clearing the TPM during
485*9c5db199SXin Li        # the reboot.
486*9c5db199SXin Li        #
487*9c5db199SXin Li        # We ignore failures from 'crossystem'.  Although failure
488*9c5db199SXin Li        # here is unexpected, and could signal a bug, the point of
489*9c5db199SXin Li        # the exercise is to paper over problems; allowing this to
490*9c5db199SXin Li        # fail would defeat the purpose.
491*9c5db199SXin Li        self._run('crossystem clear_tpm_owner_request=1', ignore_status=True)
492*9c5db199SXin Li        self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
493*9c5db199SXin Li
494*9c5db199SXin Li        # Touch the lab machine file to leave a marker that
495*9c5db199SXin Li        # distinguishes this image from other test images.
496*9c5db199SXin Li        # Afterwards, we must re-run the autoreboot script because
497*9c5db199SXin Li        # it depends on the LAB_MACHINE_FILE.
498*9c5db199SXin Li        autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
499*9c5db199SXin Li                          '( touch "$FILE" ; start autoreboot )')
500*9c5db199SXin Li        self._run(autoreboot_cmd % LAB_MACHINE_FILE)
501*9c5db199SXin Li        try:
502*9c5db199SXin Li            kernel_utils.verify_boot_expectations(
503*9c5db199SXin Li                    expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE,
504*9c5db199SXin Li                    self.host)
505*9c5db199SXin Li        except Exception:
506*9c5db199SXin Li            # When the system is rolled back, the provision_failed file is
507*9c5db199SXin Li            # removed. So add it back here and re-raise the exception.
508*9c5db199SXin Li            self._run('touch %s' % PROVISION_FAILED)
509*9c5db199SXin Li            raise
510*9c5db199SXin Li
511*9c5db199SXin Li        logging.debug('Cleaning up old autotest directories.')
512*9c5db199SXin Li        try:
513*9c5db199SXin Li            installed_autodir = autotest.Autotest.get_installed_autodir(
514*9c5db199SXin Li                    self.host)
515*9c5db199SXin Li            self._run('rm -rf ' + installed_autodir)
516*9c5db199SXin Li        except autotest.AutodirNotFoundError:
517*9c5db199SXin Li            logging.debug('No autotest installed directory found.')
518*9c5db199SXin Li
519*9c5db199SXin Li    def run_provision(self):
520*9c5db199SXin Li        """Perform a full provision of a DUT in the test lab.
521*9c5db199SXin Li
522*9c5db199SXin Li        This downloads and installs the root FS and stateful partition
523*9c5db199SXin Li        content needed for the update specified in `self.host` and
524*9c5db199SXin Li        `self.update_url`.  The provision is performed according to the
525*9c5db199SXin Li        requirements for provisioning a DUT for testing the requested
526*9c5db199SXin Li        build.
527*9c5db199SXin Li
528*9c5db199SXin Li        At the end of the procedure, metrics are reported describing the
529*9c5db199SXin Li        outcome of the operation.
530*9c5db199SXin Li
531*9c5db199SXin Li        @returns A tuple of the form `(image_name, attributes)`, where
532*9c5db199SXin Li            `image_name` is the name of the image installed, and
533*9c5db199SXin Li            `attributes` is new attributes to be applied to the DUT.
534*9c5db199SXin Li        """
535*9c5db199SXin Li        server_name = ""
536*9c5db199SXin Li        if not self._public_bucket:
537*9c5db199SXin Li            server_name = dev_server.get_resolved_hostname(self.update_url)
538*9c5db199SXin Li            metrics.Counter(_metric_name('install')).increment(
539*9c5db199SXin Li                    fields={'devserver': server_name})
540*9c5db199SXin Li
541*9c5db199SXin Li        try:
542*9c5db199SXin Li            self._prepare_host()
543*9c5db199SXin Li        except _AttributedUpdateError:
544*9c5db199SXin Li            raise
545*9c5db199SXin Li        except Exception as e:
546*9c5db199SXin Li            logging.exception('Failure preparing host prior to update.')
547*9c5db199SXin Li            raise HostUpdateError(self.host.hostname, str(e))
548*9c5db199SXin Li
549*9c5db199SXin Li        try:
550*9c5db199SXin Li            expected_kernel = self._install_update()
551*9c5db199SXin Li        except _AttributedUpdateError:
552*9c5db199SXin Li            raise
553*9c5db199SXin Li        except Exception as e:
554*9c5db199SXin Li            logging.exception('Failure during download and install.')
555*9c5db199SXin Li            raise ImageInstallError(self.host.hostname, server_name, str(e))
556*9c5db199SXin Li
557*9c5db199SXin Li        # Servohost will handle post update process themselves.
558*9c5db199SXin Li        if not self._is_servohost:
559*9c5db199SXin Li            try:
560*9c5db199SXin Li                self._complete_update(expected_kernel)
561*9c5db199SXin Li            except _AttributedUpdateError:
562*9c5db199SXin Li                raise
563*9c5db199SXin Li            except Exception as e:
564*9c5db199SXin Li                logging.exception('Failure from build after update.')
565*9c5db199SXin Li                raise NewBuildUpdateError(self.update_version, str(e))
566*9c5db199SXin Li
567*9c5db199SXin Li        image_name = url_to_image_name(self.update_url)
568*9c5db199SXin Li        # update_url is different from devserver url needed to stage autotest
569*9c5db199SXin Li        # packages, therefore, resolve a new devserver url here.
570*9c5db199SXin Li        devserver_url = dev_server.ImageServer.resolve(
571*9c5db199SXin Li                image_name, self.host.hostname).url()
572*9c5db199SXin Li        repo_url = tools.get_package_url(devserver_url, image_name)
573*9c5db199SXin Li        return image_name, {ds_constants.JOB_REPO_URL: repo_url}
574