xref: /aosp_15_r20/external/autotest/server/hosts/cros_repair.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Lint as: python2, python3
2*9c5db199SXin Li# Copyright 2016 The Chromium OS Authors. All rights reserved.
3*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
4*9c5db199SXin Li# found in the LICENSE file.
5*9c5db199SXin Li
6*9c5db199SXin Lifrom __future__ import absolute_import
7*9c5db199SXin Lifrom __future__ import division
8*9c5db199SXin Lifrom __future__ import print_function
9*9c5db199SXin Li
10*9c5db199SXin Liimport logging
11*9c5db199SXin Liimport math
12*9c5db199SXin Liimport six
13*9c5db199SXin Liimport sys
14*9c5db199SXin Liimport time
15*9c5db199SXin Li
16*9c5db199SXin Liimport common
17*9c5db199SXin Lifrom autotest_lib.client.common_lib import error
18*9c5db199SXin Lifrom autotest_lib.client.common_lib import global_config
19*9c5db199SXin Lifrom autotest_lib.client.common_lib import hosts
20*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils
21*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import dev_server
22*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry
23*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import tpm_utils
24*9c5db199SXin Lifrom autotest_lib.server import afe_utils
25*9c5db199SXin Lifrom autotest_lib.server import crashcollect
26*9c5db199SXin Lifrom autotest_lib.server.cros import provisioner
27*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import tools
28*9c5db199SXin Lifrom autotest_lib.server.cros.dynamic_suite import constants as ds_constants
29*9c5db199SXin Lifrom autotest_lib.server.cros.servo.keyboard import servo_keyboard_flasher
30*9c5db199SXin Lifrom autotest_lib.server.cros.repair import mac_address_helper
31*9c5db199SXin Lifrom autotest_lib.server.hosts import cros_constants
32*9c5db199SXin Lifrom autotest_lib.server.hosts import cros_firmware
33*9c5db199SXin Lifrom autotest_lib.server.hosts import repair_utils
34*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import verifiers as audit_verify
35*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import constants as audit_const
36*9c5db199SXin Lifrom autotest_lib.site_utils.admin_audit import battery_validator
37*9c5db199SXin Lifrom six.moves import range
38*9c5db199SXin Li
39*9c5db199SXin Litry:
40*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
41*9c5db199SXin Liexcept ImportError:
42*9c5db199SXin Li    metrics = utils.metrics_mock
43*9c5db199SXin Li
44*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import timeout_util
45*9c5db199SXin Li
46*9c5db199SXin LiDEFAULT_SERVO_RESET_TRIGGER = (
47*9c5db199SXin Li        'ping',
48*9c5db199SXin Li        'ssh',
49*9c5db199SXin Li        'stop_start_ui',
50*9c5db199SXin Li        'power',
51*9c5db199SXin Li)
52*9c5db199SXin Li
53*9c5db199SXin Li
54*9c5db199SXin Li# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be
55*9c5db199SXin Li# in dev mode (usually, those should be unmanaged devices)
56*9c5db199SXin Li#
57*9c5db199SXin Li_DEV_MODE_ALLOWED_POOLS = set(
58*9c5db199SXin Li    global_config.global_config.get_config_value(
59*9c5db199SXin Li            'CROS',
60*9c5db199SXin Li            'pools_dev_mode_allowed',
61*9c5db199SXin Li            type=str,
62*9c5db199SXin Li            default='',
63*9c5db199SXin Li            allow_blank=True).split(','))
64*9c5db199SXin Li
65*9c5db199SXin Li# Setting to suppress dev mode check; primarily used for moblab where all
66*9c5db199SXin Li# DUT's are in dev mode.
67*9c5db199SXin Li_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value(
68*9c5db199SXin Li            'CROS',
69*9c5db199SXin Li            'dev_mode_allowed',
70*9c5db199SXin Li            type=bool,
71*9c5db199SXin Li            default=False)
72*9c5db199SXin Li
73*9c5db199SXin Li# Triggers for the 'provision', 'powerwash', and 'usb' repair actions.
74*9c5db199SXin Li# These are also used as dependencies in the `CrosHost` repair
75*9c5db199SXin Li# sequence, as follows:
76*9c5db199SXin Li#
77*9c5db199SXin Li# provision:
78*9c5db199SXin Li#   - triggers: _CROS_PROVISION_TRIGGERS
79*9c5db199SXin Li#   - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS
80*9c5db199SXin Li#
81*9c5db199SXin Li# powerwash:
82*9c5db199SXin Li#   - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_PROVISION_TRIGGERS
83*9c5db199SXin Li#   - depends on: _CROS_USB_TRIGGERS
84*9c5db199SXin Li#
85*9c5db199SXin Li# usb:
86*9c5db199SXin Li#   - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
87*9c5db199SXin Li#               _CROS_PROVISION_TRIGGERS
88*9c5db199SXin Li#   - depends on: _CROS_USB_DEPENDENCIES
89*9c5db199SXin Li#
90*9c5db199SXin Li# N.B. AC power detection depends on software on the DUT, and there
91*9c5db199SXin Li# have been bugs where detection failed even though the DUT really
92*9c5db199SXin Li# did have power.  So, we make the 'power' verifier a trigger for
93*9c5db199SXin Li# reinstall repair actions, too.
94*9c5db199SXin Li#
95*9c5db199SXin Li# TODO(jrbarnette):  provision repair can't fix all problems reported by
96*9c5db199SXin Li# the 'cros' verifier; it's listed as an provision trigger as a
97*9c5db199SXin Li# simplification.  The ultimate fix is to split the 'cros' verifier
98*9c5db199SXin Li# into smaller individual verifiers.
99*9c5db199SXin Li_CROS_PROVISION_TRIGGERS = (
100*9c5db199SXin Li        'power',
101*9c5db199SXin Li        'rwfw',
102*9c5db199SXin Li        'fwstatus',
103*9c5db199SXin Li        'python',
104*9c5db199SXin Li        'hwid',
105*9c5db199SXin Li        'cros',
106*9c5db199SXin Li        'dev_default_boot',
107*9c5db199SXin Li)
108*9c5db199SXin Li_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_provision', 'ext4',)
109*9c5db199SXin Li_CROS_USB_TRIGGERS = (
110*9c5db199SXin Li        'ping',
111*9c5db199SXin Li        'ssh',
112*9c5db199SXin Li        'writable',
113*9c5db199SXin Li)
114*9c5db199SXin Li_JETSTREAM_USB_TRIGGERS = (
115*9c5db199SXin Li        'ping',
116*9c5db199SXin Li        'ssh',
117*9c5db199SXin Li        'writable',
118*9c5db199SXin Li)
119*9c5db199SXin Li_CROS_FIRMWARE_TRIGGERS = (
120*9c5db199SXin Li        'ping',
121*9c5db199SXin Li        'ssh',
122*9c5db199SXin Li)
123*9c5db199SXin Li_CROS_AC_TRIGGERS = (
124*9c5db199SXin Li        'ping',
125*9c5db199SXin Li        'power',
126*9c5db199SXin Li)
127*9c5db199SXin Li_CROS_USB_DEPENDENCIES = ('usb_drive', )
128*9c5db199SXin Li
129*9c5db199SXin Li
130*9c5db199SXin Liclass ACPowerVerifier(hosts.Verifier):
131*9c5db199SXin Li    """Check for AC power and battery charging state."""
132*9c5db199SXin Li
133*9c5db199SXin Li    # Battery discharging state in power_supply_info file.
134*9c5db199SXin Li    BATTERY_DISCHARGING = 'Discharging'
135*9c5db199SXin Li    # Power controller can discharge battery any time till 90% for any model.
136*9c5db199SXin Li    # Setting level to 90% in case we have wearout of it.
137*9c5db199SXin Li    BATTERY_DISCHARGE_MIN = 90
138*9c5db199SXin Li
139*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
140*9c5db199SXin Li    def verify(self, host):
141*9c5db199SXin Li        # pylint: disable=missing-docstring
142*9c5db199SXin Li        info = self._load_info(host)
143*9c5db199SXin Li        self._validate_ac_plugged(info)
144*9c5db199SXin Li        self._validate_battery(host, info)
145*9c5db199SXin Li
146*9c5db199SXin Li    def _load_info(self, host):
147*9c5db199SXin Li        try:
148*9c5db199SXin Li            info = host.get_power_supply_info()
149*9c5db199SXin Li        except error.AutoservRunError:
150*9c5db199SXin Li            raise hosts.AutoservVerifyError(
151*9c5db199SXin Li                    'Failed to get power supply info')
152*9c5db199SXin Li        return info
153*9c5db199SXin Li
154*9c5db199SXin Li    def _validate_ac_plugged(self, info):
155*9c5db199SXin Li        # Validate that DUT is plugged to the AC.
156*9c5db199SXin Li        try:
157*9c5db199SXin Li            if info['Line Power']['online'] != 'yes':
158*9c5db199SXin Li                raise hosts.AutoservVerifyError(
159*9c5db199SXin Li                        'AC power is not plugged in')
160*9c5db199SXin Li        except KeyError:
161*9c5db199SXin Li            raise hosts.AutoservVerifyError(
162*9c5db199SXin Li                    'Cannot determine AC power status')
163*9c5db199SXin Li
164*9c5db199SXin Li    def _validate_battery(self, host, info):
165*9c5db199SXin Li        host_info = host.host_info_store.get()
166*9c5db199SXin Li        if host_info.get_label_value('power') == 'battery':
167*9c5db199SXin Li            if 'Battery' not in info:
168*9c5db199SXin Li                data = {'host': host.hostname, 'model': host_info.model}
169*9c5db199SXin Li                metrics.Counter('chromeos/autotest/battery_not_detected'
170*9c5db199SXin Li                                ).increment(fields=data)
171*9c5db199SXin Li                logging.info('Battery is not presented but expected!'
172*9c5db199SXin Li                             ' Probably hardware issue.')
173*9c5db199SXin Li
174*9c5db199SXin Li        try:
175*9c5db199SXin Li            charging_state = info['Battery']['state']
176*9c5db199SXin Li            battery_level = float(info['Battery']['percentage'])
177*9c5db199SXin Li
178*9c5db199SXin Li            # Collect info to determine which battery level is better to call
179*9c5db199SXin Li            # as MIN_BATTERY_LEVEL for DUTs in the lab.
180*9c5db199SXin Li            if battery_level < cros_constants.MIN_BATTERY_LEVEL:
181*9c5db199SXin Li                level_by_10 = int(math.floor(battery_level / 10.0)) * 10
182*9c5db199SXin Li                metrics_data = {
183*9c5db199SXin Li                        'host': host.hostname,
184*9c5db199SXin Li                        'level': level_by_10,
185*9c5db199SXin Li                        'mode': charging_state
186*9c5db199SXin Li                }
187*9c5db199SXin Li                metrics.Counter('chromeos/autotest/battery/state2').increment(
188*9c5db199SXin Li                        fields=metrics_data)
189*9c5db199SXin Li
190*9c5db199SXin Li            if (charging_state == self.BATTERY_DISCHARGING
191*9c5db199SXin Li                        and battery_level < self.BATTERY_DISCHARGE_MIN):
192*9c5db199SXin Li                logging.debug('Try to fix discharging state of the battery. '
193*9c5db199SXin Li                              'Possible that a test left wrong state.')
194*9c5db199SXin Li                # Here is the chance that battery is discharging because
195*9c5db199SXin Li                # of some test did not clean up the state.
196*9c5db199SXin Li                # We are going to try to fix it by set charging to normal.
197*9c5db199SXin Li                host.run('ectool chargecontrol normal', ignore_status=True)
198*9c5db199SXin Li                # wait to change state.
199*9c5db199SXin Li                time.sleep(10)
200*9c5db199SXin Li                info = self._load_info(host)
201*9c5db199SXin Li                charging_state = info['Battery']['state']
202*9c5db199SXin Li                fixed = charging_state != self.BATTERY_DISCHARGING
203*9c5db199SXin Li                # TODO (@otabek) remove metrics after research
204*9c5db199SXin Li                logging.debug('Fixed battery discharge mode.')
205*9c5db199SXin Li                metrics_data = {
206*9c5db199SXin Li                        'model': host.host_info_store.get().model,
207*9c5db199SXin Li                        'fixed': fixed
208*9c5db199SXin Li                }
209*9c5db199SXin Li                metrics.Counter(
210*9c5db199SXin Li                    'chromeos/autotest/repair/chargecontrol_fixed'
211*9c5db199SXin Li                ).increment(fields=metrics_data)
212*9c5db199SXin Li
213*9c5db199SXin Li            if (battery_level < cros_constants.MIN_BATTERY_LEVEL
214*9c5db199SXin Li                        and charging_state == self.BATTERY_DISCHARGING):
215*9c5db199SXin Li                # TODO(@xianuowang) remove metrics here once we have device
216*9c5db199SXin Li                # health profile to collect history of DUT's metrics.
217*9c5db199SXin Li                metrics_data = {'host': host.hostname,
218*9c5db199SXin Li                                'board': host.host_info_store.get().board}
219*9c5db199SXin Li                metrics.Counter(
220*9c5db199SXin Li                    'chromeos/autotest/repair/verifier/power').increment(
221*9c5db199SXin Li                        fields=metrics_data)
222*9c5db199SXin Li                raise hosts.AutoservVerifyError(
223*9c5db199SXin Li                        'Battery is in discharging state and current level'
224*9c5db199SXin Li                        ' is less than %s%%' %
225*9c5db199SXin Li                        cros_constants.MIN_BATTERY_LEVEL)
226*9c5db199SXin Li        except (KeyError, ValueError):
227*9c5db199SXin Li            logging.warning('Cannot determine battery state -'
228*9c5db199SXin Li                            ' skipping check.')
229*9c5db199SXin Li
230*9c5db199SXin Li    @property
231*9c5db199SXin Li    def description(self):
232*9c5db199SXin Li        # pylint: disable=missing-docstring
233*9c5db199SXin Li        return 'The DUT is plugged in to AC power and battery is charging'
234*9c5db199SXin Li
235*9c5db199SXin Li
236*9c5db199SXin Liclass ProvisioningLabelsVerifier(hosts.Verifier):
237*9c5db199SXin Li    """Confirm that current ChromeOS image on the host is matches
238*9c5db199SXin Li    to provision labels.
239*9c5db199SXin Li
240*9c5db199SXin Li    Some tests behavior may changed DUT image while they don't update
241*9c5db199SXin Li    provision-cros_version or provisioning-job_repo_url labels, which could
242*9c5db199SXin Li    cause the next test run on the same host gets an unexpected data and
243*9c5db199SXin Li    yields false positive test result.
244*9c5db199SXin Li    """
245*9c5db199SXin Li
246*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
247*9c5db199SXin Li    def verify(self, host):
248*9c5db199SXin Li        self._verify_cros_version(host)
249*9c5db199SXin Li        self._verify_job_repo_url(host)
250*9c5db199SXin Li
251*9c5db199SXin Li    def _verify_cros_version(self, host):
252*9c5db199SXin Li        """Verify that cros-version match version on the host."""
253*9c5db199SXin Li        label_match = True
254*9c5db199SXin Li        try:
255*9c5db199SXin Li            label_match = host.verify_cros_version_label()
256*9c5db199SXin Li        except Exception as e:
257*9c5db199SXin Li            # We don't want fail this verifier for any errors that other
258*9c5db199SXin Li            # than a actual version mismatch, as that can make debugging
259*9c5db199SXin Li            # more challenge.
260*9c5db199SXin Li            logging.warning(
261*9c5db199SXin Li                    'Unexpected error during verify cros version on %s; %s',
262*9c5db199SXin Li                    host.hostname, e)
263*9c5db199SXin Li
264*9c5db199SXin Li        if not label_match:
265*9c5db199SXin Li            raise hosts.AutoservVerifyError('ChromeOS image on the host'
266*9c5db199SXin Li                                            ' does not match to cros-version'
267*9c5db199SXin Li                                            ' label.')
268*9c5db199SXin Li
269*9c5db199SXin Li    def _verify_job_repo_url(self, host):
270*9c5db199SXin Li        """Verify that job_repo_url match version on the host."""
271*9c5db199SXin Li        info = host.host_info_store.get()
272*9c5db199SXin Li        job_repo_url = info.attributes.get(ds_constants.JOB_REPO_URL, '')
273*9c5db199SXin Li        if not job_repo_url:
274*9c5db199SXin Li            logging.debug('job_repo_url is empty. Skip check.')
275*9c5db199SXin Li            return
276*9c5db199SXin Li        os_from_host = host.get_release_builder_path()
277*9c5db199SXin Li        if not os_from_host in job_repo_url:
278*9c5db199SXin Li            raise hosts.AutoservVerifyError('ChromeOS image on the host'
279*9c5db199SXin Li                                            ' does not match to job_repo_url'
280*9c5db199SXin Li                                            ' label.')
281*9c5db199SXin Li
282*9c5db199SXin Li    @property
283*9c5db199SXin Li    def description(self):
284*9c5db199SXin Li        # pylint: disable=missing-docstring
285*9c5db199SXin Li        return 'ChromeOS image on host matches cros_version label'
286*9c5db199SXin Li
287*9c5db199SXin Li
288*9c5db199SXin Liclass WritableVerifier(hosts.Verifier):
289*9c5db199SXin Li    """
290*9c5db199SXin Li    Confirm the stateful file systems are writable.
291*9c5db199SXin Li
292*9c5db199SXin Li    The standard linux response to certain unexpected file system errors
293*9c5db199SXin Li    (including hardware errors in block devices) is to change the file
294*9c5db199SXin Li    system status to read-only.  This checks that that hasn't happened.
295*9c5db199SXin Li
296*9c5db199SXin Li    The test covers the two file systems that need to be writable for
297*9c5db199SXin Li    critical operations like AU:
298*9c5db199SXin Li      * The (unencrypted) stateful system which includes
299*9c5db199SXin Li        /mnt/stateful_partition.
300*9c5db199SXin Li      * The encrypted stateful partition, which includes /var.
301*9c5db199SXin Li
302*9c5db199SXin Li    The test doesn't check various bind mounts; those are expected to
303*9c5db199SXin Li    fail the same way as their underlying main mounts.  Whether the
304*9c5db199SXin Li    Linux kernel can guarantee that is untested...
305*9c5db199SXin Li    """
306*9c5db199SXin Li
307*9c5db199SXin Li    # N.B. Order matters here:  Encrypted stateful is loop-mounted from
308*9c5db199SXin Li    # a file in unencrypted stateful, so we don't test for errors in
309*9c5db199SXin Li    # encrypted stateful if unencrypted fails.
310*9c5db199SXin Li    _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp']
311*9c5db199SXin Li
312*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
313*9c5db199SXin Li    def verify(self, host):
314*9c5db199SXin Li        # pylint: disable=missing-docstring
315*9c5db199SXin Li        # This deliberately stops looking after the first error.
316*9c5db199SXin Li        # See above for the details.
317*9c5db199SXin Li        for testdir in self._TEST_DIRECTORIES:
318*9c5db199SXin Li            if not host.is_file_system_writable([testdir]):
319*9c5db199SXin Li                msg = 'Can\'t create a file in %s' % testdir
320*9c5db199SXin Li                raise hosts.AutoservVerifyError(msg)
321*9c5db199SXin Li
322*9c5db199SXin Li    @property
323*9c5db199SXin Li    def description(self):
324*9c5db199SXin Li        # pylint: disable=missing-docstring
325*9c5db199SXin Li        return 'The stateful filesystems are writable'
326*9c5db199SXin Li
327*9c5db199SXin Li
328*9c5db199SXin Liclass EXT4fsErrorVerifier(hosts.Verifier):
329*9c5db199SXin Li    """
330*9c5db199SXin Li    Confirm we have not seen critical file system kernel errors.
331*9c5db199SXin Li    """
332*9c5db199SXin Li
333*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
334*9c5db199SXin Li    def verify(self, host):
335*9c5db199SXin Li        # pylint: disable=missing-docstring
336*9c5db199SXin Li        # grep for stateful FS errors of the type "EXT4-fs error (device sda1):"
337*9c5db199SXin Li        command = ("dmesg | grep -E \"EXT4-fs error \(device "
338*9c5db199SXin Li                   "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | "
339*9c5db199SXin Li                   "grep -e '^/mnt/stateful_partition ' | "
340*9c5db199SXin Li                   "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"")
341*9c5db199SXin Li        output = host.run(command=command, ignore_status=True).stdout
342*9c5db199SXin Li        if output:
343*9c5db199SXin Li            sample = output.splitlines()[0]
344*9c5db199SXin Li            message = 'Saw file system error: %s' % sample
345*9c5db199SXin Li            raise hosts.AutoservVerifyError(message)
346*9c5db199SXin Li        # Check for other critical FS errors.
347*9c5db199SXin Li        command = 'dmesg | grep "This should not happen!!  Data will be lost"'
348*9c5db199SXin Li        output = host.run(command=command, ignore_status=True).stdout
349*9c5db199SXin Li        if output:
350*9c5db199SXin Li            message = 'Saw file system error: Data will be lost'
351*9c5db199SXin Li            raise hosts.AutoservVerifyError(message)
352*9c5db199SXin Li        else:
353*9c5db199SXin Li            logging.error('Could not determine stateful mount.')
354*9c5db199SXin Li
355*9c5db199SXin Li    @property
356*9c5db199SXin Li    def description(self):
357*9c5db199SXin Li        # pylint: disable=missing-docstring
358*9c5db199SXin Li        return 'Did not find critical file system errors'
359*9c5db199SXin Li
360*9c5db199SXin Li
361*9c5db199SXin Liclass UpdateSuccessVerifier(hosts.Verifier):
362*9c5db199SXin Li    """
363*9c5db199SXin Li    Checks that the DUT successfully finished its last provision job.
364*9c5db199SXin Li
365*9c5db199SXin Li    At the start of any update (e.g. for a Provision job), the code
366*9c5db199SXin Li    creates a marker file named `PROVISION_FAILED`.  The file is located
367*9c5db199SXin Li    in a part of the stateful partition that will be removed if an
368*9c5db199SXin Li    update finishes successfully.  Thus, the presence of the file
369*9c5db199SXin Li    indicates that a prior update failed.
370*9c5db199SXin Li
371*9c5db199SXin Li    The verifier tests for the existence of the marker file and fails if
372*9c5db199SXin Li    it still exists.
373*9c5db199SXin Li    """
374*9c5db199SXin Li
375*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
376*9c5db199SXin Li    def verify(self, host):
377*9c5db199SXin Li        # pylint: disable=missing-docstring
378*9c5db199SXin Li        result = host.run('test -f %s' % provisioner.PROVISION_FAILED,
379*9c5db199SXin Li                          ignore_status=True)
380*9c5db199SXin Li        if result.exit_status == 0:
381*9c5db199SXin Li            raise hosts.AutoservVerifyError(
382*9c5db199SXin Li                    'Last provision on this DUT failed')
383*9c5db199SXin Li
384*9c5db199SXin Li    @property
385*9c5db199SXin Li    def description(self):
386*9c5db199SXin Li        # pylint: disable=missing-docstring
387*9c5db199SXin Li        return 'The most recent provision attempt on this DUT succeeded'
388*9c5db199SXin Li
389*9c5db199SXin Li
390*9c5db199SXin Liclass TPMStatusVerifier(hosts.Verifier):
391*9c5db199SXin Li    """Verify that the host's TPM is in a good state."""
392*9c5db199SXin Li
393*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
394*9c5db199SXin Li    def verify(self, host):
395*9c5db199SXin Li        # pylint: disable=missing-docstring
396*9c5db199SXin Li        if _is_virtual_machine(host):
397*9c5db199SXin Li            # We do not forward host TPM / emulated TPM to qemu VMs, so skip
398*9c5db199SXin Li            # this verification step.
399*9c5db199SXin Li            logging.debug('Skipped verification %s on VM', self)
400*9c5db199SXin Li            return
401*9c5db199SXin Li
402*9c5db199SXin Li        try:
403*9c5db199SXin Li            status = TpmStatus(host)
404*9c5db199SXin Li        except hosts.AutoservVerifyError:
405*9c5db199SXin Li            logging.info('Cannot determine the Cryptohome valid status - '
406*9c5db199SXin Li                         'skipping check.')
407*9c5db199SXin Li            return
408*9c5db199SXin Li        try:
409*9c5db199SXin Li            if not status['is_enabled']:
410*9c5db199SXin Li                raise hosts.AutoservVerifyError(
411*9c5db199SXin Li                        'TPM is not enabled -- Hardware is not working.')
412*9c5db199SXin Li            if status['is_owned'] and not status['is_srk_default_auth']:
413*9c5db199SXin Li                raise hosts.AutoservVerifyError('Cannot load the TPM SRK')
414*9c5db199SXin Li        except KeyError:
415*9c5db199SXin Li            logging.info('Cannot determine the TPM valid status - '
416*9c5db199SXin Li                         'skipping check.')
417*9c5db199SXin Li
418*9c5db199SXin Li    @property
419*9c5db199SXin Li    def description(self):
420*9c5db199SXin Li        # pylint: disable=missing-docstring
421*9c5db199SXin Li        return 'The host\'s TPM is available and working'
422*9c5db199SXin Li
423*9c5db199SXin Li
424*9c5db199SXin Liclass PythonVerifier(hosts.Verifier):
425*9c5db199SXin Li    """Confirm the presence of a working Python interpreter."""
426*9c5db199SXin Li
427*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
428*9c5db199SXin Li    def verify(self, host):
429*9c5db199SXin Li        # pylint: disable=missing-docstring
430*9c5db199SXin Li        result = host.run('python -c "import json"',
431*9c5db199SXin Li                          ignore_status=True)
432*9c5db199SXin Li        if result.exit_status != 0:
433*9c5db199SXin Li            message = 'The python interpreter is broken'
434*9c5db199SXin Li            if result.exit_status == 127:
435*9c5db199SXin Li                search = host.run('which python', ignore_status=True)
436*9c5db199SXin Li                if search.exit_status != 0 or not search.stdout:
437*9c5db199SXin Li                    message = ('Python is missing; may be caused by '
438*9c5db199SXin Li                               'powerwash')
439*9c5db199SXin Li            raise hosts.AutoservVerifyError(message)
440*9c5db199SXin Li
441*9c5db199SXin Li    @property
442*9c5db199SXin Li    def description(self):
443*9c5db199SXin Li        # pylint: disable=missing-docstring
444*9c5db199SXin Li        return 'Python on the host is installed and working'
445*9c5db199SXin Li
446*9c5db199SXin Li
447*9c5db199SXin Liclass DevModeVerifier(hosts.Verifier):
448*9c5db199SXin Li    """Verify that the host is not in dev mode."""
449*9c5db199SXin Li
450*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
451*9c5db199SXin Li    def verify(self, host):
452*9c5db199SXin Li        # pylint: disable=missing-docstring
453*9c5db199SXin Li        # Some pools are allowed to be in dev mode
454*9c5db199SXin Li        info = host.host_info_store.get()
455*9c5db199SXin Li        if (_DEV_MODE_ALWAYS_ALLOWED or
456*9c5db199SXin Li                bool(info.pools & _DEV_MODE_ALLOWED_POOLS)):
457*9c5db199SXin Li            return
458*9c5db199SXin Li
459*9c5db199SXin Li        result = host.run('crossystem devsw_boot', ignore_status=True).stdout
460*9c5db199SXin Li        if result != '0':
461*9c5db199SXin Li            raise hosts.AutoservVerifyError('The host is in dev mode')
462*9c5db199SXin Li
463*9c5db199SXin Li    @property
464*9c5db199SXin Li    def description(self):
465*9c5db199SXin Li        # pylint: disable=missing-docstring
466*9c5db199SXin Li        return 'The host should not be in dev mode'
467*9c5db199SXin Li
468*9c5db199SXin Li
469*9c5db199SXin Liclass DevDefaultBootVerifier(hosts.Verifier):
470*9c5db199SXin Li    """Verify that the host is set to boot the internal disk by default."""
471*9c5db199SXin Li
472*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
473*9c5db199SXin Li    def verify(self, host):
474*9c5db199SXin Li        # pylint: disable=missing-docstring
475*9c5db199SXin Li        result = host.run('crossystem dev_default_boot', ignore_status=True)
476*9c5db199SXin Li        default_boot = result.stdout.strip()
477*9c5db199SXin Li        if default_boot != 'disk':
478*9c5db199SXin Li            raise hosts.AutoservVerifyError(
479*9c5db199SXin Li                    'The host has incorrect dev_default_boot value: %r'
480*9c5db199SXin Li                    % default_boot)
481*9c5db199SXin Li
482*9c5db199SXin Li    @property
483*9c5db199SXin Li    def description(self):
484*9c5db199SXin Li        # pylint: disable=missing-docstring
485*9c5db199SXin Li        return 'The host should have dev_default_boot=disk'
486*9c5db199SXin Li
487*9c5db199SXin Li
488*9c5db199SXin Liclass HWIDVerifier(hosts.Verifier):
489*9c5db199SXin Li    """Verify that the host has HWID & serial number."""
490*9c5db199SXin Li
491*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
492*9c5db199SXin Li    def verify(self, host):
493*9c5db199SXin Li        # pylint: disable=missing-docstring
494*9c5db199SXin Li        info = host.host_info_store.get()
495*9c5db199SXin Li        if not info.board or not info.model:
496*9c5db199SXin Li            # if board or model missed in host_info file then it is empty
497*9c5db199SXin Li            # skip verifier
498*9c5db199SXin Li            return
499*9c5db199SXin Li        info_hwid = info.attributes.get('HWID')
500*9c5db199SXin Li        info_serial_number = info.attributes.get('serial_number')
501*9c5db199SXin Li
502*9c5db199SXin Li        if not info_hwid or not info_serial_number:
503*9c5db199SXin Li            logging.info('Missing HWID or/and SerialNumber.'
504*9c5db199SXin Li                         ' Probably device was not deployed properly.'
505*9c5db199SXin Li                         ' Marking DUT for need re-deployment.')
506*9c5db199SXin Li            host.set_device_repair_state(
507*9c5db199SXin Li                    cros_constants.DEVICE_STATE_NEEDS_DEPLOY)
508*9c5db199SXin Li            return
509*9c5db199SXin Li
510*9c5db199SXin Li        host_hwid = host.run('crossystem hwid', ignore_status=True).stdout
511*9c5db199SXin Li        host_serial_number = self._get_serial_number(host, info_serial_number)
512*9c5db199SXin Li        if not host_hwid or not host_serial_number:
513*9c5db199SXin Li            raise hosts.AutoservVerifyError(
514*9c5db199SXin Li                    'Failed to get HWID & Serial Number for host %s' %
515*9c5db199SXin Li                    host.hostname)
516*9c5db199SXin Li
517*9c5db199SXin Li        if host_hwid != info_hwid:
518*9c5db199SXin Li            # We not fail verifier as it not critical for majority tests.
519*9c5db199SXin Li            metrics.Counter('chromeos/autotest/repair/hwid_change').increment(
520*9c5db199SXin Li                    fields={
521*9c5db199SXin Li                            'host': host.hostname,
522*9c5db199SXin Li                            'board': info.board or ''
523*9c5db199SXin Li                    })
524*9c5db199SXin Li            logging.info(
525*9c5db199SXin Li                    'HWID changed to: %s required manual work'
526*9c5db199SXin Li                    ' to fix it.', host_hwid)
527*9c5db199SXin Li
528*9c5db199SXin Li        if host_serial_number and host_serial_number != info_serial_number:
529*9c5db199SXin Li            logging.info(
530*9c5db199SXin Li                    'The SerialNumber mismatch detected %s != %s.'
531*9c5db199SXin Li                    ' Probably attempt to replace DUT without deployment.'
532*9c5db199SXin Li                    ' Marking DUT for need re-deployment.', info_serial_number,
533*9c5db199SXin Li                    host_serial_number)
534*9c5db199SXin Li            host.set_device_repair_state(
535*9c5db199SXin Li                    cros_constants.DEVICE_STATE_NEEDS_DEPLOY)
536*9c5db199SXin Li
537*9c5db199SXin Li    def _get_serial_number(self, host, serial_number):
538*9c5db199SXin Li        """Read serial_number from VPD.
539*9c5db199SXin Li
540*9c5db199SXin Li        If VPD does not have any value for serial_number then it will
541*9c5db199SXin Li        try to restore from host_info.
542*9c5db199SXin Li
543*9c5db199SXin Li        @param host             CrosHost
544*9c5db199SXin Li        @param serial_number    Serial-number from host-info
545*9c5db199SXin Li        """
546*9c5db199SXin Li        req = host.run('vpd -g serial_number', ignore_status=True)
547*9c5db199SXin Li        # serial_number not found in the VPD info
548*9c5db199SXin Li        if not req.stdout and req.exit_status == 3 and serial_number:
549*9c5db199SXin Li            logging.debug('Cannot find serial_number from VPD.')
550*9c5db199SXin Li            # check if vpd working fine without error
551*9c5db199SXin Li            l1 = host.run('vpd -l', ignore_status=True)
552*9c5db199SXin Li            l2 = host.run('vpd -l |grep "\"serial_number\"="',
553*9c5db199SXin Li                          ignore_status=True)
554*9c5db199SXin Li            if l1.exit_status == 0 and l2.exit_status == 1:
555*9c5db199SXin Li                logging.info('Start restoring serial_number:%s for VPD.',
556*9c5db199SXin Li                             serial_number)
557*9c5db199SXin Li                # update serial_number for VPD
558*9c5db199SXin Li                cmd = 'vpd -s serial_number=%s'
559*9c5db199SXin Li                host.run(cmd % serial_number, ignore_status=True)
560*9c5db199SXin Li                host.run('dump_vpd_log --force', ignore_status=True)
561*9c5db199SXin Li                # reading from VPD to see what we updated
562*9c5db199SXin Li                req = host.run('vpd -g serial_number', ignore_status=True)
563*9c5db199SXin Li        return req.stdout
564*9c5db199SXin Li
565*9c5db199SXin Li    def _is_applicable(self, host):
566*9c5db199SXin Li        if host.is_satlab():
567*9c5db199SXin Li            logging.info('Not critical for Satlab. Skipping')
568*9c5db199SXin Li            return False
569*9c5db199SXin Li        return True
570*9c5db199SXin Li
571*9c5db199SXin Li    @property
572*9c5db199SXin Li    def description(self):
573*9c5db199SXin Li        # pylint: disable=missing-docstring
574*9c5db199SXin Li        return 'The host should have valid HWID and Serial Number'
575*9c5db199SXin Li
576*9c5db199SXin Li
577*9c5db199SXin Liclass EnrollmentStateVerifier(hosts.Verifier):
578*9c5db199SXin Li    """Verify that the device's enrollment state is clean.
579*9c5db199SXin Li
580*9c5db199SXin Li    There are two "flags" that generate 3 possible enrollment states here.
581*9c5db199SXin Li    Flag 1 - The presence of install attributes file in
582*9c5db199SXin Li             /home/.shadow/install_attributes.pb
583*9c5db199SXin Li
584*9c5db199SXin Li    Flag 2 - The value of "check_enrollment" from VPD. Can be obtained by
585*9c5db199SXin Li             reading the cache file in
586*9c5db199SXin Li             /mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt
587*9c5db199SXin Li
588*9c5db199SXin Li    The states:
589*9c5db199SXin Li    State 1 - Device is enrolled, means flag 1 is true and in
590*9c5db199SXin Li              flag 2 check_enrollment=1
591*9c5db199SXin Li    State 2 - Device is consumer owned, means flag 1 is true and in
592*9c5db199SXin Li              flag 2 check_enrollment=0
593*9c5db199SXin Li    State 3 - Device is enrolled and has been powerwashed, means flag 1 is
594*9c5db199SXin Li              false. If the value in flag 2 is check_enrollment=1 then the
595*9c5db199SXin Li              device will perform forced re-enrollment check and depending
596*9c5db199SXin Li              on the response from the server might force the device to enroll
597*9c5db199SXin Li              again. If the value is check_enrollment=0, then device can be
598*9c5db199SXin Li              used like a new device.
599*9c5db199SXin Li
600*9c5db199SXin Li    We consider state 1, and first scenario(check_enrollment=1) of state 3
601*9c5db199SXin Li    as unacceptable state here as they may interfere with normal tests.
602*9c5db199SXin Li    """
603*9c5db199SXin Li
604*9c5db199SXin Li    VPD_CACHE = '/mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt'
605*9c5db199SXin Li
606*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
607*9c5db199SXin Li    def verify(self, host):
608*9c5db199SXin Li        # pylint: disable=missing-docstring
609*9c5db199SXin Li        if self._get_enrollment_state(host):
610*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError('The device is enrolled,'
611*9c5db199SXin Li                                                       ' it may interfere with'
612*9c5db199SXin Li                                                       ' some tests.')
613*9c5db199SXin Li
614*9c5db199SXin Li    def _get_enrollment_state(self, host):
615*9c5db199SXin Li        logging.debug('checking enrollment state from VPD cache...')
616*9c5db199SXin Li        response = host.run('grep "check_enrollment" %s' % self.VPD_CACHE,
617*9c5db199SXin Li                            ignore_status=True)
618*9c5db199SXin Li        if response.exit_status == 0:
619*9c5db199SXin Li            result = response.stdout.strip()
620*9c5db199SXin Li            logging.info('Enrollment state in VPD cache: %s', result)
621*9c5db199SXin Li            return result == '"check_enrollment"="1"'
622*9c5db199SXin Li
623*9c5db199SXin Li        logging.error('Unexpected error occured during verify enrollment state'
624*9c5db199SXin Li                      ' in VPD cache, skipping verify process.')
625*9c5db199SXin Li        return False
626*9c5db199SXin Li
627*9c5db199SXin Li    def _is_applicable(self, host):
628*9c5db199SXin Li        info = host.host_info_store.get()
629*9c5db199SXin Li        # if os type is missing from host_info, then we assume it's cros.
630*9c5db199SXin Li        return getattr(info, 'os', 'cros') in ('', 'cros')
631*9c5db199SXin Li
632*9c5db199SXin Li    @property
633*9c5db199SXin Li    def description(self):
634*9c5db199SXin Li        # pylint: disable=missing-docstring
635*9c5db199SXin Li        return 'The enrollment state is clean on the host'
636*9c5db199SXin Li
637*9c5db199SXin Li
638*9c5db199SXin Liclass FirmwareTpmVerifier(hosts.Verifier):
639*9c5db199SXin Li    """Verifier that firmware tpm info is correct.
640*9c5db199SXin Li
641*9c5db199SXin Li    For dev-signed firmware, tpm_fwver and tpm_kernver reported from
642*9c5db199SXin Li    crossystem should always be 0x10001. Firmware update on DUTs with
643*9c5db199SXin Li    incorrect tmp_fwver or tpm_kernver may fail due to firmware
644*9c5db199SXin Li    rollback protection.
645*9c5db199SXin Li    """
646*9c5db199SXin Li    # A list of field we want check from crossystem and expected value.
647*9c5db199SXin Li    CHECK_LIST = [
648*9c5db199SXin Li            ('tpm_fwver', '0x00010001'),
649*9c5db199SXin Li            ('tpm_kernver', '0x00010001'),
650*9c5db199SXin Li    ]
651*9c5db199SXin Li
652*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
653*9c5db199SXin Li    def verify(self, host):
654*9c5db199SXin Li        # pylint: disable=missing-docstring
655*9c5db199SXin Li        for field, expected_value in self.CHECK_LIST:
656*9c5db199SXin Li            result = host.run('crossystem %s' % field, ignore_status=True)
657*9c5db199SXin Li            if result.exit_status != 0:
658*9c5db199SXin Li                raise hosts.AutoservNonCriticalVerifyError(
659*9c5db199SXin Li                        'Unable to get %s from crossystem.' % field)
660*9c5db199SXin Li            if result.stdout != expected_value:
661*9c5db199SXin Li                raise hosts.AutoservNonCriticalVerifyError(
662*9c5db199SXin Li                        'Unexpected %s value: %s, expected: %s. This error'
663*9c5db199SXin Li                        ' may cause firmware provision fail due to the'
664*9c5db199SXin Li                        ' rollback protection.' %
665*9c5db199SXin Li                        (field, result.stdout, expected_value))
666*9c5db199SXin Li
667*9c5db199SXin Li    def _is_applicable(self, host):
668*9c5db199SXin Li        return cros_firmware._is_firmware_testing_device(host)
669*9c5db199SXin Li
670*9c5db199SXin Li    @property
671*9c5db199SXin Li    def description(self):
672*9c5db199SXin Li        # pylint: disable=missing-docstring
673*9c5db199SXin Li        return 'Firmware tpm info is correct in crossystem.'
674*9c5db199SXin Li
675*9c5db199SXin Li
676*9c5db199SXin Liclass JetstreamTpmVerifier(hosts.Verifier):
677*9c5db199SXin Li    """Verify that Jetstream TPM is in a good state."""
678*9c5db199SXin Li
679*9c5db199SXin Li    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
680*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
681*9c5db199SXin Li    def verify(self, host):
682*9c5db199SXin Li        # pylint: disable=missing-docstring
683*9c5db199SXin Li        try:
684*9c5db199SXin Li            status = TpmStatus(host)
685*9c5db199SXin Li            if not status.tpm_enabled:
686*9c5db199SXin Li                raise hosts.AutoservVerifyError('TPM is not enabled')
687*9c5db199SXin Li            if not status.tpm_owned:
688*9c5db199SXin Li                raise hosts.AutoservVerifyError('TPM is not owned')
689*9c5db199SXin Li            if not status.tpm_can_load_srk:
690*9c5db199SXin Li                raise hosts.AutoservVerifyError('TPM cannot load SRK')
691*9c5db199SXin Li            if not status.tpm_can_load_srk_pubkey:
692*9c5db199SXin Li                raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey')
693*9c5db199SXin Li
694*9c5db199SXin Li            # Check that the TPM is fully initialized. The output of this
695*9c5db199SXin Li            # command is line-oriented property/value pairs.
696*9c5db199SXin Li            result = host.run('cryptohome --action=tpm_status')
697*9c5db199SXin Li            if 'TPM Ready: true' not in result.stdout:
698*9c5db199SXin Li                raise hosts.AutoservVerifyError('TPM is not ready')
699*9c5db199SXin Li        except error.AutoservRunError:
700*9c5db199SXin Li            raise hosts.AutoservVerifyError(
701*9c5db199SXin Li                    'Could not determine TPM status')
702*9c5db199SXin Li
703*9c5db199SXin Li    @property
704*9c5db199SXin Li    def description(self):
705*9c5db199SXin Li        # pylint: disable=missing-docstring
706*9c5db199SXin Li        return 'Jetstream TPM state check'
707*9c5db199SXin Li
708*9c5db199SXin Li
709*9c5db199SXin Liclass JetstreamAttestationVerifier(hosts.Verifier):
710*9c5db199SXin Li    """Verify that Jetstream attestation client has a certificate."""
711*9c5db199SXin Li
712*9c5db199SXin Li    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
713*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
714*9c5db199SXin Li    def verify(self, host):
715*9c5db199SXin Li        # pylint: disable=missing-docstring
716*9c5db199SXin Li        try:
717*9c5db199SXin Li            # This output is in text protobuf format.
718*9c5db199SXin Li            result = host.run('cryptohome --action=tpm_more_status')
719*9c5db199SXin Li            if 'attestation_prepared: true' not in result.stdout:
720*9c5db199SXin Li                raise hosts.AutoservVerifyError(
721*9c5db199SXin Li                        'Attestation has not been prepared')
722*9c5db199SXin Li
723*9c5db199SXin Li            result = host.run('cryptohome --action=tpm_attestation_get_ek')
724*9c5db199SXin Li            if 'EK Certificate' not in result.stdout:
725*9c5db199SXin Li                raise hosts.AutoservVerifyError(
726*9c5db199SXin Li                        'Endorsement certificate not found')
727*9c5db199SXin Li        except error.AutoservRunError:
728*9c5db199SXin Li            raise hosts.AutoservVerifyError(
729*9c5db199SXin Li                    'Unable to fetch endorsement certificate')
730*9c5db199SXin Li
731*9c5db199SXin Li    @property
732*9c5db199SXin Li    def description(self):
733*9c5db199SXin Li        # pylint: disable=missing-docstring
734*9c5db199SXin Li        return 'Jetstream attestation endorsement check'
735*9c5db199SXin Li
736*9c5db199SXin Li
737*9c5db199SXin Liclass JetstreamServicesVerifier(hosts.Verifier):
738*9c5db199SXin Li    """Verify that Jetstream services are running."""
739*9c5db199SXin Li
740*9c5db199SXin Li    # Retry for b/62576902
741*9c5db199SXin Li    @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10)
742*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
743*9c5db199SXin Li    def verify(self, host):
744*9c5db199SXin Li        # pylint: disable=missing-docstring
745*9c5db199SXin Li        try:
746*9c5db199SXin Li            host.run('pgrep ap-controller')
747*9c5db199SXin Li        except error.AutoservRunError:
748*9c5db199SXin Li            raise hosts.AutoservVerifyError(
749*9c5db199SXin Li                'ap-controller process is not running')
750*9c5db199SXin Li
751*9c5db199SXin Li    @property
752*9c5db199SXin Li    def description(self):
753*9c5db199SXin Li        # pylint: disable=missing-docstring
754*9c5db199SXin Li        return 'Jetstream services must be running'
755*9c5db199SXin Li
756*9c5db199SXin Li
757*9c5db199SXin Liclass StopStartUIVerifier(hosts.Verifier):
758*9c5db199SXin Li    """Verify that command 'stop ui' won't crash the DUT.
759*9c5db199SXin Li
760*9c5db199SXin Li    We run 'stop ui' in AU and provision. We found some bad images broke
761*9c5db199SXin Li    this command and then broke all the provision of all following test. We add
762*9c5db199SXin Li    this verifier to ensure it works and will trigger reimaging to a good
763*9c5db199SXin Li    version if it fails.
764*9c5db199SXin Li    """
765*9c5db199SXin Li
766*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
767*9c5db199SXin Li    def verify(self, host):
768*9c5db199SXin Li        try:
769*9c5db199SXin Li            host.run('stop ui && start ui', ignore_status=True, timeout=45)
770*9c5db199SXin Li        except error.AutoservSSHTimeout:
771*9c5db199SXin Li            raise hosts.AutoservVerifyError(
772*9c5db199SXin Li                "Got timeout when stop ui/start ui. DUT might crash.")
773*9c5db199SXin Li
774*9c5db199SXin Li    @property
775*9c5db199SXin Li    def description(self):
776*9c5db199SXin Li        return 'The DUT image works fine when stop ui/start ui.'
777*9c5db199SXin Li
778*9c5db199SXin Li
779*9c5db199SXin Liclass GscToolPresentVerifier(hosts.Verifier):
780*9c5db199SXin Li    """Verify that GSC tool is functional.
781*9c5db199SXin Li
782*9c5db199SXin Li    If board/model expected to have GSC tool but it does not have it then need
783*9c5db199SXin Li    to re-image the host to recover it.
784*9c5db199SXin Li    If host-info has label 'cr50' then we expect to have GSC tool on the host.
785*9c5db199SXin Li    """
786*9c5db199SXin Li
787*9c5db199SXin Li    VERIFY_GSC_CMD = 'gsctool -a -f'
788*9c5db199SXin Li
789*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
790*9c5db199SXin Li    def verify(self, host):
791*9c5db199SXin Li        r = host.run(self.VERIFY_GSC_CMD, ignore_status=True, timeout=10)
792*9c5db199SXin Li        if r.exit_status != 0:
793*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
794*9c5db199SXin Li                    "GSC tool issue detected.")
795*9c5db199SXin Li        logging.debug('GSC tool is functional.')
796*9c5db199SXin Li
797*9c5db199SXin Li    def _is_applicable(self, host):
798*9c5db199SXin Li        host_info = host.host_info_store.get()
799*9c5db199SXin Li        if host_info.get_label_value('cr50'):
800*9c5db199SXin Li            return True
801*9c5db199SXin Li        logging.info('GSC is not on the host.')
802*9c5db199SXin Li        return False
803*9c5db199SXin Li
804*9c5db199SXin Li    @property
805*9c5db199SXin Li    def description(self):
806*9c5db199SXin Li        return 'Verify GSC tool is functional.'
807*9c5db199SXin Li
808*9c5db199SXin Li
809*9c5db199SXin Liclass ServoUSBDriveVerifier(hosts.Verifier):
810*9c5db199SXin Li    """Verify that USB drive on Servo is good to use.
811*9c5db199SXin Li
812*9c5db199SXin Li    Check if USB drive is detected on servo and verified on servohost and
813*9c5db199SXin Li    USB is not marked for replacement.
814*9c5db199SXin Li    """
815*9c5db199SXin Li
816*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
817*9c5db199SXin Li    def verify(self, host):
818*9c5db199SXin Li        # pylint: disable=missing-docstring
819*9c5db199SXin Li        usb_dev = ''
820*9c5db199SXin Li        try:
821*9c5db199SXin Li            usb_dev = host._servo_host._probe_and_validate_usb_dev()
822*9c5db199SXin Li        except hosts.AutoservRepairError as e:
823*9c5db199SXin Li            # We USB drive not detected by servod
824*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
825*9c5db199SXin Li        host_info = host.host_info_store.get()
826*9c5db199SXin Li        if not usb_dev:
827*9c5db199SXin Li            host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX,
828*9c5db199SXin Li                                        audit_const.HW_STATE_NOT_DETECTED)
829*9c5db199SXin Li            host.host_info_store.commit(host_info)
830*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
831*9c5db199SXin Li                    'USB-drive is not detected or bad')
832*9c5db199SXin Li
833*9c5db199SXin Li        # Check if USB-drive marked for replacement.
834*9c5db199SXin Li        usb_state = host_info.get_label_value(
835*9c5db199SXin Li                audit_const.SERVO_USB_STATE_PREFIX)
836*9c5db199SXin Li        if usb_state and usb_state == audit_const.HW_STATE_NEED_REPLACEMENT:
837*9c5db199SXin Li            # Allow to use USB-key marked for replacement.
838*9c5db199SXin Li            # Goal to collect metrics to see if DUT still can recovered
839*9c5db199SXin Li            return
840*9c5db199SXin Li            # TODO(otabek): restory when fix crbug.com/1164408
841*9c5db199SXin Li            # raise hosts.AutoservNonCriticalVerifyError(
842*9c5db199SXin Li            #         'USB-drive marked for replacement')
843*9c5db199SXin Li
844*9c5db199SXin Li        # The USB-drive detected and was not mark for replacement.
845*9c5db199SXin Li        # Set as normal for future audit.
846*9c5db199SXin Li        host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX,
847*9c5db199SXin Li                                    audit_const.HW_STATE_NORMAL)
848*9c5db199SXin Li        host.host_info_store.commit(host_info)
849*9c5db199SXin Li
850*9c5db199SXin Li    def _is_applicable(self, host):
851*9c5db199SXin Li        if host.servo:
852*9c5db199SXin Li            return True
853*9c5db199SXin Li        return False
854*9c5db199SXin Li
855*9c5db199SXin Li    @property
856*9c5db199SXin Li    def description(self):
857*9c5db199SXin Li        return 'Ensure USB drive on Servo is in good state.'
858*9c5db199SXin Li
859*9c5db199SXin Li
860*9c5db199SXin Liclass DUTStorageVerifier(hosts.Verifier):
861*9c5db199SXin Li    """Verify that main storage on DUT is good to use.
862*9c5db199SXin Li
863*9c5db199SXin Li    Check if DUT drive is providing good SMART stats which not showing any
864*9c5db199SXin Li    issues on it. The verifier can mark DUT for replacement if SMART stats
865*9c5db199SXin Li    show outworn data.
866*9c5db199SXin Li    """
867*9c5db199SXin Li
868*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
869*9c5db199SXin Li    def verify(self, host):
870*9c5db199SXin Li        # pylint: disable=missing-docstring
871*9c5db199SXin Li        verifier = audit_verify.VerifyDutStorage(host)
872*9c5db199SXin Li        verifier.verify(set_label=True, run_badblocks='NOT')
873*9c5db199SXin Li        state = verifier.get_state() or audit_const.HW_STATE_UNKNOWN
874*9c5db199SXin Li        if not state:
875*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
876*9c5db199SXin Li                    'DUT storage did not detected or state cannot extracted.')
877*9c5db199SXin Li        if state == audit_const.HW_STATE_NEED_REPLACEMENT:
878*9c5db199SXin Li            logging.info('Detected issue with storage on the DUT.')
879*9c5db199SXin Li            host.set_device_needs_replacement()
880*9c5db199SXin Li
881*9c5db199SXin Li    @property
882*9c5db199SXin Li    def description(self):
883*9c5db199SXin Li        return 'Ensure DUT storage SMART information is in good state.'
884*9c5db199SXin Li
885*9c5db199SXin Li
886*9c5db199SXin Liclass AuditBattery(hosts.Verifier):
887*9c5db199SXin Li    """Verify that battery on DUT is good to use.
888*9c5db199SXin Li
889*9c5db199SXin Li    Check if DUT drive is providing good SMART stats which not showing any
890*9c5db199SXin Li    issues on it. The verifier can mark DUT for replacement if SMART stats
891*9c5db199SXin Li    show outworn data.
892*9c5db199SXin Li    """
893*9c5db199SXin Li
894*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
895*9c5db199SXin Li    def verify(self, host):
896*9c5db199SXin Li        # pylint: disable=missing-docstring
897*9c5db199SXin Li        state = None
898*9c5db199SXin Li        try:
899*9c5db199SXin Li            state = self._get_validator(host).validate()
900*9c5db199SXin Li        except Exception as e:
901*9c5db199SXin Li            # We do not want stop main process if it fail.
902*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
903*9c5db199SXin Li        if not state:
904*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
905*9c5db199SXin Li                    'DUT battery did not detected or state cannot extracted.')
906*9c5db199SXin Li        if state == audit_const.HW_STATE_NEED_REPLACEMENT:
907*9c5db199SXin Li            logging.info('Detected issue with storage on the DUT.')
908*9c5db199SXin Li            host.set_device_needs_replacement()
909*9c5db199SXin Li
910*9c5db199SXin Li    def _is_applicable(self, host):
911*9c5db199SXin Li        return self._get_validator(host).is_battery_expected()
912*9c5db199SXin Li
913*9c5db199SXin Li    def _get_validator(self, host):
914*9c5db199SXin Li        if not getattr(self, '_validator', None):
915*9c5db199SXin Li            self._validator = battery_validator.BatteryValidator(host)
916*9c5db199SXin Li        return self._validator
917*9c5db199SXin Li
918*9c5db199SXin Li    @property
919*9c5db199SXin Li    def description(self):
920*9c5db199SXin Li        return 'Ensure DUT battery is in good state.'
921*9c5db199SXin Li
922*9c5db199SXin Li
923*9c5db199SXin Liclass ServoKeyboardMapVerifier(hosts.Verifier):
924*9c5db199SXin Li    """Not critical verify to flash servo keyboard for the host.
925*9c5db199SXin Li
926*9c5db199SXin Li    Check if host support servo keyboard and update if firmware is not present.
927*9c5db199SXin Li    """
928*9c5db199SXin Li
929*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
930*9c5db199SXin Li    def verify(self, host):
931*9c5db199SXin Li        try:
932*9c5db199SXin Li            flasher = servo_keyboard_flasher.ServoKeyboardMapFlasher()
933*9c5db199SXin Li            if flasher.is_image_supported(host):
934*9c5db199SXin Li                flasher.update(host)
935*9c5db199SXin Li        except Exception as e:
936*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
937*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
938*9c5db199SXin Li                    'Fail to verify/update servo keyboard map on the host.')
939*9c5db199SXin Li
940*9c5db199SXin Li    def _is_applicable(self, host):
941*9c5db199SXin Li        if host.servo:
942*9c5db199SXin Li            return True
943*9c5db199SXin Li        return False
944*9c5db199SXin Li
945*9c5db199SXin Li    @property
946*9c5db199SXin Li    def description(self):
947*9c5db199SXin Li        return 'Verify and update servo keyboard map.'
948*9c5db199SXin Li
949*9c5db199SXin Li
950*9c5db199SXin Liclass ServoMacAddressVerifier(hosts.Verifier):
951*9c5db199SXin Li    """Not critical verify to cache NIC mac address for the host on servo.
952*9c5db199SXin Li
953*9c5db199SXin Li    Servo_v4 plugged to the DUT and providing NIC for that. We caching mac
954*9c5db199SXin Li    address on servod side for better debugging.
955*9c5db199SXin Li    """
956*9c5db199SXin Li
957*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
958*9c5db199SXin Li    def verify(self, host):
959*9c5db199SXin Li        try:
960*9c5db199SXin Li            helper = mac_address_helper.MacAddressHelper()
961*9c5db199SXin Li            helper.update_if_needed(host)
962*9c5db199SXin Li        except Exception as e:
963*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
964*9c5db199SXin Li            raise hosts.AutoservNonCriticalVerifyError(
965*9c5db199SXin Li                    'Fail to verify/update servo NIC mac address for host.')
966*9c5db199SXin Li
967*9c5db199SXin Li    def _is_applicable(self, host):
968*9c5db199SXin Li        if host.servo:
969*9c5db199SXin Li            return True
970*9c5db199SXin Li        return False
971*9c5db199SXin Li
972*9c5db199SXin Li    @property
973*9c5db199SXin Li    def description(self):
974*9c5db199SXin Li        return 'Verify and update cached NIC mac address.'
975*9c5db199SXin Li
976*9c5db199SXin Li
977*9c5db199SXin Liclass _ResetRepairAction(hosts.RepairAction):
978*9c5db199SXin Li    """Common handling for repair actions that reset a DUT."""
979*9c5db199SXin Li
980*9c5db199SXin Li    def _collect_logs(self, host):
981*9c5db199SXin Li        """Collect logs from a successfully repaired DUT."""
982*9c5db199SXin Li        dirname = 'after_%s' % self.tag
983*9c5db199SXin Li        local_log_dir = crashcollect.get_crashinfo_dir(host, dirname)
984*9c5db199SXin Li        # Collect crash info.
985*9c5db199SXin Li        crashcollect.get_crashinfo(host, None)
986*9c5db199SXin Li
987*9c5db199SXin Li    def _check_reset_success(self, host):
988*9c5db199SXin Li        """Check whether reset succeeded, and gather logs if possible."""
989*9c5db199SXin Li        # Waiting to boot device after repair action.
990*9c5db199SXin Li        if host.wait_up(host.BOOT_TIMEOUT):
991*9c5db199SXin Li            if host.get_verifier_state('ssh') == hosts.VERIFY_SUCCESS:
992*9c5db199SXin Li                logging.debug(
993*9c5db199SXin Li                        'Skip collection logs due DUT was sshable before')
994*9c5db199SXin Li                return
995*9c5db199SXin Li            try:
996*9c5db199SXin Li                # Collect logs once we regain ssh access before
997*9c5db199SXin Li                # clobbering them.
998*9c5db199SXin Li                self._collect_logs(host)
999*9c5db199SXin Li            except Exception:
1000*9c5db199SXin Li                # If the DUT is up, we want to declare success, even if
1001*9c5db199SXin Li                # log gathering fails for some reason.  So, if there's
1002*9c5db199SXin Li                # a failure, just log it and move on.
1003*9c5db199SXin Li                logging.exception('Non-critical failure in log '
1004*9c5db199SXin Li                                  'collection during %s.',
1005*9c5db199SXin Li                                  self.tag)
1006*9c5db199SXin Li            return
1007*9c5db199SXin Li        raise hosts.AutoservRepairError(
1008*9c5db199SXin Li                'Host %s is offline after %s.' % (host.hostname, self.tag),
1009*9c5db199SXin Li                'failed_to_boot_after_' + self.tag)
1010*9c5db199SXin Li
1011*9c5db199SXin Li
1012*9c5db199SXin Liclass ServoSysRqRepair(_ResetRepairAction):
1013*9c5db199SXin Li    """
1014*9c5db199SXin Li    Repair a Chrome device by sending a system request to the kernel.
1015*9c5db199SXin Li
1016*9c5db199SXin Li    Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x)
1017*9c5db199SXin Li    will ask the kernel to panic itself and reboot while conserving
1018*9c5db199SXin Li    the kernel logs in console ramoops.
1019*9c5db199SXin Li    """
1020*9c5db199SXin Li
1021*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1022*9c5db199SXin Li    def repair(self, host):
1023*9c5db199SXin Li        # pylint: disable=missing-docstring
1024*9c5db199SXin Li        repair_utils.require_servo(host, ignore_state=True)
1025*9c5db199SXin Li        # Press 3 times Alt+VolUp+X
1026*9c5db199SXin Li        # no checking DUT health between each press as
1027*9c5db199SXin Li        # killing Chrome is not really likely to fix the DUT SSH.
1028*9c5db199SXin Li        for _ in range(3):
1029*9c5db199SXin Li            try:
1030*9c5db199SXin Li                host.servo.sysrq_x()
1031*9c5db199SXin Li            except error.TestFail as ex:
1032*9c5db199SXin Li                raise hosts.AutoservRepairError(
1033*9c5db199SXin Li                      'cannot press sysrq-x: %s.' % str(ex),
1034*9c5db199SXin Li                      'cannot_press_sysrq_x')
1035*9c5db199SXin Li            # less than 5 seconds between presses.
1036*9c5db199SXin Li            time.sleep(2.0)
1037*9c5db199SXin Li        self._check_reset_success(host)
1038*9c5db199SXin Li
1039*9c5db199SXin Li    @property
1040*9c5db199SXin Li    def description(self):
1041*9c5db199SXin Li        # pylint: disable=missing-docstring
1042*9c5db199SXin Li        return 'Reset the DUT via keyboard sysrq-x'
1043*9c5db199SXin Li
1044*9c5db199SXin Li
1045*9c5db199SXin Liclass ServoResetRepair(_ResetRepairAction):
1046*9c5db199SXin Li    """Repair a Chrome device by resetting it with servo."""
1047*9c5db199SXin Li
1048*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1049*9c5db199SXin Li    def repair(self, host):
1050*9c5db199SXin Li        # pylint: disable=missing-docstring
1051*9c5db199SXin Li        repair_utils.require_servo(host, ignore_state=True)
1052*9c5db199SXin Li        host.servo.get_power_state_controller().reset()
1053*9c5db199SXin Li        self._check_reset_success(host)
1054*9c5db199SXin Li
1055*9c5db199SXin Li    def _is_applicable(self, host):
1056*9c5db199SXin Li        if host.servo:
1057*9c5db199SXin Li            return True
1058*9c5db199SXin Li        return False
1059*9c5db199SXin Li
1060*9c5db199SXin Li    @property
1061*9c5db199SXin Li    def description(self):
1062*9c5db199SXin Li        # pylint: disable=missing-docstring
1063*9c5db199SXin Li        return 'Reset the DUT via servo'
1064*9c5db199SXin Li
1065*9c5db199SXin Li
1066*9c5db199SXin Liclass ServoCr50RebootRepair(_ResetRepairAction):
1067*9c5db199SXin Li    """
1068*9c5db199SXin Li    Repair a Chrome device by resetting cr50 by servo.
1069*9c5db199SXin Li
1070*9c5db199SXin Li    Reset cr50 which is ec+ccd reset.
1071*9c5db199SXin Li    """
1072*9c5db199SXin Li
1073*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1074*9c5db199SXin Li    def repair(self, host):
1075*9c5db199SXin Li        # pylint: disable=missing-docstring
1076*9c5db199SXin Li        try:
1077*9c5db199SXin Li            host.servo.get_power_state_controller().cr50_reset()
1078*9c5db199SXin Li            self._check_reset_success(host)
1079*9c5db199SXin Li        finally:
1080*9c5db199SXin Li            # cr50 reset will clear some some init like `ccd testlab open`
1081*9c5db199SXin Li            # so we want to re-initialize servo after cr50 reset if the main
1082*9c5db199SXin Li            # device uses cr50 console commands.
1083*9c5db199SXin Li            if host.servo.main_device_uses_gsc_drv():
1084*9c5db199SXin Li                host.servo.initialize_dut()
1085*9c5db199SXin Li
1086*9c5db199SXin Li    def _is_applicable(self, host):
1087*9c5db199SXin Li        if host.servo:
1088*9c5db199SXin Li            if host.servo.has_control('cr50_reboot'):
1089*9c5db199SXin Li                return True
1090*9c5db199SXin Li        return False
1091*9c5db199SXin Li
1092*9c5db199SXin Li    @property
1093*9c5db199SXin Li    def description(self):
1094*9c5db199SXin Li        # pylint: disable=missing-docstring
1095*9c5db199SXin Li        return 'Reset(cr50) the DUT via servo'
1096*9c5db199SXin Li
1097*9c5db199SXin Li
1098*9c5db199SXin Liclass DevDefaultBootRepair(hosts.RepairAction):
1099*9c5db199SXin Li    """Repair a CrOS target by setting dev_default_boot to 'disk'"""
1100*9c5db199SXin Li
1101*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC)
1102*9c5db199SXin Li    def repair(self, host):
1103*9c5db199SXin Li        # pylint: disable=missing-docstring
1104*9c5db199SXin Li        host.run('crossystem dev_default_boot=disk', ignore_status=True)
1105*9c5db199SXin Li
1106*9c5db199SXin Li    @property
1107*9c5db199SXin Li    def description(self):
1108*9c5db199SXin Li        # pylint: disable=missing-docstring
1109*9c5db199SXin Li        return "Set dev_default_boot to 'disk'"
1110*9c5db199SXin Li
1111*9c5db199SXin Li
1112*9c5db199SXin Liclass CrosRebootRepair(repair_utils.RebootRepair):
1113*9c5db199SXin Li    """Repair a CrOS target by clearing dev mode and rebooting it."""
1114*9c5db199SXin Li
1115*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1116*9c5db199SXin Li    def repair(self, host):
1117*9c5db199SXin Li        # pylint: disable=missing-docstring
1118*9c5db199SXin Li        # N.B. We need to reboot regardless of whether clearing
1119*9c5db199SXin Li        # dev_mode succeeds or fails.
1120*9c5db199SXin Li        host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
1121*9c5db199SXin Li                 ignore_status=True)
1122*9c5db199SXin Li        host.run('crossystem disable_dev_request=1',
1123*9c5db199SXin Li                 ignore_status=True)
1124*9c5db199SXin Li        super(CrosRebootRepair, self).repair(host)
1125*9c5db199SXin Li
1126*9c5db199SXin Li    @property
1127*9c5db199SXin Li    def description(self):
1128*9c5db199SXin Li        # pylint: disable=missing-docstring
1129*9c5db199SXin Li        return 'Reset GBB flags and Reboot the host'
1130*9c5db199SXin Li
1131*9c5db199SXin Li
1132*9c5db199SXin Liclass ProvisioningLabelsRepair(hosts.RepairAction):
1133*9c5db199SXin Li    """Repair issue with provisioning labels for the host.
1134*9c5db199SXin Li
1135*9c5db199SXin Li    The repair is doing simple clean up of labels as next provisioning will
1136*9c5db199SXin Li    re-generate required fields.
1137*9c5db199SXin Li    """
1138*9c5db199SXin Li
1139*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC)
1140*9c5db199SXin Li    def repair(self, host):
1141*9c5db199SXin Li        afe_utils.clean_provision_labels(host)
1142*9c5db199SXin Li
1143*9c5db199SXin Li    @property
1144*9c5db199SXin Li    def description(self):
1145*9c5db199SXin Li        # pylint: disable=missing-docstring
1146*9c5db199SXin Li        return 'Cleanup provisioning labels for the host'
1147*9c5db199SXin Li
1148*9c5db199SXin Li
1149*9c5db199SXin Liclass EnrollmentCleanupRepair(hosts.RepairAction):
1150*9c5db199SXin Li    """Cleanup enrollment state on ChromeOS device"""
1151*9c5db199SXin Li
1152*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1153*9c5db199SXin Li    def repair(self, host):
1154*9c5db199SXin Li        # Reset VPD enrollment state.
1155*9c5db199SXin Li        host.run('/usr/sbin/update_rw_vpd check_enrollment 0')
1156*9c5db199SXin Li
1157*9c5db199SXin Li        # Clear TPM Owner state.
1158*9c5db199SXin Li        tpm_utils.ClearTPMOwnerRequest(host, wait_for_ready=True,
1159*9c5db199SXin Li                                       timeout=host.BOOT_TIMEOUT)
1160*9c5db199SXin Li
1161*9c5db199SXin Li    def _is_applicable(self, host):
1162*9c5db199SXin Li        info = host.host_info_store.get()
1163*9c5db199SXin Li        # if os type is missing from host_info, then we assume it's cros.
1164*9c5db199SXin Li        return getattr(info, 'os', 'cros') in ('', 'cros')
1165*9c5db199SXin Li
1166*9c5db199SXin Li    @property
1167*9c5db199SXin Li    def description(self):
1168*9c5db199SXin Li        # pylint: disable=missing-docstring
1169*9c5db199SXin Li        return 'Cleanup enrollment state and reboot the host'
1170*9c5db199SXin Li
1171*9c5db199SXin Li
1172*9c5db199SXin Liclass ProvisionRepair(hosts.RepairAction):
1173*9c5db199SXin Li    """
1174*9c5db199SXin Li    Repair by re-installing a test image using quick provision.
1175*9c5db199SXin Li
1176*9c5db199SXin Li    Try to install the DUT's designated "stable test image" using the
1177*9c5db199SXin Li    standard procedure for installing a new test image via quick provision.
1178*9c5db199SXin Li    """
1179*9c5db199SXin Li
1180*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC)
1181*9c5db199SXin Li    def repair(self, host):
1182*9c5db199SXin Li        # pylint: disable=missing-docstring
1183*9c5db199SXin Li        image_name = host.get_cros_repair_image_name()
1184*9c5db199SXin Li        logging.info('Staging build for provision: %s', image_name)
1185*9c5db199SXin Li        devserver = dev_server.ImageServer.resolve(image_name, host.hostname)
1186*9c5db199SXin Li        devserver.trigger_download(image_name, synchronous=False)
1187*9c5db199SXin Li        update_url = tools.image_url_pattern() % (
1188*9c5db199SXin Li                devserver.url(), image_name)
1189*9c5db199SXin Li        afe_utils.machine_install_and_update_labels(host, update_url)
1190*9c5db199SXin Li
1191*9c5db199SXin Li    @property
1192*9c5db199SXin Li    def description(self):
1193*9c5db199SXin Li        # pylint: disable=missing-docstring
1194*9c5db199SXin Li        return 'Re-install the stable build on the host'
1195*9c5db199SXin Li
1196*9c5db199SXin Li
1197*9c5db199SXin Liclass PowerWashRepair(ProvisionRepair):
1198*9c5db199SXin Li    """
1199*9c5db199SXin Li    Powerwash the DUT, then re-install using quick provision.
1200*9c5db199SXin Li
1201*9c5db199SXin Li    Powerwash the DUT, then attempt to re-install a stable test image as
1202*9c5db199SXin Li    for `ProvisionRepair`.
1203*9c5db199SXin Li    """
1204*9c5db199SXin Li
1205*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC)
1206*9c5db199SXin Li    def repair(self, host):
1207*9c5db199SXin Li        # pylint: disable=missing-docstring
1208*9c5db199SXin Li        host.run('echo "fast safe" > '
1209*9c5db199SXin Li                 '/mnt/stateful_partition/factory_install_reset')
1210*9c5db199SXin Li        host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True)
1211*9c5db199SXin Li        super(PowerWashRepair, self).repair(host)
1212*9c5db199SXin Li
1213*9c5db199SXin Li    @property
1214*9c5db199SXin Li    def description(self):
1215*9c5db199SXin Li        # pylint: disable=missing-docstring
1216*9c5db199SXin Li        return 'Powerwash and then re-install the stable build on the host'
1217*9c5db199SXin Li
1218*9c5db199SXin Li
1219*9c5db199SXin Liclass ServoInstallRepair(hosts.RepairAction):
1220*9c5db199SXin Li    """
1221*9c5db199SXin Li    Reinstall a test image from USB using servo.
1222*9c5db199SXin Li
1223*9c5db199SXin Li    Use servo to re-install the DUT's designated "stable test image"
1224*9c5db199SXin Li    from servo-attached USB storage.
1225*9c5db199SXin Li    """
1226*9c5db199SXin Li
1227*9c5db199SXin Li    # Timeout value for this repair action is specially configured as we need
1228*9c5db199SXin Li    # stage image to usb drive, install chromeos image.
1229*9c5db199SXin Li    @timeout_util.TimeoutDecorator(60 * 60)
1230*9c5db199SXin Li    def repair(self, host):
1231*9c5db199SXin Li        self.boot_in_recovery = False
1232*9c5db199SXin Li        # pylint: disable=missing-docstring
1233*9c5db199SXin Li        repair_utils.require_servo(host, ignore_state=True)
1234*9c5db199SXin Li        image_name = host.get_cros_repair_image_name()
1235*9c5db199SXin Li        image_name_on_usb = host._servo_host.validate_image_usbkey()
1236*9c5db199SXin Li        if image_name_on_usb == image_name:
1237*9c5db199SXin Li            logging.info(
1238*9c5db199SXin Li                    'Required image %s is already on usbkey,'
1239*9c5db199SXin Li                    ' skipping download.', image_name)
1240*9c5db199SXin Li            need_update_image = False
1241*9c5db199SXin Li        else:
1242*9c5db199SXin Li            logging.info('Required image is not on usbkey.')
1243*9c5db199SXin Li            need_update_image = True
1244*9c5db199SXin Li
1245*9c5db199SXin Li        # Verify if we want to force re-image the USB.
1246*9c5db199SXin Li        if not need_update_image and host.health_profile:
1247*9c5db199SXin Li            repair_failed_count = host.health_profile.get_repair_fail_count()
1248*9c5db199SXin Li            # try to re-image USB when previous attempt failed
1249*9c5db199SXin Li            if (repair_failed_count > 0 and
1250*9c5db199SXin Li                (repair_failed_count == 1 or repair_failed_count % 10 == 0)):
1251*9c5db199SXin Li                logging.info(
1252*9c5db199SXin Li                        'Required re-download image to usbkey as'
1253*9c5db199SXin Li                        ' a previous repair failed. Fail count: %s',
1254*9c5db199SXin Li                        repair_failed_count)
1255*9c5db199SXin Li                need_update_image = True
1256*9c5db199SXin Li
1257*9c5db199SXin Li        update_url = None
1258*9c5db199SXin Li        if need_update_image:
1259*9c5db199SXin Li            logging.info('Staging image: %s on caching server.', image_name)
1260*9c5db199SXin Li            _, update_url = host.stage_image_for_servo()
1261*9c5db199SXin Li        afe_utils.clean_provision_labels(host)
1262*9c5db199SXin Li        # Start process to install new image from USB
1263*9c5db199SXin Li        need_snk = host.require_snk_mode_in_recovery()
1264*9c5db199SXin Li
1265*9c5db199SXin Li        host.servo.get_power_state_controller().power_off()
1266*9c5db199SXin Li        if update_url:
1267*9c5db199SXin Li            try:
1268*9c5db199SXin Li                host.install_image_to_servo_usb(image_url=update_url)
1269*9c5db199SXin Li            except Exception as e:
1270*9c5db199SXin Li                # Format USB-storage as incorrect download image can cause
1271*9c5db199SXin Li                # false believe that image downloaded.
1272*9c5db199SXin Li                self._format_usb_storage(host)
1273*9c5db199SXin Li                # Powering DUT on as if leave it in off mode can cause issue
1274*9c5db199SXin Li                # with detecting ccd_cr50 on the board.
1275*9c5db199SXin Li                host.servo.get_power_state_controller().power_on()
1276*9c5db199SXin Li                six.reraise(error.AutotestError, str(e), sys.exc_info()[2])
1277*9c5db199SXin Li        else:
1278*9c5db199SXin Li            # Give the DUT some time to power_off if we skip
1279*9c5db199SXin Li            # download image to usb. (crbug.com/982993)
1280*9c5db199SXin Li            time.sleep(10)
1281*9c5db199SXin Li
1282*9c5db199SXin Li        host.boot_in_recovery_mode(need_snk=need_snk)
1283*9c5db199SXin Li        # Note that device successful booted from USB
1284*9c5db199SXin Li        # That mean fw RO is good.
1285*9c5db199SXin Li        self.boot_in_recovery = True
1286*9c5db199SXin Li        host.run_install_image(install_timeout=host.ADMIN_INSTALL_TIMEOUT * 2,
1287*9c5db199SXin Li                               need_snk=need_snk,
1288*9c5db199SXin Li                               is_repair=True)
1289*9c5db199SXin Li        afe_utils.add_provision_labels(host, host.VERSION_PREFIX, image_name)
1290*9c5db199SXin Li        # Collect info which USB-key used for successful re-image.
1291*9c5db199SXin Li        host_info = host.host_info_store.get()
1292*9c5db199SXin Li        if host_info:
1293*9c5db199SXin Li            usb_state = host_info.get_label_value(
1294*9c5db199SXin Li                    audit_const.SERVO_USB_STATE_PREFIX)
1295*9c5db199SXin Li            metrics_data = {'host': host.hostname, 'usb_state': usb_state}
1296*9c5db199SXin Li            metrics.Counter('chromeos/autotest/usbkey_install_success'
1297*9c5db199SXin Li                            ).increment(fields=metrics_data)
1298*9c5db199SXin Li
1299*9c5db199SXin Li    def _format_usb_storage(self, host):
1300*9c5db199SXin Li        """Format USB-storage connected to servo."""
1301*9c5db199SXin Li        try:
1302*9c5db199SXin Li            # Format USB-storage to prevent corrupted image to be
1303*9c5db199SXin Li            # counted as good image.
1304*9c5db199SXin Li            usb_path = host.servo.probe_host_usb_dev()
1305*9c5db199SXin Li            logging.info('Formating %s', usb_path)
1306*9c5db199SXin Li            cmd = 'mkfs.ext4 -F %s' % usb_path
1307*9c5db199SXin Li            host._servo_host.run(cmd, ignore_status=True)
1308*9c5db199SXin Li        except Exception as e:
1309*9c5db199SXin Li            logging.info('(Not critical) fail to format USB-storage: %s', e)
1310*9c5db199SXin Li
1311*9c5db199SXin Li    @property
1312*9c5db199SXin Li    def description(self):
1313*9c5db199SXin Li        # pylint: disable=missing-docstring
1314*9c5db199SXin Li        return 'Reinstall from USB using servo'
1315*9c5db199SXin Li
1316*9c5db199SXin Li
1317*9c5db199SXin Liclass ServoResetAfterUSBRepair(_ResetRepairAction):
1318*9c5db199SXin Li    """Repair a host by resetting it with servo.
1319*9c5db199SXin Li
1320*9c5db199SXin Li    This is follow up action for cases when device fail to boot as part of
1321*9c5db199SXin Li    USB-install. The repair will be applicable only if device was successful
1322*9c5db199SXin Li    booted from USB-key.
1323*9c5db199SXin Li    """
1324*9c5db199SXin Li
1325*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1326*9c5db199SXin Li    def repair(self, host):
1327*9c5db199SXin Li        # pylint: disable=missing-docstring
1328*9c5db199SXin Li        host.servo.get_power_state_controller().reset()
1329*9c5db199SXin Li        self._check_reset_success(host)
1330*9c5db199SXin Li
1331*9c5db199SXin Li    def _is_applicable(self, host):
1332*9c5db199SXin Li        if not host.servo:
1333*9c5db199SXin Li            return False
1334*9c5db199SXin Li        if host.is_marked_for_replacement():
1335*9c5db199SXin Li            logging.debug('The device marked for replacement.'
1336*9c5db199SXin Li                          ' Skip the action.')
1337*9c5db199SXin Li            return False
1338*9c5db199SXin Li        usb_install = host.get_repair_strategy_node('usb')
1339*9c5db199SXin Li        if not usb_install:
1340*9c5db199SXin Li            logging.debug('Strategy node not found! Skip repair action.')
1341*9c5db199SXin Li            return False
1342*9c5db199SXin Li        if not getattr(usb_install, 'boot_in_recovery', False):
1343*9c5db199SXin Li            logging.debug('Device did not boot in recovery mode.'
1344*9c5db199SXin Li                          ' Skip repair action.')
1345*9c5db199SXin Li            return False
1346*9c5db199SXin Li        return True
1347*9c5db199SXin Li
1348*9c5db199SXin Li    @property
1349*9c5db199SXin Li    def description(self):
1350*9c5db199SXin Li        # pylint: disable=missing-docstring
1351*9c5db199SXin Li        return 'Reset the DUT via servo after USB-install'
1352*9c5db199SXin Li
1353*9c5db199SXin Li
1354*9c5db199SXin Liclass RecoverFwAfterUSBRepair(_ResetRepairAction):
1355*9c5db199SXin Li    """Recover FW on the host when host can boot in recovery mode.
1356*9c5db199SXin Li
1357*9c5db199SXin Li    This is follow up action for cases when device fail to boot as part of
1358*9c5db199SXin Li    USB-install but successful booted in recovery mode.
1359*9c5db199SXin Li
1360*9c5db199SXin Li    If host can boot in recovery mode but fail boot in default mode then
1361*9c5db199SXin Li    probably we have corrupted firmware. The repair try to recover firmware
1362*9c5db199SXin Li    on the host by booting from USB-key.
1363*9c5db199SXin Li    """
1364*9c5db199SXin Li
1365*9c5db199SXin Li    # Command to update firmware located on host
1366*9c5db199SXin Li    _FW_UPDATE_CMD = 'chromeos-firmwareupdate --mode=recovery'
1367*9c5db199SXin Li
1368*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC)
1369*9c5db199SXin Li    def repair(self, host):
1370*9c5db199SXin Li        # pylint: disable=missing-docstring
1371*9c5db199SXin Li        # Switch USB_key to servo to wake up it as sometimes it can show
1372*9c5db199SXin Li        # USB-key direction to DUT but it is not yet seeing by DUT.
1373*9c5db199SXin Li        host.servo.switch_usbkey('host')
1374*9c5db199SXin Li        time.sleep(host.servo.USB_DETECTION_DELAY)
1375*9c5db199SXin Li        # Power off the DUT as in this case the host will boot
1376*9c5db199SXin Li        # in recovery mode with higher chance.
1377*9c5db199SXin Li        host.servo.get_power_state_controller().power_off()
1378*9c5db199SXin Li        # Give the DUT some time to power_off if we skip
1379*9c5db199SXin Li        # download image to usb. (crbug.com/982993)
1380*9c5db199SXin Li        time.sleep(10)
1381*9c5db199SXin Li
1382*9c5db199SXin Li        # Boot host in recovery mode as it is working and verified
1383*9c5db199SXin Li        # by another repair action.
1384*9c5db199SXin Li        need_snk = host.require_snk_mode_in_recovery()
1385*9c5db199SXin Li        try:
1386*9c5db199SXin Li            host.boot_in_recovery_mode(need_snk=need_snk)
1387*9c5db199SXin Li            logging.debug('Host booted in recovery mode')
1388*9c5db199SXin Li
1389*9c5db199SXin Li            result = host.run(self._FW_UPDATE_CMD, ignore_status=True)
1390*9c5db199SXin Li            if result.exit_status != 0:
1391*9c5db199SXin Li                logging.error('chromeos-firmwareupdate failed: %s',
1392*9c5db199SXin Li                              result.stdout.strip())
1393*9c5db199SXin Li            host.halt()
1394*9c5db199SXin Li        finally:
1395*9c5db199SXin Li            # We need reset the DUT no matter success or not,
1396*9c5db199SXin Li            # as we don't want leave the DUT in boot from usb state.
1397*9c5db199SXin Li            # N.B. The Servo API requires that we use power_on() here
1398*9c5db199SXin Li            # for two reasons:
1399*9c5db199SXin Li            #  1) After turning on a DUT in recovery mode, you must turn
1400*9c5db199SXin Li            #     it off and then on with power_on() once more to
1401*9c5db199SXin Li            #     disable recovery mode (this is a Parrot specific
1402*9c5db199SXin Li            #     requirement).
1403*9c5db199SXin Li            #  2) After power_off(), the only way to turn on is with
1404*9c5db199SXin Li            #     power_on() (this is a Storm specific requirement).
1405*9c5db199SXin Li            logging.debug('Power cycling DUT through servo.')
1406*9c5db199SXin Li            host.servo.get_power_state_controller().power_off()
1407*9c5db199SXin Li            host.servo.switch_usbkey('off')
1408*9c5db199SXin Li            if need_snk:
1409*9c5db199SXin Li                # Attempt to restore servo_v4 role to 'src' mode.
1410*9c5db199SXin Li                host.servo.set_servo_v4_role('src')
1411*9c5db199SXin Li            # Use cold-reset instead 'on' to increase the chance to boot DUT
1412*9c5db199SXin Li            host.servo.get_power_state_controller().reset()
1413*9c5db199SXin Li        self._check_reset_success(host)
1414*9c5db199SXin Li
1415*9c5db199SXin Li    def _is_applicable(self, host):
1416*9c5db199SXin Li        if not host.servo:
1417*9c5db199SXin Li            return False
1418*9c5db199SXin Li        if host.is_marked_for_replacement():
1419*9c5db199SXin Li            logging.debug('The device marked for replacement.'
1420*9c5db199SXin Li                          ' Skip the action.')
1421*9c5db199SXin Li            return False
1422*9c5db199SXin Li        usb_install = host.get_repair_strategy_node('usb')
1423*9c5db199SXin Li        if not usb_install:
1424*9c5db199SXin Li            logging.debug('Strategy node not found! Skip repair action.')
1425*9c5db199SXin Li            return False
1426*9c5db199SXin Li        if not getattr(usb_install, 'boot_in_recovery', False):
1427*9c5db199SXin Li            logging.debug('Device did not boot in recovery mode.'
1428*9c5db199SXin Li                          ' Skip repair action.')
1429*9c5db199SXin Li            return False
1430*9c5db199SXin Li        dhp = host.health_profile
1431*9c5db199SXin Li        if not dhp:
1432*9c5db199SXin Li            logging.info('Device health profile is not available, cannot'
1433*9c5db199SXin Li                         ' determine if firmware repair is needed.')
1434*9c5db199SXin Li            return False
1435*9c5db199SXin Li        if dhp.get_failed_repair_action(self.tag) > 2:
1436*9c5db199SXin Li            logging.info('Firmware recovery has been attempted and failed 3'
1437*9c5db199SXin Li                         ' times, no need to retry.')
1438*9c5db199SXin Li            return False
1439*9c5db199SXin Li        return True
1440*9c5db199SXin Li
1441*9c5db199SXin Li    @property
1442*9c5db199SXin Li    def description(self):
1443*9c5db199SXin Li        # pylint: disable=missing-docstring
1444*9c5db199SXin Li        return 'Recover FW on the host after USB-install'
1445*9c5db199SXin Li
1446*9c5db199SXin Li
1447*9c5db199SXin Liclass RecoverACPowerRepair(_ResetRepairAction):
1448*9c5db199SXin Li    """Recover AC detection if AC is not detected.
1449*9c5db199SXin Li
1450*9c5db199SXin Li    The fix based on toggle PD negotiating on EC level of DUT.
1451*9c5db199SXin Li    Repair works only for the DUT which has EC and battery.
1452*9c5db199SXin Li    """
1453*9c5db199SXin Li
1454*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1455*9c5db199SXin Li    def repair(self, host):
1456*9c5db199SXin Li        # pylint: disable=missing-docstring
1457*9c5db199SXin Li        repair_utils.require_servo(host, ignore_state=True)
1458*9c5db199SXin Li        # Verify that EC is available and we can interact with that.
1459*9c5db199SXin Li        # Do not put it in '_is_applicable' to avoid extra DUT reset.
1460*9c5db199SXin Li        try:
1461*9c5db199SXin Li            host.servo.get_ec_board()
1462*9c5db199SXin Li        except Exception as e:
1463*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
1464*9c5db199SXin Li            # if EC is off it will fail to execute any EC command
1465*9c5db199SXin Li            # to wake it up we do cold-reboot then we will have active ec
1466*9c5db199SXin Li            # connection for ~30 seconds
1467*9c5db199SXin Li            host.servo.get_power_state_controller().reset()
1468*9c5db199SXin Li        try:
1469*9c5db199SXin Li            if host.servo.get('battery_is_charging'):
1470*9c5db199SXin Li                # device is changing.
1471*9c5db199SXin Li                return
1472*9c5db199SXin Li        except Exception as e:
1473*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
1474*9c5db199SXin Li            raise hosts.AutoservRepairError(
1475*9c5db199SXin Li                    'Fail to read battery metrics from EC')
1476*9c5db199SXin Li        # Simple off-on not always working stable in all cases as source-sink
1477*9c5db199SXin Li        # not working too in another cases. To cover more cases here we do
1478*9c5db199SXin Li        # both toggle to recover PD negotiation.
1479*9c5db199SXin Li        # Source/sink switching CC lines to make DUT work as supplying or
1480*9c5db199SXin Li        # consuming power (between Rp and Rd).
1481*9c5db199SXin Li        self._set_pd_dualrole(host, 'off')
1482*9c5db199SXin Li        self._set_pd_dualrole(host, 'on')
1483*9c5db199SXin Li        self._set_pd_dualrole(host, 'source')
1484*9c5db199SXin Li        self._set_pd_dualrole(host, 'sink')
1485*9c5db199SXin Li        # wait to reinitialize PD negotiation and charge a little bit
1486*9c5db199SXin Li        time.sleep(120)
1487*9c5db199SXin Li        # Recommended to reset EC after manipulation with PD
1488*9c5db199SXin Li        host.servo.get_power_state_controller().reset()
1489*9c5db199SXin Li        # Verify if repair well done.
1490*9c5db199SXin Li        if not host.servo.get('battery_is_charging'):
1491*9c5db199SXin Li            raise hosts.AutoservRepairError(
1492*9c5db199SXin Li                    'Fail recovery AC detection fo the DUT.',
1493*9c5db199SXin Li                    'failed_recover_usb_pd_ac')
1494*9c5db199SXin Li        self._check_reset_success(host)
1495*9c5db199SXin Li
1496*9c5db199SXin Li    def _set_pd_dualrole(self, host, role):
1497*9c5db199SXin Li        host.servo.set_nocheck('ec_uart_flush', 'off')
1498*9c5db199SXin Li        host.servo.set_nocheck('ec_uart_cmd', 'pd dualrole %s' % role)
1499*9c5db199SXin Li        host.servo.set_nocheck('ec_uart_flush', 'on')
1500*9c5db199SXin Li        time.sleep(1)
1501*9c5db199SXin Li
1502*9c5db199SXin Li    def _is_applicable(self, host):
1503*9c5db199SXin Li        if not host._servo_host.is_ec_supported():
1504*9c5db199SXin Li            logging.info('The board not support EC')
1505*9c5db199SXin Li            return False
1506*9c5db199SXin Li        host_info = host.host_info_store.get()
1507*9c5db199SXin Li        if host_info.get_label_value('power') != 'battery':
1508*9c5db199SXin Li            logging.info('The board does not have battery')
1509*9c5db199SXin Li            return False
1510*9c5db199SXin Li        return True
1511*9c5db199SXin Li
1512*9c5db199SXin Li    @property
1513*9c5db199SXin Li    def description(self):
1514*9c5db199SXin Li        # pylint: disable=missing-docstring
1515*9c5db199SXin Li        return 'Recovery AC of DUT'
1516*9c5db199SXin Li
1517*9c5db199SXin Li
1518*9c5db199SXin Liclass JetstreamTpmRepair(hosts.RepairAction):
1519*9c5db199SXin Li    """Repair by resetting TPM and rebooting."""
1520*9c5db199SXin Li
1521*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1522*9c5db199SXin Li    def repair(self, host):
1523*9c5db199SXin Li        # pylint: disable=missing-docstring
1524*9c5db199SXin Li        host.run('rm -f /var/cache/ap/setup-network', ignore_status=True)
1525*9c5db199SXin Li        host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True)
1526*9c5db199SXin Li        host.run('rm -f /home/.shadow/.can_attempt_ownership',
1527*9c5db199SXin Li                 ignore_status=True)
1528*9c5db199SXin Li        host.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
1529*9c5db199SXin Li        host.reboot()
1530*9c5db199SXin Li
1531*9c5db199SXin Li    @property
1532*9c5db199SXin Li    def description(self):
1533*9c5db199SXin Li        # pylint: disable=missing-docstring
1534*9c5db199SXin Li        return 'Reset TPM and reboot'
1535*9c5db199SXin Li
1536*9c5db199SXin Li
1537*9c5db199SXin Liclass JetstreamServiceRepair(hosts.RepairAction):
1538*9c5db199SXin Li    """Repair by restarting Jetstream services."""
1539*9c5db199SXin Li
1540*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1541*9c5db199SXin Li    def repair(self, host):
1542*9c5db199SXin Li        # pylint: disable=missing-docstring
1543*9c5db199SXin Li        host.cleanup_services()
1544*9c5db199SXin Li
1545*9c5db199SXin Li    @property
1546*9c5db199SXin Li    def description(self):
1547*9c5db199SXin Li        # pylint: disable=missing-docstring
1548*9c5db199SXin Li        return 'Restart Jetstream services'
1549*9c5db199SXin Li
1550*9c5db199SXin Li
1551*9c5db199SXin Lidef _cros_verify_dag():
1552*9c5db199SXin Li    """Return the verification DAG for a `CrosHost`."""
1553*9c5db199SXin Li    return _cros_verify_base_dag() + _cros_verify_extended_dag()
1554*9c5db199SXin Li
1555*9c5db199SXin Li
1556*9c5db199SXin Lidef _cros_verify_base_dag():
1557*9c5db199SXin Li    """Return the base verification DAG for a `CrosHost`."""
1558*9c5db199SXin Li    FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier
1559*9c5db199SXin Li    FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
1560*9c5db199SXin Li    verify_dag = (
1561*9c5db199SXin Li            (repair_utils.PingVerifier, 'ping', ()),
1562*9c5db199SXin Li            (repair_utils.SshVerifier, 'ssh', ('ping', )),
1563*9c5db199SXin Li            (ServoUSBDriveVerifier, 'usb_drive', ()),
1564*9c5db199SXin Li            (DevDefaultBootVerifier, 'dev_default_boot', ('ssh', )),
1565*9c5db199SXin Li            (DevModeVerifier, 'devmode', ('ssh', )),
1566*9c5db199SXin Li            (EnrollmentStateVerifier, 'enrollment_state', ('ssh', )),
1567*9c5db199SXin Li            (HWIDVerifier, 'hwid', ('ssh', )),
1568*9c5db199SXin Li            (ACPowerVerifier, 'power', ('ssh', )),
1569*9c5db199SXin Li            (EXT4fsErrorVerifier, 'ext4', ('ssh', )),
1570*9c5db199SXin Li            (WritableVerifier, 'writable', ('ssh', )),
1571*9c5db199SXin Li            (TPMStatusVerifier, 'tpm', ('ssh', )),
1572*9c5db199SXin Li            (UpdateSuccessVerifier, 'good_provision', ('ssh', )),
1573*9c5db199SXin Li            (FirmwareTpmVerifier, 'faft_tpm', ('ssh', )),
1574*9c5db199SXin Li            (FirmwareStatusVerifier, 'fwstatus', ('ssh', )),
1575*9c5db199SXin Li            (FirmwareVersionVerifier, 'rwfw', ('ssh', )),
1576*9c5db199SXin Li            (PythonVerifier, 'python', ('ssh', )),
1577*9c5db199SXin Li            (repair_utils.LegacyHostVerifier, 'cros', ('ssh', )),
1578*9c5db199SXin Li            (ProvisioningLabelsVerifier, 'provisioning_labels', ('ssh', )),
1579*9c5db199SXin Li    )
1580*9c5db199SXin Li    return verify_dag
1581*9c5db199SXin Li
1582*9c5db199SXin Li
1583*9c5db199SXin Lidef _cros_verify_extended_dag():
1584*9c5db199SXin Li    """Return the extended verification DAG for a `CrosHost`."""
1585*9c5db199SXin Li    return (
1586*9c5db199SXin Li            (StopStartUIVerifier, 'stop_start_ui', ('ssh', )),
1587*9c5db199SXin Li            (DUTStorageVerifier, 'storage', ('ssh', )),
1588*9c5db199SXin Li            (AuditBattery, 'audit_battery', ()),
1589*9c5db199SXin Li            (GscToolPresentVerifier, 'dut_gsctool', ('ssh', )),
1590*9c5db199SXin Li            (ServoKeyboardMapVerifier, 'dut_servo_keyboard', ('ssh', )),
1591*9c5db199SXin Li            (ServoMacAddressVerifier, 'dut_servo_macaddr', ('ssh', )),
1592*9c5db199SXin Li    )
1593*9c5db199SXin Li
1594*9c5db199SXin Li
1595*9c5db199SXin Lidef _cros_basic_repair_actions(
1596*9c5db199SXin Li    servo_reset_trigger=DEFAULT_SERVO_RESET_TRIGGER
1597*9c5db199SXin Li):
1598*9c5db199SXin Li    """Return the basic repair actions for a `CrosHost`
1599*9c5db199SXin Li
1600*9c5db199SXin Li    @param servo_reset_trigger: sequence of verifiers that trigger servo reset
1601*9c5db199SXin Li    and servo cr50 reboot repair.
1602*9c5db199SXin Li    """
1603*9c5db199SXin Li    repair_actions = (
1604*9c5db199SXin Li            # RPM cycling must precede Servo reset:  if the DUT has a dead
1605*9c5db199SXin Li            # battery, we need to reattach AC power before we reset via servo.
1606*9c5db199SXin Li            (repair_utils.RPMCycleRepair, 'rpm', (), (
1607*9c5db199SXin Li                    'ping',
1608*9c5db199SXin Li                    'ssh',
1609*9c5db199SXin Li                    'power',
1610*9c5db199SXin Li            )),
1611*9c5db199SXin Li            (ServoResetRepair, 'servoreset', (), servo_reset_trigger),
1612*9c5db199SXin Li            (ServoCr50RebootRepair, 'cr50_reset', (), servo_reset_trigger),
1613*9c5db199SXin Li            (ServoSysRqRepair, 'sysrq', (), (
1614*9c5db199SXin Li                    'ping',
1615*9c5db199SXin Li                    'ssh',
1616*9c5db199SXin Li            )),
1617*9c5db199SXin Li            (ProvisioningLabelsRepair, 'provisioning_labels_repair', ('ssh', ),
1618*9c5db199SXin Li             ('provisioning_labels', )),
1619*9c5db199SXin Li
1620*9c5db199SXin Li            # N.B. FaftFirmwareRepair can't fix a 'good_provision' failure
1621*9c5db199SXin Li            # directly, because it doesn't remove the flag file that triggers
1622*9c5db199SXin Li            # the failure.  We include it as a repair trigger because it's
1623*9c5db199SXin Li            # possible the the last update failed because of the firmware,
1624*9c5db199SXin Li            # and we want the repair steps below to be able to trust the
1625*9c5db199SXin Li            # firmware.
1626*9c5db199SXin Li            (cros_firmware.FaftFirmwareRepair, 'faft_firmware_repair', (), (
1627*9c5db199SXin Li                    'ping',
1628*9c5db199SXin Li                    'ssh',
1629*9c5db199SXin Li                    'fwstatus',
1630*9c5db199SXin Li                    'good_provision',
1631*9c5db199SXin Li            )),
1632*9c5db199SXin Li            (DevDefaultBootRepair, 'set_default_boot', ('ssh', ),
1633*9c5db199SXin Li             ('dev_default_boot', )),
1634*9c5db199SXin Li            (CrosRebootRepair, 'reboot', ('ssh', ), (
1635*9c5db199SXin Li                    'devmode',
1636*9c5db199SXin Li                    'writable',
1637*9c5db199SXin Li            )),
1638*9c5db199SXin Li            (EnrollmentCleanupRepair, 'cleanup_enrollment', ('ssh', ),
1639*9c5db199SXin Li             ('enrollment_state', )),
1640*9c5db199SXin Li    )
1641*9c5db199SXin Li    return repair_actions
1642*9c5db199SXin Li
1643*9c5db199SXin Li
1644*9c5db199SXin Lidef _cros_extended_repair_actions(provision_triggers=_CROS_PROVISION_TRIGGERS,
1645*9c5db199SXin Li                                  powerwash_triggers=_CROS_POWERWASH_TRIGGERS,
1646*9c5db199SXin Li                                  usb_triggers=_CROS_USB_TRIGGERS,
1647*9c5db199SXin Li                                  usb_dependencies=_CROS_USB_DEPENDENCIES):
1648*9c5db199SXin Li    """Return the extended repair actions for a `CrosHost`"""
1649*9c5db199SXin Li
1650*9c5db199SXin Li    # The dependencies and triggers for the 'provision', 'powerwash', and 'usb'
1651*9c5db199SXin Li    # repair actions stack up:  Each one is able to repair progressively
1652*9c5db199SXin Li    # more verifiers than the one before.  The 'triggers' lists specify
1653*9c5db199SXin Li    # the progression.
1654*9c5db199SXin Li
1655*9c5db199SXin Li    repair_actions = (
1656*9c5db199SXin Li            (ProvisionRepair, 'provision', usb_triggers + powerwash_triggers,
1657*9c5db199SXin Li             provision_triggers),
1658*9c5db199SXin Li            (PowerWashRepair, 'powerwash', usb_triggers,
1659*9c5db199SXin Li             powerwash_triggers + provision_triggers),
1660*9c5db199SXin Li            (
1661*9c5db199SXin Li                    ServoInstallRepair,
1662*9c5db199SXin Li                    'usb',
1663*9c5db199SXin Li                    usb_dependencies,
1664*9c5db199SXin Li                    # faft_tpm is a trigger of usb repair action but should not be
1665*9c5db199SXin Li                    # dependence of provision and powerwash repair action, due to
1666*9c5db199SXin Li                    # restriction of current structure, we hardcode it here instead
1667*9c5db199SXin Li                    # of put it into _CROS_USB_TRIGGERS. TODO(xianuowang@) refactor
1668*9c5db199SXin Li                    # the logic to create action/verifier DAG for different host
1669*9c5db199SXin Li                    # type after we decouple infra from test autotest repo.
1670*9c5db199SXin Li                    usb_triggers + powerwash_triggers + provision_triggers +
1671*9c5db199SXin Li                    ('faft_tpm', )),
1672*9c5db199SXin Li    )
1673*9c5db199SXin Li    return repair_actions
1674*9c5db199SXin Li
1675*9c5db199SXin Li
1676*9c5db199SXin Lidef _cros_repair_actions():
1677*9c5db199SXin Li    """Return the repair actions for a `CrosHost`."""
1678*9c5db199SXin Li
1679*9c5db199SXin Li    servo_reset_trigger = DEFAULT_SERVO_RESET_TRIGGER
1680*9c5db199SXin Li    firmware_triggers = _CROS_FIRMWARE_TRIGGERS
1681*9c5db199SXin Li    ac_triggers = _CROS_AC_TRIGGERS
1682*9c5db199SXin Li    usb_dependencies = _CROS_USB_DEPENDENCIES
1683*9c5db199SXin Li    provision_triggers = _CROS_PROVISION_TRIGGERS + (
1684*9c5db199SXin Li            'stop_start_ui',
1685*9c5db199SXin Li            'dut_gsctool',
1686*9c5db199SXin Li    )
1687*9c5db199SXin Li    powerwash_triggers = _CROS_POWERWASH_TRIGGERS
1688*9c5db199SXin Li    usb_triggers = _CROS_USB_TRIGGERS
1689*9c5db199SXin Li
1690*9c5db199SXin Li    repair_actions = (
1691*9c5db199SXin Li            # RPM cycling must precede Servo reset:  if the DUT has a dead
1692*9c5db199SXin Li            # battery, we need to reattach AC power before we reset via servo.
1693*9c5db199SXin Li            (repair_utils.RPMCycleRepair, 'rpm', (), (
1694*9c5db199SXin Li                    'ping',
1695*9c5db199SXin Li                    'ssh',
1696*9c5db199SXin Li                    'power',
1697*9c5db199SXin Li            )),
1698*9c5db199SXin Li            (ServoResetRepair, 'servoreset', (), servo_reset_trigger),
1699*9c5db199SXin Li            (ServoCr50RebootRepair, 'cr50_reset', (), servo_reset_trigger),
1700*9c5db199SXin Li            (ServoSysRqRepair, 'sysrq', (), (
1701*9c5db199SXin Li                    'ping',
1702*9c5db199SXin Li                    'ssh',
1703*9c5db199SXin Li            )),
1704*9c5db199SXin Li            (ProvisioningLabelsRepair, 'provisioning_labels_repair', ('ssh', ),
1705*9c5db199SXin Li             ('provisioning_labels', )),
1706*9c5db199SXin Li
1707*9c5db199SXin Li            # N.B. FaftFirmwareRepair can't fix a 'good_provision' failure
1708*9c5db199SXin Li            # directly, because it doesn't remove the flag file that triggers
1709*9c5db199SXin Li            # the failure.  We include it as a repair trigger because it's
1710*9c5db199SXin Li            # possible the the last update failed because of the firmware,
1711*9c5db199SXin Li            # and we want the repair steps below to be able to trust the
1712*9c5db199SXin Li            # firmware.
1713*9c5db199SXin Li            (cros_firmware.FaftFirmwareRepair, 'faft_firmware_repair', (), (
1714*9c5db199SXin Li                    'ping',
1715*9c5db199SXin Li                    'ssh',
1716*9c5db199SXin Li                    'fwstatus',
1717*9c5db199SXin Li                    'good_provision',
1718*9c5db199SXin Li            )),
1719*9c5db199SXin Li            (DevDefaultBootRepair, 'set_default_boot', ('ssh', ),
1720*9c5db199SXin Li             ('dev_default_boot', )),
1721*9c5db199SXin Li            (CrosRebootRepair, 'reboot', ('ssh', ), (
1722*9c5db199SXin Li                    'devmode',
1723*9c5db199SXin Li                    'writable',
1724*9c5db199SXin Li            )),
1725*9c5db199SXin Li            (EnrollmentCleanupRepair, 'cleanup_enrollment', ('ssh', ),
1726*9c5db199SXin Li             ('enrollment_state', )),
1727*9c5db199SXin Li            (cros_firmware.GeneralFirmwareRepair, 'general_firmware',
1728*9c5db199SXin Li             usb_dependencies, firmware_triggers),
1729*9c5db199SXin Li            (RecoverACPowerRepair, 'ac_recover', (), ac_triggers),
1730*9c5db199SXin Li            (ProvisionRepair, 'provision', usb_triggers + powerwash_triggers,
1731*9c5db199SXin Li             provision_triggers),
1732*9c5db199SXin Li            (PowerWashRepair, 'powerwash', usb_triggers,
1733*9c5db199SXin Li             powerwash_triggers + provision_triggers),
1734*9c5db199SXin Li            (
1735*9c5db199SXin Li                    ServoInstallRepair,
1736*9c5db199SXin Li                    'usb',
1737*9c5db199SXin Li                    usb_dependencies,
1738*9c5db199SXin Li                    # faft_tpm is a trigger of usb repair action but should
1739*9c5db199SXin Li                    # not be dependence of provision and powerwash repair
1740*9c5db199SXin Li                    # action, due to restriction of current structure, we
1741*9c5db199SXin Li                    # hardcode it here instead of put it into
1742*9c5db199SXin Li                    # _CROS_USB_TRIGGERS. TODO(xianuowang@) refactor the logic
1743*9c5db199SXin Li                    # to create action/verifier DAG for different host type
1744*9c5db199SXin Li                    # after we decouple infra from test autotest repo.
1745*9c5db199SXin Li                    usb_triggers + powerwash_triggers + provision_triggers +
1746*9c5db199SXin Li                    ('faft_tpm', )),
1747*9c5db199SXin Li            (ServoResetAfterUSBRepair, 'servo_reset_after_usb',
1748*9c5db199SXin Li             (usb_dependencies), (
1749*9c5db199SXin Li                     'ping',
1750*9c5db199SXin Li                     'ssh',
1751*9c5db199SXin Li             )),
1752*9c5db199SXin Li            (RecoverFwAfterUSBRepair, 'recover_fw_after_usb',
1753*9c5db199SXin Li             (usb_dependencies), (
1754*9c5db199SXin Li                     'ping',
1755*9c5db199SXin Li                     'ssh',
1756*9c5db199SXin Li             )),
1757*9c5db199SXin Li    )
1758*9c5db199SXin Li    return repair_actions
1759*9c5db199SXin Li
1760*9c5db199SXin Li
1761*9c5db199SXin Lidef create_cros_repair_strategy():
1762*9c5db199SXin Li    """Return a `RepairStrategy` for a `CrosHost`."""
1763*9c5db199SXin Li    verify_dag = _cros_verify_dag()
1764*9c5db199SXin Li    repair_actions = _cros_repair_actions()
1765*9c5db199SXin Li    return hosts.RepairStrategy(verify_dag, repair_actions, 'cros')
1766*9c5db199SXin Li
1767*9c5db199SXin Li
1768*9c5db199SXin Lidef _moblab_verify_dag():
1769*9c5db199SXin Li    """Return the verification DAG for a `MoblabHost`."""
1770*9c5db199SXin Li    verify_dag = (
1771*9c5db199SXin Li        (repair_utils.SshVerifier,        'ssh',     ()),
1772*9c5db199SXin Li        (ACPowerVerifier,                 'power',   ('ssh',)),
1773*9c5db199SXin Li        (PythonVerifier,                  'python',  ('ssh',)),
1774*9c5db199SXin Li        (repair_utils.LegacyHostVerifier, 'cros',    ('ssh',)),
1775*9c5db199SXin Li    )
1776*9c5db199SXin Li    return verify_dag
1777*9c5db199SXin Li
1778*9c5db199SXin Li
1779*9c5db199SXin Lidef _moblab_repair_actions():
1780*9c5db199SXin Li    """Return the repair actions for a `MoblabHost`."""
1781*9c5db199SXin Li    repair_actions = (
1782*9c5db199SXin Li        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
1783*9c5db199SXin Li        (ProvisionRepair, 'provision', ('ssh',), ('power', 'python', 'cros')),
1784*9c5db199SXin Li    )
1785*9c5db199SXin Li    return repair_actions
1786*9c5db199SXin Li
1787*9c5db199SXin Li
1788*9c5db199SXin Lidef create_moblab_repair_strategy():
1789*9c5db199SXin Li    """
1790*9c5db199SXin Li    Return a `RepairStrategy` for a `MoblabHost`.
1791*9c5db199SXin Li
1792*9c5db199SXin Li    Moblab is a subset of the CrOS verify and repair.  Several pieces
1793*9c5db199SXin Li    are removed because they're not expected to be meaningful.  Some
1794*9c5db199SXin Li    others are removed for more specific reasons:
1795*9c5db199SXin Li
1796*9c5db199SXin Li    'tpm':  Moblab DUTs don't run the tests that matter to this
1797*9c5db199SXin Li        verifier.  TODO(jrbarnette)  This assertion is unproven.
1798*9c5db199SXin Li
1799*9c5db199SXin Li    'good_provision':  This verifier can't pass, because the Moblab provision
1800*9c5db199SXin Li        procedure doesn't properly delete the PROVISION_FAILED file.
1801*9c5db199SXin Li        TODO(jrbarnette) We should refactor ChromiumOSProvisioner so
1802*9c5db199SXin Li        that it can be different for Moblab.
1803*9c5db199SXin Li
1804*9c5db199SXin Li    'firmware':  Moblab DUTs shouldn't be in FAFT pools, so we don't try
1805*9c5db199SXin Li        this.
1806*9c5db199SXin Li
1807*9c5db199SXin Li    'powerwash':  Powerwash on Moblab causes trouble with deleting the
1808*9c5db199SXin Li        DHCP leases file, so we skip it.
1809*9c5db199SXin Li    """
1810*9c5db199SXin Li    verify_dag = _moblab_verify_dag()
1811*9c5db199SXin Li    repair_actions = _moblab_repair_actions()
1812*9c5db199SXin Li    return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab')
1813*9c5db199SXin Li
1814*9c5db199SXin Li
1815*9c5db199SXin Lidef _jetstream_repair_actions():
1816*9c5db199SXin Li    """Return the repair actions for a `JetstreamHost`."""
1817*9c5db199SXin Li    provision_triggers = _CROS_PROVISION_TRIGGERS
1818*9c5db199SXin Li    jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation')
1819*9c5db199SXin Li    jetstream_service_triggers = (jetstream_tpm_triggers +
1820*9c5db199SXin Li                                  ('jetstream_services',))
1821*9c5db199SXin Li    base_actions = _cros_basic_repair_actions(servo_reset_trigger=(
1822*9c5db199SXin Li            'ping',
1823*9c5db199SXin Li            'ssh',
1824*9c5db199SXin Li    ))
1825*9c5db199SXin Li    custom_actions = (
1826*9c5db199SXin Li            (JetstreamTpmRepair, 'jetstream_tpm_repair',
1827*9c5db199SXin Li             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS,
1828*9c5db199SXin Li             provision_triggers + jetstream_tpm_triggers),
1829*9c5db199SXin Li            (JetstreamServiceRepair, 'jetstream_service_repair',
1830*9c5db199SXin Li             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
1831*9c5db199SXin Li             ('jetstream_tpm', 'jetstream_attestation'),
1832*9c5db199SXin Li             provision_triggers + jetstream_service_triggers),
1833*9c5db199SXin Li    )
1834*9c5db199SXin Li    extend_actions = _cros_extended_repair_actions(
1835*9c5db199SXin Li            provision_triggers=provision_triggers + jetstream_service_triggers,
1836*9c5db199SXin Li            usb_triggers=_JETSTREAM_USB_TRIGGERS)
1837*9c5db199SXin Li    return base_actions + custom_actions + extend_actions
1838*9c5db199SXin Li
1839*9c5db199SXin Li
1840*9c5db199SXin Lidef _jetstream_verify_dag():
1841*9c5db199SXin Li    """Return the verification DAG for a `JetstreamHost`."""
1842*9c5db199SXin Li    verify_dag = _cros_verify_base_dag() + (
1843*9c5db199SXin Li        (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)),
1844*9c5db199SXin Li        (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)),
1845*9c5db199SXin Li        (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)),
1846*9c5db199SXin Li    )
1847*9c5db199SXin Li    return verify_dag
1848*9c5db199SXin Li
1849*9c5db199SXin Li
1850*9c5db199SXin Lidef create_jetstream_repair_strategy():
1851*9c5db199SXin Li    """
1852*9c5db199SXin Li    Return a `RepairStrategy` for a `JetstreamHost`.
1853*9c5db199SXin Li
1854*9c5db199SXin Li    The Jetstream repair strategy is based on the CrOS verify and repair,
1855*9c5db199SXin Li    but adds the JetstreamServicesVerifier.
1856*9c5db199SXin Li    """
1857*9c5db199SXin Li    verify_dag = _jetstream_verify_dag()
1858*9c5db199SXin Li    repair_actions = _jetstream_repair_actions()
1859*9c5db199SXin Li    return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream')
1860*9c5db199SXin Li
1861*9c5db199SXin Li
1862*9c5db199SXin Li# TODO(pprabhu) Move this to a better place. I have no idea what that place
1863*9c5db199SXin Li# would be.
1864*9c5db199SXin Lidef _is_virtual_machine(host):
1865*9c5db199SXin Li    """Determine whether the given |host| is a virtual machine.
1866*9c5db199SXin Li
1867*9c5db199SXin Li    @param host: a hosts.Host object.
1868*9c5db199SXin Li    @returns True if the host is a virtual machine, False otherwise.
1869*9c5db199SXin Li    """
1870*9c5db199SXin Li    output = host.run('cat /proc/cpuinfo | grep "model name"',
1871*9c5db199SXin Li                      ignore_status=True)
1872*9c5db199SXin Li    return (output.exit_status == 0 and output.stdout and
1873*9c5db199SXin Li            'qemu' in output.stdout.lower())
1874*9c5db199SXin Li
1875*9c5db199SXin Li
1876*9c5db199SXin Liclass TpmStatus(dict):
1877*9c5db199SXin Li    """Wrapper for getting cryptohome status from a host."""
1878*9c5db199SXin Li
1879*9c5db199SXin Li    def __init__(self, host):
1880*9c5db199SXin Li        super(TpmStatus, self).__init__()
1881*9c5db199SXin Li        self.update(_get_tpm_status(host))
1882*9c5db199SXin Li
1883*9c5db199SXin Li    @property
1884*9c5db199SXin Li    def tpm_enabled(self):
1885*9c5db199SXin Li        # pylint: disable=missing-docstring
1886*9c5db199SXin Li        return self.get('is_enabled') == True
1887*9c5db199SXin Li
1888*9c5db199SXin Li    @property
1889*9c5db199SXin Li    def tpm_owned(self):
1890*9c5db199SXin Li        # pylint: disable=missing-docstring
1891*9c5db199SXin Li        return self.get('is_owned') == True
1892*9c5db199SXin Li
1893*9c5db199SXin Li    @property
1894*9c5db199SXin Li    def tpm_can_load_srk(self):
1895*9c5db199SXin Li        # pylint: disable=missing-docstring
1896*9c5db199SXin Li        return self.tpm_owned and self.get('is_srk_default_auth') == True
1897*9c5db199SXin Li
1898*9c5db199SXin Li    @property
1899*9c5db199SXin Li    def tpm_can_load_srk_pubkey(self):
1900*9c5db199SXin Li        # pylint: disable=missing-docstring
1901*9c5db199SXin Li        return self.tpm_owned and self.get('is_srk_default_auth') == True
1902*9c5db199SXin Li
1903*9c5db199SXin Li
1904*9c5db199SXin Lidef _get_tpm_status(host):
1905*9c5db199SXin Li    """Returns a dictionary containing the TPM status.
1906*9c5db199SXin Li
1907*9c5db199SXin Li    @param host: a hosts.Host object.
1908*9c5db199SXin Li    @returns A dictionary containing the TPM status.
1909*9c5db199SXin Li    @raises AutoservVerifyError: if the output could not be parsed or the TPM
1910*9c5db199SXin Li       status is missing.
1911*9c5db199SXin Li    @raises hosts.AutoservRunError: if the cryptohome command failed.
1912*9c5db199SXin Li    """
1913*9c5db199SXin Li    try:
1914*9c5db199SXin Li        output = host.run(
1915*9c5db199SXin Li                'tpm_manager_client status --nonsensitive').stdout.strip()
1916*9c5db199SXin Li        lines = output.split('\n')[1:-1]
1917*9c5db199SXin Li        status = {}
1918*9c5db199SXin Li        for item in lines:
1919*9c5db199SXin Li            item = item.split(':')
1920*9c5db199SXin Li            if not item[0]:
1921*9c5db199SXin Li                continue
1922*9c5db199SXin Li            if len(item) == 1:
1923*9c5db199SXin Li                item.append('')
1924*9c5db199SXin Li            item = [x.strip() for x in item]
1925*9c5db199SXin Li            item[1] = True if item[1] == 'true' else item[1]
1926*9c5db199SXin Li            item[1] = False if item[1] == 'false' else item[1]
1927*9c5db199SXin Li            status[item[0]] = item[1]
1928*9c5db199SXin Li        if status['status'] != 'STATUS_SUCCESS':
1929*9c5db199SXin Li            raise hosts.AutoservVerifyError('TPM status is missing')
1930*9c5db199SXin Li        return status
1931*9c5db199SXin Li    except ValueError:
1932*9c5db199SXin Li        raise hosts.AutoservVerifyError('Unable to parse cryptohome status')
1933