xref: /aosp_15_r20/external/autotest/server/hosts/labstation_repair.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1# Copyright 2019 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import logging
7from autotest_lib.client.common_lib import hosts
8from autotest_lib.server.hosts import cros_constants
9from autotest_lib.server.hosts import repair_utils
10from autotest_lib.client.common_lib import utils
11
12from autotest_lib.utils.frozen_chromite.lib import timeout_util
13
14try:
15    from autotest_lib.utils.frozen_chromite.lib import metrics
16except ImportError:
17    metrics = utils.metrics_mock
18
19# There are some labstations we don't want they receive auto-update,
20# e.g. labstations that used for image qualification purpose
21UPDATE_EXEMPTED_POOL = {
22        'servo_verification', 'labstation_tryjob', 'labstation_canary'
23}
24
25
26class _LabstationUpdateVerifier(hosts.Verifier):
27    """
28    Verifier to trigger a labstation update, if necessary.
29
30    The operation doesn't wait for the update to complete and is
31    considered a success whether or not the servo is currently
32    up-to-date.
33    """
34
35    @timeout_util.TimeoutDecorator(cros_constants.LONG_VERIFY_TIMEOUT_SEC)
36    def verify(self, host):
37        """First, only run this verifier if the host is in the physical lab.
38        Secondly, skip if the test is being run by test_that, because subnet
39        restrictions can cause the update to fail.
40        """
41        if host.is_in_lab() and host.job and host.job.in_lab:
42            host.update_cros_version_label()
43            info = host.host_info_store.get()
44            if bool(UPDATE_EXEMPTED_POOL & info.pools):
45                logging.info("Skip update because the labstation is in"
46                             " one of following exempted pool: %s", info.pools)
47                return
48
49            stable_version = info.stable_versions.get('cros')
50            if stable_version:
51                host.update_image(stable_version=stable_version)
52            else:
53                raise hosts.AutoservVerifyError('Failed to check/update'
54                                                ' labstation due to no stable'
55                                                '_version found in host_info'
56                                                '_store.')
57
58    @property
59    def description(self):
60        return 'Labstation image is updated to current stable-version'
61
62
63class _LabstationRebootVerifier(hosts.Verifier):
64    """Check if reboot is need for the labstation and perform a reboot if it's
65    not currently using by any tests.
66    """
67
68    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
69    def verify(self, host):
70        if host.is_reboot_requested():
71            host.try_reboot()
72
73    @property
74    def description(self):
75        return 'Reboot labstation if requested and the labstation is not in use'
76
77
78class _LabstationLangidVerifier(hosts.Verifier):
79    """Check if labstation has issue with read serial from servo devices.
80
81    TODO(b:162518926): remove when bug will be resolved.
82    """
83
84    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
85    def verify(self, host):
86        try:
87            cmd = (
88                    "python2 -c 'import usb;"
89                    " print([[d.open().getString(d.iSerialNumber, 128)"
90                    " for d in bus.devices if d.idVendor == 0x18d1"
91                    " and (d.idProduct == 0x501b"  #servo_v4
92                    " or d.idProduct == 0x501a"  #servo_micro
93                    " or d.idProduct == 0x5014)"  #ccd_cr50
94                    " and d.iSerialNumber == 3]"  # 3 - slot for serial
95                    " for bus in usb.busses()])'")
96            result = host.run(cmd, ignore_status=True, timeout=30)
97            if result.exit_status == 0:
98                return
99            if 'The device has no langid' in result.stderr.strip():
100                self._mark_host_for_reboot(host)
101        except Exception as e:
102            logging.debug('(Not critical) %s', e)
103            if 'Timeout encountered' in str(e):
104                # Time out mean we cannot get servo attributes in time because
105                # one of the servos has langid.
106                self._mark_host_for_reboot(host)
107
108    def _mark_host_for_reboot(self, host):
109        """Mark Labstation as has issue with langid."""
110        logging.info('Detected langid issue.')
111        data = {'host': host.hostname, 'board': host.get_board() or ''}
112        metrics.Counter('chromeos/autotest/labstation/langid_issue').increment(
113                fields=data)
114        # labstation reboot will fix the issue but we does not want to
115        # reboot the labstation to often. Just create request to reboot
116        # it for the next time.
117        logging.info('Created request for reboot.')
118        cmd = ('touch %slangid%s' %
119               (host.TEMP_FILE_DIR, host.REBOOT_FILE_POSTFIX))
120        host.run(cmd, ignore_status=True, timeout=30)
121
122    @property
123    def description(self):
124        return 'Check if labsattion has langid issue'
125
126
127def create_labstation_repair_strategy():
128    """
129    Return a `RepairStrategy` for a `LabstationHost`.
130    """
131    verify_dag = [
132            (repair_utils.SshVerifier, 'ssh', []),
133            (_LabstationUpdateVerifier, 'update', ['ssh']),
134            (_LabstationLangidVerifier, 'langid', ['ssh']),
135            (_LabstationRebootVerifier, 'reboot', ['ssh']),
136    ]
137
138    repair_actions = [
139        (repair_utils.RPMCycleRepair, 'rpm', [], ['ssh', 'reboot']),
140    ]
141    return hosts.RepairStrategy(verify_dag, repair_actions, 'labstation')
142