xref: /aosp_15_r20/external/autotest/server/hosts/labstation_repair.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li# Copyright 2019 The Chromium OS Authors. All rights reserved.
2*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
3*9c5db199SXin Li# found in the LICENSE file.
4*9c5db199SXin Li
5*9c5db199SXin Liimport common
6*9c5db199SXin Liimport logging
7*9c5db199SXin Lifrom autotest_lib.client.common_lib import hosts
8*9c5db199SXin Lifrom autotest_lib.server.hosts import cros_constants
9*9c5db199SXin Lifrom autotest_lib.server.hosts import repair_utils
10*9c5db199SXin Lifrom autotest_lib.client.common_lib import utils
11*9c5db199SXin Li
12*9c5db199SXin Lifrom autotest_lib.utils.frozen_chromite.lib import timeout_util
13*9c5db199SXin Li
14*9c5db199SXin Litry:
15*9c5db199SXin Li    from autotest_lib.utils.frozen_chromite.lib import metrics
16*9c5db199SXin Liexcept ImportError:
17*9c5db199SXin Li    metrics = utils.metrics_mock
18*9c5db199SXin Li
19*9c5db199SXin Li# There are some labstations we don't want they receive auto-update,
20*9c5db199SXin Li# e.g. labstations that used for image qualification purpose
21*9c5db199SXin LiUPDATE_EXEMPTED_POOL = {
22*9c5db199SXin Li        'servo_verification', 'labstation_tryjob', 'labstation_canary'
23*9c5db199SXin Li}
24*9c5db199SXin Li
25*9c5db199SXin Li
26*9c5db199SXin Liclass _LabstationUpdateVerifier(hosts.Verifier):
27*9c5db199SXin Li    """
28*9c5db199SXin Li    Verifier to trigger a labstation update, if necessary.
29*9c5db199SXin Li
30*9c5db199SXin Li    The operation doesn't wait for the update to complete and is
31*9c5db199SXin Li    considered a success whether or not the servo is currently
32*9c5db199SXin Li    up-to-date.
33*9c5db199SXin Li    """
34*9c5db199SXin Li
35*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.LONG_VERIFY_TIMEOUT_SEC)
36*9c5db199SXin Li    def verify(self, host):
37*9c5db199SXin Li        """First, only run this verifier if the host is in the physical lab.
38*9c5db199SXin Li        Secondly, skip if the test is being run by test_that, because subnet
39*9c5db199SXin Li        restrictions can cause the update to fail.
40*9c5db199SXin Li        """
41*9c5db199SXin Li        if host.is_in_lab() and host.job and host.job.in_lab:
42*9c5db199SXin Li            host.update_cros_version_label()
43*9c5db199SXin Li            info = host.host_info_store.get()
44*9c5db199SXin Li            if bool(UPDATE_EXEMPTED_POOL & info.pools):
45*9c5db199SXin Li                logging.info("Skip update because the labstation is in"
46*9c5db199SXin Li                             " one of following exempted pool: %s", info.pools)
47*9c5db199SXin Li                return
48*9c5db199SXin Li
49*9c5db199SXin Li            stable_version = info.stable_versions.get('cros')
50*9c5db199SXin Li            if stable_version:
51*9c5db199SXin Li                host.update_image(stable_version=stable_version)
52*9c5db199SXin Li            else:
53*9c5db199SXin Li                raise hosts.AutoservVerifyError('Failed to check/update'
54*9c5db199SXin Li                                                ' labstation due to no stable'
55*9c5db199SXin Li                                                '_version found in host_info'
56*9c5db199SXin Li                                                '_store.')
57*9c5db199SXin Li
58*9c5db199SXin Li    @property
59*9c5db199SXin Li    def description(self):
60*9c5db199SXin Li        return 'Labstation image is updated to current stable-version'
61*9c5db199SXin Li
62*9c5db199SXin Li
63*9c5db199SXin Liclass _LabstationRebootVerifier(hosts.Verifier):
64*9c5db199SXin Li    """Check if reboot is need for the labstation and perform a reboot if it's
65*9c5db199SXin Li    not currently using by any tests.
66*9c5db199SXin Li    """
67*9c5db199SXin Li
68*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
69*9c5db199SXin Li    def verify(self, host):
70*9c5db199SXin Li        if host.is_reboot_requested():
71*9c5db199SXin Li            host.try_reboot()
72*9c5db199SXin Li
73*9c5db199SXin Li    @property
74*9c5db199SXin Li    def description(self):
75*9c5db199SXin Li        return 'Reboot labstation if requested and the labstation is not in use'
76*9c5db199SXin Li
77*9c5db199SXin Li
78*9c5db199SXin Liclass _LabstationLangidVerifier(hosts.Verifier):
79*9c5db199SXin Li    """Check if labstation has issue with read serial from servo devices.
80*9c5db199SXin Li
81*9c5db199SXin Li    TODO(b:162518926): remove when bug will be resolved.
82*9c5db199SXin Li    """
83*9c5db199SXin Li
84*9c5db199SXin Li    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
85*9c5db199SXin Li    def verify(self, host):
86*9c5db199SXin Li        try:
87*9c5db199SXin Li            cmd = (
88*9c5db199SXin Li                    "python2 -c 'import usb;"
89*9c5db199SXin Li                    " print([[d.open().getString(d.iSerialNumber, 128)"
90*9c5db199SXin Li                    " for d in bus.devices if d.idVendor == 0x18d1"
91*9c5db199SXin Li                    " and (d.idProduct == 0x501b"  #servo_v4
92*9c5db199SXin Li                    " or d.idProduct == 0x501a"  #servo_micro
93*9c5db199SXin Li                    " or d.idProduct == 0x5014)"  #ccd_cr50
94*9c5db199SXin Li                    " and d.iSerialNumber == 3]"  # 3 - slot for serial
95*9c5db199SXin Li                    " for bus in usb.busses()])'")
96*9c5db199SXin Li            result = host.run(cmd, ignore_status=True, timeout=30)
97*9c5db199SXin Li            if result.exit_status == 0:
98*9c5db199SXin Li                return
99*9c5db199SXin Li            if 'The device has no langid' in result.stderr.strip():
100*9c5db199SXin Li                self._mark_host_for_reboot(host)
101*9c5db199SXin Li        except Exception as e:
102*9c5db199SXin Li            logging.debug('(Not critical) %s', e)
103*9c5db199SXin Li            if 'Timeout encountered' in str(e):
104*9c5db199SXin Li                # Time out mean we cannot get servo attributes in time because
105*9c5db199SXin Li                # one of the servos has langid.
106*9c5db199SXin Li                self._mark_host_for_reboot(host)
107*9c5db199SXin Li
108*9c5db199SXin Li    def _mark_host_for_reboot(self, host):
109*9c5db199SXin Li        """Mark Labstation as has issue with langid."""
110*9c5db199SXin Li        logging.info('Detected langid issue.')
111*9c5db199SXin Li        data = {'host': host.hostname, 'board': host.get_board() or ''}
112*9c5db199SXin Li        metrics.Counter('chromeos/autotest/labstation/langid_issue').increment(
113*9c5db199SXin Li                fields=data)
114*9c5db199SXin Li        # labstation reboot will fix the issue but we does not want to
115*9c5db199SXin Li        # reboot the labstation to often. Just create request to reboot
116*9c5db199SXin Li        # it for the next time.
117*9c5db199SXin Li        logging.info('Created request for reboot.')
118*9c5db199SXin Li        cmd = ('touch %slangid%s' %
119*9c5db199SXin Li               (host.TEMP_FILE_DIR, host.REBOOT_FILE_POSTFIX))
120*9c5db199SXin Li        host.run(cmd, ignore_status=True, timeout=30)
121*9c5db199SXin Li
122*9c5db199SXin Li    @property
123*9c5db199SXin Li    def description(self):
124*9c5db199SXin Li        return 'Check if labsattion has langid issue'
125*9c5db199SXin Li
126*9c5db199SXin Li
127*9c5db199SXin Lidef create_labstation_repair_strategy():
128*9c5db199SXin Li    """
129*9c5db199SXin Li    Return a `RepairStrategy` for a `LabstationHost`.
130*9c5db199SXin Li    """
131*9c5db199SXin Li    verify_dag = [
132*9c5db199SXin Li            (repair_utils.SshVerifier, 'ssh', []),
133*9c5db199SXin Li            (_LabstationUpdateVerifier, 'update', ['ssh']),
134*9c5db199SXin Li            (_LabstationLangidVerifier, 'langid', ['ssh']),
135*9c5db199SXin Li            (_LabstationRebootVerifier, 'reboot', ['ssh']),
136*9c5db199SXin Li    ]
137*9c5db199SXin Li
138*9c5db199SXin Li    repair_actions = [
139*9c5db199SXin Li        (repair_utils.RPMCycleRepair, 'rpm', [], ['ssh', 'reboot']),
140*9c5db199SXin Li    ]
141*9c5db199SXin Li    return hosts.RepairStrategy(verify_dag, repair_actions, 'labstation')
142