xref: /aosp_15_r20/external/autotest/site_utils/admin_audit/verifiers.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1#!/usr/bin/env python3
2# Copyright 2020 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import logging
7
8
9import common
10from autotest_lib.client.common_lib import error
11from autotest_lib.client.common_lib import utils as client_utils
12from autotest_lib.server.cros.storage import storage_validate as storage
13from autotest_lib.server.cros.servo.keyboard import servo_keyboard_flasher
14from autotest_lib.server.cros.repair import mac_address_helper
15from autotest_lib.site_utils.admin_audit import base
16from autotest_lib.site_utils.admin_audit import constants
17from autotest_lib.site_utils.admin_audit import rpm_validator
18from autotest_lib.site_utils.admin_audit import servo_updater
19
20try:
21    from autotest_lib.utils.frozen_chromite.lib import metrics
22except ImportError:
23    metrics = client_utils.metrics_mock
24
25# Common status used for statistics.
26STATUS_FAIL = 'fail'
27STATUS_SUCCESS = 'success'
28STATUS_SKIPPED = 'skipped'
29
30
31class VerifyDutStorage(base._BaseDUTVerifier):
32    """Verify the state of the storage on the DUT
33
34    The process to determine the type of storage and read metrics
35    of usage and EOL(end-of-life) information to determine the
36    state.
37    Supported storage types: MMS, NVME, SSD.
38    Possible states are:
39      UNKNOWN - not access to the DUT, not determine type of storage,
40                not information to determine metrics
41      NORMAL - the storage is in good shape and will work stable
42                device will work stable. (supported for all types)
43      ACCEPTABLE - the storage almost used all resources, device will
44                work stable but it is better be ready for replacement
45                device will work stable. (supported by MMS, NVME)
46      NEED_REPLACEMENT - the storage broken or worn off the life limit
47                device can work by not stable and can cause the
48                flakiness on the tests. (supported by all types)
49    """
50    def __init__(self, dut_host):
51        super(VerifyDutStorage, self).__init__(dut_host)
52        self._state = None
53
54    def _verify(self, set_label=True, run_badblocks=None):
55        if not self.host_is_up():
56            logging.info('Host is down; Skipping the verification')
57            return
58        try:
59            validator = storage.StorageStateValidator(self.get_host())
60            storage_type = validator.get_type()
61            logging.debug('Detected storage type: %s', storage_type)
62            storage_state = validator.get_state(run_badblocks=run_badblocks)
63            logging.debug('Detected storage state: %s', storage_state)
64            state = self.convert_state(storage_state)
65            if state and set_label:
66                self._set_host_info_state(constants.DUT_STORAGE_STATE_PREFIX,
67                                          state)
68                if state == constants.HW_STATE_NEED_REPLACEMENT:
69                    self.get_host().set_device_needs_replacement(
70                        resultdir=self.get_result_dir())
71            self._state = state
72        except Exception as e:
73            raise base.AuditError('Exception during getting state of'
74                                  ' storage %s' % str(e))
75
76    def convert_state(self, state):
77        """Mapping state from validator to verifier"""
78        if state == storage.STORAGE_STATE_NORMAL:
79            return constants.HW_STATE_NORMAL
80        if state == storage.STORAGE_STATE_WARNING:
81            return constants.HW_STATE_ACCEPTABLE
82        if state == storage.STORAGE_STATE_CRITICAL:
83            return constants.HW_STATE_NEED_REPLACEMENT
84        return None
85
86    def get_state(self):
87        return self._state
88
89
90class VerifyServoUsb(base._BaseServoVerifier):
91    """Verify the state of the USB-drive on the Servo
92
93    The process to determine by checking the USB-drive on having any
94    bad sectors on it.
95    Possible states are:
96      UNKNOWN - not access to the device or servo, not available
97                software on the servo.
98      NORMAL - the device available for testing and not bad sectors.
99                was found on it, device will work stable
100      NEED_REPLACEMENT - the device available for testing and
101                some bad sectors were found on it. The device can
102                work but cause flakiness in the tests or repair process.
103
104    badblocks errors:
105    No such device or address while trying to determine device size
106    """
107    def _verify(self):
108        if not self.servo_is_up():
109            logging.info('Servo not initialized; Skipping the verification')
110            return
111        try:
112            usb = self.get_host()._probe_and_validate_usb_dev()
113            logging.debug('USB path: %s', usb)
114        except Exception as e:
115            usb = ''
116            logging.debug('(Not critical) %s', e)
117        if not usb:
118            self._set_state(constants.HW_STATE_NOT_DETECTED)
119            return
120        # basic readonly check
121
122        # path to USB if DUT is sshable
123        logging.info('Starting verification of USB drive...')
124        dut_usb = None
125        if self.host_is_up():
126            dut_usb = self._usb_path_on_dut()
127        state = None
128        try:
129            if dut_usb:
130                logging.info('Try run check on DUT side.')
131                state = self._run_check_on_host(self._dut_host, dut_usb)
132            else:
133                logging.info('Try run check on ServoHost side.')
134                servo = self.get_host().get_servo()
135                servo_usb = servo.probe_host_usb_dev()
136                state = self._run_check_on_host(self.get_host(), servo_usb)
137        except Exception as e:
138            if 'Timeout encountered:' in str(e):
139                logging.info('Timeout during running action')
140                metrics.Counter(
141                    'chromeos/autotest/audit/servo/usb/timeout'
142                    ).increment(fields={'host': self._dut_host.hostname})
143            else:
144                # badblocks generate errors when device not reachable or
145                # cannot read system information to execute process
146                state = constants.HW_STATE_NEED_REPLACEMENT
147            logging.debug(str(e))
148
149        self._set_state(state)
150        logging.info('Finished verification of USB drive.')
151
152        self._install_stable_image()
153
154    def _usb_path_on_dut(self):
155        """Return path to the USB detected on DUT side."""
156        servo = self.get_host().get_servo()
157        servo.switch_usbkey('dut')
158        result = self._dut_host.run('ls /dev/sd[a-z]')
159        for path in result.stdout.splitlines():
160            cmd = ('. /usr/share/misc/chromeos-common.sh; get_device_type %s' %
161                   path)
162            check_run = self._dut_host.run(cmd, timeout=30, ignore_status=True)
163            if check_run.stdout.strip() != 'USB':
164                continue
165            if self._quick_check_if_device_responsive(self._dut_host, path):
166                logging.info('USB drive detected on DUT side as %s', path)
167                return path
168        return None
169
170    def _quick_check_if_device_responsive(self, host, usb_path):
171        """Verify that device """
172        validate_cmd = 'fdisk -l %s' % usb_path
173        try:
174            resp = host.run(validate_cmd, ignore_status=True, timeout=30)
175            if resp.exit_status == 0:
176                return True
177            logging.error('USB %s is not detected by fdisk!', usb_path)
178        except error.AutoservRunError as e:
179            if 'Timeout encountered' in str(e):
180                logging.warning('Timeout encountered during fdisk run.')
181            else:
182                logging.error('(Not critical) fdisk check fail for %s; %s',
183                              usb_path, str(e))
184        return False
185
186    def _run_check_on_host(self, host, usb):
187        """Run badblocks on the provided host.
188
189        @params host:   Host where USB drive mounted
190        @params usb:    Path to USB drive. (e.g. /dev/sda)
191        """
192        command = 'badblocks -w -e 5 -b 4096 -t random %s' % usb
193        logging.info('Running command: %s', command)
194        # The response is the list of bad block on USB.
195        # Extended time for 2 hour to run USB verification.
196        # TODO (otabek@) (b:153661014#comment2) bring F3 to run
197        # check faster if badblocks cannot finish in 2 hours.
198        result = host.run(command, timeout=7200).stdout.strip()
199        logging.info("Check result: '%s'", result)
200        if result:
201            # So has result is Bad and empty is Good.
202            return constants.HW_STATE_NEED_REPLACEMENT
203        return constants.HW_STATE_NORMAL
204
205    def _install_stable_image(self):
206        """Install stable image to the USB drive."""
207        # install fresh image to the USB because badblocks formats it
208        # https://crbug.com/1091406
209        try:
210            logging.debug('Started to install test image to USB-drive')
211            _, image_path = self._dut_host.stage_image_for_servo()
212            self.get_host().get_servo().image_to_servo_usb(image_path,
213                                                           power_off_dut=False)
214            logging.debug('Finished installing test image to USB-drive')
215        except:
216            # ignore any error which happined during install image
217            # it not relative to the main goal
218            logging.info('Fail to install test image to USB-drive')
219
220    def _set_state(self, state):
221        if state:
222            self._set_host_info_state(constants.SERVO_USB_STATE_PREFIX, state)
223
224
225class VerifyServoFw(base._BaseServoVerifier):
226    """Force update Servo firmware if it not up-to-date.
227
228    This is rarely case when servo firmware was not updated by labstation
229    when servod started. This should ensure that the servo_v4 and
230    servo_micro is up-to-date.
231    """
232    def _verify(self):
233        if not self.servo_host_is_up():
234            logging.info('Servo host is down; Skipping the verification')
235            return
236        servo_updater.update_servo_firmware(
237            self.get_host(),
238            force_update=True)
239
240
241class VerifyRPMConfig(base._BaseDUTVerifier):
242    """Check RPM config of the setup.
243
244    This check run against RPM configs settings.
245    """
246
247    def _verify(self):
248        if not self.host_is_up():
249            logging.info('Host is down; Skipping the verification')
250            return
251        rpm_validator.verify_unsafe(self.get_host())
252
253
254class FlashServoKeyboardMapVerifier(base._BaseDUTVerifier):
255    """Flash the keyboard map on servo."""
256
257    def _verify(self):
258        if not self.host_is_up():
259            raise base.AuditError('Host is down')
260        if not self.servo_is_up():
261            raise base.AuditError('Servo not initialized')
262
263        host = self.get_host()
264        flasher = servo_keyboard_flasher.ServoKeyboardMapFlasher()
265        if flasher.is_image_supported(host):
266            flasher.update(host)
267
268
269class VerifyDUTMacAddress(base._BaseDUTVerifier):
270    """Verify and update cached NIC mac address on servo.
271
272    Servo_v4 plugged to the DUT and providing NIC for that. We caching mac
273    address on servod side to better debugging.
274    """
275
276    def _verify(self):
277        if not self.host_is_up():
278            raise base.AuditError('Host is down.')
279        if not self.servo_is_up():
280            raise base.AuditError('Servo host is down.')
281
282        helper = mac_address_helper.MacAddressHelper()
283        helper.update_if_needed(self.get_host())
284