1#!/usr/bin/env python3 2# Copyright 2020 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import logging 7 8 9import common 10from autotest_lib.client.common_lib import error 11from autotest_lib.client.common_lib import utils as client_utils 12from autotest_lib.server.cros.storage import storage_validate as storage 13from autotest_lib.server.cros.servo.keyboard import servo_keyboard_flasher 14from autotest_lib.server.cros.repair import mac_address_helper 15from autotest_lib.site_utils.admin_audit import base 16from autotest_lib.site_utils.admin_audit import constants 17from autotest_lib.site_utils.admin_audit import rpm_validator 18from autotest_lib.site_utils.admin_audit import servo_updater 19 20try: 21 from autotest_lib.utils.frozen_chromite.lib import metrics 22except ImportError: 23 metrics = client_utils.metrics_mock 24 25# Common status used for statistics. 26STATUS_FAIL = 'fail' 27STATUS_SUCCESS = 'success' 28STATUS_SKIPPED = 'skipped' 29 30 31class VerifyDutStorage(base._BaseDUTVerifier): 32 """Verify the state of the storage on the DUT 33 34 The process to determine the type of storage and read metrics 35 of usage and EOL(end-of-life) information to determine the 36 state. 37 Supported storage types: MMS, NVME, SSD. 38 Possible states are: 39 UNKNOWN - not access to the DUT, not determine type of storage, 40 not information to determine metrics 41 NORMAL - the storage is in good shape and will work stable 42 device will work stable. (supported for all types) 43 ACCEPTABLE - the storage almost used all resources, device will 44 work stable but it is better be ready for replacement 45 device will work stable. (supported by MMS, NVME) 46 NEED_REPLACEMENT - the storage broken or worn off the life limit 47 device can work by not stable and can cause the 48 flakiness on the tests. (supported by all types) 49 """ 50 def __init__(self, dut_host): 51 super(VerifyDutStorage, self).__init__(dut_host) 52 self._state = None 53 54 def _verify(self, set_label=True, run_badblocks=None): 55 if not self.host_is_up(): 56 logging.info('Host is down; Skipping the verification') 57 return 58 try: 59 validator = storage.StorageStateValidator(self.get_host()) 60 storage_type = validator.get_type() 61 logging.debug('Detected storage type: %s', storage_type) 62 storage_state = validator.get_state(run_badblocks=run_badblocks) 63 logging.debug('Detected storage state: %s', storage_state) 64 state = self.convert_state(storage_state) 65 if state and set_label: 66 self._set_host_info_state(constants.DUT_STORAGE_STATE_PREFIX, 67 state) 68 if state == constants.HW_STATE_NEED_REPLACEMENT: 69 self.get_host().set_device_needs_replacement( 70 resultdir=self.get_result_dir()) 71 self._state = state 72 except Exception as e: 73 raise base.AuditError('Exception during getting state of' 74 ' storage %s' % str(e)) 75 76 def convert_state(self, state): 77 """Mapping state from validator to verifier""" 78 if state == storage.STORAGE_STATE_NORMAL: 79 return constants.HW_STATE_NORMAL 80 if state == storage.STORAGE_STATE_WARNING: 81 return constants.HW_STATE_ACCEPTABLE 82 if state == storage.STORAGE_STATE_CRITICAL: 83 return constants.HW_STATE_NEED_REPLACEMENT 84 return None 85 86 def get_state(self): 87 return self._state 88 89 90class VerifyServoUsb(base._BaseServoVerifier): 91 """Verify the state of the USB-drive on the Servo 92 93 The process to determine by checking the USB-drive on having any 94 bad sectors on it. 95 Possible states are: 96 UNKNOWN - not access to the device or servo, not available 97 software on the servo. 98 NORMAL - the device available for testing and not bad sectors. 99 was found on it, device will work stable 100 NEED_REPLACEMENT - the device available for testing and 101 some bad sectors were found on it. The device can 102 work but cause flakiness in the tests or repair process. 103 104 badblocks errors: 105 No such device or address while trying to determine device size 106 """ 107 def _verify(self): 108 if not self.servo_is_up(): 109 logging.info('Servo not initialized; Skipping the verification') 110 return 111 try: 112 usb = self.get_host()._probe_and_validate_usb_dev() 113 logging.debug('USB path: %s', usb) 114 except Exception as e: 115 usb = '' 116 logging.debug('(Not critical) %s', e) 117 if not usb: 118 self._set_state(constants.HW_STATE_NOT_DETECTED) 119 return 120 # basic readonly check 121 122 # path to USB if DUT is sshable 123 logging.info('Starting verification of USB drive...') 124 dut_usb = None 125 if self.host_is_up(): 126 dut_usb = self._usb_path_on_dut() 127 state = None 128 try: 129 if dut_usb: 130 logging.info('Try run check on DUT side.') 131 state = self._run_check_on_host(self._dut_host, dut_usb) 132 else: 133 logging.info('Try run check on ServoHost side.') 134 servo = self.get_host().get_servo() 135 servo_usb = servo.probe_host_usb_dev() 136 state = self._run_check_on_host(self.get_host(), servo_usb) 137 except Exception as e: 138 if 'Timeout encountered:' in str(e): 139 logging.info('Timeout during running action') 140 metrics.Counter( 141 'chromeos/autotest/audit/servo/usb/timeout' 142 ).increment(fields={'host': self._dut_host.hostname}) 143 else: 144 # badblocks generate errors when device not reachable or 145 # cannot read system information to execute process 146 state = constants.HW_STATE_NEED_REPLACEMENT 147 logging.debug(str(e)) 148 149 self._set_state(state) 150 logging.info('Finished verification of USB drive.') 151 152 self._install_stable_image() 153 154 def _usb_path_on_dut(self): 155 """Return path to the USB detected on DUT side.""" 156 servo = self.get_host().get_servo() 157 servo.switch_usbkey('dut') 158 result = self._dut_host.run('ls /dev/sd[a-z]') 159 for path in result.stdout.splitlines(): 160 cmd = ('. /usr/share/misc/chromeos-common.sh; get_device_type %s' % 161 path) 162 check_run = self._dut_host.run(cmd, timeout=30, ignore_status=True) 163 if check_run.stdout.strip() != 'USB': 164 continue 165 if self._quick_check_if_device_responsive(self._dut_host, path): 166 logging.info('USB drive detected on DUT side as %s', path) 167 return path 168 return None 169 170 def _quick_check_if_device_responsive(self, host, usb_path): 171 """Verify that device """ 172 validate_cmd = 'fdisk -l %s' % usb_path 173 try: 174 resp = host.run(validate_cmd, ignore_status=True, timeout=30) 175 if resp.exit_status == 0: 176 return True 177 logging.error('USB %s is not detected by fdisk!', usb_path) 178 except error.AutoservRunError as e: 179 if 'Timeout encountered' in str(e): 180 logging.warning('Timeout encountered during fdisk run.') 181 else: 182 logging.error('(Not critical) fdisk check fail for %s; %s', 183 usb_path, str(e)) 184 return False 185 186 def _run_check_on_host(self, host, usb): 187 """Run badblocks on the provided host. 188 189 @params host: Host where USB drive mounted 190 @params usb: Path to USB drive. (e.g. /dev/sda) 191 """ 192 command = 'badblocks -w -e 5 -b 4096 -t random %s' % usb 193 logging.info('Running command: %s', command) 194 # The response is the list of bad block on USB. 195 # Extended time for 2 hour to run USB verification. 196 # TODO (otabek@) (b:153661014#comment2) bring F3 to run 197 # check faster if badblocks cannot finish in 2 hours. 198 result = host.run(command, timeout=7200).stdout.strip() 199 logging.info("Check result: '%s'", result) 200 if result: 201 # So has result is Bad and empty is Good. 202 return constants.HW_STATE_NEED_REPLACEMENT 203 return constants.HW_STATE_NORMAL 204 205 def _install_stable_image(self): 206 """Install stable image to the USB drive.""" 207 # install fresh image to the USB because badblocks formats it 208 # https://crbug.com/1091406 209 try: 210 logging.debug('Started to install test image to USB-drive') 211 _, image_path = self._dut_host.stage_image_for_servo() 212 self.get_host().get_servo().image_to_servo_usb(image_path, 213 power_off_dut=False) 214 logging.debug('Finished installing test image to USB-drive') 215 except: 216 # ignore any error which happined during install image 217 # it not relative to the main goal 218 logging.info('Fail to install test image to USB-drive') 219 220 def _set_state(self, state): 221 if state: 222 self._set_host_info_state(constants.SERVO_USB_STATE_PREFIX, state) 223 224 225class VerifyServoFw(base._BaseServoVerifier): 226 """Force update Servo firmware if it not up-to-date. 227 228 This is rarely case when servo firmware was not updated by labstation 229 when servod started. This should ensure that the servo_v4 and 230 servo_micro is up-to-date. 231 """ 232 def _verify(self): 233 if not self.servo_host_is_up(): 234 logging.info('Servo host is down; Skipping the verification') 235 return 236 servo_updater.update_servo_firmware( 237 self.get_host(), 238 force_update=True) 239 240 241class VerifyRPMConfig(base._BaseDUTVerifier): 242 """Check RPM config of the setup. 243 244 This check run against RPM configs settings. 245 """ 246 247 def _verify(self): 248 if not self.host_is_up(): 249 logging.info('Host is down; Skipping the verification') 250 return 251 rpm_validator.verify_unsafe(self.get_host()) 252 253 254class FlashServoKeyboardMapVerifier(base._BaseDUTVerifier): 255 """Flash the keyboard map on servo.""" 256 257 def _verify(self): 258 if not self.host_is_up(): 259 raise base.AuditError('Host is down') 260 if not self.servo_is_up(): 261 raise base.AuditError('Servo not initialized') 262 263 host = self.get_host() 264 flasher = servo_keyboard_flasher.ServoKeyboardMapFlasher() 265 if flasher.is_image_supported(host): 266 flasher.update(host) 267 268 269class VerifyDUTMacAddress(base._BaseDUTVerifier): 270 """Verify and update cached NIC mac address on servo. 271 272 Servo_v4 plugged to the DUT and providing NIC for that. We caching mac 273 address on servod side to better debugging. 274 """ 275 276 def _verify(self): 277 if not self.host_is_up(): 278 raise base.AuditError('Host is down.') 279 if not self.servo_is_up(): 280 raise base.AuditError('Servo host is down.') 281 282 helper = mac_address_helper.MacAddressHelper() 283 helper.update_if_needed(self.get_host()) 284