1# Copyright (c) 2019 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""This file provides core logic for labstation verify/repair process.""" 6 7import logging 8 9from autotest_lib.client.common_lib import error 10from autotest_lib.server import afe_utils 11from autotest_lib.server.hosts import base_label 12from autotest_lib.server.hosts import cros_label 13from autotest_lib.server.hosts import labstation_repair 14from autotest_lib.server.cros import provision 15from autotest_lib.server.hosts import base_servohost 16from autotest_lib.server.cros.dynamic_suite import constants as ds_constants 17from autotest_lib.server.cros.dynamic_suite import tools 18from autotest_lib.client.common_lib.cros import dev_server 19from autotest_lib.server import utils as server_utils 20from autotest_lib.site_utils.rpm_control_system import rpm_client 21 22class LabstationHost(base_servohost.BaseServoHost): 23 """Labstation specific host class""" 24 25 # Threshold we decide to ignore a in_use file lock. In minutes 26 IN_USE_FILE_EXPIRE_MINS = 90 27 28 # Uptime threshold to perform a labstation reboot, this is to prevent a 29 # broken DUT keep trying to reboot a labstation. In hours 30 UP_TIME_THRESH_HOLD_HOURS = 6 31 32 VERSION_PREFIX = provision.CROS_VERSION_PREFIX 33 34 @staticmethod 35 def check_host(host, timeout=10): 36 """ 37 Check if the given host is a labstation host. 38 39 @param host: An ssh host representing a device. 40 @param timeout: The timeout for the run command. 41 42 @return: True if the host device is labstation. 43 44 @raises AutoservRunError: If the command failed. 45 @raises AutoservSSHTimeout: Ssh connection has timed out. 46 47 """ 48 try: 49 result = host.run( 50 'grep -q labstation /etc/lsb-release', 51 ignore_status=True, timeout=timeout) 52 except (error.AutoservRunError, error.AutoservSSHTimeout): 53 return False 54 return result.exit_status == 0 55 56 57 def _initialize(self, hostname, *args, **dargs): 58 super(LabstationHost, self)._initialize(hostname=hostname, 59 *args, **dargs) 60 self._repair_strategy = ( 61 labstation_repair.create_labstation_repair_strategy()) 62 self.labels = base_label.LabelRetriever(cros_label.LABSTATION_LABELS) 63 64 65 def is_reboot_requested(self): 66 """Check if a reboot is requested for this labstation, the reboot can 67 either be requested from labstation or DUTs. For request from DUTs we 68 only process it if uptime longer than a threshold because we want 69 to prevent a broken servo keep its labstation in reboot cycle. 70 71 @returns True if a reboot is required, otherwise False 72 """ 73 if self._check_update_status() == self.UPDATE_STATE.PENDING_REBOOT: 74 logging.info('Labstation reboot requested from labstation for' 75 ' update image') 76 return True 77 78 if not self._validate_uptime(): 79 logging.info('Ignoring DUTs reboot request because %s was' 80 ' rebooted in last %d hours.', 81 self.hostname, self.UP_TIME_THRESH_HOLD_HOURS) 82 return False 83 84 cmd = 'find %s*%s' % (self.TEMP_FILE_DIR, self.REBOOT_FILE_POSTFIX) 85 output = self.run(cmd, ignore_status=True).stdout 86 if output: 87 in_use_file_list = output.strip().split('\n') 88 logging.info('%s DUT(s) are currently requesting to' 89 ' reboot labstation.', len(in_use_file_list)) 90 return True 91 else: 92 return False 93 94 95 def try_reboot(self): 96 """Try to reboot the labstation if it's safe to do(no servo in use, 97 and not processing updates), and cleanup reboot control file. 98 """ 99 if self._is_servo_in_use(): 100 logging.info('Aborting reboot action because some DUT(s) are' 101 ' currently using servo(s).') 102 return 103 104 update_state = self._check_update_status() 105 if update_state == self.UPDATE_STATE.RUNNING: 106 logging.info('Aborting reboot action because an update process' 107 ' is running.') 108 return 109 if update_state == self.UPDATE_STATE.PENDING_REBOOT: 110 self._post_update_reboot() 111 else: 112 self._servo_host_reboot() 113 self.update_cros_version_label() 114 logging.info('Cleaning up reboot control files.') 115 self._cleanup_post_reboot() 116 117 118 def get_labels(self): 119 """Return the detected labels on the host.""" 120 return self.labels.get_labels(self) 121 122 123 def get_os_type(self): 124 return 'labstation' 125 126 127 def verify_job_repo_url(self, tag=''): 128 """ 129 Make sure job_repo_url of this host is valid. 130 131 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\ 132 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the 133 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case, 134 download and extract it. If the devserver embedded in the url is 135 unresponsive, update the job_repo_url of the host after staging it on 136 another devserver. 137 138 @param job_repo_url: A url pointing to the devserver where the autotest 139 package for this build should be staged. 140 @param tag: The tag from the server job, in the format 141 <job_id>-<user>/<hostname>, or <hostless> for a server job. 142 143 @raises DevServerException: If we could not resolve a devserver. 144 @raises AutoservError: If we're unable to save the new job_repo_url as 145 a result of choosing a new devserver because the old one failed to 146 respond to a health check. 147 @raises urllib2.URLError: If the devserver embedded in job_repo_url 148 doesn't respond within the timeout. 149 """ 150 info = self.host_info_store.get() 151 job_repo_url = info.attributes.get(ds_constants.JOB_REPO_URL, '') 152 if not job_repo_url: 153 logging.warning('No job repo url set on host %s', self.hostname) 154 return 155 156 logging.info('Verifying job repo url %s', job_repo_url) 157 devserver_url, image_name = tools.get_devserver_build_from_package_url( 158 job_repo_url) 159 160 ds = dev_server.ImageServer(devserver_url) 161 162 logging.info('Staging autotest artifacts for %s on devserver %s', 163 image_name, ds.url()) 164 165 ds.stage_artifacts(image_name, ['autotest_packages']) 166 167 168 def host_version_prefix(self, image): 169 """Return version label prefix. 170 171 In case the CrOS provisioning version is something other than the 172 standard CrOS version e.g. CrOS TH version, this function will 173 find the prefix from provision.py. 174 175 @param image: The image name to find its version prefix. 176 @returns: A prefix string for the image type. 177 """ 178 return provision.get_version_label_prefix(image) 179 180 181 def stage_server_side_package(self, image=None): 182 """Stage autotest server-side package on devserver. 183 184 @param image: Full path of an OS image to install or a build name. 185 186 @return: A url to the autotest server-side package. 187 188 @raise: error.AutoservError if fail to locate the build to test with, or 189 fail to stage server-side package. 190 """ 191 # If enable_drone_in_restricted_subnet is False, do not set hostname 192 # in devserver.resolve call, so a devserver in non-restricted subnet 193 # is picked to stage autotest server package for drone to download. 194 hostname = self.hostname 195 if not server_utils.ENABLE_DRONE_IN_RESTRICTED_SUBNET: 196 hostname = None 197 if image: 198 image_name = tools.get_build_from_image(image) 199 if not image_name: 200 raise error.AutoservError( 201 'Failed to parse build name from %s' % image) 202 ds = dev_server.ImageServer.resolve(image_name, hostname) 203 else: 204 info = self.host_info_store.get() 205 job_repo_url = info.attributes.get(ds_constants.JOB_REPO_URL, '') 206 if job_repo_url: 207 devserver_url, image_name = ( 208 tools.get_devserver_build_from_package_url(job_repo_url)) 209 # If enable_drone_in_restricted_subnet is True, use the 210 # existing devserver. Otherwise, resolve a new one in 211 # non-restricted subnet. 212 if server_utils.ENABLE_DRONE_IN_RESTRICTED_SUBNET: 213 ds = dev_server.ImageServer(devserver_url) 214 else: 215 ds = dev_server.ImageServer.resolve(image_name) 216 elif info.build is not None: 217 ds = dev_server.ImageServer.resolve(info.build, hostname) 218 image_name = info.build 219 else: 220 raise error.AutoservError( 221 'Failed to stage server-side package. The host has ' 222 'no job_repo_url attribute or cros-version label.') 223 224 ds.stage_artifacts(image_name, ['autotest_server_package']) 225 return '%s/static/%s/%s' % (ds.url(), image_name, 226 'autotest_server_package.tar.bz2') 227 228 229 def repair(self): 230 """Attempt to repair a labstation.""" 231 message = 'Beginning repair for host %s board %s model %s' 232 info = self.host_info_store.get() 233 message %= (self.hostname, info.board, info.model) 234 self.record('INFO', None, None, message) 235 self._repair_strategy.repair(self) 236 237 238 def update_cros_version_label(self): 239 """Update cros-version label on labstation""" 240 image_name = self.get_full_release_path() 241 if not image_name: 242 logging.info('Could not get labstation version, it could be' 243 ' the labstation is running a customized image.') 244 info = self.host_info_store.get() 245 info.clear_version_labels(version_prefix=self.VERSION_PREFIX) 246 self.host_info_store.commit(info) 247 return 248 afe_utils.add_provision_labels(self, self.VERSION_PREFIX, image_name) 249 250 251 def _validate_uptime(self): 252 return (float(self.check_uptime()) > 253 self.UP_TIME_THRESH_HOLD_HOURS * 3600) 254 255 256 def _is_servo_in_use(self): 257 """Determine if there are any DUTs currently running task that uses 258 servo, only files that has been touched within pre-set threshold of 259 minutes counts. 260 261 @returns True if any DUTs is using servos, otherwise False. 262 """ 263 cmd = 'find %s*%s -mmin -%s' % (self.TEMP_FILE_DIR, 264 self.LOCK_FILE_POSTFIX, 265 self.IN_USE_FILE_EXPIRE_MINS) 266 result = self.run(cmd, ignore_status=True) 267 return bool(result.stdout) 268 269 270 def _cleanup_post_reboot(self): 271 """Clean up all xxxx_reboot file after reboot.""" 272 cmd = 'rm %s*%s' % (self.TEMP_FILE_DIR, self.REBOOT_FILE_POSTFIX) 273 self.run(cmd, ignore_status=True) 274 275 def rpm_power_on_and_wait(self, _rpm_client=None): 276 """Power on a labstation through RPM and wait for it to come up""" 277 return self.change_rpm_state_and_wait("ON", _rpm_client=_rpm_client) 278 279 def rpm_power_off_and_wait(self, _rpm_client=None): 280 """Power off a labstation through RPM and wait for it to shut down""" 281 return self.change_rpm_state_and_wait("OFF", _rpm_client=_rpm_client) 282 283 def change_rpm_state_and_wait(self, state, _rpm_client=None): 284 """Change the state of a labstation 285 286 @param state: on or off 287 @param _rpm_client: rpm_client module, to support testing 288 """ 289 _rpm_client = _rpm_client or rpm_client 290 wait = { 291 "ON": self.wait_up, 292 "OFF": self.wait_down, 293 }[state] 294 timeout = { 295 "ON": self.BOOT_TIMEOUT, 296 "OFF": self.WAIT_DOWN_REBOOT_TIMEOUT, 297 }[state] 298 _rpm_client.set_power(self, state) 299 if not wait(timeout=timeout): 300 msg = "%s didn't enter %s state in %s seconds" % ( 301 getattr(self, 'hostname', None), 302 state, 303 timeout, 304 ) 305 raise Exception(msg) 306