1# Lint as: python2, python3 2"""This class defines the Remote host class.""" 3 4from __future__ import absolute_import 5from __future__ import division 6from __future__ import print_function 7import os, logging, time 8import six 9from six.moves import urllib 10import re 11 12import common 13 14from autotest_lib.client.common_lib import error 15from autotest_lib.client.common_lib.global_config import global_config 16from autotest_lib.server import utils 17from autotest_lib.server.hosts import base_classes 18from autotest_lib.server.hosts.tls_client.connection import TLSConnection 19 20 21class RemoteHost(base_classes.Host): 22 """ 23 This class represents a remote machine on which you can run 24 programs. 25 26 It may be accessed through a network, a serial line, ... 27 It is not the machine autoserv is running on. 28 29 Implementation details: 30 This is an abstract class, leaf subclasses must implement the methods 31 listed here and in parent classes which have no implementation. They 32 may reimplement methods which already have an implementation. You 33 must not instantiate this class but should instantiate one of those 34 leaf subclasses. 35 """ 36 37 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 38 DEFAULT_HALT_TIMEOUT = 2 * 60 39 _LABEL_FUNCTIONS = [] 40 _DETECTABLE_LABELS = [] 41 42 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 43 TMP_DIR_TEMPLATE = '/usr/local/tmp/autoserv-XXXXXX' 44 45 46 def _initialize(self, hostname, autodir=None, *args, **dargs): 47 super(RemoteHost, self)._initialize(*args, **dargs) 48 49 self.hostname = hostname 50 self.autodir = autodir 51 self.tmp_dirs = [] 52 53 get_value = global_config.get_config_value 54 55 self.tls_connection = None 56 try: 57 self.tls_connection = TLSConnection() 58 except Exception as e: 59 logging.warning("Could not establish TLS connection %s", e) 60 61 def __repr__(self): 62 return "<remote host: %s>" % self.hostname 63 64 65 def close(self): 66 # pylint: disable=missing-docstring 67 super(RemoteHost, self).close() 68 self.stop_loggers() 69 70 if hasattr(self, 'tmp_dirs'): 71 for dir in self.tmp_dirs: 72 try: 73 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 74 except error.AutoservRunError: 75 pass 76 if self.tls_connection: 77 self.tls_connection.close() 78 self.tls_connection = None 79 80 def job_start(self): 81 """ 82 Abstract method, called the first time a remote host object 83 is created for a specific host after a job starts. 84 85 This method depends on the create_host factory being used to 86 construct your host object. If you directly construct host objects 87 you will need to call this method yourself (and enforce the 88 single-call rule). 89 """ 90 try: 91 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 92 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 93 self.run(cmd) 94 except Exception as e: 95 # Non-fatal error 96 logging.info('Failed to copy /var/log/messages at startup: %s', e) 97 98 99 def get_autodir(self): 100 return self.autodir 101 102 103 def set_autodir(self, autodir): 104 """ 105 This method is called to make the host object aware of the 106 where autotest is installed. Called in server/autotest.py 107 after a successful install 108 """ 109 self.autodir = autodir 110 111 112 def sysrq_reboot(self): 113 # pylint: disable=missing-docstring 114 self.run_background('echo b > /proc/sysrq-trigger') 115 116 117 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 118 """ 119 Shut down the remote host. 120 121 N.B. This method makes no provision to bring the target back 122 up. The target will be offline indefinitely if there's no 123 independent hardware (servo, RPM, etc.) to force the target to 124 power on. 125 126 @param timeout Maximum time to wait for host down, in seconds. 127 @param wait Whether to wait for the host to go offline. 128 """ 129 self.run_background('sleep 1 ; halt') 130 if wait: 131 self.wait_down(timeout=timeout) 132 133 134 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True, 135 fastsync=False, reboot_cmd=None, **dargs): 136 """ 137 Reboot the remote host. 138 139 Args: 140 timeout - How long to wait for the reboot. 141 wait - Should we wait to see if the machine comes back up. 142 If this is set to True, ignores reboot_cmd's error 143 even if occurs. 144 fastsync - Don't wait for the sync to complete, just start one 145 and move on. This is for cases where rebooting prompty 146 is more important than data integrity and/or the 147 machine may have disks that cause sync to never return. 148 reboot_cmd - Reboot command to execute. 149 """ 150 self.reboot_setup(**dargs) 151 if not reboot_cmd: 152 reboot_cmd = ('sync & sleep 5; ' 153 'reboot & sleep 60; ' 154 'reboot -f & sleep 10; ' 155 'reboot -nf & sleep 10; ' 156 'telinit 6') 157 158 def reboot(): 159 # pylint: disable=missing-docstring 160 self.record("GOOD", None, "reboot.start") 161 current_boot_id = None 162 try: 163 current_boot_id = self.get_boot_id() 164 165 # sync before starting the reboot, so that a long sync during 166 # shutdown isn't timed out by wait_down's short timeout 167 if not fastsync: 168 self.run('sync; sync', timeout=timeout, ignore_status=True) 169 170 self.run_background(reboot_cmd) 171 except error.AutoservRunError: 172 # If wait is set, ignore the error here, and rely on the 173 # wait_for_restart() for stability, instead. 174 # reboot_cmd sometimes causes an error even if reboot is 175 # successfully in progress. This is difficult to be avoided, 176 # because we have no much control on remote machine after 177 # "reboot" starts. 178 if not wait or current_boot_id is None: 179 # TODO(b/37652392): Revisit no-wait case, later. 180 self.record("ABORT", None, "reboot.start", 181 "reboot command failed") 182 raise 183 if wait: 184 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 185 **dargs) 186 187 # if this is a full reboot-and-wait, run the reboot inside a group 188 if wait: 189 self.log_op(self.OP_REBOOT, reboot) 190 else: 191 reboot() 192 193 def suspend(self, timeout, suspend_cmd, 194 allow_early_resume=False): 195 """ 196 Suspend the remote host. 197 198 Args: 199 timeout - How long to wait for the suspend in integer seconds. 200 suspend_cmd - suspend command to execute. 201 allow_early_resume - Boolean that indicate whether resume 202 before |timeout| is ok. 203 Raises: 204 error.AutoservSuspendError - If |allow_early_resume| is False 205 and if device resumes before 206 |timeout|. 207 """ 208 # define a function for the supend and run it in a group 209 def suspend(): 210 # pylint: disable=missing-docstring 211 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 212 try: 213 self.run_background(suspend_cmd) 214 except error.AutoservRunError: 215 self.record("ABORT", None, "suspend.start", 216 "suspend command failed") 217 raise error.AutoservSuspendError("suspend command failed") 218 219 # Wait for some time, to ensure the machine is going to sleep. 220 # Not too long to check if the machine really suspended. 221 time_slice = min(timeout / 2, 300) 222 time.sleep(time_slice) 223 time_counter = time_slice 224 while time_counter < timeout + 60: 225 # Check if the machine is back. We check regularely to 226 # ensure the machine was suspended long enough. 227 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 228 return 229 else: 230 if time_counter > timeout - 10: 231 time_slice = 5 232 time.sleep(time_slice) 233 time_counter += time_slice 234 235 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 236 raise error.AutoservSuspendError( 237 "DUT is not responding after %d seconds" % (time_counter)) 238 239 start_time = time.time() 240 self.log_op(self.OP_SUSPEND, suspend) 241 lasted = time.time() - start_time 242 logging.info("Device resumed after %d secs", lasted) 243 if (lasted < timeout and not allow_early_resume): 244 raise error.AutoservSuspendError( 245 "Suspend did not last long enough: %d instead of %d" % ( 246 lasted, timeout)) 247 248 def reboot_followup(self, *args, **dargs): 249 # pylint: disable=missing-docstring 250 super(RemoteHost, self).reboot_followup(*args, **dargs) 251 if self.job: 252 self.job.profilers.handle_reboot(self) 253 254 255 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 256 """ 257 Wait for the host to come back from a reboot. This wraps the 258 generic wait_for_restart implementation in a reboot group. 259 """ 260 def op_func(): 261 # pylint: disable=missing-docstring 262 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 263 self.log_op(self.OP_REBOOT, op_func) 264 265 266 def cleanup(self): 267 # pylint: disable=missing-docstring 268 super(RemoteHost, self).cleanup() 269 self.reboot() 270 271 272 def get_tmp_dir(self, parent='/tmp'): 273 """ 274 Return the pathname of a directory on the host suitable 275 for temporary file storage. 276 277 The directory and its content will be deleted automatically 278 on the destruction of the Host object that was used to obtain 279 it. 280 """ 281 template = os.path.join(parent, self.TMP_DIR_TEMPLATE) 282 parent = os.path.dirname(template) 283 dir_name = self.run('mkdir -p %s && mktemp -d %s' % (parent, template)).stdout.rstrip() 284 self.tmp_dirs.append(dir_name) 285 return dir_name 286 287 288 def get_platform_label(self): 289 """ 290 Return the platform label, or None if platform label is not set. 291 """ 292 293 if self.job: 294 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 295 self.hostname) 296 keyvals = utils.read_keyval(keyval_path) 297 return keyvals.get('platform', None) 298 else: 299 return None 300 301 302 def get_all_labels(self): 303 """ 304 Return all labels, or empty list if label is not set. 305 """ 306 if self.job: 307 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 308 self.hostname) 309 keyvals = utils.read_keyval(keyval_path) 310 all_labels = keyvals.get('labels', '') 311 if all_labels: 312 all_labels = all_labels.split(',') 313 return [urllib.parse.unquote(label) for label in all_labels] 314 return [] 315 316 317 def delete_tmp_dir(self, tmpdir): 318 """ 319 Delete the given temporary directory on the remote machine. 320 321 @param tmpdir The directory to delete. 322 """ 323 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 324 self.tmp_dirs.remove(tmpdir) 325 326 327 def delete_all_tmp_dirs(self, parent='/tmp'): 328 """ 329 Delete all directories in parent that were created by get_tmp_dir 330 331 Note that this may involve deleting directories created by calls to 332 get_tmp_dir on a different RemoteHost instance than the one running this 333 method. Only perform this operation when certain that this will not 334 cause unexpected behavior. 335 """ 336 # follow mktemp's behavior of only expanding 3 or more consecutive Xs 337 if isinstance(parent, (list, tuple)): 338 parents = parent 339 else: 340 parents = [parent] 341 rm_paths = [] 342 for parent in parents: 343 base_template = re.sub('XXXX*', '*', self.TMP_DIR_TEMPLATE) 344 # distinguish between non-wildcard asterisks in parent directory name 345 # and wildcards inserted from the template 346 base = '*'.join( 347 ['"%s"' % utils.sh_escape(x) for x in base_template.split('*')]) 348 path = '"%s' % os.path.join(utils.sh_escape(parent), base[1:]) 349 rm_paths.append(path) 350 # remove deleted directories from tmp_dirs 351 regex = os.path.join(parent, re.sub('(XXXX*)', 352 lambda match: '[a-zA-Z0-9]{%d}' % len(match.group(1)), 353 self.TMP_DIR_TEMPLATE)) 354 regex += '(/|$)' # remove if matches, or is within a dir that matches 355 self.tmp_dirs = [x for x in self.tmp_dirs if not re.match(regex, x)] 356 357 self.run('rm -rf {}'.format(" ".join(rm_paths)), ignore_status=True) 358 359 def check_uptime(self): 360 """ 361 Check that uptime is available and monotonically increasing. 362 """ 363 if not self.is_up(): 364 raise error.AutoservHostError('Client does not appear to be up') 365 result = self.run("/bin/cat /proc/uptime", 30) 366 return result.stdout.strip().split()[0] 367 368 369 def check_for_lkdtm(self): 370 """ 371 Check for kernel dump test module. return True if exist. 372 """ 373 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 374 return self.run(cmd, ignore_status=True).exit_status == 0 375 376 377 def are_wait_up_processes_up(self): 378 """ 379 Checks if any HOSTS waitup processes are running yet on the 380 remote host. 381 382 Returns True if any the waitup processes are running, False 383 otherwise. 384 """ 385 processes = self.get_wait_up_processes() 386 if len(processes) == 0: 387 return True # wait up processes aren't being used 388 for procname in processes: 389 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 390 ignore_status=True).exit_status 391 if exit_status == 0: 392 return True 393 return False 394 395 396 def get_labels(self): 397 """Return a list of labels for this given host. 398 399 This is the main way to retrieve all the automatic labels for a host 400 as it will run through all the currently implemented label functions. 401 """ 402 labels = [] 403 for label_function in self._LABEL_FUNCTIONS: 404 try: 405 label = label_function(self) 406 except Exception: 407 logging.exception('Label function %s failed; ignoring it.', 408 label_function.__name__) 409 label = None 410 if label: 411 if type(label) is str: 412 labels.append(label) 413 elif type(label) is list: 414 labels.extend(label) 415 return labels 416 417 def get_result_dir(self): 418 """Return the result directory path if passed or None if not. 419 420 @return string 421 """ 422 if self.job and hasattr(self.job, 'resultdir'): 423 return self.job.resultdir 424 return None 425