1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# Copyright 2019 The ChromiumOS Authors 5# Use of this source code is governed by a BSD-style license that can be 6# found in the LICENSE file. 7 8"""This module controls locking and unlocking of test machines.""" 9 10 11import argparse 12import enum 13import getpass 14import os 15import sys 16 17from cros_utils import command_executer 18from cros_utils import logger 19from cros_utils import machines 20import file_lock_machine 21 22 23class LockException(Exception): 24 """Base class for exceptions in this module.""" 25 26 27class MachineNotPingable(LockException): 28 """Raised when machine does not respond to ping.""" 29 30 31class LockingError(LockException): 32 """Raised when server fails to lock/unlock machine as requested.""" 33 34 35class DontOwnLock(LockException): 36 """Raised when user attmepts to unlock machine locked by someone else.""" 37 38 # This should not be raised if the user specified '--force' 39 40 41class MachineType(enum.Enum): 42 """Enum class to hold machine type.""" 43 44 LOCAL = "local" 45 CROSFLEET = "crosfleet" 46 47 48class LockManager(object): 49 """Class for locking/unlocking machines vie three different modes. 50 51 This class contains methods for checking the locked status of machines, 52 and for changing the locked status. It handles HW lab machines and local 53 machines, using appropriate locking mechanisms for each. 54 """ 55 56 CROSFLEET_PATH = "crosfleet" 57 58 # TODO(zhizhouy): lease time may needs to be dynamically adjusted. For now we 59 # set it long enough to cover the period to finish nightly rotation tests. 60 LEASE_MINS = 1439 61 62 CROSFLEET_CREDENTIAL = ( 63 "/usr/local/google/home/mobiletc-prebuild" 64 "/sheriff_utils/credentials/skylab" 65 "/chromeos-swarming-credential.json" 66 ) 67 SWARMING = "~/cipd_binaries/swarming" 68 SUCCESS = 0 69 70 def __init__( 71 self, remotes, force_option, chromeos_root, locks_dir="", log=None 72 ): 73 """Initializes an LockManager object. 74 75 Args: 76 remotes: A list of machine names or ip addresses to be managed. Names 77 and ip addresses should be represented as strings. If the list is 78 empty, the lock manager will get all known machines. 79 force_option: A Boolean indicating whether or not to force an unlock of 80 a machine that was locked by someone else. 81 chromeos_root: The ChromeOS chroot to use for the autotest scripts. 82 locks_dir: A directory used for file locking local devices. 83 log: If not None, this is the logger object to be used for writing out 84 informational output messages. It is expected to be an instance of 85 Logger class from cros_utils/logger.py. 86 """ 87 self.chromeos_root = chromeos_root 88 self.user = getpass.getuser() 89 self.logger = log or logger.GetLogger() 90 self.ce = command_executer.GetCommandExecuter(self.logger) 91 92 sys.path.append(chromeos_root) 93 94 self.locks_dir = locks_dir 95 96 self.machines = list(set(remotes)) or [] 97 self.toolchain_lab_machines = self.GetAllToolchainLabMachines() 98 99 if not self.machines: 100 self.machines = self.toolchain_lab_machines 101 self.force = force_option 102 103 self.local_machines = [] 104 self.crosfleet_machines = [] 105 106 def CheckMachine(self, machine, error_msg): 107 """Verifies that machine is responding to ping. 108 109 Args: 110 machine: String containing the name or ip address of machine to check. 111 error_msg: Message to print if ping fails. 112 113 Raises: 114 MachineNotPingable: If machine is not responding to 'ping' 115 """ 116 if not machines.MachineIsPingable(machine, logging_level="none"): 117 cros_machine = machine + ".cros" 118 if not machines.MachineIsPingable( 119 cros_machine, logging_level="none" 120 ): 121 raise MachineNotPingable(error_msg) 122 123 def GetAllToolchainLabMachines(self): 124 """Gets a list of all the toolchain machines in the ChromeOS HW lab. 125 126 Returns: 127 A list of names of the toolchain machines in the ChromeOS HW lab. 128 """ 129 machines_file = os.path.join( 130 os.path.dirname(__file__), "crosperf", "default_remotes" 131 ) 132 machine_list = [] 133 with open(machines_file, "r") as input_file: 134 lines = input_file.readlines() 135 for line in lines: 136 _, remotes = line.split(":") 137 remotes = remotes.strip() 138 for r in remotes.split(): 139 machine_list.append(r.strip()) 140 return machine_list 141 142 def GetMachineType(self, m): 143 """Get where the machine is located. 144 145 Args: 146 m: String containing the name or ip address of machine. 147 148 Returns: 149 Value of the type in MachineType Enum. 150 """ 151 if m in self.local_machines: 152 return MachineType.LOCAL 153 if m in self.crosfleet_machines: 154 return MachineType.CROSFLEET 155 156 def PrintStatusHeader(self): 157 """Prints the status header lines for machines.""" 158 print("\nMachine (Board)\t\t\t\t\tStatus") 159 print("---------------\t\t\t\t\t------") 160 161 def PrintStatus(self, m, state, machine_type): 162 """Prints status for a single machine. 163 164 Args: 165 m: String containing the name or ip address of machine. 166 state: A dictionary of the current state of the machine. 167 machine_type: MachineType to determine where the machine is located. 168 """ 169 if state["locked"]: 170 print( 171 "%s (%s)\t\t%slocked by %s since %s" 172 % ( 173 m, 174 state["board"], 175 "\t\t" if machine_type == MachineType.LOCAL else "", 176 state["locked_by"], 177 state["lock_time"], 178 ) 179 ) 180 else: 181 print( 182 "%s (%s)\t\t%sunlocked" 183 % ( 184 m, 185 state["board"], 186 "\t\t" if machine_type == MachineType.LOCAL else "", 187 ) 188 ) 189 190 def AddMachineToLocal(self, machine): 191 """Adds a machine to local machine list. 192 193 Args: 194 machine: The machine to be added. 195 """ 196 if machine not in self.local_machines: 197 self.local_machines.append(machine) 198 199 def AddMachineToCrosfleet(self, machine): 200 """Adds a machine to crosfleet machine list. 201 202 Args: 203 machine: The machine to be added. 204 """ 205 if machine not in self.crosfleet_machines: 206 self.crosfleet_machines.append(machine) 207 208 def ListMachineStates(self, machine_states): 209 """Gets and prints the current status for a list of machines. 210 211 Prints out the current status for all of the machines in the current 212 LockManager's list of machines (set when the object is initialized). 213 214 Args: 215 machine_states: A dictionary of the current state of every machine in 216 the current LockManager's list of machines. Normally obtained by 217 calling LockManager::GetMachineStates. 218 """ 219 self.PrintStatusHeader() 220 for m in machine_states: 221 machine_type = self.GetMachineType(m) 222 state = machine_states[m] 223 self.PrintStatus(m, state, machine_type) 224 225 def UpdateLockInCrosfleet(self, should_lock_machine, machine): 226 """Ask crosfleet to lease/release a machine. 227 228 Args: 229 should_lock_machine: Boolean indicating whether to lock the machine (True) 230 or unlock the machine (False). 231 machine: The machine to update. 232 233 Returns: 234 True if requested action succeeded, else False. 235 """ 236 try: 237 if should_lock_machine: 238 ret = self.LeaseCrosfleetMachine(machine) 239 else: 240 ret = self.ReleaseCrosfleetMachine(machine) 241 except Exception: 242 return False 243 return ret 244 245 def UpdateFileLock(self, should_lock_machine, machine): 246 """Use file lock for local machines, 247 248 Args: 249 should_lock_machine: Boolean indicating whether to lock the machine (True) 250 or unlock the machine (False). 251 machine: The machine to update. 252 253 Returns: 254 True if requested action succeeded, else False. 255 """ 256 try: 257 if should_lock_machine: 258 ret = file_lock_machine.Machine(machine, self.locks_dir).Lock( 259 True, sys.argv[0] 260 ) 261 else: 262 ret = file_lock_machine.Machine(machine, self.locks_dir).Unlock( 263 True 264 ) 265 except Exception: 266 return False 267 return ret 268 269 def UpdateMachines(self, lock_machines): 270 """Sets the locked state of the machines to the requested value. 271 272 The machines updated are the ones in self.machines (specified when the 273 class object was intialized). 274 275 Args: 276 lock_machines: Boolean indicating whether to lock the machines (True) or 277 unlock the machines (False). 278 279 Returns: 280 A list of the machines whose state was successfully updated. 281 """ 282 updated_machines = [] 283 action = "Locking" if lock_machines else "Unlocking" 284 for m in self.machines: 285 # TODO(zhizhouy): Handling exceptions with more details when locking 286 # doesn't succeed. 287 machine_type = self.GetMachineType(m) 288 if machine_type == MachineType.CROSFLEET: 289 ret = self.UpdateLockInCrosfleet(lock_machines, m) 290 elif machine_type == MachineType.LOCAL: 291 ret = self.UpdateFileLock(lock_machines, m) 292 293 if ret: 294 self.logger.LogOutput( 295 "%s %s machine succeeded: %s." 296 % (action, machine_type.value, m) 297 ) 298 updated_machines.append(m) 299 else: 300 self.logger.LogOutput( 301 "%s %s machine failed: %s." 302 % (action, machine_type.value, m) 303 ) 304 305 self.machines = updated_machines 306 return updated_machines 307 308 def _InternalRemoveMachine(self, machine): 309 """Remove machine from internal list of machines. 310 311 Args: 312 machine: Name of machine to be removed from internal list. 313 """ 314 # Check to see if machine is lab machine and if so, make sure it has 315 # ".cros" on the end. 316 cros_machine = machine 317 if machine.find("rack") > 0 and machine.find("row") > 0: 318 if machine.find(".cros") == -1: 319 cros_machine = cros_machine + ".cros" 320 321 self.machines = [ 322 m for m in self.machines if m not in (cros_machine, machine) 323 ] 324 325 def CheckMachineLocks(self, machine_states, cmd): 326 """Check that every machine in requested list is in the proper state. 327 328 If the cmd is 'unlock' verify that every machine is locked by requestor. 329 If the cmd is 'lock' verify that every machine is currently unlocked. 330 331 Args: 332 machine_states: A dictionary of the current state of every machine in 333 the current LockManager's list of machines. Normally obtained by 334 calling LockManager::GetMachineStates. 335 cmd: The user-requested action for the machines: 'lock' or 'unlock'. 336 337 Raises: 338 DontOwnLock: The lock on a requested machine is owned by someone else. 339 """ 340 for k, state in machine_states.items(): 341 if cmd == "unlock": 342 if not state["locked"]: 343 self.logger.LogWarning( 344 "Attempt to unlock already unlocked machine " 345 "(%s)." % k 346 ) 347 self._InternalRemoveMachine(k) 348 349 # TODO(zhizhouy): Crosfleet doesn't support host info such as locked_by. 350 # Need to update this when crosfleet supports it. 351 if ( 352 state["locked"] 353 and state["locked_by"] 354 and state["locked_by"] != self.user 355 ): 356 raise DontOwnLock( 357 "Attempt to unlock machine (%s) locked by someone " 358 "else (%s)." % (k, state["locked_by"]) 359 ) 360 elif cmd == "lock": 361 if state["locked"]: 362 self.logger.LogWarning( 363 "Attempt to lock already locked machine (%s)" % k 364 ) 365 self._InternalRemoveMachine(k) 366 367 def GetMachineStates(self, cmd=""): 368 """Gets the current state of all the requested machines. 369 370 Gets the current state of all the requested machines. Stores the data in a 371 dictionary keyed by machine name. 372 373 Args: 374 cmd: The command for which we are getting the machine states. This is 375 important because if one of the requested machines is missing we raise 376 an exception, unless the requested command is 'add'. 377 378 Returns: 379 A dictionary of machine states for all the machines in the LockManager 380 object. 381 """ 382 machine_list = {} 383 for m in self.machines: 384 # For local or crosfleet machines, we simply set {'locked': status} for 385 # them 386 # TODO(zhizhouy): This is a quick fix since crosfleet cannot return host 387 # info as afe does. We need to get more info such as locked_by when 388 # crosfleet supports that. 389 values = { 390 "locked": 0 if cmd == "lock" else 1, 391 "board": "??", 392 "locked_by": "", 393 "lock_time": "", 394 } 395 machine_list[m] = values 396 397 self.ListMachineStates(machine_list) 398 399 return machine_list 400 401 def CheckMachineInCrosfleet(self, machine): 402 """Run command to check if machine is in Crosfleet or not. 403 404 Returns: 405 True if machine in crosfleet, else False 406 """ 407 credential = "" 408 if os.path.exists(self.CROSFLEET_CREDENTIAL): 409 credential = "--service-account-json %s" % self.CROSFLEET_CREDENTIAL 410 server = "--server https://chromeos-swarming.appspot.com" 411 dimensions = "--dimension dut_name=%s" % machine.rstrip(".cros") 412 413 cmd = f"{self.SWARMING} bots {server} {credential} {dimensions}" 414 exit_code, stdout, stderr = self.ce.RunCommandWOutput(cmd) 415 if exit_code: 416 raise ValueError( 417 "Querying bots failed (2); stdout: %r; stderr: %r" 418 % (stdout, stderr) 419 ) 420 421 # The command will return a json output as stdout. If machine not in 422 # crosfleet, stdout will look like this: 423 # { 424 # "death_timeout": "600", 425 # "now": "TIMESTAMP" 426 # } 427 # Otherwise there will be a tuple starting with 'items', we simply detect 428 # this keyword for result. 429 return stdout != "[]" 430 431 def LeaseCrosfleetMachine(self, machine): 432 """Run command to lease dut from crosfleet. 433 434 Returns: 435 True if succeeded, False if failed. 436 """ 437 credential = "" 438 if os.path.exists(self.CROSFLEET_CREDENTIAL): 439 credential = "-service-account-json %s" % self.CROSFLEET_CREDENTIAL 440 cmd = ("%s dut lease -minutes %s %s %s %s") % ( 441 self.CROSFLEET_PATH, 442 self.LEASE_MINS, 443 credential, 444 "-host", 445 machine.rstrip(".cros"), 446 ) 447 # Wait 8 minutes for server to start the lease task, if not started, 448 # we will treat it as unavailable. 449 check_interval_time = 480 450 retval = self.ce.RunCommand(cmd, command_timeout=check_interval_time) 451 return retval == self.SUCCESS 452 453 def ReleaseCrosfleetMachine(self, machine): 454 """Run command to release dut from crosfleet. 455 456 Returns: 457 True if succeeded, False if failed. 458 """ 459 credential = "" 460 if os.path.exists(self.CROSFLEET_CREDENTIAL): 461 credential = "-service-account-json %s" % self.CROSFLEET_CREDENTIAL 462 463 cmd = ("%s dut abandon %s %s") % ( 464 self.CROSFLEET_PATH, 465 credential, 466 machine.rstrip(".cros"), 467 ) 468 retval = self.ce.RunCommand(cmd) 469 return retval == self.SUCCESS 470 471 472def Main(argv): 473 """Parse the options, initialize lock manager and dispatch proper method. 474 475 Args: 476 argv: The options with which this script was invoked. 477 478 Returns: 479 0 unless an exception is raised. 480 """ 481 parser = argparse.ArgumentParser() 482 483 parser.add_argument( 484 "--list", 485 dest="cmd", 486 action="store_const", 487 const="status", 488 help="List current status of all known machines.", 489 ) 490 parser.add_argument( 491 "--lock", 492 dest="cmd", 493 action="store_const", 494 const="lock", 495 help="Lock given machine(s).", 496 ) 497 parser.add_argument( 498 "--unlock", 499 dest="cmd", 500 action="store_const", 501 const="unlock", 502 help="Unlock given machine(s).", 503 ) 504 parser.add_argument( 505 "--status", 506 dest="cmd", 507 action="store_const", 508 const="status", 509 help="List current status of given machine(s).", 510 ) 511 parser.add_argument( 512 "--remote", dest="remote", help="machines on which to operate" 513 ) 514 parser.add_argument( 515 "--chromeos_root", 516 dest="chromeos_root", 517 required=True, 518 help="ChromeOS root to use for autotest scripts.", 519 ) 520 parser.add_argument( 521 "--force", 522 dest="force", 523 action="store_true", 524 default=False, 525 help="Force lock/unlock of machines, even if not" 526 " current lock owner.", 527 ) 528 529 options = parser.parse_args(argv) 530 531 if not options.remote and options.cmd != "status": 532 parser.error("No machines specified for operation.") 533 534 if not os.path.isdir(options.chromeos_root): 535 parser.error("Cannot find chromeos_root: %s." % options.chromeos_root) 536 537 if not options.cmd: 538 parser.error( 539 "No operation selected (--list, --status, --lock, --unlock," 540 " --add_machine, --remove_machine)." 541 ) 542 543 machine_list = [] 544 if options.remote: 545 machine_list = options.remote.split() 546 547 lock_manager = LockManager( 548 machine_list, options.force, options.chromeos_root 549 ) 550 551 machine_states = lock_manager.GetMachineStates(cmd=options.cmd) 552 cmd = options.cmd 553 554 if cmd == "status": 555 lock_manager.ListMachineStates(machine_states) 556 557 elif cmd == "lock": 558 if not lock_manager.force: 559 lock_manager.CheckMachineLocks(machine_states, cmd) 560 lock_manager.UpdateMachines(True) 561 562 elif cmd == "unlock": 563 if not lock_manager.force: 564 lock_manager.CheckMachineLocks(machine_states, cmd) 565 lock_manager.UpdateMachines(False) 566 567 return 0 568 569 570if __name__ == "__main__": 571 sys.exit(Main(sys.argv[1:])) 572