1#!/usr/bin/env python3 2# Copyright 2023 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Helpers to reliably reboot the device via serial and fastboot. 6 7Note, this file will be executed in docker instance without vpython3, so we use 8python3 instead. The docker instance runs this file as a symbolic link of dmc 9via the "main" function. 10""" 11 12import json 13import logging 14import os 15import shutil 16import signal 17import subprocess 18import sys 19import time 20 21from typing import List 22from boot_device import BootMode 23from compatible_utils import running_unattended 24 25# pylint: disable=too-many-return-statements, too-many-branches 26 27 28def _env_ready() -> bool: 29 """Checks if the required environment is ready to support the functions in 30 this file.""" 31 if shutil.which('fastboot') is None: 32 logging.warning('fastboot is not accessible') 33 return False 34 if shutil.which('serialio') is None: 35 logging.warning('serialio is not accessible') 36 return False 37 return True 38 39 40def boot_device(node_id: str, 41 serial_num: str, 42 mode: BootMode, 43 must_boot: bool = False) -> bool: 44 """Boots device into desired mode via serial and fastboot. 45 This function waits for at most 10 minutes for the transition. 46 47 Args: 48 node_id: The fuchsia node id of the device. 49 serial_num: The fastboot serial number of the device. 50 mode: Desired boot mode. 51 must_boot: Forces device to reboot regardless the current state. 52 53 Returns: 54 a boolean value to indicate if the operation succeeded; missing 55 dependencies like serialio (for serial access) and fastboot, or the 56 device cannot be found may also introduce the error. 57 """ 58 #TODO(crbug.com/1490434): Remove the default values once the use in 59 # flash_device has been migrated. 60 if node_id is None: 61 node_id = os.getenv('FUCHSIA_NODENAME') 62 if serial_num is None: 63 serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM') 64 assert node_id is not None 65 assert serial_num is not None 66 67 assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER 68 ], 'Unsupported BootMode %s for serial_boot_device.' % mode 69 assert _env_ready() 70 71 if is_in_fuchsia(node_id): 72 if not must_boot and mode == BootMode.REGULAR: 73 return True 74 # pylint: disable=subprocess-run-check 75 if subprocess.run([ 76 'serialio', node_id, 'send', 'dm', 'reboot' + 77 ('' if mode == BootMode.REGULAR else '-bootloader') 78 ]).returncode != 0: 79 logging.error('Failed to send dm reboot[-bootloader] via serialio') 80 return False 81 elif is_in_fastboot(serial_num): 82 # fastboot is stateless and there isn't a reason to reboot the device 83 # again to go to the fastboot. 84 if mode == BootMode.BOOTLOADER: 85 return True 86 if not _run_fastboot(['reboot'], serial_num): 87 # Shouldn't return None here, unless the device was rebooting. In 88 # the case, it would be safer to return false. 89 return False 90 else: 91 logging.error('Cannot find node id %s or fastboot serial number %s', 92 node_id, serial_num) 93 return False 94 95 start_sec = time.time() 96 while time.time() - start_sec < 600: 97 assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER] 98 if mode == BootMode.REGULAR and is_in_fuchsia(node_id): 99 return True 100 if mode == BootMode.BOOTLOADER and is_in_fastboot(serial_num): 101 return True 102 logging.error( 103 'Failed to transite node id %s or fastboot serial number %s ' 104 'to expected state %s', node_id, serial_num, mode) 105 return False 106 107 108def _serialio_send_and_wait(node_id: str, command: List[str], 109 waitfor: str) -> bool: 110 """Continously sends the command to the device and waits for the waitfor 111 string via serialio. 112 This function asserts the existence of serialio and waits at most ~30 113 seconds.""" 114 assert shutil.which('serialio') is not None 115 start_sec = time.time() 116 with subprocess.Popen(['serialio', node_id, 'wait', waitfor], 117 stdout=subprocess.DEVNULL, 118 stderr=subprocess.DEVNULL) as proc: 119 while time.time() - start_sec < 28: 120 send_command = ['serialio', node_id, 'send'] 121 send_command.extend(command) 122 # pylint: disable=subprocess-run-check 123 if subprocess.run(send_command).returncode != 0: 124 logging.error('Failed to send %s via serialio to %s', command, 125 node_id) 126 return False 127 result = proc.poll() 128 if result is not None: 129 if result == 0: 130 return True 131 logging.error( 132 'Failed to wait %s via serial to %s, ' 133 'return code %s', waitfor, node_id, result) 134 return False 135 time.sleep(2) 136 proc.kill() 137 logging.error('Have not found %s via serialio to %s', waitfor, node_id) 138 return False 139 140 141def is_in_fuchsia(node_id: str) -> bool: 142 """Checks if the device is running in fuchsia through serial. 143 Note, this check goes through serial and does not guarantee the fuchsia os 144 has a workable network or ssh connection. 145 This function asserts the existence of serialio and waits at most ~60 146 seconds.""" 147 if not _serialio_send_and_wait( 148 node_id, ['echo', 'yes-i-am-healthy', '|', 'sha1sum'], 149 '89d517b7db104aada669a83bc3c3a906e00671f7'): 150 logging.error( 151 'Device %s did not respond echo, ' 152 'it may not be running fuchsia', node_id) 153 return False 154 if not _serialio_send_and_wait(node_id, ['ps'], 'sshd'): 155 logging.warning( 156 'Cannot find sshd from ps on %s, the ssh ' 157 'connection may not be available.', node_id) 158 return True 159 160 161def is_in_fastboot(serial_num: str) -> bool: 162 """Checks if the device is running in fastboot through fastboot command. 163 Note, the fastboot may be impacted by the usb congestion and causes this 164 function to return false. 165 This function asserts the existence of fastboot and waits at most ~30 166 seconds.""" 167 start_sec = time.time() 168 while time.time() - start_sec < 28: 169 result = _run_fastboot(['getvar', 'product'], serial_num) 170 if result is None: 171 return False 172 if result: 173 return True 174 time.sleep(2) 175 logging.error('Failed to wait for fastboot state of %s', serial_num) 176 return False 177 178 179def _run_fastboot(args: List[str], serial_num: str) -> bool: 180 """Executes the fastboot command and kills the hanging process. 181 The fastboot may be impacted by the usb congestion and causes the process to 182 hang forever. So this command waits for 30 seconds before killing the 183 process, and it's not good for flashing. 184 Note, if this function detects the fastboot is waiting for the device, i.e. 185 the device is not in the fastboot, it returns None instead, e.g. unknown. 186 This function asserts the existence of fastboot.""" 187 assert shutil.which('fastboot') is not None 188 args.insert(0, 'fastboot') 189 args.extend(('-s', serial_num)) 190 try: 191 # Capture output to ensure we can get '< waiting for serial-num >' 192 # output. 193 # pylint: disable=subprocess-run-check 194 if subprocess.run(args, capture_output=True, 195 timeout=30).returncode == 0: 196 return True 197 except subprocess.TimeoutExpired as timeout: 198 if timeout.stderr is not None and serial_num.lower( 199 ) in timeout.stderr.decode().lower(): 200 logging.warning('fastboot is still waiting for %s', serial_num) 201 return None 202 logging.error('Failed to run %s against fastboot %s', args, serial_num) 203 return False 204 205 206def _shutdown_if_serial_is_unavailable(node_id: str) -> None: 207 if not running_unattended(): 208 return 209 # pylint: disable=subprocess-run-check 210 if subprocess.run(['serialio', node_id, 'poll']).returncode != 0: 211 logging.warning('shutting down the docker by killing the pid 1') 212 # Before killing the process itself, force shutting down the logging to 213 # flush everything. 214 logging.shutdown() 215 # In docker instance, killing root process will cause the instance to be 216 # shut down and restarted by swarm_docker. So the updated tty can be 217 # attached to the new docker instance. 218 os.kill(1, signal.SIGTERM) 219 220 221def main(action: str) -> int: 222 """Main entry of serial_boot_device.""" 223 node_id = os.getenv('FUCHSIA_NODENAME') 224 serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM') 225 assert node_id is not None 226 assert serial_num is not None 227 228 handlers = [logging.StreamHandler()] 229 if os.path.isdir('/home/swarming/'): 230 handlers.append( 231 logging.FileHandler('/home/swarming/dmc.%s.log' % node_id)) 232 logging.basicConfig(format='%(levelname)s %(asctime)s %(message)s', 233 handlers=handlers, 234 level=logging.INFO) 235 logging.info('Running command %s against %s %s', sys.argv, node_id, 236 serial_num) 237 238 # Checks the environment after initializing the logging. 239 if not _env_ready(): 240 logging.error('Missing environment setup, unable to perform action.') 241 return 2 242 243 if action == 'health-check': 244 _shutdown_if_serial_is_unavailable(node_id) 245 if is_in_fuchsia(node_id) or is_in_fastboot(serial_num): 246 # Print out the json result without using logging to avoid any 247 # potential formatting issue. 248 print( 249 json.dumps([{ 250 'nodename': node_id, 251 'state': 'healthy', 252 'status_message': '', 253 'dms_state': '' 254 }])) 255 return 0 256 logging.error('Cannot find node id %s or fastboot serial number %s', 257 node_id, serial_num) 258 return 1 259 if action in ['reboot', 'after-task']: 260 if action == 'after-task': 261 _shutdown_if_serial_is_unavailable(node_id) 262 if boot_device(node_id, serial_num, BootMode.REGULAR, must_boot=True): 263 return 0 264 logging.error( 265 'Cannot reboot the device with node id %s and fastboot ' 266 'serial number %s', node_id, serial_num) 267 return 1 268 if action == 'reboot-fastboot': 269 if boot_device(node_id, 270 serial_num, 271 BootMode.BOOTLOADER, 272 must_boot=True): 273 return 0 274 logging.error( 275 'Cannot reboot the device with node id %s and fastboot ' 276 'serial number %s into fastboot', node_id, serial_num) 277 return 1 278 if action == 'is-in-fuchsia': 279 if is_in_fuchsia(node_id): 280 return 0 281 logging.error('Cannot find node id %s', node_id) 282 return 1 283 if action == 'is-in-fastboot': 284 if is_in_fastboot(serial_num): 285 return 0 286 logging.error('Cannot find fastboot serial number %s', serial_num) 287 return 1 288 if action == 'server-version': 289 # TODO(crbug.com/1490434): Implement the server-version. 290 print('chromium') 291 return 0 292 if action == 'before-task': 293 # TODO(crbug.com/1490434): fuchsia.py requires IMAGE_MANIFEST_PATH and 294 # BOOTSERVER_PATH to support before-task call. So the following 295 # statement does not work as it should be. 296 _shutdown_if_serial_is_unavailable(node_id) 297 return 0 298 if action == 'set-power-state': 299 # Do nothing. The device is always restarted during after-task. 300 return 0 301 logging.error('Unknown command %s', action) 302 return 2 303 304 305if __name__ == '__main__': 306 sys.exit(main(sys.argv[1])) 307