xref: /aosp_15_r20/external/cronet/build/fuchsia/test/serial_boot_device.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2# Copyright 2023 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Helpers to reliably reboot the device via serial and fastboot.
6
7Note, this file will be executed in docker instance without vpython3, so we use
8python3 instead. The docker instance runs this file as a symbolic link of dmc
9via the "main" function.
10"""
11
12import json
13import logging
14import os
15import shutil
16import signal
17import subprocess
18import sys
19import time
20
21from typing import List
22from boot_device import BootMode
23from compatible_utils import running_unattended
24
25# pylint: disable=too-many-return-statements, too-many-branches
26
27
28def _env_ready() -> bool:
29    """Checks if the required environment is ready to support the functions in
30    this file."""
31    if shutil.which('fastboot') is None:
32        logging.warning('fastboot is not accessible')
33        return False
34    if shutil.which('serialio') is None:
35        logging.warning('serialio is not accessible')
36        return False
37    return True
38
39
40def boot_device(node_id: str,
41                serial_num: str,
42                mode: BootMode,
43                must_boot: bool = False) -> bool:
44    """Boots device into desired mode via serial and fastboot.
45    This function waits for at most 10 minutes for the transition.
46
47    Args:
48        node_id: The fuchsia node id of the device.
49        serial_num: The fastboot serial number of the device.
50        mode: Desired boot mode.
51        must_boot: Forces device to reboot regardless the current state.
52
53    Returns:
54        a boolean value to indicate if the operation succeeded; missing
55        dependencies like serialio (for serial access) and fastboot, or the
56        device cannot be found may also introduce the error.
57    """
58    #TODO(crbug.com/1490434): Remove the default values once the use in
59    # flash_device has been migrated.
60    if node_id is None:
61        node_id = os.getenv('FUCHSIA_NODENAME')
62    if serial_num is None:
63        serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
64    assert node_id is not None
65    assert serial_num is not None
66
67    assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER
68                    ], 'Unsupported BootMode %s for serial_boot_device.' % mode
69    assert _env_ready()
70
71    if is_in_fuchsia(node_id):
72        if not must_boot and mode == BootMode.REGULAR:
73            return True
74        # pylint: disable=subprocess-run-check
75        if subprocess.run([
76                'serialio', node_id, 'send', 'dm', 'reboot' +
77            ('' if mode == BootMode.REGULAR else '-bootloader')
78        ]).returncode != 0:
79            logging.error('Failed to send dm reboot[-bootloader] via serialio')
80            return False
81    elif is_in_fastboot(serial_num):
82        # fastboot is stateless and there isn't a reason to reboot the device
83        # again to go to the fastboot.
84        if mode == BootMode.BOOTLOADER:
85            return True
86        if not _run_fastboot(['reboot'], serial_num):
87            # Shouldn't return None here, unless the device was rebooting. In
88            # the case, it would be safer to return false.
89            return False
90    else:
91        logging.error('Cannot find node id %s or fastboot serial number %s',
92                      node_id, serial_num)
93        return False
94
95    start_sec = time.time()
96    while time.time() - start_sec < 600:
97        assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER]
98        if mode == BootMode.REGULAR and is_in_fuchsia(node_id):
99            return True
100        if mode == BootMode.BOOTLOADER and is_in_fastboot(serial_num):
101            return True
102    logging.error(
103        'Failed to transite node id %s or fastboot serial number %s '
104        'to expected state %s', node_id, serial_num, mode)
105    return False
106
107
108def _serialio_send_and_wait(node_id: str, command: List[str],
109                            waitfor: str) -> bool:
110    """Continously sends the command to the device and waits for the waitfor
111    string via serialio.
112    This function asserts the existence of serialio and waits at most ~30
113    seconds."""
114    assert shutil.which('serialio') is not None
115    start_sec = time.time()
116    with subprocess.Popen(['serialio', node_id, 'wait', waitfor],
117                          stdout=subprocess.DEVNULL,
118                          stderr=subprocess.DEVNULL) as proc:
119        while time.time() - start_sec < 28:
120            send_command = ['serialio', node_id, 'send']
121            send_command.extend(command)
122            # pylint: disable=subprocess-run-check
123            if subprocess.run(send_command).returncode != 0:
124                logging.error('Failed to send %s via serialio to %s', command,
125                              node_id)
126                return False
127            result = proc.poll()
128            if result is not None:
129                if result == 0:
130                    return True
131                logging.error(
132                    'Failed to wait %s via serial to %s, '
133                    'return code %s', waitfor, node_id, result)
134                return False
135            time.sleep(2)
136        proc.kill()
137    logging.error('Have not found %s via serialio to %s', waitfor, node_id)
138    return False
139
140
141def is_in_fuchsia(node_id: str) -> bool:
142    """Checks if the device is running in fuchsia through serial.
143    Note, this check goes through serial and does not guarantee the fuchsia os
144    has a workable network or ssh connection.
145    This function asserts the existence of serialio and waits at most ~60
146    seconds."""
147    if not _serialio_send_and_wait(
148            node_id, ['echo', 'yes-i-am-healthy', '|', 'sha1sum'],
149            '89d517b7db104aada669a83bc3c3a906e00671f7'):
150        logging.error(
151            'Device %s did not respond echo, '
152            'it may not be running fuchsia', node_id)
153        return False
154    if not _serialio_send_and_wait(node_id, ['ps'], 'sshd'):
155        logging.warning(
156            'Cannot find sshd from ps on %s, the ssh '
157            'connection may not be available.', node_id)
158    return True
159
160
161def is_in_fastboot(serial_num: str) -> bool:
162    """Checks if the device is running in fastboot through fastboot command.
163    Note, the fastboot may be impacted by the usb congestion and causes this
164    function to return false.
165    This function asserts the existence of fastboot and waits at most ~30
166    seconds."""
167    start_sec = time.time()
168    while time.time() - start_sec < 28:
169        result = _run_fastboot(['getvar', 'product'], serial_num)
170        if result is None:
171            return False
172        if result:
173            return True
174        time.sleep(2)
175    logging.error('Failed to wait for fastboot state of %s', serial_num)
176    return False
177
178
179def _run_fastboot(args: List[str], serial_num: str) -> bool:
180    """Executes the fastboot command and kills the hanging process.
181    The fastboot may be impacted by the usb congestion and causes the process to
182    hang forever. So this command waits for 30 seconds before killing the
183    process, and it's not good for flashing.
184    Note, if this function detects the fastboot is waiting for the device, i.e.
185    the device is not in the fastboot, it returns None instead, e.g. unknown.
186    This function asserts the existence of fastboot."""
187    assert shutil.which('fastboot') is not None
188    args.insert(0, 'fastboot')
189    args.extend(('-s', serial_num))
190    try:
191        # Capture output to ensure we can get '< waiting for serial-num >'
192        # output.
193        # pylint: disable=subprocess-run-check
194        if subprocess.run(args, capture_output=True,
195                          timeout=30).returncode == 0:
196            return True
197    except subprocess.TimeoutExpired as timeout:
198        if timeout.stderr is not None and serial_num.lower(
199        ) in timeout.stderr.decode().lower():
200            logging.warning('fastboot is still waiting for %s', serial_num)
201            return None
202    logging.error('Failed to run %s against fastboot %s', args, serial_num)
203    return False
204
205
206def _shutdown_if_serial_is_unavailable(node_id: str) -> None:
207    if not running_unattended():
208        return
209    # pylint: disable=subprocess-run-check
210    if subprocess.run(['serialio', node_id, 'poll']).returncode != 0:
211        logging.warning('shutting down the docker by killing the pid 1')
212        # Before killing the process itself, force shutting down the logging to
213        # flush everything.
214        logging.shutdown()
215        # In docker instance, killing root process will cause the instance to be
216        # shut down and restarted by swarm_docker. So the updated tty can be
217        # attached to the new docker instance.
218        os.kill(1, signal.SIGTERM)
219
220
221def main(action: str) -> int:
222    """Main entry of serial_boot_device."""
223    node_id = os.getenv('FUCHSIA_NODENAME')
224    serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
225    assert node_id is not None
226    assert serial_num is not None
227
228    handlers = [logging.StreamHandler()]
229    if os.path.isdir('/home/swarming/'):
230        handlers.append(
231            logging.FileHandler('/home/swarming/dmc.%s.log' % node_id))
232    logging.basicConfig(format='%(levelname)s %(asctime)s %(message)s',
233                        handlers=handlers,
234                        level=logging.INFO)
235    logging.info('Running command %s against %s %s', sys.argv, node_id,
236                 serial_num)
237
238    # Checks the environment after initializing the logging.
239    if not _env_ready():
240        logging.error('Missing environment setup, unable to perform action.')
241        return 2
242
243    if action == 'health-check':
244        _shutdown_if_serial_is_unavailable(node_id)
245        if is_in_fuchsia(node_id) or is_in_fastboot(serial_num):
246            # Print out the json result without using logging to avoid any
247            # potential formatting issue.
248            print(
249                json.dumps([{
250                    'nodename': node_id,
251                    'state': 'healthy',
252                    'status_message': '',
253                    'dms_state': ''
254                }]))
255            return 0
256        logging.error('Cannot find node id %s or fastboot serial number %s',
257                      node_id, serial_num)
258        return 1
259    if action in ['reboot', 'after-task']:
260        if action == 'after-task':
261            _shutdown_if_serial_is_unavailable(node_id)
262        if boot_device(node_id, serial_num, BootMode.REGULAR, must_boot=True):
263            return 0
264        logging.error(
265            'Cannot reboot the device with node id %s and fastboot '
266            'serial number %s', node_id, serial_num)
267        return 1
268    if action == 'reboot-fastboot':
269        if boot_device(node_id,
270                       serial_num,
271                       BootMode.BOOTLOADER,
272                       must_boot=True):
273            return 0
274        logging.error(
275            'Cannot reboot the device with node id %s and fastboot '
276            'serial number %s into fastboot', node_id, serial_num)
277        return 1
278    if action == 'is-in-fuchsia':
279        if is_in_fuchsia(node_id):
280            return 0
281        logging.error('Cannot find node id %s', node_id)
282        return 1
283    if action == 'is-in-fastboot':
284        if is_in_fastboot(serial_num):
285            return 0
286        logging.error('Cannot find fastboot serial number %s', serial_num)
287        return 1
288    if action == 'server-version':
289        # TODO(crbug.com/1490434): Implement the server-version.
290        print('chromium')
291        return 0
292    if action == 'before-task':
293        # TODO(crbug.com/1490434): fuchsia.py requires IMAGE_MANIFEST_PATH and
294        # BOOTSERVER_PATH to support before-task call. So the following
295        # statement does not work as it should be.
296        _shutdown_if_serial_is_unavailable(node_id)
297        return 0
298    if action == 'set-power-state':
299        # Do nothing. The device is always restarted during after-task.
300        return 0
301    logging.error('Unknown command %s', action)
302    return 2
303
304
305if __name__ == '__main__':
306    sys.exit(main(sys.argv[1]))
307