xref: /aosp_15_r20/external/crosvm/tools/impl/testvm.py (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1# Copyright 2021 The ChromiumOS Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from enum import Enum
6import json
7import os
8import socket
9import subprocess
10import sys
11import time
12import typing
13from contextlib import closing
14from pathlib import Path
15from random import randrange
16from typing import Dict, List, Literal, Optional, Tuple
17
18from .common import CACHE_DIR, download_file, cmd, rich, console
19
20KVM_SUPPORT = os.access("/dev/kvm", os.W_OK)
21
22Arch = Literal["x86_64", "aarch64"]
23ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch))
24
25
26SCRIPT_DIR = Path(__file__).parent.resolve()
27SRC_DIR = SCRIPT_DIR.joinpath("testvm")
28ID_RSA = SRC_DIR.joinpath("id_rsa")
29BASE_IMG_VERSION = open(SRC_DIR.joinpath("version"), "r").read().strip()
30
31IMAGE_DIR_URL = "https://storage.googleapis.com/crosvm/testvm"
32
33
34def cargo_target_dir():
35    # Do not call cargo if we have the environment variable specified. This
36    # allows the script to be used when cargo is not available but the target
37    # dir is known.
38    env_target = os.environ.get("CARGO_TARGET_DIR")
39    if env_target:
40        return Path(env_target)
41    text = subprocess.run(
42        ["cargo", "metadata", "--no-deps", "--format-version=1"],
43        check=True,
44        capture_output=True,
45        text=True,
46    ).stdout
47    metadata = json.loads(text)
48    return Path(metadata["target_directory"])
49
50
51def data_dir(arch: Arch):
52    return CACHE_DIR.joinpath("crosvm_tools").joinpath(arch)
53
54
55def pid_path(arch: Arch):
56    return data_dir(arch).joinpath("pid")
57
58
59def ssh_port_path(arch: Arch):
60    return data_dir(arch).joinpath("ssh_port")
61
62
63def log_path(arch: Arch):
64    return data_dir(arch).joinpath("vm_log")
65
66
67def base_img_name(arch: Arch):
68    return f"base-{arch}-{BASE_IMG_VERSION}.qcow2"
69
70
71def base_img_url(arch: Arch):
72    return f"{IMAGE_DIR_URL}/{base_img_name(arch)}"
73
74
75def base_img_path(arch: Arch):
76    return data_dir(arch).joinpath(base_img_name(arch))
77
78
79def rootfs_img_path(arch: Arch):
80    return data_dir(arch).joinpath(f"rootfs-{arch}-{BASE_IMG_VERSION}.qcow2")
81
82
83def ssh_port(arch: Arch) -> int:
84    # Default to fixed ports used by VMs started by previous versions of this script.
85    # TODO(b/275717656): Remove after a while
86    if not ssh_port_path(arch).exists():
87        return SSH_PORTS[arch]
88    return int(ssh_port_path(arch).read_text())
89
90
91ssh = cmd("ssh")
92qemu_img = cmd("qemu-img")
93
94# List of ports to use for SSH for each architecture
95# TODO(b/275717656): Remove after a while
96SSH_PORTS: Dict[Arch, int] = {
97    "x86_64": 9000,
98    "aarch64": 9001,
99}
100
101# QEMU arguments shared by all architectures
102SHARED_ARGS: List[str] = [
103    "-display none",
104    "-device virtio-net-pci,netdev=net0",
105    "-smp 8",
106    "-m 4G",
107]
108
109# QEMU command for each architecture
110ARCH_TO_QEMU: Dict[Arch, cmd] = {
111    "x86_64": cmd(
112        "qemu-system-x86_64",
113        "-cpu host",
114        "-enable-kvm" if KVM_SUPPORT else None,
115        *SHARED_ARGS,
116    ),
117    "aarch64": cmd(
118        "qemu-system-aarch64",
119        "-M virt",
120        "-machine virt,virtualization=true,gic-version=3",
121        "-cpu cortex-a57",
122        "-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd",
123        *SHARED_ARGS,
124    ),
125}
126
127
128def ssh_opts(arch: Arch) -> Dict[str, str]:
129    return {
130        "Port": str(ssh_port(arch)),
131        "User": "crosvm",
132        "StrictHostKeyChecking": "no",
133        "UserKnownHostsFile": "/dev/null",
134        "LogLevel": "ERROR",
135        "IdentityFile": str(ID_RSA),
136    }
137
138
139def ssh_cmd_args(arch: Arch):
140    return [f"-o{k}={v}" for k, v in ssh_opts(arch).items()]
141
142
143def ssh_exec(arch: Arch, cmd: Optional[str] = None):
144    os.chmod(ID_RSA, 0o600)
145    ssh.with_args(
146        "localhost",
147        *ssh_cmd_args(arch),
148        *(["-T", cmd] if cmd else []),
149    ).fg(check=False)
150
151
152def ping_vm(arch: Arch):
153    os.chmod(ID_RSA, 0o600)
154    return ssh(
155        "localhost",
156        *ssh_cmd_args(arch),
157        "-oConnectTimeout=1",
158        "-T exit",
159    ).success()
160
161
162def write_pid_file(arch: Arch, pid: int):
163    with open(pid_path(arch), "w") as pid_file:
164        pid_file.write(str(pid))
165
166
167def read_pid_file(arch: Arch):
168    if not pid_path(arch).exists():
169        return None
170
171    with open(pid_path(arch), "r") as pid_file:
172        return int(pid_file.read())
173
174
175def is_port_available(port: int):
176    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
177        return sock.connect_ex(("127.0.0.1", port)) != 0
178
179
180def pick_ssh_port():
181    for _ in range(5):
182        port = randrange(1024, 32768)
183        if is_port_available(port):
184            return port
185    raise Exception("Could not find a free port")
186
187
188def run_qemu(
189    arch: Arch,
190    hda: Path,
191    background: bool = False,
192):
193    port = pick_ssh_port()
194
195    qemu = ARCH_TO_QEMU[arch]
196    if background:
197        serial = f"file:{data_dir(arch).joinpath('vm_log')}"
198    else:
199        serial = "stdio"
200
201    console.print(f"Booting {arch} VM with disk", hda)
202    command = qemu.with_args(
203        f"-hda {hda}",
204        f"-serial {serial}",
205        f"-netdev user,id=net0,hostfwd=tcp::{port}-:22",
206    )
207    if background:
208        # Start qemu in a new session so it can outlive this process.
209        process = command.popen(
210            start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
211        )
212
213        # Wait for 1s to see if the qemu is staying alive.
214        assert process.stdout
215        for _ in range(10):
216            if process.poll() is not None:
217                sys.stdout.write(process.stdout.read())
218                print(f"'{command}' exited with code {process.returncode}")
219                sys.exit(process.returncode)
220            time.sleep(0.1)
221
222        # Print any warnings qemu might produce.
223        sys.stdout.write(process.stdout.read(0))
224        sys.stdout.flush()
225        process.stdout.close()
226
227        # Save port and pid so we can manage the process later.
228        ssh_port_path(arch).write_text(str(port))
229        write_pid_file(arch, process.pid)
230    else:
231        command.fg()
232
233
234def run_vm(arch: Arch, background: bool = False):
235    run_qemu(
236        arch,
237        rootfs_img_path(arch),
238        background=background,
239    )
240
241
242def is_running(arch: Arch):
243    pid = read_pid_file(arch)
244    if pid is None:
245        return False
246
247    # Send signal 0 to check if the process is alive
248    try:
249        os.kill(pid, 0)
250    except OSError:
251        return False
252    return True
253
254
255def kill_vm(arch: Arch):
256    pid = read_pid_file(arch)
257    if pid:
258        try:
259            os.kill(pid, 9)
260            # Ping with signal 0 until we get an OSError indicating the process has shutdown.
261            while True:
262                os.kill(pid, 0)
263        except OSError:
264            return
265
266
267def build_if_needed(arch: Arch, reset: bool = False):
268    if reset and is_running(arch):
269        print(f"Killing existing {arch} VM to perform reset...")
270        kill_vm(arch)
271        time.sleep(1)
272
273    data_dir(arch).mkdir(parents=True, exist_ok=True)
274
275    base_img = base_img_path(arch)
276    if not base_img.exists():
277        print(f"Downloading {arch} base image ({base_img_url(arch)})...")
278        download_file(base_img_url(arch), base_img_path(arch))
279
280    rootfs_img = rootfs_img_path(arch)
281    if not rootfs_img.exists() or reset:
282        # The rootfs is backed by the base image generated above. So we can
283        # easily reset to a clean VM by rebuilding an empty rootfs image.
284        print(f"Creating {arch} rootfs overlay...")
285        qemu_img.with_args(
286            "create",
287            "-f qcow2",
288            "-F qcow2",
289            f"-b {base_img}",
290            rootfs_img,
291            "8G",
292        ).fg(quiet=True)
293
294
295def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120):
296    "Starts the test vm if it's not already running. Optionally wait for it to be reachable."
297
298    # Try waiting for the running VM, if it does not become reachable, kill it.
299    if is_running(arch):
300        if not wait:
301            console.print(f"{arch} VM is running on port {ssh_port(arch)}")
302            return
303        if not wait_until_reachable(arch, timeout):
304            if is_running(arch):
305                print(f"{arch} VM is not reachable. Restarting it.")
306                kill_vm(arch)
307            else:
308                print(f"{arch} VM stopped. Starting it again.")
309        else:
310            console.print(f"{arch} VM is running on port {ssh_port(arch)}")
311            return
312
313    build_if_needed(arch, reset)
314    run_qemu(
315        arch,
316        rootfs_img_path(arch),
317        background=True,
318    )
319
320    if wait:
321        if wait_until_reachable(arch, timeout):
322            console.print(f"{arch} VM is running on port {ssh_port(arch)}")
323        else:
324            raise Exception(f"Waiting for {arch} VM timed out.")
325
326
327def wait_until_reachable(arch: Arch, timeout: int = 120):
328    "Blocks until the VM is ready to use."
329    if not is_running(arch):
330        return False
331    if ping_vm(arch):
332        return True
333
334    with rich.live.Live(
335        rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...")
336    ):
337        start_time = time.time()
338        while (time.time() - start_time) < timeout:
339            if not is_running(arch):
340                return False
341            if ping_vm(arch):
342                return True
343    return False
344
345
346class VmState(Enum):
347    REACHABLE = "Reachable"
348    RUNNING_NOT_REACHABLE = "Running, but not reachable"
349    STOPPED = "Stopped"
350
351
352def state(arch: Arch):
353    if is_running(arch):
354        if ping_vm(arch):
355            return VmState.REACHABLE
356        else:
357            return VmState.RUNNING_NOT_REACHABLE
358    else:
359        return VmState.STOPPED
360