testvm: Improved CLI usability

Makes the testvm CLI more intuitive, so we can start using the x86vm as a default for running crosvm integration tests. Modernizes the CLI output with colors and spinners for waiting. Adds status and logs commands to aid debugging. Improved `up` command to kill VMs if they do not become responsive. Also merges the wait command into `up --wait`. BUG=b:275717759 TEST=aarch64vm up && aarch64vm up --wait TEST=aarch64vm stop && aarch64vm up --wait TEST=testvm TEST=aarch64vm logs Change-Id: I82cf14cff19a0b01cda64718a24d170d564fdcd7 Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4390731 Reviewed-by: Zihan Chen <zihanchen@google.com>
2024-11-28 01:16:50 +00:00 · 2023-03-31 16:00:39 -07:00 · 2023-03-31 16:00:39 -07:00 · 1b0c937ce4
commit 1b0c937ce4
parent 2f4504e176
5 changed files with 227 additions and 81 deletions
--- a/tools/impl/common.py
+++ b/tools/impl/common.py
@ -18,7 +18,6 @@ from . import preamble  # type: ignore
 import argparse
 import contextlib
 import csv
 import datetime
 import functools
 import getpass
@ -34,7 +33,6 @@ import urllib
 import urllib.request
 import urllib.error
 from copy import deepcopy
 from io import StringIO
 from math import ceil
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
@ -111,6 +109,9 @@ assert 'name = "crosvm"' in CROSVM_TOML.read_text()
 # List of times recorded by `record_time` which will be printed if --timing-info is provided.
 global_time_records: List[Tuple[str, datetime.timedelta]] = []
 # Regex that matches ANSI escape sequences
 ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
 def crosvm_target_dir():
    crosvm_target = os.environ.get("CROSVM_TARGET_DIR")
@ -1166,6 +1167,10 @@ def download_file(url: str, filename: Path, attempts: int = 3):
                console.print("Download failed:", e)
 def strip_ansi_escape_sequences(line: str) -> str:
    return ANSI_ESCAPE.sub("", line)
 console = rich.console.Console()
 if __name__ == "__main__":
--- a/tools/impl/presubmit.py
+++ b/tools/impl/presubmit.py
@ -4,29 +4,29 @@
 # found in the LICENSE file.
 import os
 import re
 import subprocess
 import sys
 import traceback
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from fnmatch import fnmatch
 from pathlib import Path
 import sys
 from time import sleep
-from typing import Callable, List, NamedTuple, Optional, Set, Union
+from typing import Callable, List, NamedTuple, Optional, Union
 from datetime import datetime, timedelta
-from impl.common import Command, all_tracked_files, cmd, console, verbose
+from impl.common import (
-
+    Command,
-import rich
+    all_tracked_files,
-import rich.console
+    cmd,
-import rich.live
+    console,
-import rich.spinner
+    rich,
-import rich.text
+    strip_ansi_escape_sequences,
    verbose,
 )
 git = cmd("git")
 ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
@dataclass
 class CheckContext(object):
@ -204,7 +204,7 @@ class Task(object):
            *(
                # Print last log lines without it's original colors
                rich.text.Text(
-                    "│ " + ansi_escape.sub("", log_line),
+                    "│ " + strip_ansi_escape_sequences(log_line),
                    style="light_slate_grey",
                    overflow="ellipsis",
                    no_wrap=True,
--- a/tools/impl/testvm.py
+++ b/tools/impl/testvm.py
@ -2,6 +2,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 from enum import Enum
 import json
 import os
 import socket
@ -11,14 +12,15 @@ import time
 import typing
 from contextlib import closing
 from pathlib import Path
-from typing import Dict, List, Literal, Optional
+from typing import Dict, List, Literal, Optional, Tuple
-from .common import CACHE_DIR, download_file, cmd
+from .common import CACHE_DIR, download_file, cmd, rich, console
 KVM_SUPPORT = os.access("/dev/kvm", os.W_OK)
 Arch = Literal["x86_64", "aarch64"]
-ARCH_OPTIONS = typing.get_args(Arch)
+ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch))
 SCRIPT_DIR = Path(__file__).parent.resolve()
 SRC_DIR = SCRIPT_DIR.joinpath("testvm")
@ -53,6 +55,10 @@ def pid_path(arch: Arch):
    return data_dir(arch).joinpath("pid")
 def log_path(arch: Arch):
    return data_dir(arch).joinpath("vm_log")
 def base_img_name(arch: Arch):
    return f"base-{arch}-{BASE_IMG_VERSION}.qcow2"
@ -170,10 +176,36 @@ def run_qemu(
    else:
        serial = "stdio"
-    process = qemu.with_args(f"-hda {hda}", f"-serial {serial}").popen(start_new_session=background)
+    console.print(f"Booting {arch} VM with disk", hda)
-    write_pid_file(arch, process.pid)
+    command = qemu.with_args(
-    if not background:
+        f"-hda {hda}",
-        process.wait()
+        f"-serial {serial}",
        f"-netdev user,id=net0,hostfwd=tcp::{SSH_PORTS[arch]}-:22",
    )
    if background:
        # Start qemu in a new session so it can outlive this process.
        process = command.popen(
            start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
        )
        # Wait for 1s to see if the qemu is staying alive.
        assert process.stdout
        for _ in range(10):
            if process.poll() is not None:
                sys.stdout.write(process.stdout.read())
                print(f"'{command}' exited with code {process.returncode}")
                sys.exit(process.returncode)
            time.sleep(0.1)
        # Print any warnings qemu might produce.
        sys.stdout.write(process.stdout.read(0))
        sys.stdout.flush()
        process.stdout.close()
        # Save pid so we can manage the process later.
        write_pid_file(arch, process.pid)
    else:
        command.fg()
 def run_vm(arch: Arch, background: bool = False):
@ -200,12 +232,18 @@ def is_running(arch: Arch):
 def kill_vm(arch: Arch):
    pid = read_pid_file(arch)
    if pid:
-        os.kill(pid, 9)
+        try:
            os.kill(pid, 9)
            # Ping with signal 0 until we get an OSError indicating the process has shutdown.
            while True:
                os.kill(pid, 0)
        except OSError:
            return
 def build_if_needed(arch: Arch, reset: bool = False):
    if reset and is_running(arch):
-        print("Killing existing VM...")
+        print(f"Killing existing {arch} VM to perform reset...")
        kill_vm(arch)
        time.sleep(1)
@ -213,14 +251,14 @@ def build_if_needed(arch: Arch, reset: bool = False):
    base_img = base_img_path(arch)
    if not base_img.exists():
-        print(f"Downloading base image ({base_img_url(arch)})...")
+        print(f"Downloading {arch} base image ({base_img_url(arch)})...")
        download_file(base_img_url(arch), base_img_path(arch))
    rootfs_img = rootfs_img_path(arch)
    if not rootfs_img.exists() or reset:
        # The rootfs is backed by the base image generated above. So we can
        # easily reset to a clean VM by rebuilding an empty rootfs image.
-        print("Creating rootfs overlay...")
+        print(f"Creating {arch} rootfs overlay...")
        qemu_img.with_args(
            "create",
            "-f qcow2",
@ -236,33 +274,68 @@ def is_ssh_port_available(arch: Arch):
        return sock.connect_ex(("127.0.0.1", SSH_PORTS[arch])) != 0
-def up(arch: Arch, reset: bool = False):
+def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120):
-    "Start the VM if it's not already running."
+    "Starts the test vm if it's not already running. Optionally wait for it to be reachable."
    # Try waiting for the running VM, if it does not become reachable, kill it.
    if is_running(arch):
-        return
+        if not wait:
            console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
            return
        if not wait_until_reachable(arch, timeout):
            if is_running(arch):
                print(f"{arch} VM is not reachable. Restarting it.")
                kill_vm(arch)
            else:
                print(f"{arch} VM stopped. Starting it again.")
        else:
            console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
            return
    build_if_needed(arch, reset)
    print("Booting VM...")
    run_qemu(
        arch,
        rootfs_img_path(arch),
        background=True,
    )
    if wait:
        if wait_until_reachable(arch, timeout):
            console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
        else:
            raise Exception(f"Waiting for {arch} VM timed out.")
-def wait(arch: Arch, timeout: int = 120):
+
 def wait_until_reachable(arch: Arch, timeout: int = 120):
    "Blocks until the VM is ready to use."
-    up(arch)
+    if not is_running(arch):
        return False
    if ping_vm(arch):
-        return
+        return True
-    print("Waiting for VM")
+    with rich.live.Live(
-    start_time = time.time()
+        rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...")
-    while (time.time() - start_time) < timeout:
+    ):
-        time.sleep(1)
+        start_time = time.time()
-        sys.stdout.write(".")
+        while (time.time() - start_time) < timeout:
-        sys.stdout.flush()
+            if not is_running(arch):
                return False
            if ping_vm(arch):
                return True
    return False
 class VmState(Enum):
    REACHABLE = "Reachable"
    RUNNING_NOT_REACHABLE = "Running, but not reachable"
    STOPPED = "Stopped"
 def state(arch: Arch):
    if is_running(arch):
        if ping_vm(arch):
-            print()
+            return VmState.REACHABLE
-            return
+        else:
-    raise Exception("Timeout while waiting for VM")
+            return VmState.RUNNING_NOT_REACHABLE
    else:
        return VmState.STOPPED
--- a/tools/run_tests
+++ b/tools/run_tests
@ -287,6 +287,7 @@ def main(
    if dut == "host":
        check_host_prerequisites(run_root_tests)
    if dut == "vm":
        # Start VM ahead of time but don't wait for it to boot.
        testvm.up(get_vm_arch(triple))
    nextest_args = [
@ -353,7 +354,7 @@ def main(
        if dut == "host":
            target = HostTarget(package_dir)
        elif dut == "vm":
-            testvm.wait(get_vm_arch(triple))
+            testvm.up(get_vm_arch(triple), wait=True)
            remote = Remote("localhost", testvm.ssh_opts(get_vm_arch(triple)))
            target = SshTarget(package_archive, remote)
--- a/tools/testvm
+++ b/tools/testvm
@ -4,14 +4,16 @@
 # found in the LICENSE file.
 import shutil
 from typing import Iterable, Optional
-from impl.common import run_commands, argh
+from impl.common import run_commands, argh, console, strip_ansi_escape_sequences
 from impl import testvm
 from impl.testvm import Arch, VmState
 USAGE = """Manages VMs for testing crosvm.
 Can run an x86_64 and an aarch64 vm via `./tools/x86vm` and `./tools/aarch64vm`.
-The VM image will be downloaded and initialized on first use.
+Both are a wrapper around `./tools/testvm --arch=x86_64/aarch64`.
 The easiest way to use the VM is:
@ -20,21 +22,43 @@ The easiest way to use the VM is:
 Which will initialize and boot the VM, then wait for SSH to be available and
 opens an SSH session. The VM will stay alive between calls.
-Alternatively, you can set up an SSH config to connect to the VM. First ensure
+Available commands are:
-the VM ready:
+    - up: Start the VM if it is not already running.
    - stop: Gracefully stop the VM
    - kill: Send SIGKILL to the VM
    - clean: Stop the VM and delete all images
    - logs: Print logs of the VM console
-  $ ./tools/aarch64vm wait
+All of these can be called on `./tools/x86vm` or `./tools/aarch64vm`, but also on
 `tools/testvm` to apply to both VMs.
 """
-@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
+def cli_shorthand(arch: Arch):
-def up(arch: testvm.Arch = "x86_64", reset: bool = False):
+    if arch == "x86_64":
        return "tools/x86vm"
    elif arch == "aarch64":
        return "tools/aarch64vm"
    else:
        raise Exception(f"Unknown architecture: {arch}")
 def arch_or_all(arch: Optional[Arch]):
    return (arch,) if arch else testvm.ARCH_OPTIONS
 ARCHS = testvm.ARCH_OPTIONS
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
 def up(arch_list: Iterable[Arch] = [], reset: bool = False, wait: bool = False, timeout: int = 120):
    "Start the VM if it's not already running."
-    testvm.up(arch, reset)
+    for arch in arch_list:
        testvm.up(arch, reset, wait, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
-def run(arch: testvm.Arch = "x86_64", reset: bool = False):
+def run(arch: Arch = "x86_64", reset: bool = False):
    "Run the VM in foreground for debugging purposes."
    if testvm.is_running(arch):
        raise Exception("VM is already running")
@ -47,45 +71,88 @@ def run(arch: testvm.Arch = "x86_64", reset: bool = False):
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
-def wait(arch: testvm.Arch = "x86_64", timeout: int = 120):
+def shell(arch: Arch = "x86_64", timeout: int = 120):
    "Blocks until the VM is ready to use."
    testvm.wait(arch, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
 def ssh(arch: testvm.Arch = "x86_64", timeout: int = 120):
    "Starts an interactive shell via SSH, will ensure the VM is running."
-    testvm.up(arch)
+    testvm.up(arch, wait=True, timeout=timeout)
    wait(arch, timeout)
    testvm.ssh_exec(arch)
-@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
+@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
-def stop(arch: testvm.Arch = "x86_64"):
+def stop(arch_list: Iterable[Arch] = []):
    "Gracefully stops the running VM."
-    if not testvm.is_running(arch):
+    for arch in arch_list:
-        print("VM is not running.")
+        if not testvm.is_running(arch):
-        return
+            print(f"{arch} VM is not running")
-    testvm.ssh_exec(arch, "sudo poweroff")
+            break
        console.print(f"Stopping {arch} VM")
        testvm.ssh_exec(arch, "sudo poweroff")
-@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
+@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
-def kill(arch: testvm.Arch = "x86_64"):
+def kill(arch_list: Iterable[Arch] = []):
    "Kills the running VM with SIGKILL."
-    if not testvm.is_running(arch):
+    for arch in arch_list:
-        print("VM is not running.")
+        if not testvm.is_running(arch):
-        return
+            console.print(f"{arch} VM is not running")
-    testvm.kill_vm(arch)
+            break
        console.print(f"Killing {arch} VM process")
        testvm.kill_vm(arch)
        print()
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
 def clean(arch_list: Iterable[Arch] = []):
    "Stops the VM or VMs and deletes all data."
    for arch in arch_list:
        if testvm.is_running(arch):
            kill(arch)
        if testvm.data_dir(arch).exists():
            console.print("Cleaning data directory", testvm.data_dir(arch))
            shutil.rmtree(testvm.data_dir(arch))
        print()
 def vm_status(arch: Arch):
    def cli_tip(*args: str):
        return f"[green][bold]{cli_shorthand(arch)} {' '.join(args)}[/bold][/green]"
    vm = f"{arch} VM"
    port = f"[blue]{testvm.SSH_PORTS[arch]}[/blue]"
    state = testvm.state(arch)
    if state == VmState.REACHABLE:
        console.print(f"{vm} is [green]reachable[/green] on port {port}")
        console.print(f"Start a shell with {cli_tip('shell')}")
    elif state == VmState.STOPPED:
        console.print(f"{vm} is [red]stopped[/red]")
        console.print(f"Start the VM with {cli_tip('up')}")
    else:
        console.print(f"{vm} is running but [red]not reachable[/red] on port {port}")
        console.print(f"Recent logs:")
        logs(arch, 10, style="light_slate_grey")
        console.print(f"See all logs with {cli_tip('logs')}")
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
 def status(arch_list: Iterable[Arch] = []):
    for arch in arch_list:
        vm_status(arch)
        print()
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
-def clean(arch: testvm.Arch = "x86_64"):
+def logs(arch: Arch = "x86_64", n: int = 0, style: Optional[str] = None):
-    "Stops the VM and deletes all data."
+    log_lines = testvm.log_path(arch).read_text().splitlines()
-    if testvm.is_running(arch):
+    if n > 0 and len(log_lines) > n:
-        kill(arch)
+        log_lines = log_lines[-n:]
-    if testvm.data_dir(arch).exists():
+    for line in log_lines:
-        shutil.rmtree(testvm.data_dir(arch))
+        if style:
            console.print(
                strip_ansi_escape_sequences(line), style=style, markup=False, highlight=False
            )
        else:
            print(line)
 if __name__ == "__main__":
-    run_commands(up, run, wait, ssh, stop, kill, clean, usage=USAGE)
+    run_commands(up, run, shell, stop, kill, clean, status, logs, usage=USAGE, default_fn=status)