From 1b0c937ce474b7d34f226c6d47e73292134d0263 Mon Sep 17 00:00:00 2001 From: Dennis Kempin Date: Fri, 31 Mar 2023 16:00:39 -0700 Subject: [PATCH] testvm: Improved CLI usability Makes the testvm CLI more intuitive, so we can start using the x86vm as a default for running crosvm integration tests. Modernizes the CLI output with colors and spinners for waiting. Adds status and logs commands to aid debugging. Improved `up` command to kill VMs if they do not become responsive. Also merges the wait command into `up --wait`. BUG=b:275717759 TEST=aarch64vm up && aarch64vm up --wait TEST=aarch64vm stop && aarch64vm up --wait TEST=testvm TEST=aarch64vm logs Change-Id: I82cf14cff19a0b01cda64718a24d170d564fdcd7 Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4390731 Reviewed-by: Zihan Chen --- tools/impl/common.py | 9 ++- tools/impl/presubmit.py | 28 ++++---- tools/impl/testvm.py | 127 ++++++++++++++++++++++++++++-------- tools/run_tests | 3 +- tools/testvm | 141 +++++++++++++++++++++++++++++----------- 5 files changed, 227 insertions(+), 81 deletions(-) diff --git a/tools/impl/common.py b/tools/impl/common.py index f6e4f5fc45..9e153657e8 100644 --- a/tools/impl/common.py +++ b/tools/impl/common.py @@ -18,7 +18,6 @@ from . import preamble # type: ignore import argparse import contextlib -import csv import datetime import functools import getpass @@ -34,7 +33,6 @@ import urllib import urllib.request import urllib.error from copy import deepcopy -from io import StringIO from math import ceil from multiprocessing.pool import ThreadPool from pathlib import Path @@ -111,6 +109,9 @@ assert 'name = "crosvm"' in CROSVM_TOML.read_text() # List of times recorded by `record_time` which will be printed if --timing-info is provided. global_time_records: List[Tuple[str, datetime.timedelta]] = [] +# Regex that matches ANSI escape sequences +ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + def crosvm_target_dir(): crosvm_target = os.environ.get("CROSVM_TARGET_DIR") @@ -1166,6 +1167,10 @@ def download_file(url: str, filename: Path, attempts: int = 3): console.print("Download failed:", e) +def strip_ansi_escape_sequences(line: str) -> str: + return ANSI_ESCAPE.sub("", line) + + console = rich.console.Console() if __name__ == "__main__": diff --git a/tools/impl/presubmit.py b/tools/impl/presubmit.py index 3699d00683..72b331c8e3 100644 --- a/tools/impl/presubmit.py +++ b/tools/impl/presubmit.py @@ -4,29 +4,29 @@ # found in the LICENSE file. import os -import re import subprocess +import sys +import traceback from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass +from datetime import datetime, timedelta from fnmatch import fnmatch from pathlib import Path -import sys from time import sleep -from typing import Callable, List, NamedTuple, Optional, Set, Union -from datetime import datetime, timedelta +from typing import Callable, List, NamedTuple, Optional, Union -from impl.common import Command, all_tracked_files, cmd, console, verbose - -import rich -import rich.console -import rich.live -import rich.spinner -import rich.text +from impl.common import ( + Command, + all_tracked_files, + cmd, + console, + rich, + strip_ansi_escape_sequences, + verbose, +) git = cmd("git") -ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") - @dataclass class CheckContext(object): @@ -204,7 +204,7 @@ class Task(object): *( # Print last log lines without it's original colors rich.text.Text( - "│ " + ansi_escape.sub("", log_line), + "│ " + strip_ansi_escape_sequences(log_line), style="light_slate_grey", overflow="ellipsis", no_wrap=True, diff --git a/tools/impl/testvm.py b/tools/impl/testvm.py index 3959bdc574..cf1a3edf34 100755 --- a/tools/impl/testvm.py +++ b/tools/impl/testvm.py @@ -2,6 +2,7 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. +from enum import Enum import json import os import socket @@ -11,14 +12,15 @@ import time import typing from contextlib import closing from pathlib import Path -from typing import Dict, List, Literal, Optional +from typing import Dict, List, Literal, Optional, Tuple -from .common import CACHE_DIR, download_file, cmd +from .common import CACHE_DIR, download_file, cmd, rich, console KVM_SUPPORT = os.access("/dev/kvm", os.W_OK) Arch = Literal["x86_64", "aarch64"] -ARCH_OPTIONS = typing.get_args(Arch) +ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch)) + SCRIPT_DIR = Path(__file__).parent.resolve() SRC_DIR = SCRIPT_DIR.joinpath("testvm") @@ -53,6 +55,10 @@ def pid_path(arch: Arch): return data_dir(arch).joinpath("pid") +def log_path(arch: Arch): + return data_dir(arch).joinpath("vm_log") + + def base_img_name(arch: Arch): return f"base-{arch}-{BASE_IMG_VERSION}.qcow2" @@ -170,10 +176,36 @@ def run_qemu( else: serial = "stdio" - process = qemu.with_args(f"-hda {hda}", f"-serial {serial}").popen(start_new_session=background) - write_pid_file(arch, process.pid) - if not background: - process.wait() + console.print(f"Booting {arch} VM with disk", hda) + command = qemu.with_args( + f"-hda {hda}", + f"-serial {serial}", + f"-netdev user,id=net0,hostfwd=tcp::{SSH_PORTS[arch]}-:22", + ) + if background: + # Start qemu in a new session so it can outlive this process. + process = command.popen( + start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + + # Wait for 1s to see if the qemu is staying alive. + assert process.stdout + for _ in range(10): + if process.poll() is not None: + sys.stdout.write(process.stdout.read()) + print(f"'{command}' exited with code {process.returncode}") + sys.exit(process.returncode) + time.sleep(0.1) + + # Print any warnings qemu might produce. + sys.stdout.write(process.stdout.read(0)) + sys.stdout.flush() + process.stdout.close() + + # Save pid so we can manage the process later. + write_pid_file(arch, process.pid) + else: + command.fg() def run_vm(arch: Arch, background: bool = False): @@ -200,12 +232,18 @@ def is_running(arch: Arch): def kill_vm(arch: Arch): pid = read_pid_file(arch) if pid: - os.kill(pid, 9) + try: + os.kill(pid, 9) + # Ping with signal 0 until we get an OSError indicating the process has shutdown. + while True: + os.kill(pid, 0) + except OSError: + return def build_if_needed(arch: Arch, reset: bool = False): if reset and is_running(arch): - print("Killing existing VM...") + print(f"Killing existing {arch} VM to perform reset...") kill_vm(arch) time.sleep(1) @@ -213,14 +251,14 @@ def build_if_needed(arch: Arch, reset: bool = False): base_img = base_img_path(arch) if not base_img.exists(): - print(f"Downloading base image ({base_img_url(arch)})...") + print(f"Downloading {arch} base image ({base_img_url(arch)})...") download_file(base_img_url(arch), base_img_path(arch)) rootfs_img = rootfs_img_path(arch) if not rootfs_img.exists() or reset: # The rootfs is backed by the base image generated above. So we can # easily reset to a clean VM by rebuilding an empty rootfs image. - print("Creating rootfs overlay...") + print(f"Creating {arch} rootfs overlay...") qemu_img.with_args( "create", "-f qcow2", @@ -236,33 +274,68 @@ def is_ssh_port_available(arch: Arch): return sock.connect_ex(("127.0.0.1", SSH_PORTS[arch])) != 0 -def up(arch: Arch, reset: bool = False): - "Start the VM if it's not already running." +def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120): + "Starts the test vm if it's not already running. Optionally wait for it to be reachable." + + # Try waiting for the running VM, if it does not become reachable, kill it. if is_running(arch): - return + if not wait: + console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}") + return + if not wait_until_reachable(arch, timeout): + if is_running(arch): + print(f"{arch} VM is not reachable. Restarting it.") + kill_vm(arch) + else: + print(f"{arch} VM stopped. Starting it again.") + else: + console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}") + return build_if_needed(arch, reset) - print("Booting VM...") run_qemu( arch, rootfs_img_path(arch), background=True, ) + if wait: + if wait_until_reachable(arch, timeout): + console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}") + else: + raise Exception(f"Waiting for {arch} VM timed out.") -def wait(arch: Arch, timeout: int = 120): + +def wait_until_reachable(arch: Arch, timeout: int = 120): "Blocks until the VM is ready to use." - up(arch) + if not is_running(arch): + return False if ping_vm(arch): - return + return True - print("Waiting for VM") - start_time = time.time() - while (time.time() - start_time) < timeout: - time.sleep(1) - sys.stdout.write(".") - sys.stdout.flush() + with rich.live.Live( + rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...") + ): + start_time = time.time() + while (time.time() - start_time) < timeout: + if not is_running(arch): + return False + if ping_vm(arch): + return True + return False + + +class VmState(Enum): + REACHABLE = "Reachable" + RUNNING_NOT_REACHABLE = "Running, but not reachable" + STOPPED = "Stopped" + + +def state(arch: Arch): + if is_running(arch): if ping_vm(arch): - print() - return - raise Exception("Timeout while waiting for VM") + return VmState.REACHABLE + else: + return VmState.RUNNING_NOT_REACHABLE + else: + return VmState.STOPPED diff --git a/tools/run_tests b/tools/run_tests index c2fccb5d54..eba3fe9ab1 100755 --- a/tools/run_tests +++ b/tools/run_tests @@ -287,6 +287,7 @@ def main( if dut == "host": check_host_prerequisites(run_root_tests) if dut == "vm": + # Start VM ahead of time but don't wait for it to boot. testvm.up(get_vm_arch(triple)) nextest_args = [ @@ -353,7 +354,7 @@ def main( if dut == "host": target = HostTarget(package_dir) elif dut == "vm": - testvm.wait(get_vm_arch(triple)) + testvm.up(get_vm_arch(triple), wait=True) remote = Remote("localhost", testvm.ssh_opts(get_vm_arch(triple))) target = SshTarget(package_archive, remote) diff --git a/tools/testvm b/tools/testvm index 39a14dc4b2..b527e16c55 100755 --- a/tools/testvm +++ b/tools/testvm @@ -4,14 +4,16 @@ # found in the LICENSE file. import shutil +from typing import Iterable, Optional -from impl.common import run_commands, argh +from impl.common import run_commands, argh, console, strip_ansi_escape_sequences from impl import testvm +from impl.testvm import Arch, VmState USAGE = """Manages VMs for testing crosvm. Can run an x86_64 and an aarch64 vm via `./tools/x86vm` and `./tools/aarch64vm`. -The VM image will be downloaded and initialized on first use. +Both are a wrapper around `./tools/testvm --arch=x86_64/aarch64`. The easiest way to use the VM is: @@ -20,21 +22,43 @@ The easiest way to use the VM is: Which will initialize and boot the VM, then wait for SSH to be available and opens an SSH session. The VM will stay alive between calls. -Alternatively, you can set up an SSH config to connect to the VM. First ensure -the VM ready: +Available commands are: + - up: Start the VM if it is not already running. + - stop: Gracefully stop the VM + - kill: Send SIGKILL to the VM + - clean: Stop the VM and delete all images + - logs: Print logs of the VM console - $ ./tools/aarch64vm wait +All of these can be called on `./tools/x86vm` or `./tools/aarch64vm`, but also on +`tools/testvm` to apply to both VMs. """ -@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def up(arch: testvm.Arch = "x86_64", reset: bool = False): +def cli_shorthand(arch: Arch): + if arch == "x86_64": + return "tools/x86vm" + elif arch == "aarch64": + return "tools/aarch64vm" + else: + raise Exception(f"Unknown architecture: {arch}") + + +def arch_or_all(arch: Optional[Arch]): + return (arch,) if arch else testvm.ARCH_OPTIONS + + +ARCHS = testvm.ARCH_OPTIONS + + +@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS) +def up(arch_list: Iterable[Arch] = [], reset: bool = False, wait: bool = False, timeout: int = 120): "Start the VM if it's not already running." - testvm.up(arch, reset) + for arch in arch_list: + testvm.up(arch, reset, wait, timeout) @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def run(arch: testvm.Arch = "x86_64", reset: bool = False): +def run(arch: Arch = "x86_64", reset: bool = False): "Run the VM in foreground for debugging purposes." if testvm.is_running(arch): raise Exception("VM is already running") @@ -47,45 +71,88 @@ def run(arch: testvm.Arch = "x86_64", reset: bool = False): @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def wait(arch: testvm.Arch = "x86_64", timeout: int = 120): - "Blocks until the VM is ready to use." - testvm.wait(arch, timeout) - - -@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def ssh(arch: testvm.Arch = "x86_64", timeout: int = 120): +def shell(arch: Arch = "x86_64", timeout: int = 120): "Starts an interactive shell via SSH, will ensure the VM is running." - testvm.up(arch) - wait(arch, timeout) + testvm.up(arch, wait=True, timeout=timeout) testvm.ssh_exec(arch) -@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def stop(arch: testvm.Arch = "x86_64"): +@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS) +def stop(arch_list: Iterable[Arch] = []): "Gracefully stops the running VM." - if not testvm.is_running(arch): - print("VM is not running.") - return - testvm.ssh_exec(arch, "sudo poweroff") + for arch in arch_list: + if not testvm.is_running(arch): + print(f"{arch} VM is not running") + break + console.print(f"Stopping {arch} VM") + testvm.ssh_exec(arch, "sudo poweroff") -@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def kill(arch: testvm.Arch = "x86_64"): +@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS) +def kill(arch_list: Iterable[Arch] = []): "Kills the running VM with SIGKILL." - if not testvm.is_running(arch): - print("VM is not running.") - return - testvm.kill_vm(arch) + for arch in arch_list: + if not testvm.is_running(arch): + console.print(f"{arch} VM is not running") + break + console.print(f"Killing {arch} VM process") + testvm.kill_vm(arch) + print() + + +@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS) +def clean(arch_list: Iterable[Arch] = []): + "Stops the VM or VMs and deletes all data." + for arch in arch_list: + if testvm.is_running(arch): + kill(arch) + if testvm.data_dir(arch).exists(): + console.print("Cleaning data directory", testvm.data_dir(arch)) + shutil.rmtree(testvm.data_dir(arch)) + print() + + +def vm_status(arch: Arch): + def cli_tip(*args: str): + return f"[green][bold]{cli_shorthand(arch)} {' '.join(args)}[/bold][/green]" + + vm = f"{arch} VM" + port = f"[blue]{testvm.SSH_PORTS[arch]}[/blue]" + + state = testvm.state(arch) + if state == VmState.REACHABLE: + console.print(f"{vm} is [green]reachable[/green] on port {port}") + console.print(f"Start a shell with {cli_tip('shell')}") + elif state == VmState.STOPPED: + console.print(f"{vm} is [red]stopped[/red]") + console.print(f"Start the VM with {cli_tip('up')}") + else: + console.print(f"{vm} is running but [red]not reachable[/red] on port {port}") + console.print(f"Recent logs:") + logs(arch, 10, style="light_slate_grey") + console.print(f"See all logs with {cli_tip('logs')}") + + +@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS) +def status(arch_list: Iterable[Arch] = []): + for arch in arch_list: + vm_status(arch) + print() @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) -def clean(arch: testvm.Arch = "x86_64"): - "Stops the VM and deletes all data." - if testvm.is_running(arch): - kill(arch) - if testvm.data_dir(arch).exists(): - shutil.rmtree(testvm.data_dir(arch)) +def logs(arch: Arch = "x86_64", n: int = 0, style: Optional[str] = None): + log_lines = testvm.log_path(arch).read_text().splitlines() + if n > 0 and len(log_lines) > n: + log_lines = log_lines[-n:] + for line in log_lines: + if style: + console.print( + strip_ansi_escape_sequences(line), style=style, markup=False, highlight=False + ) + else: + print(line) if __name__ == "__main__": - run_commands(up, run, wait, ssh, stop, kill, clean, usage=USAGE) + run_commands(up, run, shell, stop, kill, clean, status, logs, usage=USAGE, default_fn=status)