testvm: Improved CLI usability

Makes the testvm CLI more intuitive, so we can start using the
x86vm as a default for running crosvm integration tests.

Modernizes the CLI output with colors and spinners for waiting.
Adds status and logs commands to aid debugging.
Improved `up` command to kill VMs if they do not become responsive.

Also merges the wait command into `up --wait`.

BUG=b:275717759
TEST=aarch64vm up && aarch64vm up --wait
TEST=aarch64vm stop && aarch64vm up --wait
TEST=testvm
TEST=aarch64vm logs

Change-Id: I82cf14cff19a0b01cda64718a24d170d564fdcd7
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4390731
Reviewed-by: Zihan Chen <zihanchen@google.com>
This commit is contained in:
Dennis Kempin 2023-03-31 16:00:39 -07:00 committed by crosvm LUCI
parent 2f4504e176
commit 1b0c937ce4
5 changed files with 227 additions and 81 deletions

View file

@ -18,7 +18,6 @@ from . import preamble # type: ignore
import argparse import argparse
import contextlib import contextlib
import csv
import datetime import datetime
import functools import functools
import getpass import getpass
@ -34,7 +33,6 @@ import urllib
import urllib.request import urllib.request
import urllib.error import urllib.error
from copy import deepcopy from copy import deepcopy
from io import StringIO
from math import ceil from math import ceil
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
@ -111,6 +109,9 @@ assert 'name = "crosvm"' in CROSVM_TOML.read_text()
# List of times recorded by `record_time` which will be printed if --timing-info is provided. # List of times recorded by `record_time` which will be printed if --timing-info is provided.
global_time_records: List[Tuple[str, datetime.timedelta]] = [] global_time_records: List[Tuple[str, datetime.timedelta]] = []
# Regex that matches ANSI escape sequences
ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
def crosvm_target_dir(): def crosvm_target_dir():
crosvm_target = os.environ.get("CROSVM_TARGET_DIR") crosvm_target = os.environ.get("CROSVM_TARGET_DIR")
@ -1166,6 +1167,10 @@ def download_file(url: str, filename: Path, attempts: int = 3):
console.print("Download failed:", e) console.print("Download failed:", e)
def strip_ansi_escape_sequences(line: str) -> str:
return ANSI_ESCAPE.sub("", line)
console = rich.console.Console() console = rich.console.Console()
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -4,29 +4,29 @@
# found in the LICENSE file. # found in the LICENSE file.
import os import os
import re
import subprocess import subprocess
import sys
import traceback
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta
from fnmatch import fnmatch from fnmatch import fnmatch
from pathlib import Path from pathlib import Path
import sys
from time import sleep from time import sleep
from typing import Callable, List, NamedTuple, Optional, Set, Union from typing import Callable, List, NamedTuple, Optional, Union
from datetime import datetime, timedelta
from impl.common import Command, all_tracked_files, cmd, console, verbose from impl.common import (
Command,
import rich all_tracked_files,
import rich.console cmd,
import rich.live console,
import rich.spinner rich,
import rich.text strip_ansi_escape_sequences,
verbose,
)
git = cmd("git") git = cmd("git")
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
@dataclass @dataclass
class CheckContext(object): class CheckContext(object):
@ -204,7 +204,7 @@ class Task(object):
*( *(
# Print last log lines without it's original colors # Print last log lines without it's original colors
rich.text.Text( rich.text.Text(
"" + ansi_escape.sub("", log_line), "" + strip_ansi_escape_sequences(log_line),
style="light_slate_grey", style="light_slate_grey",
overflow="ellipsis", overflow="ellipsis",
no_wrap=True, no_wrap=True,

View file

@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
from enum import Enum
import json import json
import os import os
import socket import socket
@ -11,14 +12,15 @@ import time
import typing import typing
from contextlib import closing from contextlib import closing
from pathlib import Path from pathlib import Path
from typing import Dict, List, Literal, Optional from typing import Dict, List, Literal, Optional, Tuple
from .common import CACHE_DIR, download_file, cmd from .common import CACHE_DIR, download_file, cmd, rich, console
KVM_SUPPORT = os.access("/dev/kvm", os.W_OK) KVM_SUPPORT = os.access("/dev/kvm", os.W_OK)
Arch = Literal["x86_64", "aarch64"] Arch = Literal["x86_64", "aarch64"]
ARCH_OPTIONS = typing.get_args(Arch) ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch))
SCRIPT_DIR = Path(__file__).parent.resolve() SCRIPT_DIR = Path(__file__).parent.resolve()
SRC_DIR = SCRIPT_DIR.joinpath("testvm") SRC_DIR = SCRIPT_DIR.joinpath("testvm")
@ -53,6 +55,10 @@ def pid_path(arch: Arch):
return data_dir(arch).joinpath("pid") return data_dir(arch).joinpath("pid")
def log_path(arch: Arch):
return data_dir(arch).joinpath("vm_log")
def base_img_name(arch: Arch): def base_img_name(arch: Arch):
return f"base-{arch}-{BASE_IMG_VERSION}.qcow2" return f"base-{arch}-{BASE_IMG_VERSION}.qcow2"
@ -170,10 +176,36 @@ def run_qemu(
else: else:
serial = "stdio" serial = "stdio"
process = qemu.with_args(f"-hda {hda}", f"-serial {serial}").popen(start_new_session=background) console.print(f"Booting {arch} VM with disk", hda)
write_pid_file(arch, process.pid) command = qemu.with_args(
if not background: f"-hda {hda}",
process.wait() f"-serial {serial}",
f"-netdev user,id=net0,hostfwd=tcp::{SSH_PORTS[arch]}-:22",
)
if background:
# Start qemu in a new session so it can outlive this process.
process = command.popen(
start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# Wait for 1s to see if the qemu is staying alive.
assert process.stdout
for _ in range(10):
if process.poll() is not None:
sys.stdout.write(process.stdout.read())
print(f"'{command}' exited with code {process.returncode}")
sys.exit(process.returncode)
time.sleep(0.1)
# Print any warnings qemu might produce.
sys.stdout.write(process.stdout.read(0))
sys.stdout.flush()
process.stdout.close()
# Save pid so we can manage the process later.
write_pid_file(arch, process.pid)
else:
command.fg()
def run_vm(arch: Arch, background: bool = False): def run_vm(arch: Arch, background: bool = False):
@ -200,12 +232,18 @@ def is_running(arch: Arch):
def kill_vm(arch: Arch): def kill_vm(arch: Arch):
pid = read_pid_file(arch) pid = read_pid_file(arch)
if pid: if pid:
os.kill(pid, 9) try:
os.kill(pid, 9)
# Ping with signal 0 until we get an OSError indicating the process has shutdown.
while True:
os.kill(pid, 0)
except OSError:
return
def build_if_needed(arch: Arch, reset: bool = False): def build_if_needed(arch: Arch, reset: bool = False):
if reset and is_running(arch): if reset and is_running(arch):
print("Killing existing VM...") print(f"Killing existing {arch} VM to perform reset...")
kill_vm(arch) kill_vm(arch)
time.sleep(1) time.sleep(1)
@ -213,14 +251,14 @@ def build_if_needed(arch: Arch, reset: bool = False):
base_img = base_img_path(arch) base_img = base_img_path(arch)
if not base_img.exists(): if not base_img.exists():
print(f"Downloading base image ({base_img_url(arch)})...") print(f"Downloading {arch} base image ({base_img_url(arch)})...")
download_file(base_img_url(arch), base_img_path(arch)) download_file(base_img_url(arch), base_img_path(arch))
rootfs_img = rootfs_img_path(arch) rootfs_img = rootfs_img_path(arch)
if not rootfs_img.exists() or reset: if not rootfs_img.exists() or reset:
# The rootfs is backed by the base image generated above. So we can # The rootfs is backed by the base image generated above. So we can
# easily reset to a clean VM by rebuilding an empty rootfs image. # easily reset to a clean VM by rebuilding an empty rootfs image.
print("Creating rootfs overlay...") print(f"Creating {arch} rootfs overlay...")
qemu_img.with_args( qemu_img.with_args(
"create", "create",
"-f qcow2", "-f qcow2",
@ -236,33 +274,68 @@ def is_ssh_port_available(arch: Arch):
return sock.connect_ex(("127.0.0.1", SSH_PORTS[arch])) != 0 return sock.connect_ex(("127.0.0.1", SSH_PORTS[arch])) != 0
def up(arch: Arch, reset: bool = False): def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120):
"Start the VM if it's not already running." "Starts the test vm if it's not already running. Optionally wait for it to be reachable."
# Try waiting for the running VM, if it does not become reachable, kill it.
if is_running(arch): if is_running(arch):
return if not wait:
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
return
if not wait_until_reachable(arch, timeout):
if is_running(arch):
print(f"{arch} VM is not reachable. Restarting it.")
kill_vm(arch)
else:
print(f"{arch} VM stopped. Starting it again.")
else:
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
return
build_if_needed(arch, reset) build_if_needed(arch, reset)
print("Booting VM...")
run_qemu( run_qemu(
arch, arch,
rootfs_img_path(arch), rootfs_img_path(arch),
background=True, background=True,
) )
if wait:
if wait_until_reachable(arch, timeout):
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
else:
raise Exception(f"Waiting for {arch} VM timed out.")
def wait(arch: Arch, timeout: int = 120):
def wait_until_reachable(arch: Arch, timeout: int = 120):
"Blocks until the VM is ready to use." "Blocks until the VM is ready to use."
up(arch) if not is_running(arch):
return False
if ping_vm(arch): if ping_vm(arch):
return return True
print("Waiting for VM") with rich.live.Live(
start_time = time.time() rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...")
while (time.time() - start_time) < timeout: ):
time.sleep(1) start_time = time.time()
sys.stdout.write(".") while (time.time() - start_time) < timeout:
sys.stdout.flush() if not is_running(arch):
return False
if ping_vm(arch):
return True
return False
class VmState(Enum):
REACHABLE = "Reachable"
RUNNING_NOT_REACHABLE = "Running, but not reachable"
STOPPED = "Stopped"
def state(arch: Arch):
if is_running(arch):
if ping_vm(arch): if ping_vm(arch):
print() return VmState.REACHABLE
return else:
raise Exception("Timeout while waiting for VM") return VmState.RUNNING_NOT_REACHABLE
else:
return VmState.STOPPED

View file

@ -287,6 +287,7 @@ def main(
if dut == "host": if dut == "host":
check_host_prerequisites(run_root_tests) check_host_prerequisites(run_root_tests)
if dut == "vm": if dut == "vm":
# Start VM ahead of time but don't wait for it to boot.
testvm.up(get_vm_arch(triple)) testvm.up(get_vm_arch(triple))
nextest_args = [ nextest_args = [
@ -353,7 +354,7 @@ def main(
if dut == "host": if dut == "host":
target = HostTarget(package_dir) target = HostTarget(package_dir)
elif dut == "vm": elif dut == "vm":
testvm.wait(get_vm_arch(triple)) testvm.up(get_vm_arch(triple), wait=True)
remote = Remote("localhost", testvm.ssh_opts(get_vm_arch(triple))) remote = Remote("localhost", testvm.ssh_opts(get_vm_arch(triple)))
target = SshTarget(package_archive, remote) target = SshTarget(package_archive, remote)

View file

@ -4,14 +4,16 @@
# found in the LICENSE file. # found in the LICENSE file.
import shutil import shutil
from typing import Iterable, Optional
from impl.common import run_commands, argh from impl.common import run_commands, argh, console, strip_ansi_escape_sequences
from impl import testvm from impl import testvm
from impl.testvm import Arch, VmState
USAGE = """Manages VMs for testing crosvm. USAGE = """Manages VMs for testing crosvm.
Can run an x86_64 and an aarch64 vm via `./tools/x86vm` and `./tools/aarch64vm`. Can run an x86_64 and an aarch64 vm via `./tools/x86vm` and `./tools/aarch64vm`.
The VM image will be downloaded and initialized on first use. Both are a wrapper around `./tools/testvm --arch=x86_64/aarch64`.
The easiest way to use the VM is: The easiest way to use the VM is:
@ -20,21 +22,43 @@ The easiest way to use the VM is:
Which will initialize and boot the VM, then wait for SSH to be available and Which will initialize and boot the VM, then wait for SSH to be available and
opens an SSH session. The VM will stay alive between calls. opens an SSH session. The VM will stay alive between calls.
Alternatively, you can set up an SSH config to connect to the VM. First ensure Available commands are:
the VM ready: - up: Start the VM if it is not already running.
- stop: Gracefully stop the VM
- kill: Send SIGKILL to the VM
- clean: Stop the VM and delete all images
- logs: Print logs of the VM console
$ ./tools/aarch64vm wait All of these can be called on `./tools/x86vm` or `./tools/aarch64vm`, but also on
`tools/testvm` to apply to both VMs.
""" """
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) def cli_shorthand(arch: Arch):
def up(arch: testvm.Arch = "x86_64", reset: bool = False): if arch == "x86_64":
return "tools/x86vm"
elif arch == "aarch64":
return "tools/aarch64vm"
else:
raise Exception(f"Unknown architecture: {arch}")
def arch_or_all(arch: Optional[Arch]):
return (arch,) if arch else testvm.ARCH_OPTIONS
ARCHS = testvm.ARCH_OPTIONS
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def up(arch_list: Iterable[Arch] = [], reset: bool = False, wait: bool = False, timeout: int = 120):
"Start the VM if it's not already running." "Start the VM if it's not already running."
testvm.up(arch, reset) for arch in arch_list:
testvm.up(arch, reset, wait, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def run(arch: testvm.Arch = "x86_64", reset: bool = False): def run(arch: Arch = "x86_64", reset: bool = False):
"Run the VM in foreground for debugging purposes." "Run the VM in foreground for debugging purposes."
if testvm.is_running(arch): if testvm.is_running(arch):
raise Exception("VM is already running") raise Exception("VM is already running")
@ -47,45 +71,88 @@ def run(arch: testvm.Arch = "x86_64", reset: bool = False):
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def wait(arch: testvm.Arch = "x86_64", timeout: int = 120): def shell(arch: Arch = "x86_64", timeout: int = 120):
"Blocks until the VM is ready to use."
testvm.wait(arch, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def ssh(arch: testvm.Arch = "x86_64", timeout: int = 120):
"Starts an interactive shell via SSH, will ensure the VM is running." "Starts an interactive shell via SSH, will ensure the VM is running."
testvm.up(arch) testvm.up(arch, wait=True, timeout=timeout)
wait(arch, timeout)
testvm.ssh_exec(arch) testvm.ssh_exec(arch)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) @argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def stop(arch: testvm.Arch = "x86_64"): def stop(arch_list: Iterable[Arch] = []):
"Gracefully stops the running VM." "Gracefully stops the running VM."
if not testvm.is_running(arch): for arch in arch_list:
print("VM is not running.") if not testvm.is_running(arch):
return print(f"{arch} VM is not running")
testvm.ssh_exec(arch, "sudo poweroff") break
console.print(f"Stopping {arch} VM")
testvm.ssh_exec(arch, "sudo poweroff")
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) @argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def kill(arch: testvm.Arch = "x86_64"): def kill(arch_list: Iterable[Arch] = []):
"Kills the running VM with SIGKILL." "Kills the running VM with SIGKILL."
if not testvm.is_running(arch): for arch in arch_list:
print("VM is not running.") if not testvm.is_running(arch):
return console.print(f"{arch} VM is not running")
testvm.kill_vm(arch) break
console.print(f"Killing {arch} VM process")
testvm.kill_vm(arch)
print()
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def clean(arch_list: Iterable[Arch] = []):
"Stops the VM or VMs and deletes all data."
for arch in arch_list:
if testvm.is_running(arch):
kill(arch)
if testvm.data_dir(arch).exists():
console.print("Cleaning data directory", testvm.data_dir(arch))
shutil.rmtree(testvm.data_dir(arch))
print()
def vm_status(arch: Arch):
def cli_tip(*args: str):
return f"[green][bold]{cli_shorthand(arch)} {' '.join(args)}[/bold][/green]"
vm = f"{arch} VM"
port = f"[blue]{testvm.SSH_PORTS[arch]}[/blue]"
state = testvm.state(arch)
if state == VmState.REACHABLE:
console.print(f"{vm} is [green]reachable[/green] on port {port}")
console.print(f"Start a shell with {cli_tip('shell')}")
elif state == VmState.STOPPED:
console.print(f"{vm} is [red]stopped[/red]")
console.print(f"Start the VM with {cli_tip('up')}")
else:
console.print(f"{vm} is running but [red]not reachable[/red] on port {port}")
console.print(f"Recent logs:")
logs(arch, 10, style="light_slate_grey")
console.print(f"See all logs with {cli_tip('logs')}")
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def status(arch_list: Iterable[Arch] = []):
for arch in arch_list:
vm_status(arch)
print()
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS) @argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def clean(arch: testvm.Arch = "x86_64"): def logs(arch: Arch = "x86_64", n: int = 0, style: Optional[str] = None):
"Stops the VM and deletes all data." log_lines = testvm.log_path(arch).read_text().splitlines()
if testvm.is_running(arch): if n > 0 and len(log_lines) > n:
kill(arch) log_lines = log_lines[-n:]
if testvm.data_dir(arch).exists(): for line in log_lines:
shutil.rmtree(testvm.data_dir(arch)) if style:
console.print(
strip_ansi_escape_sequences(line), style=style, markup=False, highlight=False
)
else:
print(line)
if __name__ == "__main__": if __name__ == "__main__":
run_commands(up, run, wait, ssh, stop, kill, clean, usage=USAGE) run_commands(up, run, shell, stop, kill, clean, status, logs, usage=USAGE, default_fn=status)