testvm: Improved CLI usability

Makes the testvm CLI more intuitive, so we can start using the
x86vm as a default for running crosvm integration tests.

Modernizes the CLI output with colors and spinners for waiting.
Adds status and logs commands to aid debugging.
Improved `up` command to kill VMs if they do not become responsive.

Also merges the wait command into `up --wait`.

BUG=b:275717759
TEST=aarch64vm up && aarch64vm up --wait
TEST=aarch64vm stop && aarch64vm up --wait
TEST=testvm
TEST=aarch64vm logs

Change-Id: I82cf14cff19a0b01cda64718a24d170d564fdcd7
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4390731
Reviewed-by: Zihan Chen <zihanchen@google.com>
This commit is contained in:
Dennis Kempin 2023-03-31 16:00:39 -07:00 committed by crosvm LUCI
parent 2f4504e176
commit 1b0c937ce4
5 changed files with 227 additions and 81 deletions

View file

@ -18,7 +18,6 @@ from . import preamble # type: ignore
import argparse
import contextlib
import csv
import datetime
import functools
import getpass
@ -34,7 +33,6 @@ import urllib
import urllib.request
import urllib.error
from copy import deepcopy
from io import StringIO
from math import ceil
from multiprocessing.pool import ThreadPool
from pathlib import Path
@ -111,6 +109,9 @@ assert 'name = "crosvm"' in CROSVM_TOML.read_text()
# List of times recorded by `record_time` which will be printed if --timing-info is provided.
global_time_records: List[Tuple[str, datetime.timedelta]] = []
# Regex that matches ANSI escape sequences
ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
def crosvm_target_dir():
crosvm_target = os.environ.get("CROSVM_TARGET_DIR")
@ -1166,6 +1167,10 @@ def download_file(url: str, filename: Path, attempts: int = 3):
console.print("Download failed:", e)
def strip_ansi_escape_sequences(line: str) -> str:
return ANSI_ESCAPE.sub("", line)
console = rich.console.Console()
if __name__ == "__main__":

View file

@ -4,29 +4,29 @@
# found in the LICENSE file.
import os
import re
import subprocess
import sys
import traceback
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from datetime import datetime, timedelta
from fnmatch import fnmatch
from pathlib import Path
import sys
from time import sleep
from typing import Callable, List, NamedTuple, Optional, Set, Union
from datetime import datetime, timedelta
from typing import Callable, List, NamedTuple, Optional, Union
from impl.common import Command, all_tracked_files, cmd, console, verbose
import rich
import rich.console
import rich.live
import rich.spinner
import rich.text
from impl.common import (
Command,
all_tracked_files,
cmd,
console,
rich,
strip_ansi_escape_sequences,
verbose,
)
git = cmd("git")
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
@dataclass
class CheckContext(object):
@ -204,7 +204,7 @@ class Task(object):
*(
# Print last log lines without it's original colors
rich.text.Text(
"" + ansi_escape.sub("", log_line),
"" + strip_ansi_escape_sequences(log_line),
style="light_slate_grey",
overflow="ellipsis",
no_wrap=True,

View file

@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from enum import Enum
import json
import os
import socket
@ -11,14 +12,15 @@ import time
import typing
from contextlib import closing
from pathlib import Path
from typing import Dict, List, Literal, Optional
from typing import Dict, List, Literal, Optional, Tuple
from .common import CACHE_DIR, download_file, cmd
from .common import CACHE_DIR, download_file, cmd, rich, console
KVM_SUPPORT = os.access("/dev/kvm", os.W_OK)
Arch = Literal["x86_64", "aarch64"]
ARCH_OPTIONS = typing.get_args(Arch)
ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch))
SCRIPT_DIR = Path(__file__).parent.resolve()
SRC_DIR = SCRIPT_DIR.joinpath("testvm")
@ -53,6 +55,10 @@ def pid_path(arch: Arch):
return data_dir(arch).joinpath("pid")
def log_path(arch: Arch):
return data_dir(arch).joinpath("vm_log")
def base_img_name(arch: Arch):
return f"base-{arch}-{BASE_IMG_VERSION}.qcow2"
@ -170,10 +176,36 @@ def run_qemu(
else:
serial = "stdio"
process = qemu.with_args(f"-hda {hda}", f"-serial {serial}").popen(start_new_session=background)
write_pid_file(arch, process.pid)
if not background:
process.wait()
console.print(f"Booting {arch} VM with disk", hda)
command = qemu.with_args(
f"-hda {hda}",
f"-serial {serial}",
f"-netdev user,id=net0,hostfwd=tcp::{SSH_PORTS[arch]}-:22",
)
if background:
# Start qemu in a new session so it can outlive this process.
process = command.popen(
start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
)
# Wait for 1s to see if the qemu is staying alive.
assert process.stdout
for _ in range(10):
if process.poll() is not None:
sys.stdout.write(process.stdout.read())
print(f"'{command}' exited with code {process.returncode}")
sys.exit(process.returncode)
time.sleep(0.1)
# Print any warnings qemu might produce.
sys.stdout.write(process.stdout.read(0))
sys.stdout.flush()
process.stdout.close()
# Save pid so we can manage the process later.
write_pid_file(arch, process.pid)
else:
command.fg()
def run_vm(arch: Arch, background: bool = False):
@ -200,12 +232,18 @@ def is_running(arch: Arch):
def kill_vm(arch: Arch):
pid = read_pid_file(arch)
if pid:
os.kill(pid, 9)
try:
os.kill(pid, 9)
# Ping with signal 0 until we get an OSError indicating the process has shutdown.
while True:
os.kill(pid, 0)
except OSError:
return
def build_if_needed(arch: Arch, reset: bool = False):
if reset and is_running(arch):
print("Killing existing VM...")
print(f"Killing existing {arch} VM to perform reset...")
kill_vm(arch)
time.sleep(1)
@ -213,14 +251,14 @@ def build_if_needed(arch: Arch, reset: bool = False):
base_img = base_img_path(arch)
if not base_img.exists():
print(f"Downloading base image ({base_img_url(arch)})...")
print(f"Downloading {arch} base image ({base_img_url(arch)})...")
download_file(base_img_url(arch), base_img_path(arch))
rootfs_img = rootfs_img_path(arch)
if not rootfs_img.exists() or reset:
# The rootfs is backed by the base image generated above. So we can
# easily reset to a clean VM by rebuilding an empty rootfs image.
print("Creating rootfs overlay...")
print(f"Creating {arch} rootfs overlay...")
qemu_img.with_args(
"create",
"-f qcow2",
@ -236,33 +274,68 @@ def is_ssh_port_available(arch: Arch):
return sock.connect_ex(("127.0.0.1", SSH_PORTS[arch])) != 0
def up(arch: Arch, reset: bool = False):
"Start the VM if it's not already running."
def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120):
"Starts the test vm if it's not already running. Optionally wait for it to be reachable."
# Try waiting for the running VM, if it does not become reachable, kill it.
if is_running(arch):
return
if not wait:
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
return
if not wait_until_reachable(arch, timeout):
if is_running(arch):
print(f"{arch} VM is not reachable. Restarting it.")
kill_vm(arch)
else:
print(f"{arch} VM stopped. Starting it again.")
else:
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
return
build_if_needed(arch, reset)
print("Booting VM...")
run_qemu(
arch,
rootfs_img_path(arch),
background=True,
)
if wait:
if wait_until_reachable(arch, timeout):
console.print(f"{arch} VM is running on port {SSH_PORTS[arch]}")
else:
raise Exception(f"Waiting for {arch} VM timed out.")
def wait(arch: Arch, timeout: int = 120):
def wait_until_reachable(arch: Arch, timeout: int = 120):
"Blocks until the VM is ready to use."
up(arch)
if not is_running(arch):
return False
if ping_vm(arch):
return
return True
print("Waiting for VM")
start_time = time.time()
while (time.time() - start_time) < timeout:
time.sleep(1)
sys.stdout.write(".")
sys.stdout.flush()
with rich.live.Live(
rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...")
):
start_time = time.time()
while (time.time() - start_time) < timeout:
if not is_running(arch):
return False
if ping_vm(arch):
return True
return False
class VmState(Enum):
REACHABLE = "Reachable"
RUNNING_NOT_REACHABLE = "Running, but not reachable"
STOPPED = "Stopped"
def state(arch: Arch):
if is_running(arch):
if ping_vm(arch):
print()
return
raise Exception("Timeout while waiting for VM")
return VmState.REACHABLE
else:
return VmState.RUNNING_NOT_REACHABLE
else:
return VmState.STOPPED

View file

@ -287,6 +287,7 @@ def main(
if dut == "host":
check_host_prerequisites(run_root_tests)
if dut == "vm":
# Start VM ahead of time but don't wait for it to boot.
testvm.up(get_vm_arch(triple))
nextest_args = [
@ -353,7 +354,7 @@ def main(
if dut == "host":
target = HostTarget(package_dir)
elif dut == "vm":
testvm.wait(get_vm_arch(triple))
testvm.up(get_vm_arch(triple), wait=True)
remote = Remote("localhost", testvm.ssh_opts(get_vm_arch(triple)))
target = SshTarget(package_archive, remote)

View file

@ -4,14 +4,16 @@
# found in the LICENSE file.
import shutil
from typing import Iterable, Optional
from impl.common import run_commands, argh
from impl.common import run_commands, argh, console, strip_ansi_escape_sequences
from impl import testvm
from impl.testvm import Arch, VmState
USAGE = """Manages VMs for testing crosvm.
Can run an x86_64 and an aarch64 vm via `./tools/x86vm` and `./tools/aarch64vm`.
The VM image will be downloaded and initialized on first use.
Both are a wrapper around `./tools/testvm --arch=x86_64/aarch64`.
The easiest way to use the VM is:
@ -20,21 +22,43 @@ The easiest way to use the VM is:
Which will initialize and boot the VM, then wait for SSH to be available and
opens an SSH session. The VM will stay alive between calls.
Alternatively, you can set up an SSH config to connect to the VM. First ensure
the VM ready:
Available commands are:
- up: Start the VM if it is not already running.
- stop: Gracefully stop the VM
- kill: Send SIGKILL to the VM
- clean: Stop the VM and delete all images
- logs: Print logs of the VM console
$ ./tools/aarch64vm wait
All of these can be called on `./tools/x86vm` or `./tools/aarch64vm`, but also on
`tools/testvm` to apply to both VMs.
"""
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def up(arch: testvm.Arch = "x86_64", reset: bool = False):
def cli_shorthand(arch: Arch):
if arch == "x86_64":
return "tools/x86vm"
elif arch == "aarch64":
return "tools/aarch64vm"
else:
raise Exception(f"Unknown architecture: {arch}")
def arch_or_all(arch: Optional[Arch]):
return (arch,) if arch else testvm.ARCH_OPTIONS
ARCHS = testvm.ARCH_OPTIONS
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def up(arch_list: Iterable[Arch] = [], reset: bool = False, wait: bool = False, timeout: int = 120):
"Start the VM if it's not already running."
testvm.up(arch, reset)
for arch in arch_list:
testvm.up(arch, reset, wait, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def run(arch: testvm.Arch = "x86_64", reset: bool = False):
def run(arch: Arch = "x86_64", reset: bool = False):
"Run the VM in foreground for debugging purposes."
if testvm.is_running(arch):
raise Exception("VM is already running")
@ -47,45 +71,88 @@ def run(arch: testvm.Arch = "x86_64", reset: bool = False):
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def wait(arch: testvm.Arch = "x86_64", timeout: int = 120):
"Blocks until the VM is ready to use."
testvm.wait(arch, timeout)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def ssh(arch: testvm.Arch = "x86_64", timeout: int = 120):
def shell(arch: Arch = "x86_64", timeout: int = 120):
"Starts an interactive shell via SSH, will ensure the VM is running."
testvm.up(arch)
wait(arch, timeout)
testvm.up(arch, wait=True, timeout=timeout)
testvm.ssh_exec(arch)
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def stop(arch: testvm.Arch = "x86_64"):
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def stop(arch_list: Iterable[Arch] = []):
"Gracefully stops the running VM."
if not testvm.is_running(arch):
print("VM is not running.")
return
testvm.ssh_exec(arch, "sudo poweroff")
for arch in arch_list:
if not testvm.is_running(arch):
print(f"{arch} VM is not running")
break
console.print(f"Stopping {arch} VM")
testvm.ssh_exec(arch, "sudo poweroff")
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def kill(arch: testvm.Arch = "x86_64"):
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def kill(arch_list: Iterable[Arch] = []):
"Kills the running VM with SIGKILL."
if not testvm.is_running(arch):
print("VM is not running.")
return
testvm.kill_vm(arch)
for arch in arch_list:
if not testvm.is_running(arch):
console.print(f"{arch} VM is not running")
break
console.print(f"Killing {arch} VM process")
testvm.kill_vm(arch)
print()
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def clean(arch_list: Iterable[Arch] = []):
"Stops the VM or VMs and deletes all data."
for arch in arch_list:
if testvm.is_running(arch):
kill(arch)
if testvm.data_dir(arch).exists():
console.print("Cleaning data directory", testvm.data_dir(arch))
shutil.rmtree(testvm.data_dir(arch))
print()
def vm_status(arch: Arch):
def cli_tip(*args: str):
return f"[green][bold]{cli_shorthand(arch)} {' '.join(args)}[/bold][/green]"
vm = f"{arch} VM"
port = f"[blue]{testvm.SSH_PORTS[arch]}[/blue]"
state = testvm.state(arch)
if state == VmState.REACHABLE:
console.print(f"{vm} is [green]reachable[/green] on port {port}")
console.print(f"Start a shell with {cli_tip('shell')}")
elif state == VmState.STOPPED:
console.print(f"{vm} is [red]stopped[/red]")
console.print(f"Start the VM with {cli_tip('up')}")
else:
console.print(f"{vm} is running but [red]not reachable[/red] on port {port}")
console.print(f"Recent logs:")
logs(arch, 10, style="light_slate_grey")
console.print(f"See all logs with {cli_tip('logs')}")
@argh.arg("--arch-list", "--arch", nargs="*", type=str, default=ARCHS, choices=ARCHS)
def status(arch_list: Iterable[Arch] = []):
for arch in arch_list:
vm_status(arch)
print()
@argh.arg("--arch", required=True, choices=testvm.ARCH_OPTIONS)
def clean(arch: testvm.Arch = "x86_64"):
"Stops the VM and deletes all data."
if testvm.is_running(arch):
kill(arch)
if testvm.data_dir(arch).exists():
shutil.rmtree(testvm.data_dir(arch))
def logs(arch: Arch = "x86_64", n: int = 0, style: Optional[str] = None):
log_lines = testvm.log_path(arch).read_text().splitlines()
if n > 0 and len(log_lines) > n:
log_lines = log_lines[-n:]
for line in log_lines:
if style:
console.print(
strip_ansi_escape_sequences(line), style=style, markup=False, highlight=False
)
else:
print(line)
if __name__ == "__main__":
run_commands(up, run, wait, ssh, stop, kill, clean, usage=USAGE)
run_commands(up, run, shell, stop, kill, clean, status, logs, usage=USAGE, default_fn=status)