x86_64: support running protected VMs with pvmfw

Add support for running protected VMs on x86 in the proper way, i.e.
similarly to arm64, ask the pKVM hypervisor to load the pVM firmware
(pvmfw) into the VM memory at a dedicated address and to set the VM
entry point to the pvmfw address.

The KVM uAPI used for that (for setting pvmfw address in guest memory
and for querying pvmfw size) is almost exactly the same as on arm64.
But as explained in [1], this is a temporary uAPI anyway, so for now
don't try to generalize the crosvm code using this uAPI between both
architectures.

[1] https://android-review.git.corp.google.com/c/kernel/common/+/3372231
[2] https://android-review.git.corp.google.com/c/kernel/common/+/3372235

BUG=b:350694931
TEST=a VM run with "crosvm run --protected-vm" runs successfully,
assuming that the bootloader provides pvmfw image to the host kernel, or
using a pKVM hack described in "Test:" in [2] instead.

Change-Id: I64242bcd0a4c053b8c27c2f83d5c876a885d0442
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/6043709
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Commit-Queue: Dmytro Maluka <dmaluka@chromium.org>
Reviewed-by: Pierre-Clément Tosi <ptosi@google.com>
This commit is contained in:
Dmytro Maluka 2024-11-20 15:15:06 +00:00 committed by crosvm LUCI
parent 2118fbb57c
commit f0c46addbb
11 changed files with 116 additions and 4 deletions

View file

@ -485,6 +485,15 @@ impl VmX86_64 for HaxmVm {
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
fn load_protected_vm_firmware(
&mut self,
_fw_addr: GuestAddress,
_fw_max_size: u64,
) -> Result<()> {
// Haxm does not support protected VMs
Err(Error::new(libc::ENXIO))
}
}
// TODO(b:241252288): Enable tests disabled with dummy feature flag - enable_haxm_tests.

View file

@ -122,6 +122,7 @@ pub enum KvmCap {
ImmediateExit = KVM_CAP_IMMEDIATE_EXIT,
ArmPmuV3 = KVM_CAP_ARM_PMU_V3,
ArmProtectedVm = KVM_CAP_ARM_PROTECTED_VM,
X86ProtectedVm = KVM_CAP_X86_PROTECTED_VM,
ArmMte = KVM_CAP_ARM_MTE,
#[cfg(target_arch = "x86_64")]
BusLockDetect = KVM_CAP_X86_BUS_LOCK_EXIT,

View file

@ -23,7 +23,9 @@ use data_model::FlexibleArrayWrapper;
use kvm_sys::*;
use libc::E2BIG;
use libc::EAGAIN;
use libc::EINVAL;
use libc::EIO;
use libc::ENOMEM;
use libc::ENXIO;
use serde::Deserialize;
use serde::Serialize;
@ -31,6 +33,7 @@ use vm_memory::GuestAddress;
use super::Config;
use super::Kvm;
use super::KvmCap;
use super::KvmVcpu;
use super::KvmVm;
use crate::host_phys_addr_bits;
@ -445,6 +448,48 @@ impl KvmVm {
Ok(())
}
}
/// Get pKVM hypervisor details, e.g. the firmware size.
///
/// Returns `Err` if not running under pKVM.
///
/// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
/// in KVM.
fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
let mut info = KvmProtectedVmInfo {
firmware_size: 0,
reserved: [0; 7],
};
// SAFETY:
// Safe because we allocated the struct and we know the kernel won't write beyond the end of
// the struct or keep a pointer to it.
unsafe {
self.enable_raw_capability(
KvmCap::X86ProtectedVm,
KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO,
&[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
)
}?;
Ok(info)
}
fn set_protected_vm_firmware_gpa(&self, fw_addr: GuestAddress) -> Result<()> {
// SAFETY:
// Safe because none of the args are pointers.
unsafe {
self.enable_raw_capability(
KvmCap::X86ProtectedVm,
KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA,
&[fw_addr.0, 0, 0, 0],
)
}
}
}
#[repr(C)]
struct KvmProtectedVmInfo {
firmware_size: u64,
reserved: [u64; 7],
}
impl VmX86_64 for KvmVm {
@ -452,6 +497,22 @@ impl VmX86_64 for KvmVm {
&self.kvm
}
fn load_protected_vm_firmware(
&mut self,
fw_addr: GuestAddress,
fw_max_size: u64,
) -> Result<()> {
let info = self.get_protected_vm_info()?;
if info.firmware_size == 0 {
Err(Error::new(EINVAL))
} else {
if info.firmware_size > fw_max_size {
return Err(Error::new(ENOMEM));
}
self.set_protected_vm_firmware_gpa(fw_addr)
}
}
fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
// create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
// or VcpuX86. But both use the same implementation in KvmVm::create_vcpu.

View file

@ -776,6 +776,15 @@ impl VmX86_64 for WhpxVm {
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
fn load_protected_vm_firmware(
&mut self,
_fw_addr: GuestAddress,
_fw_max_size: u64,
) -> Result<()> {
// WHPX does not support protected VMs
Err(Error::new(libc::ENXIO))
}
}
// NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest

View file

@ -65,6 +65,12 @@ pub trait VmX86_64: Vm {
/// Sets the address of a one-page region in the VM's address space.
fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
/// Load pVM firmware for the VM, creating a memslot for it as needed.
///
/// Only works on protected VMs (i.e. those with vm_type == KVM_X86_PKVM_PROTECTED_VM).
fn load_protected_vm_firmware(&mut self, fw_addr: GuestAddress, fw_max_size: u64)
-> Result<()>;
}
/// A wrapper around creating and using a VCPU on x86_64.

View file

@ -122,6 +122,7 @@ pub enum Cap {
ImmediateExit = KVM_CAP_IMMEDIATE_EXIT,
ArmPmuV3 = KVM_CAP_ARM_PMU_V3,
ArmProtectedVm = KVM_CAP_ARM_PROTECTED_VM,
X86ProtectedVm = KVM_CAP_X86_PROTECTED_VM,
ArmMte = KVM_CAP_ARM_MTE,
#[cfg(target_arch = "x86_64")]
BusLockDetect = KVM_CAP_X86_BUS_LOCK_EXIT,

View file

@ -25,6 +25,9 @@ pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: u32 = 0;
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]

View file

@ -23,6 +23,9 @@ pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: u32 = 0;
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]

View file

@ -23,6 +23,9 @@ pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: u32 = 0;
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]

View file

@ -23,6 +23,9 @@ pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: u32 = 0;
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]

View file

@ -241,6 +241,8 @@ pub enum Error {
LoadKernel(kernel_loader::Error),
#[error("error loading pflash: {0}")]
LoadPflash(io::Error),
#[error("error loading pVM firmware: {0}")]
LoadPvmFw(base::Error),
#[error("error translating address: Page not present")]
PageNotPresent,
#[error("pci mmio overlaps with pVM firmware memory")]
@ -1180,15 +1182,26 @@ impl arch::LinuxArch for X8664arch {
PROTECTED_VM_FW_MAX_SIZE,
)
.map_err(Error::LoadCustomPvmFw)?;
} else if protection_type.runs_firmware() {
// Tell the hypervisor to load the pVM firmware.
vm.load_protected_vm_firmware(
GuestAddress(PROTECTED_VM_FW_START),
PROTECTED_VM_FW_MAX_SIZE,
)
.map_err(Error::LoadPvmFw)?;
}
let entry_addr = if protection_type.runs_firmware() {
PROTECTED_VM_FW_START
let entry_addr = if protection_type.needs_firmware_loaded() {
Some(PROTECTED_VM_FW_START)
} else if protection_type.runs_firmware() {
None // Initial RIP value is set by the hypervisor
} else {
kernel_entry.offset()
Some(kernel_entry.offset())
};
vcpu_init[0].regs.rip = entry_addr;
if let Some(entry) = entry_addr {
vcpu_init[0].regs.rip = entry;
}
match kernel_type {
KernelType::BzImage | KernelType::Elf => {