From 4ea25d1e3356d5715fb016aced2c8b38797fd434 Mon Sep 17 00:00:00 2001 From: Noah Gold Date: Fri, 10 Jun 2022 13:46:21 -0700 Subject: [PATCH] x86_64: add TSC leaf synthesis. There have been two evolutions of providing the TSC cpuid leaf (aka 0x15) to the guest. a) For CrosVM on Windows, we have been providing the leaf unconditionally. Furthermore, we've not been using the exact host leaf; instead, we calibrate the TSC frequency and provide that value in the leaf. This was done because the actual cpuid leaf values are not as accurate as we needed them to be to drive a guest clocksource. b) In CrosVM mainline, 4080aaf9b33e0d16247a7a92e2c4a3b1eaeb8e08 introduced the flag enable_pnp / enable_pnp_data, and provides the exact host 0x15 leaf to the guest if the flag is enabled. This CL adds a new hypervisor capability (CalibratedTscLeafRequired) to control whether or not the calibrated TSC leaf should be used, in addition to a new CLI option to force it on hypervisors where it isn't enabled by default. The new option is `--force_calibrated_tsc_leaf`. BUG=b:213152505 TEST=builds upstream, battletested downstream on WHPX. Change-Id: I611422808a9e10578c0ddcbd211ae902f937685f Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3698993 Commit-Queue: Noah Gold Reviewed-by: Junichi Uekawa Tested-by: kokoro --- aarch64/src/lib.rs | 1 + arch/src/lib.rs | 2 ++ hypervisor/src/caps.rs | 12 ++++++++++++ hypervisor/src/kvm/mod.rs | 2 ++ hypervisor/src/whpx/vm.rs | 2 ++ src/crosvm/cmdline.rs | 18 ++++++++++++++++++ src/crosvm/config.rs | 2 ++ src/crosvm/linux/mod.rs | 1 + src/crosvm/linux/vcpu.rs | 4 ++++ x86_64/src/cpuid.rs | 31 +++++++++++++++++++++++++++++-- x86_64/src/lib.rs | 2 ++ x86_64/src/test_integration.rs | 5 ++++- 12 files changed, 79 insertions(+), 3 deletions(-) diff --git a/aarch64/src/lib.rs b/aarch64/src/lib.rs index 82c0f00896..9a3f075f39 100644 --- a/aarch64/src/lib.rs +++ b/aarch64/src/lib.rs @@ -561,6 +561,7 @@ impl arch::LinuxArch for AArch64 { _host_cpu_topology: bool, _enable_pnp_data: bool, _itmt: bool, + _force_calibrated_tsc_leaf: bool, ) -> std::result::Result<(), Self::Error> { // AArch64 doesn't configure vcpus on the vcpu thread, so nothing to do here. Ok(()) diff --git a/arch/src/lib.rs b/arch/src/lib.rs index 50d140ad6a..0c985e367e 100644 --- a/arch/src/lib.rs +++ b/arch/src/lib.rs @@ -238,6 +238,7 @@ pub trait LinuxArch { /// * `host_cpu_topology` - whether enabling host cpu topology. /// * `enable_pnp_data` - whether enabling PnP statistics data. /// * `itmt` - whether enabling ITMT scheduler + /// * `force_calibrated_tsc_leaf` - whether to force using a calibrated TSC leaf (0x15). fn configure_vcpu( vm: &V, hypervisor: &dyn HypervisorArch, @@ -251,6 +252,7 @@ pub trait LinuxArch { host_cpu_topology: bool, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, ) -> Result<(), Self::Error>; /// Configures and add a pci device into vm diff --git a/hypervisor/src/caps.rs b/hypervisor/src/caps.rs index 0bc1c066b4..636d8b3cd1 100644 --- a/hypervisor/src/caps.rs +++ b/hypervisor/src/caps.rs @@ -12,6 +12,18 @@ pub enum HypervisorCap { UserMemory, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Xcrs, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + /// CPUID leaf 0x15 is available on some Intel chips and contains the TSC + /// frequency, which can be used to calibrate the guest's TSC clocksource; + /// however, it is not typically accurate enough (being off by 1-2% is a + /// big problem for a clocksource), and inside the guest, calibration by + /// other means is not always reliable. + /// + /// Hypervisors which do not provide the TSC frequency (e.g. via the kvm + /// pvclock) or have another suitable calibration source can declare this + /// capability, which causes CrosVM to substitute a calibrated value in leaf + /// 0x15 that will be accurate enough for use in a clocksource. + CalibratedTscLeafRequired, } /// A capability the `Vm` can possibly expose. diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs index 2fe46a2ee9..d9873a2998 100644 --- a/hypervisor/src/kvm/mod.rs +++ b/hypervisor/src/kvm/mod.rs @@ -1152,6 +1152,8 @@ impl TryFrom for KvmCap { HypervisorCap::UserMemory => Ok(KvmCap::UserMemory), #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] HypervisorCap::Xcrs => Ok(KvmCap::Xcrs), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + HypervisorCap::CalibratedTscLeafRequired => Err(Error::new(libc::EINVAL)), } } } diff --git a/hypervisor/src/whpx/vm.rs b/hypervisor/src/whpx/vm.rs index 47d90b25d0..ad5da2abe3 100644 --- a/hypervisor/src/whpx/vm.rs +++ b/hypervisor/src/whpx/vm.rs @@ -375,6 +375,8 @@ impl Vm for WhpxVm { VmCap::Protected => false, // whpx initializes cpuid early during VM creation. VmCap::EarlyInitCpuid => true, + // under whpx, guests rely on this leaf to calibrate their clocksource. + VmCap::CalibratedTscLeafRequired => true, } } diff --git a/src/crosvm/cmdline.rs b/src/crosvm/cmdline.rs index 443d6a4999..b25054805d 100644 --- a/src/crosvm/cmdline.rs +++ b/src/crosvm/cmdline.rs @@ -559,6 +559,11 @@ pub struct RunCommand { /// align - whether to adjust addr and size to page /// boundaries implicitly pub file_backed_mappings: Vec, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[argh(switch)] + /// force use of a calibrated TSC cpuid leaf (0x15) even if the hypervisor + /// doesn't require one. + pub force_calibrated_tsc_leaf: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] #[argh(option, arg_name = "PORT")] /// (EXPERIMENTAL) gdb on the given port @@ -1594,8 +1599,21 @@ impl TryFrom for super::config::Config { cfg.itmt = cmd.itmt; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if cmd.enable_pnp_data && cmd.force_calibrated_tsc_leaf { + return Err( + "Only one of [enable_pnp_data,force_calibrated_tsc_leaf] can be specified" + .to_string(), + ); + } + cfg.enable_pnp_data = cmd.enable_pnp_data; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + cfg.force_calibrated_tsc_leaf = cmd.force_calibrated_tsc_leaf; + } + cfg.privileged_vm = cmd.privileged_vm; cfg.stub_pci_devices = cmd.stub_pci_devices; diff --git a/src/crosvm/config.rs b/src/crosvm/config.rs index 1dcc30d5cd..724693fe0c 100644 --- a/src/crosvm/config.rs +++ b/src/crosvm/config.rs @@ -1704,6 +1704,7 @@ pub struct Config { pub enable_pnp_data: bool, pub executable_path: Option, pub file_backed_mappings: Vec, + pub force_calibrated_tsc_leaf: bool, pub force_s2idle: bool, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] pub gdb: Option, @@ -1835,6 +1836,7 @@ impl Default for Config { enable_pnp_data: false, executable_path: None, file_backed_mappings: Vec::new(), + force_calibrated_tsc_leaf: false, force_s2idle: false, #[cfg(all(target_arch = "x86_64", feature = "gdb"))] gdb: None, diff --git a/src/crosvm/linux/mod.rs b/src/crosvm/linux/mod.rs index b1d64a60fa..f8088b10f0 100644 --- a/src/crosvm/linux/mod.rs +++ b/src/crosvm/linux/mod.rs @@ -1986,6 +1986,7 @@ fn run_control( cfg.host_cpu_topology, cfg.enable_pnp_data, cfg.itmt, + cfg.force_calibrated_tsc_leaf, cfg.privileged_vm, match vcpu_cgroup_tasks_file { None => None, diff --git a/src/crosvm/linux/vcpu.rs b/src/crosvm/linux/vcpu.rs index a910fbf19e..08a8115e3b 100644 --- a/src/crosvm/linux/vcpu.rs +++ b/src/crosvm/linux/vcpu.rs @@ -143,6 +143,7 @@ pub fn runnable_vcpu( host_cpu_topology: bool, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, ) -> Result<(V, VcpuRunHandle)> where V: VcpuArch, @@ -180,6 +181,7 @@ where host_cpu_topology, enable_pnp_data, itmt, + force_calibrated_tsc_leaf, ) .context("failed to configure vcpu")?; @@ -575,6 +577,7 @@ pub fn run_vcpu( host_cpu_topology: bool, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, privileged_vm: bool, vcpu_cgroup_tasks_file: Option, userspace_msr: BTreeMap, @@ -616,6 +619,7 @@ where host_cpu_topology, enable_pnp_data, itmt, + force_calibrated_tsc_leaf, ); // Add MSR handlers after CPU affinity setting. diff --git a/x86_64/src/cpuid.rs b/x86_64/src/cpuid.rs index d33e547215..640e034d77 100644 --- a/x86_64/src/cpuid.rs +++ b/x86_64/src/cpuid.rs @@ -7,7 +7,7 @@ use std::cmp; use std::result; use devices::{Apic, IrqChipCap, IrqChipX86_64}; -use hypervisor::{CpuIdEntry, HypervisorX86_64, VcpuX86_64}; +use hypervisor::{CpuIdEntry, HypervisorCap, HypervisorX86_64, VcpuX86_64}; use crate::CpuManufacturer; use remain::sorted; @@ -63,6 +63,11 @@ pub struct CpuIdContext { apic_frequency: u32, /// The TSC frequency in Hz, if it could be determined. tsc_frequency: Option, + /// Whether to force the use of a calibrated TSC cpuid leaf (0x15) even if + /// the hypervisor doesn't require it. + force_calibrated_tsc_leaf: bool, + /// Whether the hypervisor requires a calibrated TSC cpuid leaf (0x15). + calibrated_tsc_leaf_required: bool, /// Whether or not VCPU IDs and APIC IDs should match host cpu IDs. host_cpu_topology: bool, enable_pnp_data: bool, @@ -83,6 +88,8 @@ impl CpuIdContext { irq_chip: Option<&dyn IrqChipX86_64>, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, + calibrated_tsc_leaf_required: bool, cpuid_count: unsafe fn(u32, u32) -> CpuidResult, cpuid: unsafe fn(u32) -> CpuidResult, ) -> CpuIdContext { @@ -96,6 +103,8 @@ impl CpuIdContext { }), apic_frequency: irq_chip.map_or(Apic::frequency(), |chip| chip.lapic_frequency()), tsc_frequency: devices::tsc::tsc_frequency().ok(), + force_calibrated_tsc_leaf, + calibrated_tsc_leaf_required, host_cpu_topology, enable_pnp_data, itmt, @@ -210,7 +219,18 @@ pub fn adjust_cpuid(entry: &mut CpuIdEntry, ctx: &CpuIdContext) { } } 0x15 => { - if ctx.enable_pnp_data { + if ctx.calibrated_tsc_leaf_required + || ctx.force_calibrated_tsc_leaf { + + let cpuid_15 = ctx + .tsc_frequency + .map(|tsc_freq| devices::tsc::fake_tsc_frequency_cpuid( + tsc_freq, ctx.apic_frequency)); + + if let Some(new_entry) = cpuid_15 { + entry.cpuid = new_entry.cpuid; + } + } else if ctx.enable_pnp_data { // Expose TSC frequency to guest // Safe because we pass 0x15 for this call and the host // supports the `cpuid` instruction @@ -321,6 +341,7 @@ pub fn setup_cpuid( host_cpu_topology: bool, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, ) -> Result<()> { let mut cpuid = hypervisor .get_supported_cpuid() @@ -336,6 +357,8 @@ pub fn setup_cpuid( Some(irq_chip), enable_pnp_data, itmt, + force_calibrated_tsc_leaf, + hypervisor.check_capability(HypervisorCap::CalibratedTscLeafRequired), __cpuid_count, __cpuid, ), @@ -421,6 +444,8 @@ mod tests { Some(&irq_chip), false, false, + false, + false, __cpuid_count, __cpuid, ), @@ -466,6 +491,8 @@ mod tests { host_cpu_topology: true, enable_pnp_data: false, itmt: false, + force_calibrated_tsc_leaf: false, + calibrated_tsc_leaf_required: false, cpuid_count: fake_cpuid_count, cpuid: fake_cpuid, }; diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs index 4b27748ac6..73370847ad 100644 --- a/x86_64/src/lib.rs +++ b/x86_64/src/lib.rs @@ -770,6 +770,7 @@ impl arch::LinuxArch for X8664arch { host_cpu_topology: bool, enable_pnp_data: bool, itmt: bool, + force_calibrated_tsc_leaf: bool, ) -> Result<()> { if !vm.check_capability(VmCap::EarlyInitCpuid) { cpuid::setup_cpuid( @@ -782,6 +783,7 @@ impl arch::LinuxArch for X8664arch { host_cpu_topology, enable_pnp_data, itmt, + force_calibrated_tsc_leaf, ) .map_err(Error::SetupCpuid)?; } diff --git a/x86_64/src/test_integration.rs b/x86_64/src/test_integration.rs index 2085854550..8dfcbdfad3 100644 --- a/x86_64/src/test_integration.rs +++ b/x86_64/src/test_integration.rs @@ -240,7 +240,10 @@ where .expect("failed to add vcpu to irqchip"); if !vm.check_capability(VmCap::EarlyInitCpuid) { - setup_cpuid(&hyp, &irq_chip, &vcpu, 0, 1, false, false, false, false).unwrap(); + setup_cpuid( + &hyp, &irq_chip, &vcpu, 0, 1, false, false, false, false, false, + ) + .unwrap(); } setup_msrs(&vm, &vcpu, read_pci_mmio_before_32bit().start).unwrap();