Setup vcpu's MTRR

When vfio pass-through is enabled, guest will be very slow. The root casue is gfn is uncachable in EPT. From the comments in kernel vmx_get_mt_mask(vcpu, gfn, is_mmio) function, EPT memory type with VT-d, VT-d without snooping control feature: can't guarantee the result, try to trust guest through kvm_mtrr_get_guest_memory_type(vcpu, gfn). But crosvm doesn't set mtrr, so host kernel will set uncachable for all gfn in ept. This patch set the default cache type as WB, and set mmio cache type as UC, so the guest ram is WB. BUG=chromium:992270 TEST=crosvm --vfio /sys/devices/pci0000:00/0000:00:02.0, pass through host igd into linux guest, the guest runs smoothly and guest desktop could be shown on physical local display. Change-Id: I151aae7835910cfbc9e38464ee901e5da281de1e Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.corp-partner.google.com> Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/1813458 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
2024-11-25 05:03:05 +00:00 · 2019-08-28 13:27:36 +08:00 · 2019-08-28 13:27:36 +08:00 · 3064a7164a
commit 3064a7164a
parent 3ea11281eb
3 changed files with 140 additions and 5 deletions
--- a/x86_64/src/cpuid.rs
+++ b/x86_64/src/cpuid.rs
@ -121,6 +121,41 @@ pub fn setup_cpuid(kvm: &kvm::Kvm, vcpu: &kvm::Vcpu, cpu_id: u64, nrcpus: u64) -
        .map_err(Error::SetSupportedCpusFailed)
 }

+/// get host cpu max physical address bits
+pub fn phy_max_address_bits() -> u32 {
+    let mut eax: u32 = 0;
+    let mut ebx: u32 = 0;
+    let mut ecx: u32 = 0;
+    let mut edx: u32 = 0;
+    let mut phys_bits: u32 = 36;
+
+    unsafe {
+        host_cpuid(
+            0x80000000,
+            0,
+            &mut eax as *mut u32,
+            &mut ebx as *mut u32,
+            &mut ecx as *mut u32,
+            &mut edx as *mut u32,
+        );
+    }
+    if eax >= 0x80000008 {
+        unsafe {
+            host_cpuid(
+                0x80000008,
+                0,
+                &mut eax as *mut u32,
+                &mut ebx as *mut u32,
+                &mut ecx as *mut u32,
+                &mut edx as *mut u32,
+            );
+        }
+        phys_bits = eax & 0xff;
+    }
+
+    phys_bits
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/x86_64/src/lib.rs
+++ b/x86_64/src/lib.rs
@ -157,6 +157,7 @@ impl std::error::Error for Error {}
 pub struct X8664arch;

 const BOOT_STACK_POINTER: u64 = 0x8000;
+// Make sure it align to 256MB for MTRR convenient
 const MEM_32BIT_GAP_SIZE: u64 = (768 << 20);
 const FIRST_ADDR_PAST_32BITS: u64 = (1 << 32);
 const END_ADDR_BEFORE_32BITS: u64 = FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE;
@ -758,7 +759,7 @@ impl X8664arch {
    ) -> Result<()> {
        let kernel_load_addr = GuestAddress(KERNEL_START_OFFSET);
        cpuid::setup_cpuid(kvm, vcpu, cpu_id, num_cpus).map_err(Error::SetupCpuid)?;
-        regs::setup_msrs(vcpu).map_err(Error::SetupMsrs)?;
+        regs::setup_msrs(vcpu, END_ADDR_BEFORE_32BITS).map_err(Error::SetupMsrs)?;
        let kernel_end = guest_mem
            .checked_offset(kernel_load_addr, KERNEL_64BIT_ENTRY_OFFSET)
            .ok_or(Error::KernelOffsetPastEnd)?;
--- a/x86_64/src/regs.rs
+++ b/x86_64/src/regs.rs
@ -13,7 +13,7 @@ use kvm_sys::kvm_msr_entry;
 use kvm_sys::kvm_msrs;
 use kvm_sys::kvm_regs;
 use kvm_sys::kvm_sregs;
-use sys_util::{self, GuestAddress, GuestMemory, LayoutAllocation};
+use sys_util::{self, warn, GuestAddress, GuestMemory, LayoutAllocation};

 use crate::gdt;

@ -65,7 +65,102 @@ impl Display for Error {
    }
 }

-fn create_msr_entries() -> Vec<kvm_msr_entry> {
+const MTRR_MEMTYPE_UC: u8 = 0x0;
+const MTRR_MEMTYPE_WB: u8 = 0x6;
+const MTRR_VAR_VALID: u64 = 0x800;
+const MTRR_ENABLE: u64 = 0x800;
+const MTRR_PHYS_BASE_MSR: u32 = 0x200;
+const MTRR_PHYS_MASK_MSR: u32 = 0x201;
+const VAR_MTRR_NUM_MASK: u64 = 0xFF;
+
+// Returns the value of the highest bit in a 64-bit value. Equivalent to
+// 1 << HighBitSet(x)
+fn get_power_of_two(data: u64) -> u64 {
+    1 << (64 - data.leading_zeros() - 1)
+}
+
+// Returns the max length which suitable for mtrr setting based on the
+// specified (base, len)
+fn get_max_len(base: u64, len: u64) -> u64 {
+    let mut ret = get_power_of_two(len);
+
+    while base % ret != 0 {
+        ret >>= 1;
+    }
+
+    ret
+}
+
+// For the specified (Base, Len), returns (base, len) pair which could be
+// set into mtrr register. mtrr requires: the base-address alignment value can't be
+// less than its length
+fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
+    let mut vecs = Vec::new();
+
+    let mut remains = len;
+    let mut new = base;
+    while remains != 0 {
+        let max = get_max_len(new, remains);
+        vecs.push((new, max));
+        remains -= max;
+        new += max;
+    }
+
+    vecs
+}
+
+fn create_mtrr_entries(vpu: &kvm::Vcpu, pci_start: u64) -> Vec<kvm_msr_entry> {
+    let mut entries = Vec::<kvm_msr_entry>::new();
+
+    // Get VAR MTRR num from MSR_MTRRcap
+    let mut msrs = vec![kvm_msr_entry {
+        index: crate::msr_index::MSR_MTRRcap,
+        ..Default::default()
+    }];
+    if vpu.get_msrs(&mut msrs).is_err() {
+        warn!("get msrs fail, guest with pass through device may be very slow");
+        return entries;
+    }
+    let var_num = msrs[0].data & VAR_MTRR_NUM_MASK;
+
+    // Set pci_start .. 4G as UC
+    // all others are set to default WB
+    let pci_len = (1 << 32) - pci_start;
+    let vecs = get_mtrr_pairs(pci_start, pci_len);
+    if vecs.len() as u64 > var_num {
+        warn!(
+            "mtrr fail for pci mmio, please check pci_start addr,
+              guest with pass through device may be very slow"
+        );
+        return entries;
+    }
+
+    let phys_mask: u64 = (1 << crate::cpuid::phy_max_address_bits()) - 1;
+    for (idx, (base, len)) in vecs.iter().enumerate() {
+        let reg_idx = idx as u32 * 2;
+        entries.push(kvm_msr_entry {
+            index: MTRR_PHYS_BASE_MSR + reg_idx,
+            data: base | MTRR_MEMTYPE_UC as u64,
+            ..Default::default()
+        });
+        let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
+        entries.push(kvm_msr_entry {
+            index: MTRR_PHYS_MASK_MSR + reg_idx,
+            data: mask,
+            ..Default::default()
+        });
+    }
+    // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
+    entries.push(kvm_msr_entry {
+        index: crate::msr_index::MSR_MTRRdefType,
+        data: MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
+        ..Default::default()
+    });
+
+    entries
+}
+
+fn create_msr_entries(vcpu: &kvm::Vcpu, pci_start: u64) -> Vec<kvm_msr_entry> {
    let mut entries = Vec::<kvm_msr_entry>::new();

    entries.push(kvm_msr_entry {
@ -121,6 +216,10 @@ fn create_msr_entries() -> Vec<kvm_msr_entry> {
        ..Default::default()
    });

+    let mut mtrr_entries = create_mtrr_entries(vcpu, pci_start);
+
+    entries.append(&mut mtrr_entries);
+
    entries
 }

@ -129,14 +228,14 @@ fn create_msr_entries() -> Vec<kvm_msr_entry> {
 /// # Arguments
 ///
 /// * `vcpu` - Structure for the vcpu that holds the vcpu fd.
-pub fn setup_msrs(vcpu: &kvm::Vcpu) -> Result<()> {
+pub fn setup_msrs(vcpu: &kvm::Vcpu, pci_start: u64) -> Result<()> {
    const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
    const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
    const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
    const ALIGN_OF_ENTRY: usize = mem::align_of::<kvm_msr_entry>();
    const_assert!(ALIGN_OF_MSRS >= ALIGN_OF_ENTRY);

-    let entry_vec = create_msr_entries();
+    let entry_vec = create_msr_entries(vcpu, pci_start);
    let size = SIZE_OF_MSRS + entry_vec.len() * SIZE_OF_ENTRY;
    let layout = Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
    let mut allocation = LayoutAllocation::zeroed(layout);