From d93be1b11dfe8b4bc4147893fd07389ec0f09529 Mon Sep 17 00:00:00 2001
From: Ryan Neph <ryanneph@google.com>
Date: Tue, 17 Sep 2024 12:47:24 -0700
Subject: [PATCH] virtio: optional prepare of pci bar mapping with explicit
 MemCacheType

Lazy-init of VirtioPciDevice shared memory region (to setup hypervisor
mapping) would previously use the MemCacheType seen on first mapping,
which could be anything -- this behavior is never desired, but grew from
a later addition of MemCacheType workaround to the existing Virtio "Fixed
Mapping" optimization.

On devices where the single hypervisor mapping's MemCacheType matters
(e.g. for devices that must configure it with
MemCacheType::CacheNonCoherent), if the first mapping attempted is for a
shmem with WB caching, all later mappings with WC or UC memtype would be
configured incorrectly.

Instead, query the VirtioDevice Trait implementator whether lazy-init of
a single hypervisor mapping should be used (the default), and with which
MemCacheType.

Attempting to later add a `CacheNonCoherent` mapping for a device that
explicitly declared `SingleMappingOnFirst(CacheCoherent)` is invalid,
and would lead to bugs, so we now treat this as an error and fail the
mapping altogether.

VirtioGpu device implementations will use this for devices with either
mandatory or optional-but-enabled non-coherent DMA (e.g. Intel devices
without coherent LLC shared between CPU/GPU, or that may opt to bypass
LLC coherency for optimal perf).

BUG=b:360295883, b:246334944
TEST=Builds

Change-Id: If41d238fd3c220e45c61d78da4a2505572709053
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5871721
Commit-Queue: Ryan Neph <ryanneph@google.com>
Reviewed-by: David Stevens <stevensd@chromium.org>
---
 devices/src/virtio/mod.rs               |  1 +
 devices/src/virtio/virtio_device.rs     | 17 ++++++
 devices/src/virtio/virtio_pci_device.rs | 71 ++++++++++++++++++-------
 hypervisor/src/lib.rs                   | 18 +++++++
 4 files changed, 88 insertions(+), 19 deletions(-)

diff --git a/devices/src/virtio/mod.rs b/devices/src/virtio/mod.rs
index 9b4439ca2b..e159e39103 100644
--- a/devices/src/virtio/mod.rs
+++ b/devices/src/virtio/mod.rs
@@ -98,6 +98,7 @@ pub use self::vhost_user_frontend::VhostUserFrontend;
 #[cfg(any(feature = "video-decoder", feature = "video-encoder"))]
 pub use self::video::VideoDevice;
 pub use self::virtio_device::SharedMemoryMapper;
+pub use self::virtio_device::SharedMemoryPrepareType;
 pub use self::virtio_device::SharedMemoryRegion;
 pub use self::virtio_device::VirtioDevice;
 pub use self::virtio_device::VirtioTransportType;
diff --git a/devices/src/virtio/virtio_device.rs b/devices/src/virtio/virtio_device.rs
index 8c86e22f9c..63d0af2237 100644
--- a/devices/src/virtio/virtio_device.rs
+++ b/devices/src/virtio/virtio_device.rs
@@ -28,6 +28,15 @@ pub enum VirtioTransportType {
     Mmio,
 }
 
+/// Type of Virtio device memory mapping to use.
+pub enum SharedMemoryPrepareType {
+    /// On first attempted mapping, the entire SharedMemoryRegion is configured with declared
+    /// MemCacheType.
+    SingleMappingOnFirst(MemCacheType),
+    /// No mapping preparation is performed. each mapping is handled individually
+    DynamicPerMapping,
+}
+
 #[derive(Clone)]
 pub struct SharedMemoryRegion {
     /// The id of the shared memory region. A device may have multiple regions, but each
@@ -196,6 +205,14 @@ pub trait VirtioDevice: Send {
     /// devices can remain backwards compatible with older drivers.
     fn set_shared_memory_region_base(&mut self, _addr: GuestAddress) {}
 
+    /// Queries the implementation whether a single prepared hypervisor memory mapping with explicit
+    /// caching type should be setup lazily on first mapping request, or whether to dynamically
+    /// setup a hypervisor mapping with every request's caching type.
+    fn get_shared_memory_prepare_type(&mut self) -> SharedMemoryPrepareType {
+        // default to lazy-prepare of a single memslot with explicit caching type
+        SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent)
+    }
+
     /// Pause all processing.
     ///
     /// Gives up the queues so that a higher layer can potentially snapshot them. The
diff --git a/devices/src/virtio/virtio_pci_device.rs b/devices/src/virtio/virtio_pci_device.rs
index 585fef6ec9..01f4ebd9a3 100644
--- a/devices/src/virtio/virtio_pci_device.rs
+++ b/devices/src/virtio/virtio_pci_device.rs
@@ -9,6 +9,7 @@ use std::sync::Arc;
 use acpi_tables::sdt::SDT;
 use anyhow::anyhow;
 use anyhow::Context;
+use base::debug;
 use base::error;
 use base::trace;
 use base::AsRawDescriptor;
@@ -1093,19 +1094,25 @@ where
             bar: config.bar_index() as u8,
         };
 
+        let vm_memory_client = virtio_pci_device
+            .shared_memory_vm_memory_client
+            .take()
+            .expect("missing shared_memory_tube");
+
+        // See comment VmMemoryRequest::execute
+        let can_prepare = !virtio_pci_device
+            .device
+            .expose_shmem_descriptors_with_viommu();
+        let prepare_type = if can_prepare {
+            virtio_pci_device.device.get_shared_memory_prepare_type()
+        } else {
+            SharedMemoryPrepareType::DynamicPerMapping
+        };
+
+        let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
         virtio_pci_device
             .device
-            .set_shared_memory_mapper(Box::new(VmRequester::new(
-                virtio_pci_device
-                    .shared_memory_vm_memory_client
-                    .take()
-                    .expect("missing shared_memory_tube"),
-                alloc,
-                // See comment VmMemoryRequest::execute
-                !virtio_pci_device
-                    .device
-                    .expose_shmem_descriptors_with_viommu(),
-            )));
+            .set_shared_memory_mapper(vm_requester);
 
         vec![config]
     };
@@ -1441,16 +1448,22 @@ struct VmRequester {
     vm_memory_client: VmMemoryClient,
     alloc: Alloc,
     mappings: BTreeMap<u64, VmMemoryRegionId>,
-    needs_prepare: bool,
+    prepare_type: SharedMemoryPrepareType,
+    prepared: bool,
 }
 
 impl VmRequester {
-    fn new(vm_memory_client: VmMemoryClient, alloc: Alloc, do_prepare: bool) -> Self {
+    fn new(
+        vm_memory_client: VmMemoryClient,
+        alloc: Alloc,
+        prepare_type: SharedMemoryPrepareType,
+    ) -> Self {
         Self {
             vm_memory_client,
             alloc,
             mappings: BTreeMap::new(),
-            needs_prepare: do_prepare,
+            prepare_type,
+            prepared: false,
         }
     }
 }
@@ -1463,11 +1476,31 @@ impl SharedMemoryMapper for VmRequester {
         prot: Protection,
         cache: MemCacheType,
     ) -> anyhow::Result<()> {
-        if self.needs_prepare {
-            self.vm_memory_client
-                .prepare_shared_memory_region(self.alloc, cache)
-                .context("prepare_shared_memory_region failed")?;
-            self.needs_prepare = false;
+        if !self.prepared {
+            if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
+                self.prepare_type
+            {
+                debug!(
+                    "lazy prepare_shared_memory_region with {:?}",
+                    prepare_cache_type
+                );
+                self.vm_memory_client
+                    .prepare_shared_memory_region(self.alloc, prepare_cache_type)
+                    .context("lazy prepare_shared_memory_region failed")?;
+            }
+            self.prepared = true;
+        }
+
+        // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
+        // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
+        // add any mapping that requests MemCacheType::CacheNonCoherent.
+        if cache == MemCacheType::CacheNonCoherent {
+            if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
+                self.prepare_type
+            {
+                error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
+                return Err(anyhow!("invalid MemCacheType"));
+            }
         }
 
         let id = self
diff --git a/hypervisor/src/lib.rs b/hypervisor/src/lib.rs
index 0fe061a200..ede57408da 100644
--- a/hypervisor/src/lib.rs
+++ b/hypervisor/src/lib.rs
@@ -56,9 +56,27 @@ pub struct MemRegion {
     pub size: u64,
 }
 
+/// Signal to the hypervisor on kernels that support the KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA (or
+/// equivalent) that during user memory region (memslot) configuration, a guest page's memtype
+/// should be considered in SLAT effective memtype determination rather than implicitly respecting
+/// only the host page's memtype.
+///
+/// This explicit control is needed for Virtio devices (e.g. gpu) that configure memslots for host
+/// WB page mappings with guest WC page mappings. See b/316337317, b/360295883 for more detail.
 #[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub enum MemCacheType {
+    /// Don't provide any explicit instruction to the hypervisor on how it should determine a
+    /// memslot's effective memtype.
+    ///
+    /// On KVM-VMX (Intel), this means that the memslot is flagged with VMX_EPT_IPAT_BIT such that
+    /// only the host memtype is respected.
     CacheCoherent,
+    /// explicitly instruct the hypervisor to respect the guest page's memtype when determining the
+    /// memslot's effective memtype.
+    ///
+    /// On KVM-VMX (Intel), this means the memslot is NOT flagged with VMX_EPT_IPAT_BIT, and the
+    /// effective memtype will generally decay to the weaker amongst the host/guest memtypes and
+    /// the MTRR for the physical address.
     CacheNonCoherent,
 }