vIOMMU: Enable dynamically map/unmap host dma-buf

Host dma-buf refers to the buffers allocated in host memory and shared
with guest passthrough devices or vCPU through dma-buf interface. To let
guest passthrough devices access the host dma-buf, host needs to pin
those buffers in memory and set up the mapping in vt-d table before
those buffers are exposed to guest and do the unpin thing right after
those buffers get returned from guest.

BUG=b:235508487
TEST=test with a crostini VM equiped with a passthrough GPU, e.g.
        --vfio /sys/bus/pci/devices/0000:04:00.0,guest-address="00:1f.0",iommu=viommu

Change-Id: Ie7b3db3a7f7aa8afff03425be16c952db4a5f67b
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/3764931
Commit-Queue: David Stevens <stevensd@chromium.org>
Reviewed-by: David Stevens <stevensd@chromium.org>
Tested-by: David Stevens <stevensd@chromium.org>
This commit is contained in:
Tina Zhang 2022-07-13 16:37:10 +08:00 committed by crosvm LUCI
parent 9d25cf1617
commit abcab6f771
15 changed files with 234 additions and 33 deletions

View file

@ -1113,6 +1113,7 @@ impl CoIommuDev {
descriptor,
offset,
size: size as u64,
gpu_blob: false,
},
dest: VmMemoryDestination::GuestPhysicalAddress(gpa),
prot,

View file

@ -968,6 +968,7 @@ impl VfioPciDevice {
descriptor,
offset,
size: mmap_size,
gpu_blob: false,
},
dest: VmMemoryDestination::GuestPhysicalAddress(guest_map_start),
prot: Protection::read_write(),

View file

@ -204,6 +204,7 @@ impl VfioPlatformDevice {
descriptor,
offset,
size: mmap_size,
gpu_blob: false,
},
dest: VmMemoryDestination::GuestPhysicalAddress(guest_map_start),
prot: Protection::read_write(),

View file

@ -727,6 +727,7 @@ impl VirtioGpu {
descriptor: export.os_handle,
offset: 0,
size: resource.size,
gpu_blob: true,
},
}
} else {

View file

@ -31,6 +31,8 @@ use base::warn;
use base::AsRawDescriptor;
use base::Error as SysError;
use base::Event;
use base::MappedRegion;
use base::MemoryMapping;
use base::Protection;
use base::RawDescriptor;
use base::Result as SysResult;
@ -44,6 +46,7 @@ use data_model::DataInit;
use data_model::Le64;
use futures::select;
use futures::FutureExt;
use hypervisor::MemSlot;
use remain::sorted;
use sync::Mutex;
use thiserror::Error;
@ -168,6 +171,12 @@ pub enum IommuError {
// value: reference counter and MemoryMapperTrait
type DomainMap = BTreeMap<u32, (u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>)>;
struct DmabufRegionEntry {
mmap: MemoryMapping,
mem_slot: MemSlot,
len: u64,
}
// Shared state for the virtio-iommu device.
struct State {
mem: GuestMemory,
@ -186,6 +195,9 @@ struct State {
// key: endpoint PCI address
// value: reference counter and MemoryMapperTrait
endpoints: BTreeMap<u32, Arc<Mutex<Box<dyn MemoryMapperTrait>>>>,
// Contains dmabuf regions
// key: guest physical address
dmabuf_mem: BTreeMap<u64, DmabufRegionEntry>,
}
impl State {
@ -393,15 +405,40 @@ impl State {
if let Some(mapper) = self.domain_map.get(&domain) {
let size = u64::from(req.virt_end) - u64::from(req.virt_start) + 1u64;
let vfio_map_result = mapper.1.lock().add_map(MappingInfo {
iova: req.virt_start.into(),
gpa: GuestAddress(req.phys_start.into()),
size,
prot: match write_en {
true => Protection::read_write(),
false => Protection::read(),
let dmabuf_map = self
.dmabuf_mem
.range(..=u64::from(req.phys_start))
.next_back()
.and_then(|(addr, region)| {
if u64::from(req.phys_start) + size <= addr + region.len {
Some(region.mmap.as_ptr() as u64 + (u64::from(req.phys_start) - addr))
} else {
None
}
});
let prot = match write_en {
true => Protection::read_write(),
false => Protection::read(),
};
let vfio_map_result = match dmabuf_map {
// Safe because [dmabuf_map, dmabuf_map + size) refers to an external mmap'ed region.
Some(dmabuf_map) => unsafe {
mapper.1.lock().vfio_dma_map(
req.virt_start.into(),
dmabuf_map as u64,
size,
prot,
)
},
});
None => mapper.1.lock().add_map(MappingInfo {
iova: req.virt_start.into(),
gpa: GuestAddress(req.phys_start.into()),
size,
prot,
}),
};
match vfio_map_result {
Ok(AddMapResult::Ok) => (),
@ -837,6 +874,7 @@ impl VirtioDevice for Iommu {
endpoint_map: BTreeMap::new(),
domain_map: BTreeMap::new(),
endpoints: eps,
dmabuf_mem: BTreeMap::new(),
};
let result = run(
state,

View file

@ -212,6 +212,20 @@ pub trait MemoryMapper: Send {
/// Exports the specified IO region.
///
/// # Safety
///
/// The memory in the region specified by hva and size must be
/// memory external to rust.
unsafe fn vfio_dma_map(
&mut self,
_iova: u64,
_hva: u64,
_size: u64,
_prot: Protection,
) -> Result<AddMapResult> {
bail!("not supported");
}
/// Multiple MemRegions should be returned when the gpa is discontiguous or perms are different.
fn export(&mut self, _iova: u64, _size: u64) -> Result<Vec<MemRegion>> {
bail!("not supported");

View file

@ -6,13 +6,16 @@ pub mod vfio_wrapper;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::fs::File;
use std::rc::Rc;
use std::sync::Arc;
use base::error;
use base::MemoryMappingBuilder;
use base::TubeError;
use cros_async::AsyncTube;
use cros_async::Executor;
use hypervisor::MemSlot;
use sync::Mutex;
use vm_control::VirtioIOMMURequest;
use vm_control::VirtioIOMMUResponse;
@ -22,11 +25,14 @@ use vm_control::VirtioIOMMUVfioResult;
use self::vfio_wrapper::VfioWrapper;
use crate::virtio::iommu::ipc_memory_mapper::IommuRequest;
use crate::virtio::iommu::ipc_memory_mapper::IommuResponse;
use crate::virtio::iommu::DmabufRegionEntry;
use crate::virtio::iommu::Result;
use crate::virtio::iommu::State;
use crate::virtio::IommuError;
use crate::VfioContainer;
const VIRTIO_IOMMU_PAGE_SHIFT: u32 = 12;
impl State {
pub(in crate::virtio::iommu) fn handle_add_vfio_device(
&mut self,
@ -65,6 +71,52 @@ impl State {
VirtioIOMMUVfioResult::Ok
}
pub(in crate::virtio::iommu) fn handle_map_dmabuf(
&mut self,
mem_slot: MemSlot,
gfn: u64,
size: u64,
dma_buf: File,
) -> VirtioIOMMUVfioResult {
let mmap = match MemoryMappingBuilder::new(size as usize)
.from_file(&dma_buf)
.build()
{
Ok(v) => v,
Err(_) => {
error!("failed to mmap dma_buf");
return VirtioIOMMUVfioResult::InvalidParam;
}
};
self.dmabuf_mem.insert(
gfn << VIRTIO_IOMMU_PAGE_SHIFT,
DmabufRegionEntry {
mmap,
mem_slot,
len: size,
},
);
VirtioIOMMUVfioResult::Ok
}
pub(in crate::virtio::iommu) fn handle_unmap_dmabuf(
&mut self,
mem_slot: MemSlot,
) -> VirtioIOMMUVfioResult {
if let Some(range) = self
.dmabuf_mem
.iter()
.find(|(_, dmabuf_entry)| dmabuf_entry.mem_slot == mem_slot)
.map(|entry| *entry.0)
{
self.dmabuf_mem.remove(&range);
VirtioIOMMUVfioResult::Ok
} else {
VirtioIOMMUVfioResult::NoSuchMappedDmabuf
}
}
pub(in crate::virtio::iommu) fn handle_vfio(
&mut self,
vfio_cmd: VirtioIOMMUVfioCommand,
@ -87,6 +139,13 @@ impl State {
}
},
VfioDeviceDel { endpoint_addr } => self.handle_del_vfio_device(endpoint_addr),
VfioDmabufMap {
mem_slot,
gfn,
size,
dma_buf,
} => self.handle_map_dmabuf(mem_slot, gfn, size, File::from(dma_buf)),
VfioDmabufUnmap(mem_slot) => self.handle_unmap_dmabuf(mem_slot),
};
VirtioIOMMUResponse::VfioResponse(vfio_result)
}

View file

@ -52,6 +52,22 @@ impl VfioWrapper {
pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor, VfioError> {
self.container.lock().clone_as_raw_descriptor()
}
unsafe fn do_map(&self, map: MappingInfo) -> anyhow::Result<AddMapResult> {
let res = self.container.lock().vfio_dma_map(
map.iova,
map.size,
map.gpa.offset(),
map.prot.allows(&Protection::write()),
);
if let Err(VfioError::IommuDmaMap(err)) = res {
if err.errno() == libc::EEXIST {
// A mapping already exists in the requested range,
return Ok(AddMapResult::OverlapFailure);
}
}
res.context("vfio mapping error").map(|_| AddMapResult::Ok)
}
}
impl MemoryMapper for VfioWrapper {
@ -61,23 +77,25 @@ impl MemoryMapper for VfioWrapper {
.get_host_address_range(map.gpa, map.size as usize)
.context("failed to find host address")? as u64,
);
// Safe because both guest and host address are guaranteed by
// get_host_address_range() to be valid.
let res = unsafe {
self.container.lock().vfio_dma_map(
map.iova,
map.size,
map.gpa.offset(),
map.prot.allows(&Protection::write()),
)
};
if let Err(VfioError::IommuDmaMap(err)) = res {
if err.errno() == libc::EEXIST {
// A mapping already exists in the requested range,
return Ok(AddMapResult::OverlapFailure);
}
}
res.context("vfio mapping error").map(|_| AddMapResult::Ok)
unsafe { self.do_map(map) }
}
unsafe fn vfio_dma_map(
&mut self,
iova: u64,
hva: u64,
size: u64,
prot: Protection,
) -> anyhow::Result<AddMapResult> {
self.do_map(MappingInfo {
iova,
gpa: GuestAddress(hva),
size,
prot,
})
}
fn remove_map(&mut self, iova_start: u64, size: u64) -> anyhow::Result<RemoveMapResult> {

View file

@ -768,6 +768,7 @@ impl SharedMemoryMapper for VhostShmemMapper {
descriptor,
offset,
size,
gpu_blob: false,
} => (descriptor, offset, size),
VmMemorySource::SharedMemory(shmem) => {
let size = shmem.size();

View file

@ -824,6 +824,7 @@ impl Worker {
descriptor: SafeDescriptor::from(file),
offset: region.mmap_offset,
size: region.memory_size,
gpu_blob: false,
};
let dest = VmMemoryDestination::ExistingAllocation {
allocation: self.shmem_pci_bar,

View file

@ -342,6 +342,7 @@ impl VhostUserMasterReqHandlerMut for BackendReqHandlerImpl {
.map_err(|_| std::io::Error::from_raw_os_error(libc::EIO))?,
offset: req.fd_offset,
size: req.len,
gpu_blob: false,
},
req.shm_offset,
Protection::from(req.flags.bits() as libc::c_int),

View file

@ -529,6 +529,7 @@ impl VmRequester {
descriptor,
offset: 0,
size,
gpu_blob: false,
};
let alloc = Alloc::Anon(state.next_alloc);
state.next_alloc += 1;

View file

@ -2376,6 +2376,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
&mut sys_allocator,
Arc::clone(&map_request),
&mut gralloc,
&iommu_host_tube,
);
if let Err(e) = tube.send(&response) {
error!("failed to send VmMemoryControlResponse: {}", e);

View file

@ -1002,6 +1002,7 @@ fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
&mut sys_allocator_mutex.lock(),
Arc::clone(&map_request),
&mut gralloc,
&None,
);
if let Err(e) = tube.send(&response) {
error!("failed to send VmMemoryControlResponse: {}", e);

View file

@ -268,6 +268,7 @@ pub enum VmMemorySource {
offset: u64,
/// Size of the mapping in bytes.
size: u64,
gpu_blob: bool,
},
/// Register memory mapped by Vulkano.
Vulkan {
@ -288,15 +289,21 @@ impl VmMemorySource {
map_request: Arc<Mutex<Option<ExternalMapping>>>,
gralloc: &mut RutabagaGralloc,
prot: Protection,
) -> Result<(Box<dyn MappedRegion>, u64)> {
Ok(match self {
) -> Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
let (mem_region, size, descriptor) = match self {
VmMemorySource::Descriptor {
descriptor,
offset,
size,
} => (map_descriptor(&descriptor, offset, size, prot)?, size),
gpu_blob,
} => (
map_descriptor(&descriptor, offset, size, prot)?,
size,
if gpu_blob { Some(descriptor) } else { None },
),
VmMemorySource::SharedMemory(shm) => {
(map_descriptor(&shm, 0, shm.size(), prot)?, shm.size())
(map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
}
VmMemorySource::Vulkan {
descriptor,
@ -322,7 +329,7 @@ impl VmMemorySource {
return Err(SysError::new(EINVAL));
}
};
(mapped_region, size)
(mapped_region, size, None)
}
VmMemorySource::ExternalMapping { size } => {
let mem = map_request
@ -331,9 +338,10 @@ impl VmMemorySource {
.ok_or_else(|| VmMemoryResponse::Err(SysError::new(EINVAL)))
.unwrap();
let mapped_region: Box<dyn MappedRegion> = Box::new(mem);
(mapped_region, size)
(mapped_region, size, None)
}
})
};
Ok((mem_region, size, descriptor))
}
}
@ -400,14 +408,16 @@ impl VmMemoryRequest {
sys_allocator: &mut SystemAllocator,
map_request: Arc<Mutex<Option<ExternalMapping>>>,
gralloc: &mut RutabagaGralloc,
iommu_host_tube: &Option<Tube>,
) -> VmMemoryResponse {
use self::VmMemoryRequest::*;
match self {
RegisterMemory { source, dest, prot } => {
// Correct on Windows because callers of this IPC guarantee descriptor is a mapping
// handle.
let (mapped_region, size) = match source.map(map_request, gralloc, prot) {
Ok(res) => res,
let (mapped_region, size, descriptor) = match source.map(map_request, gralloc, prot)
{
Ok((region, size, descriptor)) => (region, size, descriptor),
Err(e) => return VmMemoryResponse::Err(e),
};
@ -426,11 +436,50 @@ impl VmMemoryRequest {
Err(e) => return VmMemoryResponse::Err(e),
};
if let (Some(descriptor), Some(iommu_tube)) = (descriptor, iommu_host_tube) {
let request =
VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
mem_slot: slot,
gfn: guest_addr.0 >> 12,
size,
dma_buf: descriptor,
});
match virtio_iommu_request(iommu_tube, &request) {
Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
resp => {
error!("Unexpected message response: {:?}", resp);
// Ignore the result because there is nothing we can do with a failure.
let _ = vm.remove_memory_region(slot);
return VmMemoryResponse::Err(SysError::new(EINVAL));
}
}
}
let pfn = guest_addr.0 >> 12;
VmMemoryResponse::RegisterMemory { pfn, slot }
}
UnregisterMemory(slot) => match vm.remove_memory_region(slot) {
Ok(_) => VmMemoryResponse::Ok,
Ok(_) => {
if let Some(iommu_tube) = iommu_host_tube {
let request = VirtioIOMMURequest::VfioCommand(
VirtioIOMMUVfioCommand::VfioDmabufUnmap(slot),
);
match virtio_iommu_request(iommu_tube, &request) {
Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok))
| Ok(VirtioIOMMUResponse::VfioResponse(
VirtioIOMMUVfioResult::NoSuchMappedDmabuf,
)) => VmMemoryResponse::Ok,
resp => {
error!("Unexpected message response: {:?}", resp);
return VmMemoryResponse::Err(SysError::new(EINVAL));
}
}
} else {
VmMemoryResponse::Ok
}
}
Err(e) => VmMemoryResponse::Err(e),
},
DynamicallyFreeMemoryRange {
@ -1252,6 +1301,15 @@ pub enum VirtioIOMMUVfioCommand {
VfioDeviceDel {
endpoint_addr: u32,
},
// Map a dma-buf into vfio iommu table
VfioDmabufMap {
mem_slot: MemSlot,
gfn: u64,
size: u64,
dma_buf: SafeDescriptor,
},
// Unmap a dma-buf from vfio iommu table
VfioDmabufUnmap(MemSlot),
}
#[derive(Serialize, Deserialize, Debug)]
@ -1260,6 +1318,8 @@ pub enum VirtioIOMMUVfioResult {
NotInPCIRanges,
NoAvailableContainer,
NoSuchDevice,
NoSuchMappedDmabuf,
InvalidParam,
}
impl Display for VirtioIOMMUVfioResult {
@ -1271,6 +1331,8 @@ impl Display for VirtioIOMMUVfioResult {
NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
NoAvailableContainer => write!(f, "no available vfio container"),
NoSuchDevice => write!(f, "no such a vfio device"),
NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
InvalidParam => write!(f, "invalid parameters"),
}
}
}