devices: Add read-only memslots for pci config space

Take advantage of the fact that the PCI spec does not define any
configuration register attributes where reads have side-effects to back
the PCI configuration space with read-only memslots. Support in crosvm
needs to be done at the PciDevice implementation level, to support
situations where device-internal events lead to modifications of the PCI
configuration space memory.

After applying other optimizations, this reduces the average time needed
to exit s2idle from 120ms->40ms/200ms->100ms on delbin/dood, and helps
to reduce variance as well.

BUG=b:301865576
TEST=boot ARCVM with --break-linux-pci-config-io on x86 and ARM
TEST=manually verify virtio-net hotplug w/--break-linux-pci-config-io

Change-Id: Idcbddbed0235bfbd44cca70a46c1d526928621e8
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4891756
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Reviewed-by: Noah Gold <nkgold@google.com>
Commit-Queue: David Stevens <stevensd@chromium.org>
This commit is contained in:
David Stevens 2023-09-20 18:04:02 +09:00 committed by crosvm LUCI
parent 0762e46e75
commit 7eb7a4ede2
20 changed files with 675 additions and 40 deletions

View file

@ -555,6 +555,8 @@ impl arch::LinuxArch for AArch64 {
pci_devices,
irq_chip.as_irq_chip_mut(),
mmio_bus.clone(),
GuestAddress(AARCH64_PCI_CFG_BASE),
8,
io_bus.clone(),
system_allocator,
&mut vm,

View file

@ -590,6 +590,9 @@ pub enum DeviceRegistrationError {
// Unable to create a pipe.
#[error("failed to create pipe: {0}")]
CreatePipe(base::Error),
// Unable to create a root.
#[error("failed to create pci root: {0}")]
CreateRoot(anyhow::Error),
// Unable to create serial device from serial parameters
#[error("failed to create serial device: {0}")]
CreateSerialDevice(devices::SerialError),
@ -951,6 +954,8 @@ pub fn generate_pci_root(
mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChip,
mmio_bus: Arc<Bus>,
mmio_base: GuestAddress,
mmio_register_bit_num: usize,
io_bus: Arc<Bus>,
resources: &mut SystemAllocator,
vm: &mut impl Vm,
@ -989,7 +994,15 @@ pub fn generate_pci_root(
&mut devices,
)?;
let mut root = PciRoot::new(Arc::downgrade(&mmio_bus), Arc::downgrade(&io_bus), root_bus);
let mut root = PciRoot::new(
vm,
Arc::downgrade(&mmio_bus),
mmio_base,
mmio_register_bit_num,
Arc::downgrade(&io_bus),
root_bus,
)
.map_err(DeviceRegistrationError::CreateRoot)?;
#[cfg_attr(windows, allow(unused_mut))]
let mut pid_labels = BTreeMap::new();
@ -1144,7 +1157,7 @@ pub fn generate_pci_root(
device.on_sandboxed();
Arc::new(Mutex::new(device))
};
root.add_device(address, arced_dev.clone())
root.add_device(address, arced_dev.clone(), vm)
.map_err(DeviceRegistrationError::PciRootAddDevice)?;
for range in &ranges {
mmio_bus

View file

@ -255,6 +255,44 @@ impl MemoryMapping {
pub fn msync(&self) -> Result<()> {
self.mapping.msync()
}
/// Flush memory which the guest may be accessing through an uncached mapping.
///
/// Reads via an uncached mapping can bypass the cache and directly access main
/// memory. This is outside the memory model of Rust, which means that even with
/// proper synchronization, guest reads via an uncached mapping might not see
/// updates from the host. As such, it is necessary to perform architectural
/// cache maintainance to flush the host writes to main memory.
///
/// Note that this does not support writable uncached guest mappings, as doing so
/// requires invalidating the cache, not flushing the cache.
///
/// Currently only supported on x86_64 and aarch64. Cannot be supported on 32-bit arm.
pub fn flush_uncached_guest_mapping(&self, offset: usize) {
if offset > self.mapping.size() {
return;
}
// SAFETY: We checked that offset is within the mapping, and flushing
// the cache doesn't affect any rust safety properties.
unsafe {
#[allow(unused)]
let target = self.mapping.as_ptr().add(offset);
cfg_if::cfg_if! {
if #[cfg(target_arch = "x86_64")] {
// As per table 11-7 of the SDM, processors are not required to
// snoop UC mappings, so flush the target to memory.
core::arch::x86_64::_mm_clflush(target);
} else if #[cfg(target_arch = "aarch64")] {
// Data cache clean by VA to PoC.
std::arch::asm!("DC CVAC, {x}", x = in(reg) target);
} else if #[cfg(target_arch = "arm")] {
panic!("Userspace cannot flush to PoC");
} else {
unimplemented!("Cache flush not implemented")
}
}
}
}
}
pub struct MemoryMappingBuilder<'a> {

View file

@ -53,6 +53,13 @@ impl SharedMemory {
pub fn from_safe_descriptor(descriptor: SafeDescriptor, size: u64) -> Result<SharedMemory> {
<SharedMemory as PlatformSharedMemory>::from_safe_descriptor(descriptor, size)
}
/// Clones the SharedMemory. The new SharedMemory will refer to the same
/// underlying object as the original.
pub fn try_clone(&self) -> Result<SharedMemory> {
let shmem_descriptor = SafeDescriptor::try_from(self as &dyn AsRawDescriptor)?;
SharedMemory::from_safe_descriptor(shmem_descriptor, self.size())
}
}
/// USE THIS CAUTIOUSLY. On Windows, the returned handle is not a file handle and cannot be used as

View file

@ -20,6 +20,7 @@ use anyhow::anyhow;
use anyhow::Context;
use base::debug;
use base::error;
use base::SharedMemory;
use remain::sorted;
use serde::Deserialize;
use serde::Serialize;
@ -115,6 +116,22 @@ pub trait BusDevice: Send + Suspendable {
fn config_register_read(&self, reg_idx: usize) -> u32 {
0
}
/// Provides a memory region to back MMIO access to the configuration
/// space. If the device can keep the memory region up to date, then it
/// should return true, after which no more calls to config_register_read
/// will be made. Otherwise the device should return false.
///
/// The device must set the header type register (0x0E) before returning
/// from this function, and must make no further modifications to it
/// after returning. This is to allow the caller to manage the multi-
/// function device bit without worrying about race conditions.
///
/// * `shmem` - The shared memory to use for the configuration space.
/// * `base` - The base address of the memory region in shmem.
/// * `len` - The length of the memory region.
fn init_pci_config_mapping(&mut self, shmem: &SharedMemory, base: usize, len: usize) -> bool {
false
}
/// Sets a register in the virtual config space. Only used by PCI.
/// * `reg_idx` - The index of the config register to modify.
/// * `value` - The value to be written.

View file

@ -109,6 +109,7 @@ pub use self::pci::PciConfigMmio;
pub use self::pci::PciDevice;
pub use self::pci::PciDeviceError;
pub use self::pci::PciInterruptPin;
pub use self::pci::PciMmioMapper;
pub use self::pci::PciRoot;
pub use self::pci::PciRootCommand;
pub use self::pci::PciVirtualConfigMmio;

View file

@ -74,6 +74,7 @@ pub use self::pci_hotplug::NetResourceCarrier;
pub use self::pci_hotplug::ResourceCarrier;
pub use self::pci_root::PciConfigIo;
pub use self::pci_root::PciConfigMmio;
pub use self::pci_root::PciMmioMapper;
pub use self::pci_root::PciRoot;
pub use self::pci_root::PciRootCommand;
pub use self::pci_root::PciVirtualConfigMmio;

View file

@ -7,11 +7,15 @@ use std::convert::TryFrom;
use std::convert::TryInto;
use std::sync::Arc;
use anyhow::bail;
use anyhow::Context;
use base::custom_serde::deserialize_seq_to_arr;
use base::custom_serde::serialize_arr;
use base::error;
use base::warn;
use base::MemoryMapping;
use base::MemoryMappingBuilder;
use base::SharedMemory;
use downcast_rs::impl_downcast;
use downcast_rs::Downcast;
use remain::sorted;
@ -35,7 +39,8 @@ pub const STATUS_REG_CAPABILITIES_USED_MASK: u32 = 0x0010_0000;
#[cfg(any(target_os = "android", target_os = "linux"))]
pub const CLASS_REG: usize = 2;
pub const HEADER_TYPE_REG: usize = 3;
pub const HEADER_TYPE_MULTIFUNCTION_MASK: u32 = 0x0080_0000;
pub const HEADER_TYPE_REG_OFFSET: usize = 2;
pub const HEADER_TYPE_MULTIFUNCTION_MASK: u8 = 0x80;
pub const BAR0_REG: usize = 4;
const BAR_IO_ADDR_MASK: u32 = 0xffff_fffc;
const BAR_IO_MIN_SIZE: u64 = 4;
@ -286,6 +291,12 @@ pub trait PciCapConfig: Send {
data: &[u8],
) -> Option<Box<dyn PciCapConfigWriteResult>>;
/// Used to pass the mmio region for the capability to the implementation.
/// If any external events update the capability's registers, then
/// `PciCapMapping.set_reg` must be called to make the changes visible
/// to the guest.
fn set_cap_mapping(&mut self, _mapping: PciCapMapping) {}
fn num_regs(&self) -> usize {
self.read_mask().len()
}
@ -302,6 +313,7 @@ pub struct PciConfiguration {
// Contains the byte offset and size of the last capability.
last_capability: Option<(usize, usize)>,
capability_configs: BTreeMap<usize, Box<dyn PciCapConfig>>,
mmio_mapping: Option<(Arc<Mutex<MemoryMapping>>, usize)>,
}
#[derive(Serialize, Deserialize)]
@ -452,6 +464,7 @@ impl PciConfiguration {
bar_configs: [None; NUM_BAR_REGS],
last_capability: None,
capability_configs: BTreeMap::new(),
mmio_mapping: None,
}
}
@ -489,7 +502,11 @@ impl PciConfiguration {
if let Some((idx, cfg)) = self.capability_configs.range_mut(..=reg_idx).last() {
if reg_idx < idx + cfg.num_regs() {
let cap_idx = reg_idx - idx;
return cfg.write_reg(cap_idx, offset, data);
let ret = cfg.write_reg(cap_idx, offset, data);
let new_val = cfg.read_reg(cap_idx);
let mask = cfg.read_mask()[cap_idx];
self.set_reg(reg_idx, new_val, mask);
return ret;
}
}
None
@ -502,8 +519,11 @@ impl PciConfiguration {
return;
}
let reg_idx = offset / 4;
if let Some(r) = self.registers.get_mut(reg_idx) {
*r = (*r & !self.writable_bits[reg_idx]) | (value & self.writable_bits[reg_idx]);
if reg_idx < NUM_CONFIGURATION_REGISTERS {
let old_value = self.registers[reg_idx];
let new_value =
(old_value & !self.writable_bits[reg_idx]) | (value & self.writable_bits[reg_idx]);
self.do_write(reg_idx, new_value)
} else {
warn!("bad PCI dword write {}", offset);
}
@ -521,11 +541,13 @@ impl PciConfiguration {
};
let reg_idx = offset / 4;
if let Some(r) = self.registers.get_mut(reg_idx) {
if reg_idx < NUM_CONFIGURATION_REGISTERS {
let old_value = self.registers[reg_idx];
let writable_mask = self.writable_bits[reg_idx];
let mask = (0xffffu32 << shift) & writable_mask;
let shifted_value = (u32::from(value) << shift) & writable_mask;
*r = *r & !mask | shifted_value;
let new_value = old_value & !mask | shifted_value;
self.do_write(reg_idx, new_value)
} else {
warn!("bad PCI config word write offset {}", offset);
}
@ -541,20 +563,36 @@ impl PciConfiguration {
let shift = (offset % 4) * 8;
let reg_idx = offset / 4;
if let Some(r) = self.registers.get_mut(reg_idx) {
if reg_idx < NUM_CONFIGURATION_REGISTERS {
let writable_mask = if apply_writable_mask {
self.writable_bits[reg_idx]
} else {
0xffff_ffff
};
let old_value = self.registers[reg_idx];
let mask = (0xffu32 << shift) & writable_mask;
let shifted_value = (u32::from(value) << shift) & writable_mask;
*r = *r & !mask | shifted_value;
let new_value = old_value & !mask | shifted_value;
self.do_write(reg_idx, new_value)
} else {
warn!("bad PCI config byte write offset {}", offset);
}
}
/// Sets the value of a PciConfiguration register. This should be used when
/// device-internal events require changing the configuration space - as such,
/// the writable bits masks do not apply.
/// `reg_idx` - index into PciConfiguration.registers.
/// `data` - The data to write.
/// `mask` - The mask of which bits to modify.
pub fn set_reg(&mut self, reg_idx: usize, data: u32, mask: u32) {
if reg_idx >= NUM_CONFIGURATION_REGISTERS {
return;
}
let new_val = (self.registers[reg_idx] & !mask) | (data & mask);
self.do_write(reg_idx, new_val);
}
/// Adds a region specified by `config`. Configures the specified BAR(s) to
/// report this region and size to the guest kernel. Enforces a few constraints
/// (i.e, region size must be power of two, register not already used). Returns 'None' on
@ -617,7 +655,7 @@ impl PciConfiguration {
return Err(Error::BarInUse64(config.bar_idx));
}
self.registers[reg_idx + 1] = (config.addr >> 32) as u32;
self.do_write(reg_idx + 1, (config.addr >> 32) as u32);
self.writable_bits[reg_idx + 1] = !((config.size - 1) >> 32) as u32;
self.bar_used[config.bar_idx + 1] = true;
}
@ -637,7 +675,7 @@ impl PciConfiguration {
}
};
self.registers[reg_idx] = ((config.addr as u32) & mask) | lower_bits;
self.do_write(reg_idx, ((config.addr as u32) & mask) | lower_bits);
self.writable_bits[reg_idx] = !(config.size - 1) as u32;
if config.is_expansion_rom() {
self.writable_bits[reg_idx] |= 1; // Expansion ROM enable bit.
@ -711,10 +749,10 @@ impl PciConfiguration {
pub fn set_irq(&mut self, line: u8, pin: PciInterruptPin) {
// `pin` is 1-based in the pci config space.
let pin_idx = (pin as u32) + 1;
self.registers[INTERRUPT_LINE_PIN_REG] = (self.registers[INTERRUPT_LINE_PIN_REG]
& 0xffff_0000)
let new_val = (self.registers[INTERRUPT_LINE_PIN_REG] & 0xffff_0000)
| (pin_idx << 8)
| u32::from(line);
self.do_write(INTERRUPT_LINE_PIN_REG, new_val)
}
/// Adds the capability `cap_data` to the list of capabilities.
@ -741,7 +779,10 @@ impl PciConfiguration {
if end_offset > CAPABILITY_MAX_OFFSET {
return Err(Error::CapabilitySpaceFull(total_len));
}
self.registers[STATUS_REG] |= STATUS_REG_CAPABILITIES_USED_MASK;
self.do_write(
STATUS_REG,
self.registers[STATUS_REG] | STATUS_REG_CAPABILITIES_USED_MASK,
);
self.write_byte_internal(tail_offset, cap_offset as u8, false);
self.write_byte_internal(cap_offset, cap_data.id() as u8, false);
self.write_byte_internal(cap_offset + 1, 0, false); // Next pointer.
@ -753,7 +794,14 @@ impl PciConfiguration {
self.writable_bits[reg_idx + i] = *dword;
}
self.last_capability = Some((cap_offset, total_len));
if let Some(cap_config) = cap_config {
if let Some(mut cap_config) = cap_config {
if let Some((mapping, offset)) = &self.mmio_mapping {
cap_config.set_cap_mapping(PciCapMapping {
mapping: mapping.clone(),
offset: reg_idx * 4 + offset,
num_regs: total_len / 4,
});
}
self.capability_configs.insert(cap_offset / 4, cap_config);
}
Ok(())
@ -765,6 +813,30 @@ impl PciConfiguration {
(next + 3) & !3
}
fn do_write(&mut self, reg_idx: usize, value: u32) {
self.registers[reg_idx] = value;
if let Some((mmio_mapping, offset)) = self.mmio_mapping.as_ref() {
let mmio_mapping = mmio_mapping.lock();
let reg_offset = offset + reg_idx * 4;
if reg_idx == HEADER_TYPE_REG {
// Skip writing the header type byte (reg_idx=2/offset=3) as
// per the requirements of PciDevice.setup_pci_config_mapping.
mmio_mapping
.write_obj_volatile((value & 0xffff) as u16, reg_offset)
.expect("bad register offset");
// Skip HEADER_TYPE_REG_OFFSET (i.e. header+mfd byte)
mmio_mapping
.write_obj_volatile(((value >> 24) & 0xff) as u8, reg_offset + 3)
.expect("bad register offset");
} else {
mmio_mapping
.write_obj_volatile(value, reg_offset)
.expect("bad register offset");
}
mmio_mapping.flush_uncached_guest_mapping(reg_offset)
}
}
pub fn snapshot(&self) -> anyhow::Result<serde_json::Value> {
serde_json::to_value(PciConfigurationSerialized {
registers: self.registers,
@ -784,6 +856,50 @@ impl PciConfiguration {
self.bar_used = deser.bar_used;
self.bar_configs = deser.bar_configs;
self.last_capability = deser.last_capability;
// Restore everything via do_write to avoid writing to the header type register
// and clobbering the multi-function device bit, as that bit is managed by the
// PciRoot. Since restore doesn't change the types or layout of PCI devices, the
// header type bits in the register are already correct anyway.
for i in 0..NUM_CONFIGURATION_REGISTERS {
self.do_write(i, self.registers[i]);
}
Ok(())
}
pub fn setup_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> anyhow::Result<()> {
if self.mmio_mapping.is_some() {
bail!("PCIe config mmio mapping already initialized");
}
let mapping = MemoryMappingBuilder::new(base::pagesize())
.from_shared_memory(shmem)
.build()
.context("Failed to create mapping")?;
for i in 0..(len / 4) {
let val = self.registers.get(i).unwrap_or(&0xffff_ffff);
mapping
.write_obj_volatile(*val, base + i * 4)
.expect("memcpy failed");
}
let mapping = Arc::new(Mutex::new(mapping));
for (idx, cap) in self.capability_configs.iter_mut() {
let mut cap_mapping = PciCapMapping {
mapping: mapping.clone(),
offset: idx * 4 + base,
num_regs: cap.num_regs(),
};
for i in 0..cap.num_regs() {
let val = cap.read_reg(i);
let mask = cap.read_mask()[i];
cap_mapping.set_reg(i, val, mask);
}
cap.set_cap_mapping(cap_mapping);
}
self.mmio_mapping = Some((mapping, base));
Ok(())
}
}
@ -872,6 +988,37 @@ impl<T: PciCapConfig + ?Sized> PciCapConfig for Arc<Mutex<T>> {
) -> Option<Box<dyn PciCapConfigWriteResult>> {
self.lock().write_reg(reg_idx, offset, data)
}
fn set_cap_mapping(&mut self, mapping: PciCapMapping) {
self.lock().set_cap_mapping(mapping)
}
}
/// Struct for updating a capabilitiy's mmio mapping.
pub struct PciCapMapping {
mapping: Arc<Mutex<MemoryMapping>>,
offset: usize,
num_regs: usize,
}
impl PciCapMapping {
/// Set the bits of register `reg_idx` specified by `mask` to `data`.
pub fn set_reg(&mut self, reg_idx: usize, data: u32, mask: u32) {
if reg_idx >= self.num_regs {
error!(
"out of bounds register write {} vs {}",
self.num_regs, reg_idx
);
return;
}
let mapping = self.mapping.lock();
let offset = self.offset + reg_idx * 4;
let cur_value = mapping.read_obj::<u32>(offset).expect("memcpy failed");
let new_val = (cur_value & !mask) | (data & mask);
mapping
.write_obj_volatile(new_val, offset)
.expect("memcpy failed");
mapping.flush_uncached_guest_mapping(offset);
}
}
#[cfg(test)]

View file

@ -11,8 +11,10 @@ use acpi_tables::sdt::SDT;
use anyhow::bail;
use base::error;
use base::trace;
use base::warn;
use base::MemoryMapping;
use base::RawDescriptor;
use base::SharedMemory;
use remain::sorted;
use resources::Error as SystemAllocatorFaliure;
use resources::SystemAllocator;
@ -99,6 +101,9 @@ pub enum Error {
/// Registering an IO BAR failed.
#[error("failed to register an IO BAR, addr={0} err={1}")]
IoRegistrationFailed(u64, pci_configuration::Error),
/// Setting up MMIO mapping
#[error("failed to set up MMIO mapping: {0}")]
MmioSetup(anyhow::Error),
/// Out-of-space encountered
#[error("Out-of-space detected")]
OutOfSpace,
@ -406,6 +411,30 @@ pub trait PciDevice: Send + Suspendable {
/// * `data` - The data to write.
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]);
/// Provides a memory region to back MMIO access to the configuration
/// space. If the device can keep the memory region up to date, then it
/// should return Ok(true), after which no more calls to read_config_register
/// will be made. If support isn't implemented, it should return Ok(false).
/// Otherwise, it should return an error (a failure here is not treated as
/// a fatal setup error).
///
/// The device must set the header type register (0x0E) before returning
/// from this function, and must make no further modifications to it
/// after returning. This is to allow the caller to manage the multi-
/// function device bit without worrying about race conditions.
///
/// * `shmem` - The shared memory to use for the configuration space.
/// * `base` - The base address of the memory region in shmem.
/// * `len` - The length of the memory region.
fn setup_pci_config_mapping(
&mut self,
_shmem: &SharedMemory,
_base: usize,
_len: usize,
) -> Result<bool> {
Ok(false)
}
/// Reads from a virtual config register.
/// * `reg_idx` - virtual config register index (in units of 4 bytes).
fn read_virtual_config_register(&self, _reg_idx: usize) -> u32 {
@ -675,6 +704,16 @@ impl<T: PciDevice> BusDevice for T {
self.read_config_register(reg_idx)
}
fn init_pci_config_mapping(&mut self, shmem: &SharedMemory, base: usize, len: usize) -> bool {
match self.setup_pci_config_mapping(shmem, base, len) {
Ok(res) => res,
Err(err) => {
warn!("Failed to create PCI mapping: {:#}", err);
false
}
}
}
fn virtual_config_register_write(&mut self, reg_idx: usize, value: u32) {
self.write_virtual_config_register(reg_idx, value);
}
@ -765,6 +804,14 @@ impl<T: PciDevice + ?Sized> PciDevice for Box<T> {
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
(**self).write_config_register(reg_idx, offset, data)
}
fn setup_pci_config_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> Result<bool> {
(**self).setup_pci_config_mapping(shmem, base, len)
}
fn read_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &mut [u8]) {
(**self).read_bar(bar_index, offset, data)
}

View file

@ -2,21 +2,29 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::collections::btree_map::Entry as BTreeMapEntry;
use std::collections::BTreeMap;
use std::convert::TryInto;
use std::ops::Bound::Included;
use std::ops::DerefMut;
use std::sync::Arc;
use std::sync::Weak;
use anyhow::Context;
use base::error;
use base::MemoryMapping;
use base::MemoryMappingBuilder;
use base::Protection;
use base::RawDescriptor;
use base::SendTube;
use base::SharedMemory;
use base::VmEventType;
use hypervisor::Vm;
use resources::SystemAllocator;
use serde::Deserialize;
use serde::Serialize;
use sync::Mutex;
use vm_memory::GuestAddress;
use crate::pci::pci_configuration::PciBarConfiguration;
use crate::pci::pci_configuration::PciBridgeSubclass;
@ -25,6 +33,7 @@ use crate::pci::pci_configuration::PciConfiguration;
use crate::pci::pci_configuration::PciHeaderType;
use crate::pci::pci_configuration::HEADER_TYPE_MULTIFUNCTION_MASK;
use crate::pci::pci_configuration::HEADER_TYPE_REG;
use crate::pci::pci_configuration::HEADER_TYPE_REG_OFFSET;
use crate::pci::pci_device::Error;
use crate::pci::pci_device::PciBus;
use crate::pci::pci_device::PciDevice;
@ -67,6 +76,18 @@ impl PciDevice for PciRootConfiguration {
self.config.write_reg(reg_idx, offset, data);
}
fn setup_pci_config_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> Result<bool, Error> {
self.config
.setup_mapping(shmem, base, len)
.map(|_| true)
.map_err(Error::MmioSetup)
}
fn read_bar(&mut self, _bar_index: PciBarIndex, _offset: u64, _data: &mut [u8]) {}
fn write_bar(&mut self, _bar_index: PciBarIndex, _offset: u64, _data: &[u8]) {}
@ -107,6 +128,20 @@ pub enum PciRootCommand {
Kill,
}
#[derive(Serialize)]
struct PciRootMmioState {
/// Contains pages mapped read-only into the guest's MMIO space corresponding to
/// the PCI configuration space. Keys are the offset in number of pages from the
/// start of MMIO space. If a particular value is None, then at least one
/// attached device on that page does not support read-only mapped MMIO.
#[serde(skip_serializing)]
mappings: BTreeMap<u32, Option<(SharedMemory, MemoryMapping)>>,
/// Base address of the PCI configuration space's MMIO region.
base: GuestAddress,
/// Number of bits in the address space of a particular function's MMIO space.
register_bit_num: usize,
}
/// Emulates the PCI Root bridge.
#[allow(dead_code)] // TODO(b/174705596): remove once mmio_bus and io_bus are used
pub struct PciRoot {
@ -122,6 +157,7 @@ pub struct PciRoot {
devices: BTreeMap<PciAddress, Arc<Mutex<dyn BusDevice>>>,
/// pcie enhanced configuration access mmio base
pcie_cfg_mmio: Option<u64>,
pci_mmio_state: PciRootMmioState,
}
const PCI_DEVICE_ID_INTEL_82441: u16 = 0x1237;
@ -136,7 +172,36 @@ struct PciRootSerializable {
impl PciRoot {
/// Create an empty PCI root bus.
pub fn new(mmio_bus: Weak<Bus>, io_bus: Weak<Bus>, root_bus: Arc<Mutex<PciBus>>) -> Self {
pub fn new(
vm: &mut impl Vm,
mmio_bus: Weak<Bus>,
mmio_base: GuestAddress,
mmio_register_bit_num: usize,
io_bus: Weak<Bus>,
root_bus: Arc<Mutex<PciBus>>,
) -> anyhow::Result<Self> {
// mmio_mappings's implementation assumes each device's mmio registers
// can fit on a single page. Always true given existing specs.
assert!(base::pagesize() >= (1 << mmio_register_bit_num));
let mut root =
Self::create_for_test(mmio_bus, mmio_base, mmio_register_bit_num, io_bus, root_bus);
root.pci_mmio_state
.setup_mapping(
&PciAddress::new(0, 0, 0, 0).unwrap(),
&mut root.root_configuration,
vm,
)
.context("failed to set up root configuration mapping")?;
Ok(root)
}
fn create_for_test(
mmio_bus: Weak<Bus>,
mmio_base: GuestAddress,
mmio_register_bit_num: usize,
io_bus: Weak<Bus>,
root_bus: Arc<Mutex<PciBus>>,
) -> Self {
PciRoot {
mmio_bus,
io_bus,
@ -156,6 +221,11 @@ impl PciRoot {
},
devices: BTreeMap::new(),
pcie_cfg_mmio: None,
pci_mmio_state: PciRootMmioState {
mappings: BTreeMap::new(),
base: mmio_base,
register_bit_num: mmio_register_bit_num,
},
}
}
@ -191,22 +261,63 @@ impl PciRoot {
/// enable pcie enhanced configuration access and set base mmio
pub fn enable_pcie_cfg_mmio(&mut self, pcie_cfg_mmio: u64) {
self.pcie_cfg_mmio = Some(pcie_cfg_mmio);
// Update the config space registers that depend on pcie_cfg_mmio.
self.root_configuration.config.set_reg(
PCIE_XBAR_BASE_ADDR,
self.pcie_cfg_mmio.unwrap() as u32 | 0x1,
0xffff_ffff,
);
self.root_configuration.config.set_reg(
PCIE_XBAR_BASE_ADDR + 1,
(self.pcie_cfg_mmio.unwrap() >> 32) as u32,
0xffff_ffff,
);
}
/// Add a `device` to this root PCI bus.
pub fn add_device(
pub fn add_device<T>(
&mut self,
address: PciAddress,
device: Arc<Mutex<dyn BusDevice>>,
) -> Result<(), Error> {
mapper: &mut T,
) -> Result<(), Error>
where
T: PciMmioMapper,
{
// Ignore attempt to replace PCI Root host bridge.
if !address.is_root() {
self.pci_mmio_state
.setup_mapping(&address, device.lock().deref_mut(), mapper)
.map_err(Error::MmioSetup)?;
self.devices.insert(address, device);
self.sync_multifunction_bit_to_mmio_mappings(&address, true);
}
self.root_bus.lock().add_child_device(address)
}
fn sync_multifunction_bit_to_mmio_mappings(&mut self, address: &PciAddress, on_add: bool) {
let num_mfd = self.num_multifunction_device(address);
let target_range = if (num_mfd == 1 && on_add) || (num_mfd == 0 && !on_add) {
// If we added the first mfd or removed the last mfd, update all functions' bits
0..8
} else if on_add && num_mfd > 0 {
// If we added a new function, set its bit if necessary
address.func..(address.func + 1)
} else {
return;
};
for i in target_range {
self.pci_mmio_state.set_mfd_bit(
&PciAddress {
func: i,
..*address
},
num_mfd > 0,
);
}
}
pub fn add_bridge(&mut self, bridge_bus: Arc<Mutex<PciBus>>) -> Result<(), Error> {
self.root_bus.lock().add_child_bus(bridge_bus)
}
@ -234,6 +345,7 @@ impl PciRoot {
d.lock().destroy_device();
let _ = self.root_bus.lock().remove_child_device(address);
}
self.sync_multifunction_bit_to_mmio_mappings(&address, false);
}
pub fn config_space_read(&self, address: PciAddress, register: usize) -> u32 {
@ -255,24 +367,8 @@ impl PciRoot {
if register == HEADER_TYPE_REG {
// Set multifunction bit in header type if there are devices at non-zero functions
// in this slot.
if self
.devices
.range((
Included(&PciAddress {
bus: address.bus,
dev: address.dev,
func: 1,
}),
Included(&PciAddress {
bus: address.bus,
dev: address.dev,
func: 7,
}),
))
.next()
.is_some()
{
data |= HEADER_TYPE_MULTIFUNCTION_MASK;
if self.num_multifunction_device(&address) != 0 {
data |= (HEADER_TYPE_MULTIFUNCTION_MASK as u32) << (HEADER_TYPE_REG_OFFSET * 8);
}
}
@ -364,6 +460,129 @@ impl PciRoot {
self.pcie_cfg_mmio = deser.pcie_cfg_mmio;
Ok(())
}
fn num_multifunction_device(&self, address: &PciAddress) -> usize {
self.devices
.range((
Included(&PciAddress {
func: 1,
..*address
}),
Included(&PciAddress {
func: 7,
..*address
}),
))
.count()
}
}
impl PciRootMmioState {
fn setup_mapping<T>(
&mut self,
address: &PciAddress,
device: &mut dyn BusDevice,
mapper: &mut T,
) -> anyhow::Result<()>
where
T: PciMmioMapper,
{
// The PCI spec requires that config writes are non-posted. This requires
// uncached mappings in the guest. 32-bit ARM does not support flushing to
// PoC from userspace. The cache maintance story for riscv is unclear, so
// that is also not implemmented.
if cfg!(not(any(target_arch = "x86_64", target_arch = "aarch64"))) {
return Ok(());
}
let pagesize = base::pagesize();
let offset = address.to_config_address(0, self.register_bit_num);
let mmio_mapping_num = offset / pagesize as u32;
let (shmem, new_entry) = match self.mappings.entry(mmio_mapping_num) {
BTreeMapEntry::Vacant(e) => {
let shmem = SharedMemory::new(
format!("{:04x}_pci_cfg_mapping", mmio_mapping_num),
pagesize as u64,
)
.context("failed to create shmem")?;
let mapping = MemoryMappingBuilder::new(pagesize)
.from_shared_memory(&shmem)
.protection(Protection::read_write())
.build()
.context("failed to map shmem")?;
let (shmem, _) = e.insert(Some((shmem, mapping))).as_ref().unwrap();
(shmem, true)
}
BTreeMapEntry::Occupied(e) => {
let Some((shmem, _)) = e.into_mut() else {
// Another device sharing the page didn't support mapped mmio. Oh
// well, we'll just have to fall back to vm-exit handling.
return Ok(());
};
(&*shmem, false)
}
};
if device.init_pci_config_mapping(
shmem,
offset as usize % pagesize,
1 << self.register_bit_num,
) {
if new_entry {
let mmio_address = self
.base
.unchecked_add(mmio_mapping_num as u64 * pagesize as u64);
match mapper.add_mapping(mmio_address, shmem) {
// We never unmap the mapping, so we don't need the id
Ok(_) => (),
// If this fails, mmio handling via vm-exit will work fine. Devices
// will be doing some pointless work keeping the unused mapping up
// to date, but addressing that isn't worth the implementation cost.
Err(e) => error!("Failed to map mmio page; {:?}", e),
}
}
} else {
self.mappings.insert(mmio_mapping_num, None);
}
Ok(())
}
fn set_mfd_bit(&mut self, address: &PciAddress, is_mfd: bool) {
let pagesize = base::pagesize();
let offset = address.to_config_address(0, self.register_bit_num);
let mapping_num = offset / pagesize as u32;
if let Some(Some((_, mapping))) = self.mappings.get_mut(&mapping_num) {
let mapping_base = offset as usize % pagesize;
let reg_offset = mapping_base + (HEADER_TYPE_REG * 4) + HEADER_TYPE_REG_OFFSET;
let mut val = mapping.read_obj::<u8>(reg_offset).expect("memcpy failed");
val = if is_mfd {
val | HEADER_TYPE_MULTIFUNCTION_MASK
} else {
val & !HEADER_TYPE_MULTIFUNCTION_MASK
};
mapping
.write_obj_volatile(val, reg_offset)
.expect("memcpy failed");
mapping.flush_uncached_guest_mapping(reg_offset);
}
}
}
pub trait PciMmioMapper {
fn add_mapping(&mut self, addr: GuestAddress, shmem: &SharedMemory) -> anyhow::Result<u32>;
}
impl<T: Vm> PciMmioMapper for T {
fn add_mapping(&mut self, addr: GuestAddress, shmem: &SharedMemory) -> anyhow::Result<u32> {
let mapping = MemoryMappingBuilder::new(base::pagesize())
.from_shared_memory(shmem)
.protection(Protection::read())
.build()
.context("failed to map shmem")?;
self.add_memory_region(addr, Box::new(mapping), true, false)
.context("failed to create vm mapping")
}
}
/// Emulates PCI configuration access mechanism #1 (I/O ports 0xcf8 and 0xcfc).
@ -762,8 +981,10 @@ mod tests {
let mmio_bus = Arc::new(Bus::new(BusType::Mmio));
let root_bus = Arc::new(Mutex::new(PciBus::new(0, 0, false)));
Arc::new(Mutex::new(PciRoot::new(
Arc::new(Mutex::new(PciRoot::create_for_test(
Arc::downgrade(&mmio_bus),
GuestAddress(0),
0,
Arc::downgrade(&io_bus),
root_bus,
)))

View file

@ -14,6 +14,7 @@ use zerocopy::FromBytes;
use crate::pci::pci_configuration::PciCapConfig;
use crate::pci::pci_configuration::PciCapConfigWriteResult;
use crate::pci::pci_configuration::PciCapMapping;
use crate::pci::pci_configuration::PciCapability;
use crate::pci::pcie::pci_bridge::PciBridgeBusRange;
use crate::pci::pcie::pcie_device::PcieCap;
@ -392,6 +393,8 @@ pub struct PcieConfig {
hp_interrupt_pending: bool,
removed_downstream_valid: bool,
cap_mapping: Option<PciCapMapping>,
}
impl PcieConfig {
@ -415,6 +418,8 @@ impl PcieConfig {
hp_interrupt_pending: false,
removed_downstream_valid: false,
cap_mapping: None,
}
}
@ -568,6 +573,13 @@ impl PcieConfig {
fn set_slot_status(&mut self, flag: u16) {
self.slot_status |= flag;
if let Some(mapping) = self.cap_mapping.as_mut() {
mapping.set_reg(
PCIE_SLTCTL_OFFSET / 4,
(self.slot_status as u32) << 16,
0xffff0000,
);
}
}
}
@ -599,6 +611,10 @@ impl PciCapConfig for PcieConfig {
self.write_pcie_cap(reg_idx * 4 + offset as usize, data);
None
}
fn set_cap_mapping(&mut self, mapping: PciCapMapping) {
self.cap_mapping = Some(mapping);
}
}
/// Helper trait for implementing PcieDevice where most functions

View file

@ -10,6 +10,7 @@ use zerocopy::FromZeroes;
use crate::pci::pci_configuration::PciCapConfig;
use crate::pci::pci_configuration::PciCapConfigWriteResult;
use crate::pci::pci_configuration::PciCapMapping;
use crate::pci::PciCapability;
use crate::pci::PciCapabilityID;
@ -76,12 +77,14 @@ impl PciPmCap {
pub struct PmConfig {
power_control_status: u16,
cap_mapping: Option<PciCapMapping>,
}
impl PmConfig {
pub fn new() -> Self {
PmConfig {
power_control_status: 0,
cap_mapping: None,
}
}
@ -128,6 +131,13 @@ impl PmConfig {
&& self.power_control_status & PM_PME_ENABLE != 0
{
self.power_control_status |= PM_PME_STATUS;
if let Some(cap_mapping) = &mut self.cap_mapping {
cap_mapping.set_reg(
PM_CAP_CONTROL_STATE_OFFSET,
self.power_control_status as u32,
0xffff,
);
}
return true;
}
@ -182,4 +192,8 @@ impl PciCapConfig for PmConfig {
}
None
}
fn set_cap_mapping(&mut self, mapping: PciCapMapping) {
self.cap_mapping = Some(mapping);
}
}

View file

@ -18,6 +18,7 @@ use anyhow::Context;
use base::error;
use base::RawDescriptor;
use base::SendTube;
use base::SharedMemory;
use base::VmEventType;
use resources::Alloc;
use resources::AllocOptions;
@ -184,6 +185,18 @@ impl PciDevice for PvPanicPciDevice {
self.config_regs.write_reg(reg_idx, offset, data);
}
fn setup_pci_config_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> Result<bool> {
self.config_regs
.setup_mapping(shmem, base, len)
.map(|_| true)
.map_err(PciDeviceError::MmioSetup)
}
fn read_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &mut [u8]) {
data[0] = if bar_index == PVPANIC_BAR_INDEX && offset == 0 && data.len() == 1 {
PVPANIC_CAPABILITIES

View file

@ -14,6 +14,7 @@ use base::AsRawDescriptor;
#[cfg(feature = "swap")]
use base::AsRawDescriptors;
use base::RawDescriptor;
use base::SharedMemory;
use base::Tube;
use base::TubeError;
use libc::pid_t;
@ -68,6 +69,11 @@ enum Command {
len: u32,
data: [u8; 4],
},
InitPciConfigMapping {
shmem: SharedMemory,
base: usize,
len: usize,
},
ReadVirtualConfig(u32),
WriteVirtualConfig {
reg_idx: u32,
@ -95,6 +101,7 @@ enum CommandResult {
io_add: Vec<BusRange>,
removed_pci_devices: Vec<PciAddress>,
},
InitPciConfigMappingResult(bool),
ReadVirtualConfigResult(u32),
GetRangesResult(Vec<(BusRange, BusType)>),
SnapshotResult(std::result::Result<serde_json::Value, String>),
@ -168,6 +175,10 @@ fn child_proc<D: BusDevice>(tube: Tube, mut device: D) {
removed_pci_devices: res.removed_pci_devices,
})
}
Command::InitPciConfigMapping { shmem, base, len } => {
let success = device.init_pci_config_mapping(&shmem, base, len);
tube.send(&CommandResult::InitPciConfigMappingResult(success))
}
Command::ReadVirtualConfig(idx) => {
let val = device.virtual_config_register_read(idx as usize);
tube.send(&CommandResult::ReadVirtualConfigResult(val))
@ -444,6 +455,15 @@ impl BusDevice for ProxyDevice {
}
}
fn init_pci_config_mapping(&mut self, shmem: &SharedMemory, base: usize, len: usize) -> bool {
let Ok(shmem) = shmem.try_clone() else {
error!("Failed to clone pci config mapping shmem");
return false;
};
let res = self.sync_send(&Command::InitPciConfigMapping { shmem, base, len });
matches!(res, Some(CommandResult::InitPciConfigMappingResult(true)))
}
fn virtual_config_register_write(&mut self, reg_idx: usize, value: u32) {
let reg_idx = reg_idx as u32;
self.sync_send(&Command::WriteVirtualConfig { reg_idx, value });

View file

@ -10,6 +10,7 @@ use std::sync::Arc;
use base::error;
use base::AsRawDescriptor;
use base::RawDescriptor;
use base::SharedMemory;
use resources::Alloc;
use resources::AllocOptions;
use resources::SystemAllocator;
@ -287,6 +288,18 @@ impl PciDevice for XhciController {
self.config_regs.write_reg(reg_idx, offset, data);
}
fn setup_pci_config_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> Result<bool, PciDeviceError> {
self.config_regs
.setup_mapping(shmem, base, len)
.map(|_| true)
.map_err(PciDeviceError::MmioSetup)
}
fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
if bar_index != 0 {
return;

View file

@ -17,6 +17,7 @@ use base::Event;
use base::Protection;
use base::RawDescriptor;
use base::Result;
use base::SharedMemory;
use base::Tube;
use data_model::Le32;
use hypervisor::Datamatch;
@ -748,6 +749,18 @@ impl PciDevice for VirtioPciDevice {
}
}
fn setup_pci_config_mapping(
&mut self,
shmem: &SharedMemory,
base: usize,
len: usize,
) -> std::result::Result<bool, PciDeviceError> {
self.config_regs
.setup_mapping(shmem, base, len)
.map(|_| true)
.map_err(PciDeviceError::MmioSetup)
}
fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
if bar_index == self.settings_bar {
match offset {

View file

@ -234,6 +234,8 @@ impl arch::LinuxArch for Riscv64 {
pci_devices,
irq_chip.as_irq_chip_mut(),
Arc::clone(&mmio_bus),
GuestAddress(RISCV64_PCI_CFG_BASE),
8,
Arc::clone(&io_bus),
system_allocator,
&mut vm,

View file

@ -117,6 +117,8 @@ use devices::PciAddress;
use devices::PciBridge;
use devices::PciDevice;
#[cfg(target_arch = "x86_64")]
use devices::PciMmioMapper;
#[cfg(target_arch = "x86_64")]
use devices::PciRoot;
#[cfg(target_arch = "x86_64")]
use devices::PciRootCommand;
@ -1975,10 +1977,17 @@ where
}
}
let (hp_vm_mem_host_tube, hp_vm_mem_worker_tube) =
Tube::pair().context("failed to create tube")?;
vm_memory_control_tubes.push(VmMemoryTube {
tube: hp_vm_mem_host_tube,
expose_with_viommu: false,
});
let pci_root = linux.root_config.clone();
std::thread::Builder::new()
.name("pci_root".to_string())
.spawn(move || start_pci_root_worker(pci_root, hp_worker_tube))?
.spawn(move || start_pci_root_worker(pci_root, hp_worker_tube, hp_vm_mem_worker_tube))?
};
let gralloc = RutabagaGralloc::new().context("failed to create gralloc")?;
@ -2030,12 +2039,49 @@ where
fn start_pci_root_worker(
pci_root: Arc<Mutex<PciRoot>>,
hp_device_tube: mpsc::Receiver<PciRootCommand>,
vm_control_tube: Tube,
) {
struct PciMmioMapperTube {
vm_control_tube: Tube,
registered_regions: BTreeMap<u32, VmMemoryRegionId>,
next_id: u32,
}
impl PciMmioMapper for PciMmioMapperTube {
fn add_mapping(&mut self, addr: GuestAddress, shmem: &SharedMemory) -> anyhow::Result<u32> {
let shmem = shmem
.try_clone()
.context("failed to create new SharedMemory")?;
self.vm_control_tube
.send(&VmMemoryRequest::RegisterMemory {
source: VmMemorySource::SharedMemory(shmem),
dest: VmMemoryDestination::GuestPhysicalAddress(addr.0),
prot: Protection::read(),
})
.context("failed to send request")?;
match self.vm_control_tube.recv::<VmMemoryResponse>() {
Ok(VmMemoryResponse::RegisterMemory(slot)) => {
let cur_id = self.next_id;
self.registered_regions.insert(cur_id, slot);
self.next_id += 1;
Ok(cur_id)
}
res => bail!("Bad response: {:?}", res),
}
}
}
let mut mapper = PciMmioMapperTube {
vm_control_tube,
registered_regions: BTreeMap::new(),
next_id: 0,
};
loop {
match hp_device_tube.recv() {
Ok(cmd) => match cmd {
PciRootCommand::Add(addr, device) => {
if let Err(e) = pci_root.lock().add_device(addr, device) {
if let Err(e) = pci_root.lock().add_device(addr, device, &mut mapper) {
error!("failed to add hotplugged device to PCI root port: {}", e);
}
}

View file

@ -746,6 +746,8 @@ impl arch::LinuxArch for X8664arch {
pci_devices,
irq_chip.as_irq_chip_mut(),
mmio_bus.clone(),
GuestAddress(pcie_cfg_mmio_range.start),
12,
io_bus.clone(),
system_allocator,
&mut vm,

View file

@ -133,6 +133,8 @@ where
devices,
&mut irq_chip,
mmio_bus.clone(),
GuestAddress(0),
12,
io_bus.clone(),
&mut resources,
&mut vm,