From f30889ca70af1e7bdfc2b266cbef930ecc87d84a Mon Sep 17 00:00:00 2001 From: Dylan Reid Date: Tue, 2 Nov 2021 15:36:46 -0700 Subject: [PATCH] Add riscv arch crate Basic support for riscv. This, combined with follow on commits adds basic support for booting a linux kernel on a riscv machine. This has been tested with a qemu host as the riscv hypervisor extension is not yet widely available in hardware. Change-Id: I44f83f1acf1be2297b62d1f10311e3e47319e5f8 Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4460936 Commit-Queue: Daniel Verkamp Reviewed-by: Daniel Verkamp --- riscv64/Cargo.toml | 25 +++ riscv64/src/fdt.rs | 355 +++++++++++++++++++++++++++++++ riscv64/src/lib.rs | 504 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 884 insertions(+) create mode 100644 riscv64/Cargo.toml create mode 100644 riscv64/src/fdt.rs create mode 100644 riscv64/src/lib.rs diff --git a/riscv64/Cargo.toml b/riscv64/Cargo.toml new file mode 100644 index 0000000000..009be4e1ef --- /dev/null +++ b/riscv64/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "riscv64" +version = "0.1.0" +authors = ["Rivos Inc."] +edition = "2021" + +[dependencies] +arch = { path = "../arch" } +cros_fdt = { path = "../cros_fdt" } +data_model = { path = "../common/data_model" } +devices = { path = "../devices" } +hypervisor = { path = "../hypervisor" } +kernel_cmdline = { path = "../kernel_cmdline" } +kvm = { path = "../kvm" } +kvm_sys = { path = "../kvm_sys" } +libc = "*" +minijail = "*" +rand = "0.8" +remain = "*" +resources = { path = "../resources" } +sync = { path = "../common/sync" } +thiserror = "*" +base = { path = "../base" } +vm_control = { path = "../vm_control" } +vm_memory = { path = "../vm_memory" } diff --git a/riscv64/src/fdt.rs b/riscv64/src/fdt.rs new file mode 100644 index 0000000000..b8adf8a564 --- /dev/null +++ b/riscv64/src/fdt.rs @@ -0,0 +1,355 @@ +// Copyright 2023 The ChromiumOS Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +use cros_fdt::Error; +use cros_fdt::FdtWriter; +use cros_fdt::Result; +use devices::irqchip::aia_aplic_addr; +use devices::irqchip::aia_imsic_size; +use devices::irqchip::AIA_APLIC_SIZE; +use devices::irqchip::AIA_IMSIC_BASE; +use devices::PciAddress; +use devices::PciInterruptPin; +use rand::rngs::OsRng; +use rand::RngCore; +use vm_memory::GuestAddress; +use vm_memory::GuestMemory; + +// This is the start of DRAM in the physical address space. +use crate::RISCV64_PHYS_MEM_START; + +// CPUs are assigned phandles starting with this number. +const PHANDLE_CPU0: u32 = 0x100; + +const PHANDLE_AIA_APLIC: u32 = 2; +const PHANDLE_AIA_IMSIC: u32 = 3; +const PHANDLE_CPU_INTC_BASE: u32 = 4; + +fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemory) -> Result<()> { + let mut mem_reg_prop = Vec::new(); + let mut previous_memory_region_end = None; + let mut regions = guest_mem.guest_memory_regions(); + regions.sort(); + for region in regions { + // Merge with the previous region if possible. + if let Some(previous_end) = previous_memory_region_end { + if region.0 == previous_end { + *mem_reg_prop.last_mut().unwrap() += region.1 as u64; + previous_memory_region_end = + Some(previous_end.checked_add(region.1 as u64).unwrap()); + continue; + } + assert!(region.0 > previous_end, "Memory regions overlap"); + } + + mem_reg_prop.push(region.0.offset()); + mem_reg_prop.push(region.1 as u64); + previous_memory_region_end = Some(region.0.checked_add(region.1 as u64).unwrap()); + } + + let memory_node = fdt.begin_node("memory")?; + fdt.property_string("device_type", "memory")?; + fdt.property_array_u64("reg", &mem_reg_prop)?; + fdt.end_node(memory_node)?; + + Ok(()) +} + +fn create_cpu_nodes(fdt: &mut FdtWriter, num_cpus: u32, timebase_frequency: u32) -> Result<()> { + let cpus_node = fdt.begin_node("cpus")?; + fdt.property_u32("#address-cells", 0x1)?; + fdt.property_u32("#size-cells", 0x0)?; + fdt.property_u32("timebase-frequency", timebase_frequency)?; + + for cpu_id in 0..num_cpus { + let cpu_name = format!("cpu@{:x}", cpu_id); + let cpu_node = fdt.begin_node(&cpu_name)?; + fdt.property_string("device_type", "cpu")?; + fdt.property_string("compatible", "riscv")?; + fdt.property_string("mmu-type", "sv48")?; + fdt.property_string("riscv,isa", "rv64iafdcsu_smaia_ssaia")?; + fdt.property_string("status", "okay")?; + fdt.property_u32("reg", cpu_id)?; + fdt.property_u32("phandle", PHANDLE_CPU0 + cpu_id)?; + + // Add interrupt controller node + let intc_node = fdt.begin_node("interrupt-controller")?; + fdt.property_string("compatible", "riscv,cpu-intc")?; + fdt.property_u32("#interrupt-cells", 1)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("phandle", PHANDLE_CPU_INTC_BASE + cpu_id)?; + fdt.end_node(intc_node)?; + + fdt.end_node(cpu_node)?; + } + + fdt.end_node(cpus_node)?; + Ok(()) +} + +fn create_chosen_node( + fdt: &mut FdtWriter, + cmdline: &str, + initrd: Option<(GuestAddress, usize)>, +) -> Result<()> { + let chosen_node = fdt.begin_node("chosen")?; + fdt.property_u32("linux,pci-probe-only", 1)?; + fdt.property_string("bootargs", cmdline)?; + + let mut kaslr_seed_bytes = [0u8; 8]; + OsRng.fill_bytes(&mut kaslr_seed_bytes); + let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes); + fdt.property_u64("kaslr-seed", kaslr_seed)?; + + let mut rng_seed_bytes = [0u8; 256]; + OsRng.fill_bytes(&mut rng_seed_bytes); + fdt.property("rng-seed", &rng_seed_bytes)?; + + if let Some((initrd_addr, initrd_size)) = initrd { + let initrd_start = initrd_addr.offset() as u64; + let initrd_end = initrd_start + initrd_size as u64; + fdt.property_u64("linux,initrd-start", initrd_start)?; + fdt.property_u64("linux,initrd-end", initrd_end)?; + } + + fdt.end_node(chosen_node)?; + + Ok(()) +} + +// num_ids: number of imsic ids from the aia subsystem +// num_sources: number of aplic sources from the aia subsystem +fn create_aia_node( + fdt: &mut FdtWriter, + num_cpus: usize, + num_ids: usize, + num_sources: usize, +) -> Result<()> { + let name = format!("imsics@{:#08x}", AIA_IMSIC_BASE); + let imsic_node = fdt.begin_node(&name)?; + fdt.property_string("compatible", "riscv,imsics")?; + + let regs = [ + 0u32, + AIA_IMSIC_BASE as u32, + 0, + aia_imsic_size(num_cpus) as u32, + ]; + fdt.property_array_u32("reg", ®s)?; + fdt.property_u32("#interrupt-cells", 0)?; + fdt.property_null("interrupt-controller")?; + fdt.property_null("msi-controller")?; + fdt.property_u32("riscv,num-ids", num_ids as u32)?; + fdt.property_u32("phandle", PHANDLE_AIA_IMSIC)?; + + const S_MODE_EXT_IRQ: u32 = 9; + let mut cpu_intc_regs: Vec = Vec::with_capacity(num_cpus * 2); + for hart in 0..num_cpus { + cpu_intc_regs.push(PHANDLE_CPU_INTC_BASE + hart as u32); + cpu_intc_regs.push(S_MODE_EXT_IRQ); + } + fdt.property_array_u32("interrupts-extended", &cpu_intc_regs)?; + + fdt.end_node(imsic_node)?; + + /* Skip APLIC node if we have no interrupt sources */ + if num_sources > 0 { + let name = format!("aplic@{:#08x}", aia_aplic_addr(num_cpus)); + let aplic_node = fdt.begin_node(&name)?; + fdt.property_string("compatible", "riscv,aplic")?; + + let regs = [ + 0u32, + aia_aplic_addr(num_cpus) as u32, + 0, + AIA_APLIC_SIZE as u32, + ]; + fdt.property_array_u32("reg", ®s)?; + fdt.property_u32("#interrupt-cells", 2)?; + fdt.property_null("interrupt-controller")?; + fdt.property_u32("riscv,num-sources", num_sources as u32)?; + fdt.property_u32("phandle", PHANDLE_AIA_APLIC)?; + fdt.property_u32("msi-parent", PHANDLE_AIA_IMSIC)?; + fdt.end_node(aplic_node)?; + } + + Ok(()) +} + +/// PCI host controller address range. +/// +/// This represents a single entry in the "ranges" property for a PCI host controller. +/// +/// See [PCI Bus Binding to Open Firmware](https://www.openfirmware.info/data/docs/bus.pci.pdf) +/// and https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt +/// for more information. +#[derive(Copy, Clone)] +pub struct PciRange { + pub space: PciAddressSpace, + pub bus_address: u64, + pub cpu_physical_address: u64, + pub size: u64, + pub prefetchable: bool, +} + +/// PCI address space. +#[derive(Copy, Clone)] +#[allow(dead_code)] +pub enum PciAddressSpace { + /// PCI configuration space + Configuration = 0b00, + /// I/O space + Io = 0b01, + /// 32-bit memory space + Memory = 0b10, + /// 64-bit memory space + Memory64 = 0b11, +} + +/// Location of memory-mapped PCI configuration space. +#[derive(Copy, Clone)] +pub struct PciConfigRegion { + /// Physical address of the base of the memory-mapped PCI configuration region. + pub base: u64, + /// Size of the PCI configuration region in bytes. + pub size: u64, +} + +fn create_pci_nodes( + fdt: &mut FdtWriter, + pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, + cfg: PciConfigRegion, + ranges: &[PciRange], +) -> Result<()> { + // Add devicetree nodes describing a PCI generic host controller. + // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel + // and "PCI Bus Binding to IEEE Std 1275-1994". + let ranges: Vec = ranges + .iter() + .map(|r| { + let ss = r.space as u32; + let p = r.prefetchable as u32; + [ + // BUS_ADDRESS(3) encoded as defined in OF PCI Bus Binding + (ss << 24) | (p << 30), + (r.bus_address >> 32) as u32, + r.bus_address as u32, + // CPU_PHYSICAL(2) + (r.cpu_physical_address >> 32) as u32, + r.cpu_physical_address as u32, + // SIZE(2) + (r.size >> 32) as u32, + r.size as u32, + ] + }) + .flatten() + .collect(); + + let bus_range = [0, 0]; // Only bus 0 + let reg = [cfg.base, cfg.size]; + + const IRQ_TYPE_LEVEL_HIGH: u32 = 0x00000004; + let mut interrupts: Vec = Vec::new(); + let mut masks: Vec = Vec::new(); + + for (address, irq_num, irq_pin) in pci_irqs.iter() { + // PCI_DEVICE(3) + interrupts.push(address.to_config_address(0, 8)); + interrupts.push(0); + interrupts.push(0); + + // INT#(1) + interrupts.push(irq_pin.to_mask() + 1); + + // INTERRUPT INFO + interrupts.push(PHANDLE_AIA_APLIC); + interrupts.push(*irq_num); + interrupts.push(IRQ_TYPE_LEVEL_HIGH); + + // PCI_DEVICE(3) + masks.push(0xf800); // bits 11..15 (device) + masks.push(0); + masks.push(0); + + // INT#(1) + masks.push(0x7); // allow INTA#-INTD# (1 | 2 | 3 | 4) + } + + let pci_node = fdt.begin_node("pci")?; + fdt.property_string("compatible", "pci-host-cam-generic")?; + fdt.property_string("device_type", "pci")?; + fdt.property_array_u32("ranges", &ranges)?; + fdt.property_array_u32("bus-range", &bus_range)?; + fdt.property_u32("#address-cells", 3)?; + fdt.property_u32("#size-cells", 2)?; + fdt.property_array_u64("reg", ®)?; + fdt.property_u32("#interrupt-cells", 1)?; + fdt.property_array_u32("interrupt-map", &interrupts)?; + fdt.property_array_u32("interrupt-map-mask", &masks)?; + fdt.property_u32("msi-parent", PHANDLE_AIA_IMSIC)?; + fdt.property_null("dma-coherent")?; + fdt.end_node(pci_node)?; + + Ok(()) +} + +/// Creates a flattened device tree containing all of the parameters for the +/// kernel and loads it into the guest memory at the specified offset. +/// +/// # Arguments +/// +/// * `fdt_max_size` - The amount of space reserved for the device tree +/// * `guest_mem` - The guest memory object +/// * `pci_irqs` - List of PCI device address to PCI interrupt number and pin mappings +/// * `pci_cfg` - Location of the memory-mapped PCI configuration space. +/// * `pci_ranges` - Memory ranges accessible via the PCI host controller. +/// * `num_cpus` - Number of virtual CPUs the guest will have +/// * `fdt_load_offset` - The offset into physical memory for the device tree +/// * `cmdline` - The kernel commandline +/// * `initrd` - An optional tuple of initrd guest physical address and size +/// * `timebase_frequency` - The time base frequency for the VM. +pub fn create_fdt( + fdt_max_size: usize, + guest_mem: &GuestMemory, + pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, + pci_cfg: PciConfigRegion, + pci_ranges: &[PciRange], + num_cpus: u32, + fdt_load_offset: u64, + aia_num_ids: usize, + aia_num_sources: usize, + cmdline: &str, + initrd: Option<(GuestAddress, usize)>, + timebase_frequency: u32, +) -> Result<()> { + let mut fdt = FdtWriter::new(&[]); + + // The whole thing is put into one giant node with some top level properties + let root_node = fdt.begin_node("")?; + fdt.property_string("compatible", "linux,dummy-virt")?; + fdt.property_u32("#address-cells", 0x2)?; + fdt.property_u32("#size-cells", 0x2)?; + create_chosen_node(&mut fdt, cmdline, initrd)?; + create_memory_node(&mut fdt, guest_mem)?; + create_cpu_nodes(&mut fdt, num_cpus, timebase_frequency)?; + create_aia_node(&mut fdt, num_cpus as usize, aia_num_ids, aia_num_sources)?; + create_pci_nodes(&mut fdt, pci_irqs, pci_cfg, pci_ranges)?; + + // End giant node + fdt.end_node(root_node)?; + + let fdt_final = fdt.finish()?; + if fdt_final.len() > fdt_max_size { + return Err(Error::TotalSizeTooLarge); + } + + let fdt_address = GuestAddress(RISCV64_PHYS_MEM_START + fdt_load_offset); + let written = guest_mem + .write_at_addr(fdt_final.as_slice(), fdt_address) + .map_err(|_| Error::FdtGuestMemoryWriteError)?; + if written < fdt_final.len() { + return Err(Error::FdtGuestMemoryWriteError); + } + Ok(()) +} diff --git a/riscv64/src/lib.rs b/riscv64/src/lib.rs new file mode 100644 index 0000000000..d3fa2141f8 --- /dev/null +++ b/riscv64/src/lib.rs @@ -0,0 +1,504 @@ +// Copyright 2023 The ChromiumOS Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +//! RISC-V 64-bit architecture support. + +#![cfg(target_arch = "riscv64")] + +use std::collections::BTreeMap; +use std::io::{self}; +use std::path::PathBuf; +use std::sync::mpsc; +use std::sync::Arc; + +use arch::get_serial_cmdline; +use arch::GetSerialCmdlineError; +use arch::MsrConfig; +use arch::MsrExitHandlerError; +use arch::RunnableLinuxVm; +use arch::VmComponents; +use arch::VmImage; +use base::Event; +use base::SendTube; +use devices::serial_device::SerialHardware; +use devices::serial_device::SerialParameters; +use devices::Bus; +use devices::BusDeviceObj; +use devices::BusError; +use devices::IrqChipRiscv64; +use devices::PciAddress; +use devices::PciConfigMmio; +use devices::PciDevice; +use devices::PciRootCommand; +use hypervisor::CoreRegister; +use hypervisor::CpuConfigRiscv64; +use hypervisor::Hypervisor; +use hypervisor::ProtectionType; +use hypervisor::TimerRegister; +use hypervisor::VcpuInitRiscv64; +use hypervisor::VcpuRegister; +use hypervisor::VcpuRiscv64; +use hypervisor::Vm; +use hypervisor::VmRiscv64; +#[cfg(windows)] +use jail::FakeMinijailStub as Minijail; +#[cfg(unix)] +use minijail::Minijail; +use remain::sorted; +use resources::AddressRange; +use resources::SystemAllocator; +use resources::SystemAllocatorConfig; +use sync::Mutex; +use thiserror::Error; +use vm_control::BatteryType; +use vm_memory::GuestAddress; +use vm_memory::MemoryRegionOptions; + +mod fdt; + +// We place the kernel at offset 8MB +const RISCV64_KERNEL_OFFSET: u64 = 0x20_0000; +const RISCV64_INITRD_ALIGN: u64 = 8; +const RISCV64_FDT_ALIGN: u64 = 0x40_0000; + +// This indicates the start of DRAM inside the physical address space. +const RISCV64_PHYS_MEM_START: u64 = 0x8000_0000; + +// PCI MMIO configuration region base address. +const RISCV64_PCI_CFG_BASE: u64 = 0x1_0000; +// PCI MMIO configuration region size. +const RISCV64_PCI_CFG_SIZE: u64 = 0x100_0000; +// This is the base address of MMIO devices. +const RISCV64_MMIO_BASE: u64 = 0x0300_0000; +// Size of the whole MMIO region. +const RISCV64_MMIO_SIZE: u64 = 0x10_0000; + +const RISCV64_FDT_MAX_SIZE: u64 = 0x1_0000; + +fn get_kernel_addr() -> GuestAddress { + GuestAddress(RISCV64_PHYS_MEM_START + RISCV64_KERNEL_OFFSET) +} + +const RISCV64_IRQ_BASE: u32 = 1; + +#[sorted] +#[derive(Error, Debug)] +pub enum Error { + #[error("unable to clone an Event: {0}")] + CloneEvent(base::Error), + #[error("failed to clone IRQ chip: {0}")] + CloneIrqChip(base::Error), + #[error("the given kernel command line was invalid: {0}")] + Cmdline(kernel_cmdline::Error), + #[error("unable to make an Event: {0}")] + CreateEvent(base::Error), + #[error("FDT could not be created: {0}")] + CreateFdt(cros_fdt::Error), + #[error("failed to create a PCI root hub: {0}")] + CreatePciRoot(arch::DeviceRegistrationError), + #[error("failed to create platform bus: {0}")] + CreatePlatformBus(arch::DeviceRegistrationError), + #[error("unable to create serial devices: {0}")] + CreateSerialDevices(arch::DeviceRegistrationError), + #[error("failed to create socket: {0}")] + CreateSocket(io::Error), + #[error("failed to create VCPU: {0}")] + CreateVcpu(base::Error), + #[error("vm created wrong kind of vcpu")] + DowncastVcpu, + #[error("failed to finalize devices: {0}")] + FinalizeDevices(base::Error), + #[error("failed to finalize IRQ chip: {0}")] + FinalizeIrqChip(base::Error), + #[error("failed to get serial cmdline: {0}")] + GetSerialCmdline(GetSerialCmdlineError), + #[error("Failed to get the timer base frequency: {0}")] + GetTimebase(base::Error), + #[error("Image type not supported on riscv")] + ImageTypeUnsupported, + #[error("initrd could not be loaded: {0}")] + InitrdLoadFailure(arch::LoadImageError), + #[error("kernel could not be loaded: {0}")] + KernelLoadFailure(arch::LoadImageError), + #[error("protected vms not supported on riscv(yet)")] + ProtectedVmUnsupported, + #[error("ramoops address is different from high_mmio_base: {0} vs {1}")] + RamoopsAddress(u64, u64), + #[error("failed to register irq fd: {0}")] + RegisterIrqfd(base::Error), + #[error("error registering PCI bus: {0}")] + RegisterPci(BusError), + #[error("error registering virtual socket device: {0}")] + RegisterVsock(arch::DeviceRegistrationError), + #[error("failed to set device attr: {0}")] + SetDeviceAttr(base::Error), + #[error("failed to set register: {0}")] + SetReg(base::Error), + #[error("Timebase frequency too large")] + TimebaseTooLarge, + #[error("this function isn't supported")] + Unsupported, + #[error("failed to initialize VCPU: {0}")] + VcpuInit(base::Error), +} + +pub type Result = std::result::Result; + +pub struct Riscv64; + +impl arch::LinuxArch for Riscv64 { + type Error = Error; + + /// Returns a Vec of the valid memory addresses. + /// These should be used to configure the GuestMemory structure for the platfrom. + fn guest_memory_layout( + components: &VmComponents, + _hypervisor: &impl Hypervisor, + ) -> std::result::Result, Self::Error> { + Ok(vec![( + GuestAddress(RISCV64_PHYS_MEM_START), + components.memory_size, + Default::default(), + )]) + } + + fn get_system_allocator_config(vm: &V) -> SystemAllocatorConfig { + get_resource_allocator_config(vm.get_memory().memory_size(), vm.get_guest_phys_addr_bits()) + } + + fn build_vm( + mut components: VmComponents, + _vm_evt_wrtube: &SendTube, + system_allocator: &mut SystemAllocator, + serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, + serial_jail: Option, + (_bat_type, _bat_jail): (Option, Option), + mut vm: V, + ramoops_region: Option, + devices: Vec<(Box, Option)>, + irq_chip: &mut dyn IrqChipRiscv64, + vcpu_ids: &mut Vec, + _dump_device_tree_blob: Option, + _debugcon_jail: Option, + #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, + ) -> std::result::Result, Self::Error> + where + V: VmRiscv64, + Vcpu: VcpuRiscv64, + { + if components.hv_cfg.protection_type == ProtectionType::Protected { + return Err(Error::ProtectedVmUnsupported); + } + + let mem = vm.get_memory().clone(); + + let mmio_bus = Arc::new(Bus::new()); + + // Riscv doesn't really use the io bus like x86, so just create an empty bus. + let io_bus = Arc::new(Bus::new()); + + let com_evt_1_3 = Event::new().map_err(Error::CreateEvent)?; + let com_evt_2_4 = Event::new().map_err(Error::CreateEvent)?; + arch::add_serial_devices( + components.hv_cfg.protection_type, + &mmio_bus, + &com_evt_1_3, + &com_evt_2_4, + serial_parameters, + serial_jail, + #[cfg(feature = "swap")] + swap_controller, + ) + .map_err(Error::CreateSerialDevices)?; + + let (pci_devices, others): (Vec<_>, Vec<_>) = devices + .into_iter() + .partition(|(dev, _)| dev.as_pci_device().is_some()); + let pci_devices = pci_devices + .into_iter() + .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig)) + .collect(); + let (pci, pci_irqs, mut pid_debug_label_map, _amls) = arch::generate_pci_root( + pci_devices, + irq_chip.as_irq_chip_mut(), + Arc::clone(&mmio_bus), + Arc::clone(&io_bus), + system_allocator, + &mut vm, + devices::IMSIC_MAX_INT_IDS as usize, + None, + #[cfg(feature = "swap")] + swap_controller, + ) + .map_err(Error::CreatePciRoot)?; + + let pci_root = Arc::new(Mutex::new(pci)); + let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8))); + let (platform_devices, _others): (Vec<_>, Vec<_>) = others + .into_iter() + .partition(|(dev, _)| dev.as_platform_device().is_some()); + + let platform_devices = platform_devices + .into_iter() + .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig)) + .collect(); + let (platform_devices, mut platform_pid_debug_label_map) = + arch::sys::unix::generate_platform_bus( + platform_devices, + irq_chip.as_irq_chip_mut(), + &mmio_bus, + system_allocator, + #[cfg(feature = "swap")] + swap_controller, + ) + .map_err(Error::CreatePlatformBus)?; + pid_debug_label_map.append(&mut platform_pid_debug_label_map); + + let mut cmdline = get_base_linux_cmdline(); + + if let Some(ramoops_region) = ramoops_region { + arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region) + .map_err(Error::Cmdline)?; + } + + mmio_bus + .insert(pci_bus, RISCV64_PCI_CFG_BASE, RISCV64_PCI_CFG_SIZE) + .map_err(Error::RegisterPci)?; + + get_serial_cmdline(&mut cmdline, serial_parameters, "mmio") + .map_err(Error::GetSerialCmdline)?; + for param in components.extra_kernel_params { + cmdline.insert_str(¶m).map_err(Error::Cmdline)?; + } + + // Event used by PMDevice to notify crosvm that guest OS is trying to suspend. + let suspend_evt = Event::new().map_err(Error::CreateEvent)?; + + // separate out image loading from other setup to get a specific error for + // image loading + let initrd; + let kernel_initrd_end = match components.vm_image { + VmImage::Bios(ref _bios) => { + return Err(Error::ImageTypeUnsupported); + } + VmImage::Kernel(ref mut kernel_image) => { + let kernel_size = + arch::load_image(&mem, kernel_image, get_kernel_addr(), u64::max_value()) + .map_err(Error::KernelLoadFailure)?; + let kernel_end = get_kernel_addr().offset() + kernel_size as u64; + initrd = match components.initrd_image { + Some(initrd_file) => { + let mut initrd_file = initrd_file; + let initrd_addr = + (kernel_end + (RISCV64_INITRD_ALIGN - 1)) & !(RISCV64_INITRD_ALIGN - 1); + let initrd_max_size = + components.memory_size - (initrd_addr - RISCV64_PHYS_MEM_START); + let initrd_addr = GuestAddress(initrd_addr); + let initrd_size = + arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size) + .map_err(Error::InitrdLoadFailure)?; + Some((initrd_addr, initrd_size)) + } + None => None, + }; + if let Some((initrd_addr, initrd_size)) = initrd { + initrd_addr.offset() + initrd_size as u64 - RISCV64_PHYS_MEM_START + } else { + kernel_end - RISCV64_PHYS_MEM_START + } + } + }; + + // Creates vcpus early as the irqchip needs them created to attach interrupts. + let vcpu_count = components.vcpu_count; + let mut vcpus = Vec::with_capacity(vcpu_count); + for vcpu_id in 0..vcpu_count { + let vcpu: Vcpu = *vm + .create_vcpu(vcpu_id) + .map_err(Error::CreateVcpu)? + .downcast::() + .map_err(|_| Error::DowncastVcpu)?; + vcpus.push(vcpu); + vcpu_ids.push(vcpu_id); + } + + irq_chip.finalize().map_err(Error::FinalizeIrqChip)?; + + irq_chip + .finalize_devices(system_allocator, &io_bus, &mmio_bus) + .map_err(Error::FinalizeDevices)?; + let (aia_num_ids, aia_num_sources) = irq_chip.get_num_ids_sources(); + + let pci_cfg = fdt::PciConfigRegion { + base: RISCV64_PCI_CFG_BASE, + size: RISCV64_PCI_CFG_SIZE, + }; + + let pci_ranges: Vec = system_allocator + .mmio_pools() + .iter() + .map(|range| fdt::PciRange { + space: fdt::PciAddressSpace::Memory64, + bus_address: range.start, + cpu_physical_address: range.start, + size: range.len().unwrap(), + prefetchable: false, + }) + .collect(); + + let fdt_offset = (kernel_initrd_end + (RISCV64_FDT_ALIGN - 1)) & !(RISCV64_FDT_ALIGN - 1); + + let timebase_freq: u32 = vcpus[0] + .get_one_reg(VcpuRegister::Timer(TimerRegister::TimebaseFrequency)) + .map_err(Error::GetTimebase)? + .try_into() + .map_err(|_| Error::TimebaseTooLarge)?; + + fdt::create_fdt( + RISCV64_FDT_MAX_SIZE as usize, + &mem, + pci_irqs, + pci_cfg, + &pci_ranges, + components.vcpu_count as u32, + fdt_offset, + aia_num_ids, + aia_num_sources, + cmdline.as_str(), + initrd, + timebase_freq, + ) + .map_err(Error::CreateFdt)?; + + let vcpu_init = vec![ + VcpuInitRiscv64::new(GuestAddress(fdt_offset + RISCV64_PHYS_MEM_START)); + vcpu_count + ]; + + Ok(RunnableLinuxVm { + vm, + vcpu_count: components.vcpu_count, + vcpus: Some(vcpus), + vcpu_init, + vcpu_affinity: components.vcpu_affinity, + no_smt: false, + irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?, + has_bios: false, + io_bus, + mmio_bus, + pid_debug_label_map, + resume_notify_devices: Vec::new(), + root_config: pci_root, + platform_devices, + hotplug_bus: BTreeMap::new(), + rt_cpus: components.rt_cpus, + delay_rt: components.delay_rt, + suspend_evt, + bat_control: None, + pm: None, + devices_thread: None, + vm_request_tube: None, + }) + } + + fn configure_vcpu( + _vm: &V, + _hypervisor: &dyn Hypervisor, + _irq_chip: &mut dyn IrqChipRiscv64, + vcpu: &mut dyn VcpuRiscv64, + _vcpu_init: VcpuInitRiscv64, + vcpu_id: usize, + _num_cpus: usize, + _has_bios: bool, + cpu_config: Option, + ) -> std::result::Result<(), Self::Error> { + vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::Pc), get_kernel_addr().0) + .map_err(Self::Error::SetReg)?; + vcpu.set_one_reg(VcpuRegister::Core(CoreRegister::A0), vcpu_id as u64) + .map_err(Self::Error::SetReg)?; + vcpu.set_one_reg( + VcpuRegister::Core(CoreRegister::A1), + cpu_config.unwrap().fdt_address.0, + ) + .map_err(Self::Error::SetReg)?; + + Ok(()) + } + + fn register_pci_device( + _linux: &mut RunnableLinuxVm, + _device: Box, + _minijail: Option, + _resources: &mut SystemAllocator, + _tube: &mpsc::Sender, + #[cfg(feature = "swap")] _swap_controller: Option<&swap::SwapController>, + ) -> std::result::Result { + // hotplug function isn't verified on Riscv64, so set it unsupported here. + Err(Error::Unsupported) + } +} + +fn get_high_mmio_base_size(mem_size: u64, guest_phys_addr_bits: u8) -> (u64, u64) { + let guest_phys_end = 1u64 << guest_phys_addr_bits; + let high_mmio_base = RISCV64_PHYS_MEM_START + mem_size; + let size = guest_phys_end + .checked_sub(high_mmio_base) + .unwrap_or_else(|| { + panic!( + "guest_phys_end {:#x} < high_mmio_base {:#x}", + guest_phys_end, high_mmio_base, + ); + }); + (high_mmio_base, size) +} + +fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline { + let mut cmdline = kernel_cmdline::Cmdline::new(base::pagesize()); + cmdline.insert_str("panic=-1").unwrap(); + cmdline +} + +/// Returns a system resource allocator coniguration. +/// +/// # Arguments +/// +/// * `mem_size` - Size of guest memory (RAM) in bytes. +/// * `guest_phys_addr_bits` - Size of guest physical addresses (IPA) in bits. +fn get_resource_allocator_config(mem_size: u64, guest_phys_addr_bits: u8) -> SystemAllocatorConfig { + let (high_mmio_base, high_mmio_size) = get_high_mmio_base_size(mem_size, guest_phys_addr_bits); + SystemAllocatorConfig { + io: None, + low_mmio: AddressRange::from_start_and_size(RISCV64_MMIO_BASE, RISCV64_MMIO_SIZE) + .expect("invalid mmio region"), + high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size) + .expect("invalid high mmio region"), + platform_mmio: None, + first_irq: RISCV64_IRQ_BASE, + } +} + +pub struct MsrHandlers; + +impl MsrHandlers { + pub fn new() -> Self { + Self {} + } + + pub fn read(&self, _index: u32) -> Option { + None + } + + pub fn write(&self, _index: u32, _data: u64) -> Option<()> { + None + } + + pub fn add_handler( + &mut self, + _index: u32, + _msr_config: MsrConfig, + _cpu_id: usize, + ) -> std::result::Result<(), MsrExitHandlerError> { + Ok(()) + } +}