devices: virtio: gpu: snapshot support for 2d mode

General strategy:

  * We only snapshot the state relevant to the virtio-gpu 2d mode
    protocol (i.e. scanouts, resources, fences).
  * The GpuDisplay is recreated from scratch, we don't want to snapshot
    the state of a Wayland socket (for example).
  * No state about pending virtio requests needs to be snapshotted
    because the 2d backend completes them synchronously.

BUG=b:266514608

Change-Id: I9cfec93b1e4c9bcbb55dca09a19f6fc146db2b54
Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/4777342
Reviewed-by: Daniel Verkamp <dverkamp@chromium.org>
Reviewed-by: Gurchetan Singh <gurchetansingh@chromium.org>
This commit is contained in:
Frederick Mayle 2023-09-07 16:46:19 -07:00 committed by crosvm LUCI
parent 6c94b36864
commit 34846b558b
4 changed files with 352 additions and 28 deletions

View file

@ -73,6 +73,7 @@ use self::protocol::*;
use self::virtio_gpu::to_rutabaga_descriptor;
pub use self::virtio_gpu::ProcessDisplayResult;
use self::virtio_gpu::VirtioGpu;
use self::virtio_gpu::VirtioGpuSnapshot;
use super::copy_config;
use super::resource_bridge::ResourceRequest;
use super::resource_bridge::ResourceResponse;
@ -134,7 +135,7 @@ pub struct VirtioScanoutBlobData {
pub offsets: [u32; 4],
}
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
enum VirtioGpuRing {
Global,
ContextSpecific { ctx_id: u32, ring_idx: u8 },
@ -153,6 +154,25 @@ pub struct FenceState {
completed_fences: BTreeMap<VirtioGpuRing, u64>,
}
#[derive(Serialize, Deserialize)]
struct FenceStateSnapshot {
completed_fences: BTreeMap<VirtioGpuRing, u64>,
}
impl FenceState {
fn snapshot(&self) -> FenceStateSnapshot {
assert!(self.descs.is_empty(), "can't snapshot with pending fences");
FenceStateSnapshot {
completed_fences: self.completed_fences.clone(),
}
}
fn restore(&mut self, snapshot: FenceStateSnapshot) {
assert!(self.descs.is_empty(), "can't restore activated device");
self.completed_fences = snapshot.completed_fences;
}
}
pub trait QueueReader {
fn pop(&self) -> Option<DescriptorChain>;
fn add_used(&self, desc_chain: DescriptorChain, len: u32);
@ -849,12 +869,18 @@ struct Worker {
}
struct WorkerReturn {
// None if device not yet activated.
queues: Option<Vec<Queue>>,
#[cfg(unix)]
gpu_control_tube: Tube,
resource_bridges: ResourceBridges,
event_devices: Vec<EventDevice>,
// None if device not yet activated.
activated_state: Option<(Vec<Queue>, WorkerSnapshot)>,
}
#[derive(Serialize, Deserialize)]
struct WorkerSnapshot {
fence_state_snapshot: FenceStateSnapshot,
virtio_gpu_snapshot: VirtioGpuSnapshot,
}
impl Worker {
@ -1131,6 +1157,7 @@ struct GpuActivationResources {
interrupt: Interrupt,
ctrl_queue: SharedQueueReader,
cursor_queue: LocalQueueReader,
worker_snapshot: Option<WorkerSnapshot>,
}
pub struct Gpu {
@ -1172,6 +1199,10 @@ pub struct Gpu {
capset_mask: u64,
#[cfg(unix)]
gpu_cgroup_path: Option<PathBuf>,
/// Used to differentiate worker kill events that are for shutdown vs sleep. `virtio_sleep`
/// sets this to true while stopping the worker.
sleep_requested: Arc<AtomicBool>,
worker_snapshot: Option<WorkerSnapshot>,
}
impl Gpu {
@ -1274,6 +1305,8 @@ impl Gpu {
capset_mask: gpu_parameters.capset_mask,
#[cfg(unix)]
gpu_cgroup_path: gpu_cgroup_path.cloned(),
sleep_requested: Arc::new(AtomicBool::new(false)),
worker_snapshot: None,
}
}
@ -1378,6 +1411,7 @@ impl Gpu {
let (init_finished_tx, init_finished_rx) = mpsc::channel();
let (activate_tx, activate_rx) = mpsc::channel();
let sleep_requested = self.sleep_requested.clone();
let worker_thread = WorkerThread::start("v_gpu", move |kill_evt| {
#[cfg(unix)]
@ -1397,11 +1431,11 @@ impl Gpu {
Err(e) => {
error!("failed to build rutabaga {}", e);
return WorkerReturn {
queues: None,
#[cfg(unix)]
gpu_control_tube,
resource_bridges,
event_devices,
activated_state: None,
};
}
};
@ -1422,12 +1456,12 @@ impl Gpu {
Some(backend) => backend,
None => {
return WorkerReturn {
queues: None,
#[cfg(unix)]
gpu_control_tube,
resource_bridges,
event_devices,
}
activated_state: None,
};
}
};
@ -1446,11 +1480,11 @@ impl Gpu {
// Other half of channel was dropped.
Err(mpsc::RecvError) => {
return WorkerReturn {
queues: None,
#[cfg(unix)]
gpu_control_tube,
resource_bridges,
event_devices: virtio_gpu.display().borrow_mut().take_event_devices(),
activated_state: None,
};
}
};
@ -1478,27 +1512,61 @@ impl Gpu {
#[cfg(windows)]
gpu_display_wait_descriptor_ctrl_rd,
};
// If a snapshot was provided, restore from it.
if let Some(snapshot) = activation_resources.worker_snapshot {
worker
.state
.fence_state
.lock()
.restore(snapshot.fence_state_snapshot);
worker
.state
.virtio_gpu
.restore(snapshot.virtio_gpu_snapshot, &worker.mem)
.expect("failed to restore VirtioGpu");
}
worker.run();
let event_devices = worker
.state
.virtio_gpu
.display()
.borrow_mut()
.take_event_devices();
// Need to drop `Frontend` for the `Arc::try_unwrap` below to succeed.
std::mem::drop(worker.state);
// If we are stopping the worker because of a virtio_sleep request, then take a
// snapshot and reclaim the queues.
let activated_state = if sleep_requested.load(Ordering::SeqCst) {
let worker_snapshot = WorkerSnapshot {
fence_state_snapshot: worker.state.fence_state.lock().snapshot(),
virtio_gpu_snapshot: worker
.state
.virtio_gpu
.snapshot()
.expect("failed to snapshot VirtioGpu"),
};
// Need to drop `Frontend` for the `Arc::try_unwrap` below to succeed.
std::mem::drop(worker.state);
Some((
vec![
match Arc::try_unwrap(worker.ctrl_queue.queue) {
Ok(x) => x.into_inner(),
Err(_) => panic!("too many refs on ctrl_queue"),
},
worker.cursor_queue.queue.into_inner(),
],
worker_snapshot,
))
} else {
None
};
WorkerReturn {
queues: Some(vec![
match Arc::try_unwrap(worker.ctrl_queue.queue) {
Ok(x) => x.into_inner(),
Err(_) => panic!("too many refs on ctrl_queue"),
},
worker.cursor_queue.queue.into_inner(),
]),
#[cfg(unix)]
gpu_control_tube: worker.gpu_control_tube,
resource_bridges: worker.resource_bridges,
event_devices,
activated_state,
}
});
@ -1690,6 +1758,7 @@ impl VirtioDevice for Gpu {
interrupt,
ctrl_queue,
cursor_queue,
worker_snapshot: self.worker_snapshot.take(),
})
.expect("failed to send activation resources to worker thread");
@ -1711,16 +1780,28 @@ impl VirtioDevice for Gpu {
true
}
// Notes on sleep/wake/snapshot/restore functionality.
//
// * Only 2d mode is supported so far.
// * We only snapshot the state relevant to the virtio-gpu 2d mode protocol (i.e. scanouts,
// resources, fences).
// * The GpuDisplay is recreated from scratch, we don't want to snapshot the state of a
// Wayland socket (for example).
// * No state about pending virtio requests needs to be snapshotted because the 2d backend
// completes them synchronously.
fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
if let Some((activate_tx, worker_thread)) = self.worker_thread.take() {
self.sleep_requested.store(true, Ordering::SeqCst);
drop(activate_tx);
let WorkerReturn {
queues,
#[cfg(unix)]
gpu_control_tube,
resource_bridges,
event_devices,
activated_state,
} = worker_thread.stop();
self.sleep_requested.store(false, Ordering::SeqCst);
self.resource_bridges = Some(resource_bridges);
#[cfg(unix)]
@ -1729,10 +1810,16 @@ impl VirtioDevice for Gpu {
}
self.event_devices = Some(event_devices);
match queues {
Some(queues) => return Ok(Some(queues.into_iter().enumerate().collect())),
match activated_state {
Some((queues, worker_snapshot)) => {
self.worker_snapshot = Some(worker_snapshot);
return Ok(Some(queues.into_iter().enumerate().collect()));
}
// Device not activated yet.
None => return Ok(None),
None => {
self.worker_snapshot = None;
return Ok(None);
}
}
}
Ok(None)
@ -1755,6 +1842,15 @@ impl VirtioDevice for Gpu {
}
}
}
fn virtio_snapshot(&self) -> anyhow::Result<serde_json::Value> {
Ok(serde_json::to_value(&self.worker_snapshot)?)
}
fn virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
self.worker_snapshot = serde_json::from_value(data)?;
Ok(())
}
}
/// This struct takes the ownership of resource bridges and tracks which ones should be processed.

View file

@ -807,6 +807,8 @@ impl Display for GpuResponse {
}
}
impl std::error::Error for GpuResponse {}
/// An error indicating something went wrong decoding a `GpuCommand`.
#[sorted]
#[derive(Error, Debug)]

View file

@ -13,6 +13,7 @@ use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use anyhow::Context;
use base::error;
use base::FromRawDescriptor;
use base::IntoRawDescriptor;
@ -40,6 +41,8 @@ use rutabaga_gfx::RUTABAGA_MAP_ACCESS_WRITE;
use rutabaga_gfx::RUTABAGA_MAP_CACHE_MASK;
use rutabaga_gfx::RUTABAGA_MEM_HANDLE_TYPE_DMABUF;
use rutabaga_gfx::RUTABAGA_MEM_HANDLE_TYPE_OPAQUE_FD;
use serde::Deserialize;
use serde::Serialize;
use sync::Mutex;
use vm_control::gpu::DisplayParameters;
use vm_control::gpu::GpuControlCommand;
@ -86,6 +89,20 @@ struct VirtioGpuResource {
scanout_data: Option<VirtioScanoutBlobData>,
display_import: Option<u32>,
rutabaga_external_mapping: bool,
// Only saved for snapshotting, so that we can re-attach backing iovecs with the correct new
// host addresses.
backing_iovecs: Option<Vec<(GuestAddress, usize)>>,
}
#[derive(Serialize, Deserialize)]
struct VirtioGpuResourceSnapshot {
resource_id: u32,
width: u32,
height: u32,
size: u64,
backing_iovecs: Option<Vec<(GuestAddress, usize)>>,
}
impl VirtioGpuResource {
@ -101,15 +118,35 @@ impl VirtioGpuResource {
scanout_data: None,
display_import: None,
rutabaga_external_mapping: false,
backing_iovecs: None,
}
}
fn snapshot(&self) -> VirtioGpuResourceSnapshot {
// Only the 2D backend is support and it doesn't use these fields.
assert!(self.shmem_offset.is_none());
assert!(self.scanout_data.is_none());
assert!(self.display_import.is_none());
assert_eq!(self.rutabaga_external_mapping, false);
VirtioGpuResourceSnapshot {
resource_id: self.resource_id,
width: self.width,
height: self.height,
size: self.size,
backing_iovecs: self.backing_iovecs.clone(),
}
}
fn restore(s: VirtioGpuResourceSnapshot) -> Self {
let mut resource = VirtioGpuResource::new(s.resource_id, s.width, s.height, s.size);
resource.backing_iovecs = s.backing_iovecs;
resource
}
}
struct VirtioGpuScanout {
width: u32,
height: u32,
surface_id: Option<u32>,
resource_id: Option<NonZeroU32>,
scanout_type: SurfaceType,
// If this scanout is a primary scanout, the scanout id.
scanout_id: Option<u32>,
@ -117,6 +154,30 @@ struct VirtioGpuScanout {
display_params: Option<GpuDisplayParameters>,
// If this scanout is a cursor scanout, the scanout that this is cursor is overlayed onto.
parent_surface_id: Option<u32>,
surface_id: Option<u32>,
parent_scanout_id: Option<u32>,
resource_id: Option<NonZeroU32>,
position: Option<(u32, u32)>,
}
#[derive(Serialize, Deserialize)]
struct VirtioGpuScanoutSnapshot {
width: u32,
height: u32,
scanout_type: SurfaceType,
scanout_id: Option<u32>,
display_params: Option<GpuDisplayParameters>,
// The surface IDs aren't guest visible. Instead of storing them and then having to fix up
// `gpu_display` internals, we'll allocate new ones on restore. So, we just need to store
// whether a surface was allocated and the parent's scanout ID.
has_surface: bool,
parent_scanout_id: Option<u32>,
resource_id: Option<NonZeroU32>,
position: Option<(u32, u32)>,
}
impl VirtioGpuScanout {
@ -128,9 +189,11 @@ impl VirtioGpuScanout {
scanout_type: SurfaceType::Scanout,
scanout_id: Some(scanout_id),
display_params: Some(params),
surface_id: None,
resource_id: None,
parent_surface_id: None,
surface_id: None,
parent_scanout_id: None,
resource_id: None,
position: None,
}
}
@ -143,12 +206,56 @@ impl VirtioGpuScanout {
scanout_type: SurfaceType::Cursor,
scanout_id: None,
display_params: None,
surface_id: None,
resource_id: None,
parent_surface_id: None,
surface_id: None,
parent_scanout_id: None,
resource_id: None,
position: None,
}
}
fn snapshot(&self) -> VirtioGpuScanoutSnapshot {
VirtioGpuScanoutSnapshot {
width: self.width,
height: self.height,
has_surface: self.surface_id.is_some(),
resource_id: self.resource_id,
scanout_type: self.scanout_type,
scanout_id: self.scanout_id,
display_params: self.display_params.clone(),
parent_scanout_id: self.parent_scanout_id,
position: self.position,
}
}
fn restore(
&mut self,
snapshot: VirtioGpuScanoutSnapshot,
parent_surface_id: Option<u32>,
display: &Rc<RefCell<GpuDisplay>>,
) -> VirtioGpuResult {
// Scanouts are mainly controlled by the host, we just need to make sure it looks same,
// restore the resource_id association, and create a surface in the display.
assert_eq!(self.width, snapshot.width);
assert_eq!(self.height, snapshot.height);
assert_eq!(self.scanout_type, snapshot.scanout_type);
assert_eq!(self.scanout_id, snapshot.scanout_id);
assert_eq!(self.display_params, snapshot.display_params);
self.resource_id = snapshot.resource_id;
if snapshot.has_surface {
self.create_surface(display, parent_surface_id)?;
} else {
self.release_surface(display);
}
if let Some((x, y)) = snapshot.position {
self.set_position(display, x, y)?;
}
Ok(OkNoData)
}
fn create_surface(
&mut self,
display: &Rc<RefCell<GpuDisplay>>,
@ -197,9 +304,15 @@ impl VirtioGpuScanout {
self.surface_id = None;
}
fn set_position(&self, display: &Rc<RefCell<GpuDisplay>>, x: u32, y: u32) -> VirtioGpuResult {
fn set_position(
&mut self,
display: &Rc<RefCell<GpuDisplay>>,
x: u32,
y: u32,
) -> VirtioGpuResult {
if let Some(surface_id) = self.surface_id {
display.borrow_mut().set_position(surface_id, x, y)?;
self.position = Some((x, y));
}
Ok(OkNoData)
}
@ -312,6 +425,39 @@ pub struct VirtioGpu {
udmabuf_driver: Option<UdmabufDriver>,
}
// Only the 2D mode is supported. Notes on `VirtioGpu` fields:
//
// * display: re-initialized from scratch using the scanout snapshots
// * scanouts: snapshot'd
// * scanouts_updated: snapshot'd
// * cursor_scanout: snapshot'd
// * mapper: not needed for 2d mode
// * rutabaga: re-initialized from scatch using the resource snapshots
// * resources: snapshot'd
// * external_blob: not needed for 2d mode
// * udmabuf_driver: not needed for 2d mode
#[derive(Serialize, Deserialize)]
pub struct VirtioGpuSnapshot {
scanouts: Map<u32, VirtioGpuScanoutSnapshot>,
scanouts_updated: bool,
cursor_scanout: VirtioGpuScanoutSnapshot,
rutabaga: Vec<u8>,
resources: Map<u32, VirtioGpuResourceSnapshot>,
}
#[derive(Serialize, Deserialize)]
struct RutabagaResourceSnapshotSerializable {
resource_id: u32,
width: u32,
height: u32,
host_mem_size: usize,
backing_iovecs: Option<Vec<(GuestAddress, usize)>>,
component_mask: u8,
size: u64,
}
fn sglist_to_rutabaga_iovecs(
vecs: &[(GuestAddress, usize)],
mem: &GuestMemory,
@ -720,14 +866,26 @@ impl VirtioGpu {
mem: &GuestMemory,
vecs: Vec<(GuestAddress, usize)>,
) -> VirtioGpuResult {
let resource = self
.resources
.get_mut(&resource_id)
.ok_or(ErrInvalidResourceId)?;
let rutabaga_iovecs = sglist_to_rutabaga_iovecs(&vecs[..], mem).map_err(|_| ErrUnspec)?;
self.rutabaga.attach_backing(resource_id, rutabaga_iovecs)?;
resource.backing_iovecs = Some(vecs);
Ok(OkNoData)
}
/// Detaches any previously attached iovecs from the resource.
pub fn detach_backing(&mut self, resource_id: u32) -> VirtioGpuResult {
let resource = self
.resources
.get_mut(&resource_id)
.ok_or(ErrInvalidResourceId)?;
self.rutabaga.detach_backing(resource_id)?;
resource.backing_iovecs = None;
Ok(OkNoData)
}
@ -1057,4 +1215,70 @@ impl VirtioGpu {
Ok(OkNoData)
}
pub fn snapshot(&self) -> anyhow::Result<VirtioGpuSnapshot> {
Ok(VirtioGpuSnapshot {
scanouts: self
.scanouts
.iter()
.map(|(i, s)| (*i, s.snapshot()))
.collect(),
scanouts_updated: self.scanouts_updated.load(Ordering::SeqCst),
cursor_scanout: self.cursor_scanout.snapshot(),
rutabaga: {
let mut buffer = std::io::Cursor::new(Vec::new());
self.rutabaga
.snapshot(&mut buffer)
.context("failed to snapshot rutabaga")?;
buffer.into_inner()
},
resources: self
.resources
.iter()
.map(|(i, r)| (*i, r.snapshot()))
.collect(),
})
}
pub fn restore(
&mut self,
snapshot: VirtioGpuSnapshot,
mem: &GuestMemory,
) -> anyhow::Result<()> {
assert!(self.scanouts.keys().eq(snapshot.scanouts.keys()));
for (i, s) in snapshot.scanouts.into_iter() {
self.scanouts.get_mut(&i).unwrap().restore(
s,
// Only the cursor scanout can have a parent.
None,
&self.display,
)?;
}
self.scanouts_updated
.store(snapshot.scanouts_updated, Ordering::SeqCst);
let cursor_parent_surface_id = snapshot
.cursor_scanout
.parent_scanout_id
.and_then(|i| self.scanouts.get(&i).unwrap().surface_id);
self.cursor_scanout.restore(
snapshot.cursor_scanout,
cursor_parent_surface_id,
&self.display,
)?;
self.rutabaga
.restore(&mut &snapshot.rutabaga[..])
.context("failed to restore rutabaga")?;
for (id, s) in snapshot.resources.into_iter() {
let backing_iovecs = s.backing_iovecs.clone();
self.resources.insert(id, VirtioGpuResource::restore(s));
if let Some(backing_iovecs) = backing_iovecs {
self.attach_backing(id, mem, backing_iovecs)?;
}
}
Ok(())
}
}

View file

@ -16,6 +16,8 @@ use base::EventType;
use base::WaitContext;
use data_model::VolatileSlice;
use remain::sorted;
use serde::Deserialize;
use serde::Serialize;
use thiserror::Error;
mod event_device;
@ -105,7 +107,7 @@ impl From<IoError> for GpuDisplayError {
}
/// A surface type
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum SurfaceType {
/// Scanout surface
Scanout,