From ef37e2fe15c3bf476932bee0a47a096c126a94c7 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Wed, 31 Oct 2018 13:34:57 -0700 Subject: [PATCH] qcow: add support for rebuilding refcounts This adds the ability to regenerate the reference counts by walking all of the L1/L2 tables and headers to find all reachable clusters. This is necessary for the next patch, which will use the reference count tables to find unused clusters to reuse. BUG=chromium:899273 TEST=cargo test -p cqow Change-Id: I93dd00d381d8d33010fddfc10aa18ca32586e1f4 Signed-off-by: Daniel Verkamp Reviewed-on: https://chromium-review.googlesource.com/1327821 Reviewed-by: Dylan Reid --- qcow/src/qcow.rs | 275 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 274 insertions(+), 1 deletion(-) diff --git a/qcow/src/qcow.rs b/qcow/src/qcow.rs index 20ed941ce2..db138bf734 100644 --- a/qcow/src/qcow.rs +++ b/qcow/src/qcow.rs @@ -33,12 +33,14 @@ pub enum Error { GettingFileSize(io::Error), GettingRefcount(refcount::Error), EvictingCache(io::Error), + InvalidClusterIndex, InvalidClusterSize, InvalidIndex, InvalidL1TableOffset, InvalidMagic, InvalidOffset(u64), InvalidRefcountTableOffset, + InvalidRefcountTableSize, NoFreeClusters, NoRefcountClusters, OpeningFile(io::Error), @@ -47,6 +49,7 @@ pub enum Error { ReadingPointers(io::Error), ReadingRefCounts(io::Error), ReadingRefCountBlock(refcount::Error), + RebuildingRefCounts(io::Error), SeekingFile(io::Error), SettingFileSize(io::Error), SettingRefcountRefcount(io::Error), @@ -79,9 +82,10 @@ const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00; // Flags const COMPRESSED_FLAG: u64 = 1 << 62; const CLUSTER_USED_FLAG: u64 = 1 << 63; +const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0; /// Contains the information from the header of a qcow file. -#[derive(Debug)] +#[derive(Copy, Clone, Debug)] pub struct QcowHeader { pub magic: u32, pub version: u32, @@ -327,8 +331,31 @@ impl QcowFile { offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?; offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?; + // The first cluster should always have a non-zero refcount, so if it is 0, + // this is an old file with broken refcounts, which requires a rebuild. + let mut refcount_rebuild_required = true; + file.seek(SeekFrom::Start(header.refcount_table_offset)) + .map_err(Error::SeekingFile)?; + let first_refblock_addr = file.read_u64::().map_err(Error::ReadingHeader)?; + if first_refblock_addr != 0 { + file.seek(SeekFrom::Start(first_refblock_addr)) + .map_err(Error::SeekingFile)?; + let first_cluster_refcount = + file.read_u16::().map_err(Error::ReadingHeader)?; + if first_cluster_refcount != 0 { + refcount_rebuild_required = false; + } + } + + if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 { + refcount_rebuild_required = true; + } + let mut raw_file = QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?; + if refcount_rebuild_required { + QcowFile::rebuild_refcounts(&mut raw_file, header)?; + } let l2_size = cluster_size / size_of::() as u64; let num_clusters = div_round_up_u64(header.size, u64::from(cluster_size)); @@ -491,6 +518,240 @@ impl QcowFile { Ok(None) } + /// Rebuild the reference count tables. + fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> { + fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> { + let idx = (cluster_address / cluster_size) as usize; + if idx >= refcounts.len() { + return Err(Error::InvalidClusterIndex); + } + refcounts[idx] += 1; + Ok(()) + } + + // Add a reference to the first cluster (header plus extensions). + fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> { + add_ref(refcounts, cluster_size, 0) + } + + // Add references to the L1 table clusters. + fn set_l1_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + ) -> Result<()> { + let l1_clusters = div_round_up_u64(header.l1_size as u64, cluster_size); + let l1_table_offset = header.l1_table_offset; + for i in 0..l1_clusters { + add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?; + } + Ok(()) + } + + // Traverse the L1 and L2 tables to find all reachable data clusters. + fn set_data_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + raw_file: &mut QcowRawFile, + ) -> Result<()> { + let l1_table = raw_file + .read_pointer_table( + header.l1_table_offset, + header.l1_size as u64, + Some(L1_TABLE_OFFSET_MASK), + ).map_err(Error::ReadingPointers)?; + for l1_index in 0..header.l1_size as usize { + let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?; + if l2_addr_disk != 0 { + // Add a reference to the L2 table cluster itself. + add_ref(refcounts, cluster_size, l2_addr_disk)?; + + // Read the L2 table and find all referenced data clusters. + let l2_table = raw_file + .read_pointer_table( + l2_addr_disk, + cluster_size / size_of::() as u64, + Some(L2_TABLE_OFFSET_MASK), + ).map_err(Error::ReadingPointers)?; + for data_cluster_addr in l2_table { + if data_cluster_addr != 0 { + add_ref(refcounts, cluster_size, data_cluster_addr)?; + } + } + } + } + + Ok(()) + } + + // Add references to the top-level refcount table clusters. + fn set_refcount_table_refcounts( + refcounts: &mut [u16], + header: QcowHeader, + cluster_size: u64, + ) -> Result<()> { + let refcount_table_offset = header.refcount_table_offset; + for i in 0..header.refcount_table_clusters as u64 { + add_ref( + refcounts, + cluster_size, + refcount_table_offset + i * cluster_size, + )?; + } + Ok(()) + } + + // Allocate clusters for refblocks. + // This needs to be done last so that we have the correct refcounts for all other + // clusters. + fn alloc_refblocks( + refcounts: &mut [u16], + cluster_size: u64, + refblock_clusters: u64, + pointers_per_cluster: u64, + ) -> Result> { + let refcount_table_entries = div_round_up_u64(refblock_clusters, pointers_per_cluster); + let mut ref_table = vec![0; refcount_table_entries as usize]; + let mut first_free_cluster: u64 = 0; + for refblock_addr in ref_table.iter_mut() { + while refcounts[first_free_cluster as usize] != 0 { + first_free_cluster += 1; + if first_free_cluster >= refcounts.len() as u64 { + return Err(Error::InvalidRefcountTableSize); + } + } + + *refblock_addr = first_free_cluster * cluster_size; + add_ref(refcounts, cluster_size, *refblock_addr)?; + + first_free_cluster += 1; + } + + Ok(ref_table) + } + + // Write the updated reference count blocks and reftable. + fn write_refblocks( + refcounts: &[u16], + mut header: QcowHeader, + ref_table: &[u64], + raw_file: &mut QcowRawFile, + refcount_block_entries: u64, + ) -> Result<()> { + // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables. + header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS; + raw_file + .file_mut() + .seek(SeekFrom::Start(0)) + .map_err(Error::SeekingFile)?; + header.write_to(raw_file.file_mut())?; + + for (i, refblock_addr) in ref_table.iter().enumerate() { + // Write a block of refcounts to the location indicated by refblock_addr. + let refblock_start = i * (refcount_block_entries as usize); + let refblock_end = min( + refcounts.len(), + refblock_start + refcount_block_entries as usize, + ); + let refblock = &refcounts[refblock_start..refblock_end]; + raw_file + .write_refcount_block(*refblock_addr, refblock) + .map_err(Error::WritingHeader)?; + + // If this is the last (partial) cluster, pad it out to a full refblock cluster. + if refblock.len() < refcount_block_entries as usize { + let refblock_padding = + vec![0u16; refcount_block_entries as usize - refblock.len()]; + raw_file + .write_refcount_block( + *refblock_addr + refblock.len() as u64 * 2, + &refblock_padding, + ).map_err(Error::WritingHeader)?; + } + } + + // Rewrite the top-level refcount table. + raw_file + .write_pointer_table(header.refcount_table_offset, &ref_table, 0) + .map_err(Error::WritingHeader)?; + + // Rewrite the header again, now with lazy refcounts disabled. + header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS; + raw_file + .file_mut() + .seek(SeekFrom::Start(0)) + .map_err(Error::SeekingFile)?; + header.write_to(raw_file.file_mut())?; + + Ok(()) + } + + let cluster_size = raw_file.cluster_size(); + + let file_size = raw_file + .file_mut() + .metadata() + .map_err(Error::GettingFileSize)? + .len(); + + let refcount_bits = 1u64 << header.refcount_order; + let refcount_bytes = div_round_up_u64(refcount_bits, 8); + let refcount_block_entries = cluster_size / refcount_bytes; + let pointers_per_cluster = cluster_size / size_of::() as u64; + let data_clusters = div_round_up_u64(header.size, u64::from(cluster_size)); + let l2_clusters = div_round_up_u64(data_clusters, pointers_per_cluster); + let l1_clusters = div_round_up_u64(l2_clusters, cluster_size); + let header_clusters = div_round_up_u64(size_of::() as u64, cluster_size); + let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters; + let mut max_valid_cluster_index = max_clusters; + let refblock_clusters = div_round_up_u64(max_valid_cluster_index, refcount_block_entries); + let reftable_clusters = div_round_up_u64(refblock_clusters, pointers_per_cluster); + // Account for refblocks and the ref table size needed to address them. + let refblocks_for_refs = div_round_up_u64( + refblock_clusters + reftable_clusters, + refcount_block_entries, + ); + let reftable_clusters_for_refs = + div_round_up_u64(refblocks_for_refs, refcount_block_entries); + max_valid_cluster_index += refblock_clusters + reftable_clusters; + max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs; + + if max_valid_cluster_index > usize::max_value() as u64 { + return Err(Error::InvalidRefcountTableSize); + } + + let max_valid_cluster_offset = max_valid_cluster_index * cluster_size; + if max_valid_cluster_offset < file_size - cluster_size { + return Err(Error::InvalidRefcountTableSize); + } + + let mut refcounts = vec![0; max_valid_cluster_index as usize]; + + // Find all references clusters and rebuild refcounts. + set_header_refcount(&mut refcounts, cluster_size)?; + set_l1_refcounts(&mut refcounts, header, cluster_size)?; + set_data_refcounts(&mut refcounts, header, cluster_size, raw_file)?; + set_refcount_table_refcounts(&mut refcounts, header, cluster_size)?; + + // Allocate clusters to store the new reference count blocks. + let ref_table = alloc_refblocks( + &mut refcounts, + cluster_size, + refblock_clusters, + pointers_per_cluster, + )?; + + // Write updated reference counts and point the reftable at them. + write_refblocks( + &refcounts, + header, + &ref_table, + raw_file, + refcount_block_entries, + ) + } + // Limits the range so that it doesn't exceed the virtual size of the file. fn limit_range_file(&self, address: u64, count: usize) -> usize { if address.checked_add(count as u64).is_none() || address > self.virtual_size() { @@ -1993,4 +2254,16 @@ mod tests { assert_eq!(seek_cur(&mut file), 0); }); } + + #[test] + fn rebuild_refcounts() { + with_basic_file(&valid_header(), |mut disk_file: File| { + let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header."); + let cluster_size = 65536; + let mut raw_file = + QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile."); + QcowFile::rebuild_refcounts(&mut raw_file, header) + .expect("Failed to rebuild recounts."); + }); + } }