xref: /aosp_15_r20/external/crosvm/disk/src/qcow/mod.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod qcow_raw_file;
6 mod refcount;
7 mod vec_cache;
8 
9 use std::cmp::max;
10 use std::cmp::min;
11 use std::fs::File;
12 use std::io;
13 use std::io::Read;
14 use std::io::Seek;
15 use std::io::SeekFrom;
16 use std::io::Write;
17 use std::mem::size_of;
18 use std::path::PathBuf;
19 use std::str;
20 
21 use base::error;
22 use base::AsRawDescriptor;
23 use base::AsRawDescriptors;
24 use base::FileAllocate;
25 use base::FileReadWriteAtVolatile;
26 use base::FileSetLen;
27 use base::FileSync;
28 use base::PunchHole;
29 use base::RawDescriptor;
30 use base::VolatileMemory;
31 use base::VolatileSlice;
32 use base::WriteZeroesAt;
33 use cros_async::Executor;
34 use libc::EINVAL;
35 use libc::ENOSPC;
36 use libc::ENOTSUP;
37 use remain::sorted;
38 use sync::Mutex;
39 use thiserror::Error;
40 
41 use crate::asynchronous::DiskFlush;
42 use crate::open_disk_file;
43 use crate::qcow::qcow_raw_file::QcowRawFile;
44 use crate::qcow::refcount::RefCount;
45 use crate::qcow::vec_cache::CacheMap;
46 use crate::qcow::vec_cache::Cacheable;
47 use crate::qcow::vec_cache::VecCache;
48 use crate::AsyncDisk;
49 use crate::AsyncDiskFileWrapper;
50 use crate::DiskFile;
51 use crate::DiskFileParams;
52 use crate::DiskGetLen;
53 use crate::ToAsyncDisk;
54 
55 #[sorted]
56 #[derive(Error, Debug)]
57 pub enum Error {
58     #[error("backing file io error: {0}")]
59     BackingFileIo(io::Error),
60     #[error("backing file open error: {0}")]
61     BackingFileOpen(Box<crate::Error>),
62     #[error("backing file name is too long: {0} bytes over")]
63     BackingFileTooLong(usize),
64     #[error("compressed blocks not supported")]
65     CompressedBlocksNotSupported,
66     #[error("failed to evict cache: {0}")]
67     EvictingCache(io::Error),
68     #[error("file larger than max of {}: {0}", MAX_QCOW_FILE_SIZE)]
69     FileTooBig(u64),
70     #[error("failed to get file size: {0}")]
71     GettingFileSize(io::Error),
72     #[error("failed to get refcount: {0}")]
73     GettingRefcount(refcount::Error),
74     #[error("failed to parse filename: {0}")]
75     InvalidBackingFileName(str::Utf8Error),
76     #[error("invalid cluster index")]
77     InvalidClusterIndex,
78     #[error("invalid cluster size")]
79     InvalidClusterSize,
80     #[error("invalid index")]
81     InvalidIndex,
82     #[error("invalid L1 table offset")]
83     InvalidL1TableOffset,
84     #[error("invalid L1 table size {0}")]
85     InvalidL1TableSize(u32),
86     #[error("invalid magic")]
87     InvalidMagic,
88     #[error("invalid offset")]
89     InvalidOffset(u64),
90     #[error("invalid refcount table offset")]
91     InvalidRefcountTableOffset,
92     #[error("invalid refcount table size: {0}")]
93     InvalidRefcountTableSize(u64),
94     #[error("no free clusters")]
95     NoFreeClusters,
96     #[error("no refcount clusters")]
97     NoRefcountClusters,
98     #[error("not enough space for refcounts")]
99     NotEnoughSpaceForRefcounts,
100     #[error("failed to open file: {0}")]
101     OpeningFile(io::Error),
102     #[error("failed to open file: {0}")]
103     ReadingHeader(io::Error),
104     #[error("failed to read pointers: {0}")]
105     ReadingPointers(io::Error),
106     #[error("failed to read ref count block: {0}")]
107     ReadingRefCountBlock(refcount::Error),
108     #[error("failed to read ref counts: {0}")]
109     ReadingRefCounts(io::Error),
110     #[error("failed to rebuild ref counts: {0}")]
111     RebuildingRefCounts(io::Error),
112     #[error("refcount table offset past file end")]
113     RefcountTableOffEnd,
114     #[error("too many clusters specified for refcount table")]
115     RefcountTableTooLarge,
116     #[error("failed to seek file: {0}")]
117     SeekingFile(io::Error),
118     #[error("failed to set refcount refcount: {0}")]
119     SettingRefcountRefcount(io::Error),
120     #[error("size too small for number of clusters")]
121     SizeTooSmallForNumberOfClusters,
122     #[error("l1 entry table too large: {0}")]
123     TooManyL1Entries(u64),
124     #[error("ref count table too large: {0}")]
125     TooManyRefcounts(u64),
126     #[error("unsupported refcount order")]
127     UnsupportedRefcountOrder,
128     #[error("unsupported version: {0}")]
129     UnsupportedVersion(u32),
130     #[error("failed to write header: {0}")]
131     WritingHeader(io::Error),
132 }
133 
134 pub type Result<T> = std::result::Result<T, Error>;
135 
136 // Maximum data size supported.
137 const MAX_QCOW_FILE_SIZE: u64 = 0x01 << 44; // 16 TB.
138 
139 // QCOW magic constant that starts the header.
140 pub const QCOW_MAGIC: u32 = 0x5146_49fb;
141 // Default to a cluster size of 2^DEFAULT_CLUSTER_BITS
142 const DEFAULT_CLUSTER_BITS: u32 = 16;
143 // Limit clusters to reasonable sizes. Choose the same limits as qemu. Making the clusters smaller
144 // increases the amount of overhead for book keeping.
145 const MIN_CLUSTER_BITS: u32 = 9;
146 const MAX_CLUSTER_BITS: u32 = 21;
147 // The L1 and RefCount table are kept in RAM, only handle files that require less than 35M entries.
148 // This easily covers 1 TB files. When support for bigger files is needed the assumptions made to
149 // keep these tables in RAM needs to be thrown out.
150 const MAX_RAM_POINTER_TABLE_SIZE: u64 = 35_000_000;
151 // Only support 2 byte refcounts, 2^refcount_order bits.
152 const DEFAULT_REFCOUNT_ORDER: u32 = 4;
153 
154 const V3_BARE_HEADER_SIZE: u32 = 104;
155 
156 // bits 0-8 and 56-63 are reserved.
157 const L1_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
158 const L2_TABLE_OFFSET_MASK: u64 = 0x00ff_ffff_ffff_fe00;
159 // Flags
160 const COMPRESSED_FLAG: u64 = 1 << 62;
161 const CLUSTER_USED_FLAG: u64 = 1 << 63;
162 const COMPATIBLE_FEATURES_LAZY_REFCOUNTS: u64 = 1 << 0;
163 
164 // The format supports a "header extension area", that crosvm does not use.
165 const QCOW_EMPTY_HEADER_EXTENSION_SIZE: u32 = 8;
166 
167 // Defined by the specification
168 const MAX_BACKING_FILE_SIZE: u32 = 1023;
169 
170 /// Contains the information from the header of a qcow file.
171 #[derive(Clone, Debug)]
172 pub struct QcowHeader {
173     pub magic: u32,
174     pub version: u32,
175 
176     pub backing_file_offset: u64,
177     pub backing_file_size: u32,
178 
179     pub cluster_bits: u32,
180     pub size: u64,
181     pub crypt_method: u32,
182 
183     pub l1_size: u32,
184     pub l1_table_offset: u64,
185 
186     pub refcount_table_offset: u64,
187     pub refcount_table_clusters: u32,
188 
189     pub nb_snapshots: u32,
190     pub snapshots_offset: u64,
191 
192     // v3 entries
193     pub incompatible_features: u64,
194     pub compatible_features: u64,
195     pub autoclear_features: u64,
196     pub refcount_order: u32,
197     pub header_size: u32,
198 
199     // Post-header entries
200     pub backing_file_path: Option<String>,
201 }
202 
203 // Reads the next u16 from the file.
read_u16_from_file(mut f: &File) -> Result<u16>204 fn read_u16_from_file(mut f: &File) -> Result<u16> {
205     let mut value = [0u8; 2];
206     (&mut f)
207         .read_exact(&mut value)
208         .map_err(Error::ReadingHeader)?;
209     Ok(u16::from_be_bytes(value))
210 }
211 
212 // Reads the next u32 from the file.
read_u32_from_file(mut f: &File) -> Result<u32>213 fn read_u32_from_file(mut f: &File) -> Result<u32> {
214     let mut value = [0u8; 4];
215     (&mut f)
216         .read_exact(&mut value)
217         .map_err(Error::ReadingHeader)?;
218     Ok(u32::from_be_bytes(value))
219 }
220 
221 // Reads the next u64 from the file.
read_u64_from_file(mut f: &File) -> Result<u64>222 fn read_u64_from_file(mut f: &File) -> Result<u64> {
223     let mut value = [0u8; 8];
224     (&mut f)
225         .read_exact(&mut value)
226         .map_err(Error::ReadingHeader)?;
227     Ok(u64::from_be_bytes(value))
228 }
229 
230 impl QcowHeader {
231     /// Creates a QcowHeader from a reference to a file.
new(f: &mut File) -> Result<QcowHeader>232     pub fn new(f: &mut File) -> Result<QcowHeader> {
233         f.seek(SeekFrom::Start(0)).map_err(Error::ReadingHeader)?;
234 
235         let magic = read_u32_from_file(f)?;
236         if magic != QCOW_MAGIC {
237             return Err(Error::InvalidMagic);
238         }
239 
240         let mut header = QcowHeader {
241             magic,
242             version: read_u32_from_file(f)?,
243             backing_file_offset: read_u64_from_file(f)?,
244             backing_file_size: read_u32_from_file(f)?,
245             cluster_bits: read_u32_from_file(f)?,
246             size: read_u64_from_file(f)?,
247             crypt_method: read_u32_from_file(f)?,
248             l1_size: read_u32_from_file(f)?,
249             l1_table_offset: read_u64_from_file(f)?,
250             refcount_table_offset: read_u64_from_file(f)?,
251             refcount_table_clusters: read_u32_from_file(f)?,
252             nb_snapshots: read_u32_from_file(f)?,
253             snapshots_offset: read_u64_from_file(f)?,
254             incompatible_features: read_u64_from_file(f)?,
255             compatible_features: read_u64_from_file(f)?,
256             autoclear_features: read_u64_from_file(f)?,
257             refcount_order: read_u32_from_file(f)?,
258             header_size: read_u32_from_file(f)?,
259             backing_file_path: None,
260         };
261         if header.backing_file_size > MAX_BACKING_FILE_SIZE {
262             return Err(Error::BackingFileTooLong(header.backing_file_size as usize));
263         }
264         if header.backing_file_offset != 0 {
265             f.seek(SeekFrom::Start(header.backing_file_offset))
266                 .map_err(Error::ReadingHeader)?;
267             let mut backing_file_name_bytes = vec![0u8; header.backing_file_size as usize];
268             f.read_exact(&mut backing_file_name_bytes)
269                 .map_err(Error::ReadingHeader)?;
270             header.backing_file_path = Some(
271                 String::from_utf8(backing_file_name_bytes)
272                     .map_err(|err| Error::InvalidBackingFileName(err.utf8_error()))?,
273             );
274         }
275         Ok(header)
276     }
277 
create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader>278     pub fn create_for_size_and_path(size: u64, backing_file: Option<&str>) -> Result<QcowHeader> {
279         let cluster_bits: u32 = DEFAULT_CLUSTER_BITS;
280         let cluster_size: u32 = 0x01 << cluster_bits;
281         let max_length: usize =
282             (cluster_size - V3_BARE_HEADER_SIZE - QCOW_EMPTY_HEADER_EXTENSION_SIZE) as usize;
283         if let Some(path) = backing_file {
284             if path.len() > max_length {
285                 return Err(Error::BackingFileTooLong(path.len() - max_length));
286             }
287         }
288         // L2 blocks are always one cluster long. They contain cluster_size/sizeof(u64) addresses.
289         let l2_size: u32 = cluster_size / size_of::<u64>() as u32;
290         let num_clusters: u32 = size.div_ceil(u64::from(cluster_size)) as u32;
291         let num_l2_clusters: u32 = num_clusters.div_ceil(l2_size);
292         let l1_clusters: u32 = num_l2_clusters.div_ceil(cluster_size);
293         let header_clusters = (size_of::<QcowHeader>() as u32).div_ceil(cluster_size);
294         Ok(QcowHeader {
295             magic: QCOW_MAGIC,
296             version: 3,
297             backing_file_offset: (if backing_file.is_none() {
298                 0
299             } else {
300                 V3_BARE_HEADER_SIZE + QCOW_EMPTY_HEADER_EXTENSION_SIZE
301             }) as u64,
302             backing_file_size: backing_file.map_or(0, |x| x.len()) as u32,
303             cluster_bits: DEFAULT_CLUSTER_BITS,
304             size,
305             crypt_method: 0,
306             l1_size: num_l2_clusters,
307             l1_table_offset: u64::from(cluster_size),
308             // The refcount table is after l1 + header.
309             refcount_table_offset: u64::from(cluster_size * (l1_clusters + 1)),
310             refcount_table_clusters: {
311                 // Pre-allocate enough clusters for the entire refcount table as it must be
312                 // continuous in the file. Allocate enough space to refcount all clusters, including
313                 // the refcount clusters.
314                 let max_refcount_clusters = max_refcount_clusters(
315                     DEFAULT_REFCOUNT_ORDER,
316                     cluster_size,
317                     num_clusters + l1_clusters + num_l2_clusters + header_clusters,
318                 ) as u32;
319                 // The refcount table needs to store the offset of each refcount cluster.
320                 (max_refcount_clusters * size_of::<u64>() as u32).div_ceil(cluster_size)
321             },
322             nb_snapshots: 0,
323             snapshots_offset: 0,
324             incompatible_features: 0,
325             compatible_features: 0,
326             autoclear_features: 0,
327             refcount_order: DEFAULT_REFCOUNT_ORDER,
328             header_size: V3_BARE_HEADER_SIZE,
329             backing_file_path: backing_file.map(String::from),
330         })
331     }
332 
333     /// Write the header to `file`.
write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()>334     pub fn write_to<F: Write + Seek>(&self, file: &mut F) -> Result<()> {
335         // Writes the next u32 to the file.
336         fn write_u32_to_file<F: Write>(f: &mut F, value: u32) -> Result<()> {
337             f.write_all(&value.to_be_bytes())
338                 .map_err(Error::WritingHeader)
339         }
340 
341         // Writes the next u64 to the file.
342         fn write_u64_to_file<F: Write>(f: &mut F, value: u64) -> Result<()> {
343             f.write_all(&value.to_be_bytes())
344                 .map_err(Error::WritingHeader)
345         }
346 
347         write_u32_to_file(file, self.magic)?;
348         write_u32_to_file(file, self.version)?;
349         write_u64_to_file(file, self.backing_file_offset)?;
350         write_u32_to_file(file, self.backing_file_size)?;
351         write_u32_to_file(file, self.cluster_bits)?;
352         write_u64_to_file(file, self.size)?;
353         write_u32_to_file(file, self.crypt_method)?;
354         write_u32_to_file(file, self.l1_size)?;
355         write_u64_to_file(file, self.l1_table_offset)?;
356         write_u64_to_file(file, self.refcount_table_offset)?;
357         write_u32_to_file(file, self.refcount_table_clusters)?;
358         write_u32_to_file(file, self.nb_snapshots)?;
359         write_u64_to_file(file, self.snapshots_offset)?;
360         write_u64_to_file(file, self.incompatible_features)?;
361         write_u64_to_file(file, self.compatible_features)?;
362         write_u64_to_file(file, self.autoclear_features)?;
363         write_u32_to_file(file, self.refcount_order)?;
364         write_u32_to_file(file, self.header_size)?;
365         write_u32_to_file(file, 0)?; // header extension type: end of header extension area
366         write_u32_to_file(file, 0)?; // length of header extension data: 0
367         if let Some(backing_file_path) = self.backing_file_path.as_ref() {
368             write!(file, "{}", backing_file_path).map_err(Error::WritingHeader)?;
369         }
370 
371         // Set the file length by seeking and writing a zero to the last byte. This avoids needing
372         // a `File` instead of anything that implements seek as the `file` argument.
373         // Zeros out the l1 and refcount table clusters.
374         let cluster_size = 0x01u64 << self.cluster_bits;
375         let refcount_blocks_size = u64::from(self.refcount_table_clusters) * cluster_size;
376         file.seek(SeekFrom::Start(
377             self.refcount_table_offset + refcount_blocks_size - 2,
378         ))
379         .map_err(Error::WritingHeader)?;
380         file.write(&[0u8]).map_err(Error::WritingHeader)?;
381 
382         Ok(())
383     }
384 }
385 
max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64386 fn max_refcount_clusters(refcount_order: u32, cluster_size: u32, num_clusters: u32) -> u64 {
387     // Use u64 as the product of the u32 inputs can overflow.
388     let refcount_bytes = (0x01 << refcount_order as u64) / 8;
389     let for_data = (u64::from(num_clusters) * refcount_bytes).div_ceil(u64::from(cluster_size));
390     let for_refcounts = (for_data * refcount_bytes).div_ceil(u64::from(cluster_size));
391     for_data + for_refcounts
392 }
393 
394 /// Represents a qcow2 file. This is a sparse file format maintained by the qemu project.
395 /// Full documentation of the format can be found in the qemu repository.
396 ///
397 /// # Example
398 ///
399 /// ```
400 /// # use std::path::PathBuf;
401 /// # use base::FileReadWriteAtVolatile;
402 /// # use disk::QcowFile;
403 /// # use disk::DiskFileParams;
404 /// # use base::VolatileSlice;
405 /// # fn test(file: std::fs::File, path: PathBuf) -> std::io::Result<()> {
406 ///     let mut q = QcowFile::from(file, DiskFileParams {
407 ///         path,
408 ///         is_read_only: false,
409 ///         is_sparse_file: false,
410 ///         is_overlapped: false,
411 ///         is_direct: false,
412 ///         lock: true,
413 ///         depth: 0,
414 ///     }).expect("Can't open qcow file");
415 ///     let mut buf = [0u8; 12];
416 ///     let mut vslice = VolatileSlice::new(&mut buf);
417 ///     q.read_at_volatile(vslice, 10)?;
418 /// #   Ok(())
419 /// # }
420 /// ```
421 #[derive(Debug)]
422 pub struct QcowFile {
423     inner: Mutex<QcowFileInner>,
424     // Copy of `inner.header.size` outside the mutex.
425     virtual_size: u64,
426 }
427 
428 #[derive(Debug)]
429 struct QcowFileInner {
430     raw_file: QcowRawFile,
431     header: QcowHeader,
432     l1_table: VecCache<u64>,
433     l2_entries: u64,
434     l2_cache: CacheMap<VecCache<u64>>,
435     refcounts: RefCount,
436     current_offset: u64,
437     unref_clusters: Vec<u64>, // List of freshly unreferenced clusters.
438     // List of unreferenced clusters available to be used. unref clusters become available once the
439     // removal of references to them have been synced to disk.
440     avail_clusters: Vec<u64>,
441     backing_file: Option<Box<dyn DiskFile>>,
442 }
443 
444 impl DiskFile for QcowFile {}
445 
446 impl DiskFlush for QcowFile {
flush(&self) -> io::Result<()>447     fn flush(&self) -> io::Result<()> {
448         // Using fsync is overkill here, but, the code for flushing state to file tangled up with
449         // the fsync, so it is best we can do for now.
450         self.fsync()
451     }
452 }
453 
454 impl QcowFile {
455     /// Creates a QcowFile from `file`. File must be a valid qcow2 image.
from(mut file: File, params: DiskFileParams) -> Result<QcowFile>456     pub fn from(mut file: File, params: DiskFileParams) -> Result<QcowFile> {
457         let header = QcowHeader::new(&mut file)?;
458 
459         // Only v3 files are supported.
460         if header.version != 3 {
461             return Err(Error::UnsupportedVersion(header.version));
462         }
463 
464         // Make sure that the L1 table fits in RAM.
465         if u64::from(header.l1_size) > MAX_RAM_POINTER_TABLE_SIZE {
466             return Err(Error::InvalidL1TableSize(header.l1_size));
467         }
468 
469         let cluster_bits: u32 = header.cluster_bits;
470         if !(MIN_CLUSTER_BITS..=MAX_CLUSTER_BITS).contains(&cluster_bits) {
471             return Err(Error::InvalidClusterSize);
472         }
473         let cluster_size = 0x01u64 << cluster_bits;
474 
475         // Limit the total size of the disk.
476         if header.size > MAX_QCOW_FILE_SIZE {
477             return Err(Error::FileTooBig(header.size));
478         }
479 
480         let backing_file = if let Some(backing_file_path) = header.backing_file_path.as_ref() {
481             let backing_file = open_disk_file(DiskFileParams {
482                 path: PathBuf::from(backing_file_path),
483                 // The backing file is only read from.
484                 is_read_only: true,
485                 // Sparse isn't meaningful for read only files.
486                 is_sparse_file: false,
487                 // TODO: Should pass `params.is_overlapped` through here. Needs testing.
488                 is_overlapped: false,
489                 is_direct: params.is_direct,
490                 lock: params.lock,
491                 depth: params.depth + 1,
492             })
493             .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
494             Some(backing_file)
495         } else {
496             None
497         };
498 
499         // Only support two byte refcounts.
500         let refcount_bits: u64 = 0x01u64
501             .checked_shl(header.refcount_order)
502             .ok_or(Error::UnsupportedRefcountOrder)?;
503         if refcount_bits != 16 {
504             return Err(Error::UnsupportedRefcountOrder);
505         }
506         let refcount_bytes = (refcount_bits + 7) / 8;
507 
508         // Need at least one refcount cluster
509         if header.refcount_table_clusters == 0 {
510             return Err(Error::NoRefcountClusters);
511         }
512         offset_is_cluster_boundary(header.l1_table_offset, header.cluster_bits)?;
513         offset_is_cluster_boundary(header.snapshots_offset, header.cluster_bits)?;
514         // refcount table must be a cluster boundary, and within the file's virtual or actual size.
515         offset_is_cluster_boundary(header.refcount_table_offset, header.cluster_bits)?;
516         let file_size = file.metadata().map_err(Error::GettingFileSize)?.len();
517         if header.refcount_table_offset > max(file_size, header.size) {
518             return Err(Error::RefcountTableOffEnd);
519         }
520 
521         // The first cluster should always have a non-zero refcount, so if it is 0,
522         // this is an old file with broken refcounts, which requires a rebuild.
523         let mut refcount_rebuild_required = true;
524         file.seek(SeekFrom::Start(header.refcount_table_offset))
525             .map_err(Error::SeekingFile)?;
526         let first_refblock_addr = read_u64_from_file(&file)?;
527         if first_refblock_addr != 0 {
528             file.seek(SeekFrom::Start(first_refblock_addr))
529                 .map_err(Error::SeekingFile)?;
530             let first_cluster_refcount = read_u16_from_file(&file)?;
531             if first_cluster_refcount != 0 {
532                 refcount_rebuild_required = false;
533             }
534         }
535 
536         if (header.compatible_features & COMPATIBLE_FEATURES_LAZY_REFCOUNTS) != 0 {
537             refcount_rebuild_required = true;
538         }
539 
540         let mut raw_file =
541             QcowRawFile::from(file, cluster_size).ok_or(Error::InvalidClusterSize)?;
542         if refcount_rebuild_required {
543             QcowFileInner::rebuild_refcounts(&mut raw_file, header.clone())?;
544         }
545 
546         let l2_size = cluster_size / size_of::<u64>() as u64;
547         let num_clusters = header.size.div_ceil(cluster_size);
548         let num_l2_clusters = num_clusters.div_ceil(l2_size);
549         let l1_clusters = num_l2_clusters.div_ceil(cluster_size);
550         let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
551         if num_l2_clusters > MAX_RAM_POINTER_TABLE_SIZE {
552             return Err(Error::TooManyL1Entries(num_l2_clusters));
553         }
554         let l1_table = VecCache::from_vec(
555             raw_file
556                 .read_pointer_table(
557                     header.l1_table_offset,
558                     num_l2_clusters,
559                     Some(L1_TABLE_OFFSET_MASK),
560                 )
561                 .map_err(Error::ReadingHeader)?,
562         );
563 
564         let num_clusters = header.size.div_ceil(cluster_size);
565         let refcount_clusters = max_refcount_clusters(
566             header.refcount_order,
567             cluster_size as u32,
568             (num_clusters + l1_clusters + num_l2_clusters + header_clusters) as u32,
569         );
570         // Check that the given header doesn't have a suspiciously sized refcount table.
571         if u64::from(header.refcount_table_clusters) > 2 * refcount_clusters {
572             return Err(Error::RefcountTableTooLarge);
573         }
574         if l1_clusters + refcount_clusters > MAX_RAM_POINTER_TABLE_SIZE {
575             return Err(Error::TooManyRefcounts(refcount_clusters));
576         }
577         let refcount_block_entries = cluster_size / refcount_bytes;
578         let refcounts = RefCount::new(
579             &mut raw_file,
580             header.refcount_table_offset,
581             refcount_clusters,
582             refcount_block_entries,
583             cluster_size,
584         )
585         .map_err(Error::ReadingRefCounts)?;
586 
587         let l2_entries = cluster_size / size_of::<u64>() as u64;
588 
589         let mut inner = QcowFileInner {
590             raw_file,
591             header,
592             l1_table,
593             l2_entries,
594             l2_cache: CacheMap::new(100),
595             refcounts,
596             current_offset: 0,
597             unref_clusters: Vec::new(),
598             avail_clusters: Vec::new(),
599             backing_file,
600         };
601 
602         // Check that the L1 and refcount tables fit in a 64bit address space.
603         inner
604             .header
605             .l1_table_offset
606             .checked_add(inner.l1_address_offset(inner.virtual_size()))
607             .ok_or(Error::InvalidL1TableOffset)?;
608         inner
609             .header
610             .refcount_table_offset
611             .checked_add(u64::from(inner.header.refcount_table_clusters) * cluster_size)
612             .ok_or(Error::InvalidRefcountTableOffset)?;
613 
614         inner.find_avail_clusters()?;
615 
616         let virtual_size = inner.virtual_size();
617         Ok(QcowFile {
618             inner: Mutex::new(inner),
619             virtual_size,
620         })
621     }
622 
623     /// Creates a new QcowFile at the given path.
new(file: File, params: DiskFileParams, virtual_size: u64) -> Result<QcowFile>624     pub fn new(file: File, params: DiskFileParams, virtual_size: u64) -> Result<QcowFile> {
625         let header = QcowHeader::create_for_size_and_path(virtual_size, None)?;
626         QcowFile::new_from_header(file, params, header)
627     }
628 
629     /// Creates a new QcowFile at the given path.
new_from_backing( file: File, params: DiskFileParams, backing_file_name: &str, ) -> Result<QcowFile>630     pub fn new_from_backing(
631         file: File,
632         params: DiskFileParams,
633         backing_file_name: &str,
634     ) -> Result<QcowFile> {
635         // Open the backing file as a `DiskFile` to determine its size (which may not match the
636         // filesystem size).
637         let size = {
638             let backing_file = open_disk_file(DiskFileParams {
639                 path: PathBuf::from(backing_file_name),
640                 // The backing file is only read from.
641                 is_read_only: true,
642                 // Sparse isn't meaningful for read only files.
643                 is_sparse_file: false,
644                 // TODO: Should pass `params.is_overlapped` through here. Needs testing.
645                 is_overlapped: false,
646                 is_direct: params.is_direct,
647                 lock: params.lock,
648                 depth: params.depth + 1,
649             })
650             .map_err(|e| Error::BackingFileOpen(Box::new(e)))?;
651             backing_file.get_len().map_err(Error::BackingFileIo)?
652         };
653         let header = QcowHeader::create_for_size_and_path(size, Some(backing_file_name))?;
654         QcowFile::new_from_header(file, params, header)
655     }
656 
new_from_header( mut file: File, params: DiskFileParams, header: QcowHeader, ) -> Result<QcowFile>657     fn new_from_header(
658         mut file: File,
659         params: DiskFileParams,
660         header: QcowHeader,
661     ) -> Result<QcowFile> {
662         file.seek(SeekFrom::Start(0)).map_err(Error::SeekingFile)?;
663         header.write_to(&mut file)?;
664 
665         let mut qcow = Self::from(file, params)?;
666         let inner = qcow.inner.get_mut();
667 
668         // Set the refcount for each refcount table cluster.
669         let cluster_size = 0x01u64 << inner.header.cluster_bits;
670         let refcount_table_base = inner.header.refcount_table_offset;
671         let end_cluster_addr =
672             refcount_table_base + u64::from(inner.header.refcount_table_clusters) * cluster_size;
673 
674         let mut cluster_addr = 0;
675         while cluster_addr < end_cluster_addr {
676             let mut unref_clusters = inner
677                 .set_cluster_refcount(cluster_addr, 1)
678                 .map_err(Error::SettingRefcountRefcount)?;
679             inner.unref_clusters.append(&mut unref_clusters);
680             cluster_addr += cluster_size;
681         }
682 
683         Ok(qcow)
684     }
685 
set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>)686     pub fn set_backing_file(&mut self, backing: Option<Box<dyn DiskFile>>) {
687         self.inner.get_mut().backing_file = backing;
688     }
689 }
690 
691 impl QcowFileInner {
692     /// Returns the first cluster in the file with a 0 refcount. Used for testing.
693     #[cfg(test)]
first_zero_refcount(&mut self) -> Result<Option<u64>>694     fn first_zero_refcount(&mut self) -> Result<Option<u64>> {
695         let file_size = self
696             .raw_file
697             .file_mut()
698             .metadata()
699             .map_err(Error::GettingFileSize)?
700             .len();
701         let cluster_size = 0x01u64 << self.header.cluster_bits;
702 
703         let mut cluster_addr = 0;
704         while cluster_addr < file_size {
705             let cluster_refcount = self
706                 .refcounts
707                 .get_cluster_refcount(&mut self.raw_file, cluster_addr)
708                 .map_err(Error::GettingRefcount)?;
709             if cluster_refcount == 0 {
710                 return Ok(Some(cluster_addr));
711             }
712             cluster_addr += cluster_size;
713         }
714         Ok(None)
715     }
716 
find_avail_clusters(&mut self) -> Result<()>717     fn find_avail_clusters(&mut self) -> Result<()> {
718         let cluster_size = self.raw_file.cluster_size();
719 
720         let file_size = self
721             .raw_file
722             .file_mut()
723             .metadata()
724             .map_err(Error::GettingFileSize)?
725             .len();
726 
727         for i in (0..file_size).step_by(cluster_size as usize) {
728             let refcount = self
729                 .refcounts
730                 .get_cluster_refcount(&mut self.raw_file, i)
731                 .map_err(Error::GettingRefcount)?;
732             if refcount == 0 {
733                 self.avail_clusters.push(i);
734             }
735         }
736 
737         Ok(())
738     }
739 
740     /// Rebuild the reference count tables.
rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()>741     fn rebuild_refcounts(raw_file: &mut QcowRawFile, header: QcowHeader) -> Result<()> {
742         fn add_ref(refcounts: &mut [u16], cluster_size: u64, cluster_address: u64) -> Result<()> {
743             let idx = (cluster_address / cluster_size) as usize;
744             if idx >= refcounts.len() {
745                 return Err(Error::InvalidClusterIndex);
746             }
747             refcounts[idx] += 1;
748             Ok(())
749         }
750 
751         // Add a reference to the first cluster (header plus extensions).
752         fn set_header_refcount(refcounts: &mut [u16], cluster_size: u64) -> Result<()> {
753             add_ref(refcounts, cluster_size, 0)
754         }
755 
756         // Add references to the L1 table clusters.
757         fn set_l1_refcounts(
758             refcounts: &mut [u16],
759             header: QcowHeader,
760             cluster_size: u64,
761         ) -> Result<()> {
762             let l1_clusters = u64::from(header.l1_size).div_ceil(cluster_size);
763             let l1_table_offset = header.l1_table_offset;
764             for i in 0..l1_clusters {
765                 add_ref(refcounts, cluster_size, l1_table_offset + i * cluster_size)?;
766             }
767             Ok(())
768         }
769 
770         // Traverse the L1 and L2 tables to find all reachable data clusters.
771         fn set_data_refcounts(
772             refcounts: &mut [u16],
773             header: QcowHeader,
774             cluster_size: u64,
775             raw_file: &mut QcowRawFile,
776         ) -> Result<()> {
777             let l1_table = raw_file
778                 .read_pointer_table(
779                     header.l1_table_offset,
780                     header.l1_size as u64,
781                     Some(L1_TABLE_OFFSET_MASK),
782                 )
783                 .map_err(Error::ReadingPointers)?;
784             for l1_index in 0..header.l1_size as usize {
785                 let l2_addr_disk = *l1_table.get(l1_index).ok_or(Error::InvalidIndex)?;
786                 if l2_addr_disk != 0 {
787                     // Add a reference to the L2 table cluster itself.
788                     add_ref(refcounts, cluster_size, l2_addr_disk)?;
789 
790                     // Read the L2 table and find all referenced data clusters.
791                     let l2_table = raw_file
792                         .read_pointer_table(
793                             l2_addr_disk,
794                             cluster_size / size_of::<u64>() as u64,
795                             Some(L2_TABLE_OFFSET_MASK),
796                         )
797                         .map_err(Error::ReadingPointers)?;
798                     for data_cluster_addr in l2_table {
799                         if data_cluster_addr != 0 {
800                             add_ref(refcounts, cluster_size, data_cluster_addr)?;
801                         }
802                     }
803                 }
804             }
805 
806             Ok(())
807         }
808 
809         // Add references to the top-level refcount table clusters.
810         fn set_refcount_table_refcounts(
811             refcounts: &mut [u16],
812             header: QcowHeader,
813             cluster_size: u64,
814         ) -> Result<()> {
815             let refcount_table_offset = header.refcount_table_offset;
816             for i in 0..header.refcount_table_clusters as u64 {
817                 add_ref(
818                     refcounts,
819                     cluster_size,
820                     refcount_table_offset + i * cluster_size,
821                 )?;
822             }
823             Ok(())
824         }
825 
826         // Allocate clusters for refblocks.
827         // This needs to be done last so that we have the correct refcounts for all other
828         // clusters.
829         fn alloc_refblocks(
830             refcounts: &mut [u16],
831             cluster_size: u64,
832             refblock_clusters: u64,
833             pointers_per_cluster: u64,
834         ) -> Result<Vec<u64>> {
835             let refcount_table_entries = refblock_clusters.div_ceil(pointers_per_cluster);
836             let mut ref_table = vec![0; refcount_table_entries as usize];
837             let mut first_free_cluster: u64 = 0;
838             for refblock_addr in &mut ref_table {
839                 loop {
840                     if first_free_cluster >= refcounts.len() as u64 {
841                         return Err(Error::NotEnoughSpaceForRefcounts);
842                     }
843                     if refcounts[first_free_cluster as usize] == 0 {
844                         break;
845                     }
846                     first_free_cluster += 1;
847                 }
848 
849                 *refblock_addr = first_free_cluster * cluster_size;
850                 add_ref(refcounts, cluster_size, *refblock_addr)?;
851 
852                 first_free_cluster += 1;
853             }
854 
855             Ok(ref_table)
856         }
857 
858         // Write the updated reference count blocks and reftable.
859         fn write_refblocks(
860             refcounts: &[u16],
861             mut header: QcowHeader,
862             ref_table: &[u64],
863             raw_file: &mut QcowRawFile,
864             refcount_block_entries: u64,
865         ) -> Result<()> {
866             // Rewrite the header with lazy refcounts enabled while we are rebuilding the tables.
867             header.compatible_features |= COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
868             raw_file
869                 .file_mut()
870                 .seek(SeekFrom::Start(0))
871                 .map_err(Error::SeekingFile)?;
872             header.write_to(raw_file.file_mut())?;
873 
874             for (i, refblock_addr) in ref_table.iter().enumerate() {
875                 // Write a block of refcounts to the location indicated by refblock_addr.
876                 let refblock_start = i * (refcount_block_entries as usize);
877                 let refblock_end = min(
878                     refcounts.len(),
879                     refblock_start + refcount_block_entries as usize,
880                 );
881                 let refblock = &refcounts[refblock_start..refblock_end];
882                 raw_file
883                     .write_refcount_block(*refblock_addr, refblock)
884                     .map_err(Error::WritingHeader)?;
885 
886                 // If this is the last (partial) cluster, pad it out to a full refblock cluster.
887                 if refblock.len() < refcount_block_entries as usize {
888                     let refblock_padding =
889                         vec![0u16; refcount_block_entries as usize - refblock.len()];
890                     raw_file
891                         .write_refcount_block(
892                             *refblock_addr + refblock.len() as u64 * 2,
893                             &refblock_padding,
894                         )
895                         .map_err(Error::WritingHeader)?;
896                 }
897             }
898 
899             // Rewrite the top-level refcount table.
900             raw_file
901                 .write_pointer_table(header.refcount_table_offset, ref_table, 0)
902                 .map_err(Error::WritingHeader)?;
903 
904             // Rewrite the header again, now with lazy refcounts disabled.
905             header.compatible_features &= !COMPATIBLE_FEATURES_LAZY_REFCOUNTS;
906             raw_file
907                 .file_mut()
908                 .seek(SeekFrom::Start(0))
909                 .map_err(Error::SeekingFile)?;
910             header.write_to(raw_file.file_mut())?;
911 
912             Ok(())
913         }
914 
915         let cluster_size = raw_file.cluster_size();
916 
917         let file_size = raw_file
918             .file_mut()
919             .metadata()
920             .map_err(Error::GettingFileSize)?
921             .len();
922 
923         let refcount_bits = 1u64 << header.refcount_order;
924         let refcount_bytes = refcount_bits.div_ceil(8);
925         let refcount_block_entries = cluster_size / refcount_bytes;
926         let pointers_per_cluster = cluster_size / size_of::<u64>() as u64;
927         let data_clusters = header.size.div_ceil(cluster_size);
928         let l2_clusters = data_clusters.div_ceil(pointers_per_cluster);
929         let l1_clusters = l2_clusters.div_ceil(cluster_size);
930         let header_clusters = (size_of::<QcowHeader>() as u64).div_ceil(cluster_size);
931         let max_clusters = data_clusters + l2_clusters + l1_clusters + header_clusters;
932         let mut max_valid_cluster_index = max_clusters;
933         let refblock_clusters = max_valid_cluster_index.div_ceil(refcount_block_entries);
934         let reftable_clusters = refblock_clusters.div_ceil(pointers_per_cluster);
935         // Account for refblocks and the ref table size needed to address them.
936         let refblocks_for_refs =
937             (refblock_clusters + reftable_clusters).div_ceil(refcount_block_entries);
938         let reftable_clusters_for_refs = refblocks_for_refs.div_ceil(refcount_block_entries);
939         max_valid_cluster_index += refblock_clusters + reftable_clusters;
940         max_valid_cluster_index += refblocks_for_refs + reftable_clusters_for_refs;
941 
942         if max_valid_cluster_index > MAX_RAM_POINTER_TABLE_SIZE {
943             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_index));
944         }
945 
946         let max_valid_cluster_offset = max_valid_cluster_index * cluster_size;
947         if max_valid_cluster_offset < file_size - cluster_size {
948             return Err(Error::InvalidRefcountTableSize(max_valid_cluster_offset));
949         }
950 
951         let mut refcounts = vec![0; max_valid_cluster_index as usize];
952 
953         // Find all references clusters and rebuild refcounts.
954         set_header_refcount(&mut refcounts, cluster_size)?;
955         set_l1_refcounts(&mut refcounts, header.clone(), cluster_size)?;
956         set_data_refcounts(&mut refcounts, header.clone(), cluster_size, raw_file)?;
957         set_refcount_table_refcounts(&mut refcounts, header.clone(), cluster_size)?;
958 
959         // Allocate clusters to store the new reference count blocks.
960         let ref_table = alloc_refblocks(
961             &mut refcounts,
962             cluster_size,
963             refblock_clusters,
964             pointers_per_cluster,
965         )?;
966 
967         // Write updated reference counts and point the reftable at them.
968         write_refblocks(
969             &refcounts,
970             header,
971             &ref_table,
972             raw_file,
973             refcount_block_entries,
974         )
975     }
976 
977     // Limits the range so that it doesn't exceed the virtual size of the file.
limit_range_file(&self, address: u64, count: usize) -> usize978     fn limit_range_file(&self, address: u64, count: usize) -> usize {
979         if address.checked_add(count as u64).is_none() || address > self.virtual_size() {
980             return 0;
981         }
982         min(count as u64, self.virtual_size() - address) as usize
983     }
984 
985     // Limits the range so that it doesn't overflow the end of a cluster.
limit_range_cluster(&self, address: u64, count: usize) -> usize986     fn limit_range_cluster(&self, address: u64, count: usize) -> usize {
987         let offset: u64 = self.raw_file.cluster_offset(address);
988         let limit = self.raw_file.cluster_size() - offset;
989         min(count as u64, limit) as usize
990     }
991 
992     // Gets the maximum virtual size of this image.
virtual_size(&self) -> u64993     fn virtual_size(&self) -> u64 {
994         self.header.size
995     }
996 
997     // Gets the offset of `address` in the L1 table.
l1_address_offset(&self, address: u64) -> u64998     fn l1_address_offset(&self, address: u64) -> u64 {
999         let l1_index = self.l1_table_index(address);
1000         l1_index * size_of::<u64>() as u64
1001     }
1002 
1003     // Gets the offset of `address` in the L1 table.
l1_table_index(&self, address: u64) -> u641004     fn l1_table_index(&self, address: u64) -> u64 {
1005         (address / self.raw_file.cluster_size()) / self.l2_entries
1006     }
1007 
1008     // Gets the offset of `address` in the L2 table.
l2_table_index(&self, address: u64) -> u641009     fn l2_table_index(&self, address: u64) -> u64 {
1010         (address / self.raw_file.cluster_size()) % self.l2_entries
1011     }
1012 
1013     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters have
1014     // yet to be allocated, return None.
file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>>1015     fn file_offset_read(&mut self, address: u64) -> std::io::Result<Option<u64>> {
1016         if address >= self.virtual_size() {
1017             return Err(std::io::Error::from_raw_os_error(EINVAL));
1018         }
1019 
1020         let l1_index = self.l1_table_index(address) as usize;
1021         let l2_addr_disk = *self
1022             .l1_table
1023             .get(l1_index)
1024             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1025 
1026         if l2_addr_disk == 0 {
1027             // Reading from an unallocated cluster will return zeros.
1028             return Ok(None);
1029         }
1030 
1031         let l2_index = self.l2_table_index(address) as usize;
1032 
1033         if !self.l2_cache.contains_key(&l1_index) {
1034             // Not in the cache.
1035             let table =
1036                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1037 
1038             let l1_table = &self.l1_table;
1039             let raw_file = &mut self.raw_file;
1040             self.l2_cache.insert(l1_index, table, |index, evicted| {
1041                 raw_file.write_pointer_table(
1042                     l1_table[index],
1043                     evicted.get_values(),
1044                     CLUSTER_USED_FLAG,
1045                 )
1046             })?;
1047         };
1048 
1049         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1050         if cluster_addr == 0 {
1051             return Ok(None);
1052         }
1053         Ok(Some(cluster_addr + self.raw_file.cluster_offset(address)))
1054     }
1055 
1056     // Gets the offset of the given guest address in the host file. If L1, L2, or data clusters need
1057     // to be allocated, they will be.
file_offset_write(&mut self, address: u64) -> std::io::Result<u64>1058     fn file_offset_write(&mut self, address: u64) -> std::io::Result<u64> {
1059         if address >= self.virtual_size() {
1060             return Err(std::io::Error::from_raw_os_error(EINVAL));
1061         }
1062 
1063         let l1_index = self.l1_table_index(address) as usize;
1064         let l2_addr_disk = *self
1065             .l1_table
1066             .get(l1_index)
1067             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1068         let l2_index = self.l2_table_index(address) as usize;
1069 
1070         let mut set_refcounts = Vec::new();
1071 
1072         if !self.l2_cache.contains_key(&l1_index) {
1073             // Not in the cache.
1074             let l2_table = if l2_addr_disk == 0 {
1075                 // Allocate a new cluster to store the L2 table and update the L1 table to point
1076                 // to the new table.
1077                 let new_addr: u64 = self.get_new_cluster(None)?;
1078                 // The cluster refcount starts at one meaning it is used but doesn't need COW.
1079                 set_refcounts.push((new_addr, 1));
1080                 self.l1_table[l1_index] = new_addr;
1081                 VecCache::new(self.l2_entries as usize)
1082             } else {
1083                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?)
1084             };
1085             let l1_table = &self.l1_table;
1086             let raw_file = &mut self.raw_file;
1087             self.l2_cache.insert(l1_index, l2_table, |index, evicted| {
1088                 raw_file.write_pointer_table(
1089                     l1_table[index],
1090                     evicted.get_values(),
1091                     CLUSTER_USED_FLAG,
1092                 )
1093             })?;
1094         }
1095 
1096         let cluster_addr = match self.l2_cache.get(&l1_index).unwrap()[l2_index] {
1097             0 => {
1098                 let initial_data = if let Some(backing) = self.backing_file.as_mut() {
1099                     let cluster_size = self.raw_file.cluster_size();
1100                     let cluster_begin = address - (address % cluster_size);
1101                     let mut cluster_data = vec![0u8; cluster_size as usize];
1102                     let volatile_slice = VolatileSlice::new(&mut cluster_data);
1103                     backing.read_exact_at_volatile(volatile_slice, cluster_begin)?;
1104                     Some(cluster_data)
1105                 } else {
1106                     None
1107                 };
1108                 // Need to allocate a data cluster
1109                 let cluster_addr = self.append_data_cluster(initial_data)?;
1110                 self.update_cluster_addr(l1_index, l2_index, cluster_addr, &mut set_refcounts)?;
1111                 cluster_addr
1112             }
1113             a => a,
1114         };
1115 
1116         for (addr, count) in set_refcounts {
1117             let mut newly_unref = self.set_cluster_refcount(addr, count)?;
1118             self.unref_clusters.append(&mut newly_unref);
1119         }
1120 
1121         Ok(cluster_addr + self.raw_file.cluster_offset(address))
1122     }
1123 
1124     // Updates the l1 and l2 tables to point to the new `cluster_addr`.
update_cluster_addr( &mut self, l1_index: usize, l2_index: usize, cluster_addr: u64, set_refcounts: &mut Vec<(u64, u16)>, ) -> io::Result<()>1125     fn update_cluster_addr(
1126         &mut self,
1127         l1_index: usize,
1128         l2_index: usize,
1129         cluster_addr: u64,
1130         set_refcounts: &mut Vec<(u64, u16)>,
1131     ) -> io::Result<()> {
1132         if !self.l2_cache.get(&l1_index).unwrap().dirty() {
1133             // Free the previously used cluster if one exists. Modified tables are always
1134             // witten to new clusters so the L1 table can be committed to disk after they
1135             // are and L1 never points at an invalid table.
1136             // The index must be valid from when it was insterted.
1137             let addr = self.l1_table[l1_index];
1138             if addr != 0 {
1139                 self.unref_clusters.push(addr);
1140                 set_refcounts.push((addr, 0));
1141             }
1142 
1143             // Allocate a new cluster to store the L2 table and update the L1 table to point
1144             // to the new table. The cluster will be written when the cache is flushed, no
1145             // need to copy the data now.
1146             let new_addr: u64 = self.get_new_cluster(None)?;
1147             // The cluster refcount starts at one indicating it is used but doesn't need
1148             // COW.
1149             set_refcounts.push((new_addr, 1));
1150             self.l1_table[l1_index] = new_addr;
1151         }
1152         // 'unwrap' is OK because it was just added.
1153         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = cluster_addr;
1154         Ok(())
1155     }
1156 
1157     // Allocate a new cluster and return its offset within the raw file.
get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1158     fn get_new_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1159         // First use a pre allocated cluster if one is available.
1160         if let Some(free_cluster) = self.avail_clusters.pop() {
1161             if let Some(initial_data) = initial_data {
1162                 self.raw_file.write_cluster(free_cluster, initial_data)?;
1163             } else {
1164                 self.raw_file.zero_cluster(free_cluster)?;
1165             }
1166             return Ok(free_cluster);
1167         }
1168 
1169         let max_valid_cluster_offset = self.refcounts.max_valid_cluster_offset();
1170         if let Some(new_cluster) = self.raw_file.add_cluster_end(max_valid_cluster_offset)? {
1171             if let Some(initial_data) = initial_data {
1172                 self.raw_file.write_cluster(new_cluster, initial_data)?;
1173             }
1174             Ok(new_cluster)
1175         } else {
1176             error!("No free clusters in get_new_cluster()");
1177             Err(std::io::Error::from_raw_os_error(ENOSPC))
1178         }
1179     }
1180 
1181     // Allocate and initialize a new data cluster. Returns the offset of the
1182     // cluster in to the file on success.
append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64>1183     fn append_data_cluster(&mut self, initial_data: Option<Vec<u8>>) -> std::io::Result<u64> {
1184         let new_addr: u64 = self.get_new_cluster(initial_data)?;
1185         // The cluster refcount starts at one indicating it is used but doesn't need COW.
1186         let mut newly_unref = self.set_cluster_refcount(new_addr, 1)?;
1187         self.unref_clusters.append(&mut newly_unref);
1188         Ok(new_addr)
1189     }
1190 
1191     // Deallocate the storage for the cluster starting at `address`.
1192     // Any future reads of this cluster will return all zeroes (or the backing file, if in use).
deallocate_cluster(&mut self, address: u64) -> std::io::Result<()>1193     fn deallocate_cluster(&mut self, address: u64) -> std::io::Result<()> {
1194         if address >= self.virtual_size() {
1195             return Err(std::io::Error::from_raw_os_error(EINVAL));
1196         }
1197 
1198         let l1_index = self.l1_table_index(address) as usize;
1199         let l2_addr_disk = *self
1200             .l1_table
1201             .get(l1_index)
1202             .ok_or_else(|| std::io::Error::from_raw_os_error(EINVAL))?;
1203         let l2_index = self.l2_table_index(address) as usize;
1204 
1205         if l2_addr_disk == 0 {
1206             // The whole L2 table for this address is not allocated yet,
1207             // so the cluster must also be unallocated.
1208             return Ok(());
1209         }
1210 
1211         if !self.l2_cache.contains_key(&l1_index) {
1212             // Not in the cache.
1213             let table =
1214                 VecCache::from_vec(Self::read_l2_cluster(&mut self.raw_file, l2_addr_disk)?);
1215             let l1_table = &self.l1_table;
1216             let raw_file = &mut self.raw_file;
1217             self.l2_cache.insert(l1_index, table, |index, evicted| {
1218                 raw_file.write_pointer_table(
1219                     l1_table[index],
1220                     evicted.get_values(),
1221                     CLUSTER_USED_FLAG,
1222                 )
1223             })?;
1224         }
1225 
1226         let cluster_addr = self.l2_cache.get(&l1_index).unwrap()[l2_index];
1227         if cluster_addr == 0 {
1228             // This cluster is already unallocated; nothing to do.
1229             return Ok(());
1230         }
1231 
1232         // Decrement the refcount.
1233         let refcount = self
1234             .refcounts
1235             .get_cluster_refcount(&mut self.raw_file, cluster_addr)
1236             .map_err(|_| std::io::Error::from_raw_os_error(EINVAL))?;
1237         if refcount == 0 {
1238             return Err(std::io::Error::from_raw_os_error(EINVAL));
1239         }
1240 
1241         let new_refcount = refcount - 1;
1242         let mut newly_unref = self.set_cluster_refcount(cluster_addr, new_refcount)?;
1243         self.unref_clusters.append(&mut newly_unref);
1244 
1245         // Rewrite the L2 entry to remove the cluster mapping.
1246         // unwrap is safe as we just checked/inserted this entry.
1247         self.l2_cache.get_mut(&l1_index).unwrap()[l2_index] = 0;
1248 
1249         if new_refcount == 0 {
1250             let cluster_size = self.raw_file.cluster_size();
1251             // This cluster is no longer in use; deallocate the storage.
1252             // The underlying FS may not support FALLOC_FL_PUNCH_HOLE,
1253             // so don't treat an error as fatal.  Future reads will return zeros anyways.
1254             let _ = self.raw_file.file().punch_hole(cluster_addr, cluster_size);
1255             self.unref_clusters.push(cluster_addr);
1256         }
1257         Ok(())
1258     }
1259 
1260     // Fill a range of `length` bytes starting at `address` with zeroes.
1261     // Any future reads of this range will return all zeroes.
1262     // If there is no backing file, this will deallocate cluster storage when possible.
zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()>1263     fn zero_bytes(&mut self, address: u64, length: usize) -> std::io::Result<()> {
1264         let write_count: usize = self.limit_range_file(address, length);
1265 
1266         let mut nwritten: usize = 0;
1267         while nwritten < write_count {
1268             let curr_addr = address + nwritten as u64;
1269             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1270 
1271             if self.backing_file.is_none() && count == self.raw_file.cluster_size() as usize {
1272                 // Full cluster and no backing file in use - deallocate the storage.
1273                 self.deallocate_cluster(curr_addr)?;
1274             } else {
1275                 // Partial cluster - zero out the relevant bytes.
1276                 let offset = if self.backing_file.is_some() {
1277                     // There is a backing file, so we need to allocate a cluster in order to
1278                     // zero out the hole-punched bytes such that the backing file contents do not
1279                     // show through.
1280                     Some(self.file_offset_write(curr_addr)?)
1281                 } else {
1282                     // Any space in unallocated clusters can be left alone, since
1283                     // unallocated clusters already read back as zeroes.
1284                     self.file_offset_read(curr_addr)?
1285                 };
1286                 if let Some(offset) = offset {
1287                     // Partial cluster - zero it out.
1288                     self.raw_file.file().write_zeroes_all_at(offset, count)?;
1289                 }
1290             }
1291 
1292             nwritten += count;
1293         }
1294         Ok(())
1295     }
1296 
1297     // Reads an L2 cluster from the disk, returning an error if the file can't be read or if any
1298     // cluster is compressed.
read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>>1299     fn read_l2_cluster(raw_file: &mut QcowRawFile, cluster_addr: u64) -> std::io::Result<Vec<u64>> {
1300         let file_values = raw_file.read_pointer_cluster(cluster_addr, None)?;
1301         if file_values.iter().any(|entry| entry & COMPRESSED_FLAG != 0) {
1302             return Err(std::io::Error::from_raw_os_error(ENOTSUP));
1303         }
1304         Ok(file_values
1305             .iter()
1306             .map(|entry| *entry & L2_TABLE_OFFSET_MASK)
1307             .collect())
1308     }
1309 
1310     // Set the refcount for a cluster with the given address.
1311     // Returns a list of any refblocks that can be reused, this happens when a refblock is moved,
1312     // the old location can be reused.
set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>>1313     fn set_cluster_refcount(&mut self, address: u64, refcount: u16) -> std::io::Result<Vec<u64>> {
1314         let mut added_clusters = Vec::new();
1315         let mut unref_clusters = Vec::new();
1316         let mut refcount_set = false;
1317         let mut new_cluster = None;
1318 
1319         while !refcount_set {
1320             match self.refcounts.set_cluster_refcount(
1321                 &mut self.raw_file,
1322                 address,
1323                 refcount,
1324                 new_cluster.take(),
1325             ) {
1326                 Ok(None) => {
1327                     refcount_set = true;
1328                 }
1329                 Ok(Some(freed_cluster)) => {
1330                     unref_clusters.push(freed_cluster);
1331                     refcount_set = true;
1332                 }
1333                 Err(refcount::Error::EvictingRefCounts(e)) => {
1334                     return Err(e);
1335                 }
1336                 Err(refcount::Error::InvalidIndex) => {
1337                     return Err(std::io::Error::from_raw_os_error(EINVAL));
1338                 }
1339                 Err(refcount::Error::NeedCluster(addr)) => {
1340                     // Read the address and call set_cluster_refcount again.
1341                     new_cluster = Some((
1342                         addr,
1343                         VecCache::from_vec(self.raw_file.read_refcount_block(addr)?),
1344                     ));
1345                 }
1346                 Err(refcount::Error::NeedNewCluster) => {
1347                     // Allocate the cluster and call set_cluster_refcount again.
1348                     let addr = self.get_new_cluster(None)?;
1349                     added_clusters.push(addr);
1350                     new_cluster = Some((
1351                         addr,
1352                         VecCache::new(self.refcounts.refcounts_per_block() as usize),
1353                     ));
1354                 }
1355                 Err(refcount::Error::ReadingRefCounts(e)) => {
1356                     return Err(e);
1357                 }
1358             }
1359         }
1360 
1361         for addr in added_clusters {
1362             self.set_cluster_refcount(addr, 1)?;
1363         }
1364         Ok(unref_clusters)
1365     }
1366 
sync_caches(&mut self) -> std::io::Result<()>1367     fn sync_caches(&mut self) -> std::io::Result<()> {
1368         // Write out all dirty L2 tables.
1369         for (l1_index, l2_table) in self.l2_cache.iter_mut().filter(|(_k, v)| v.dirty()) {
1370             // The index must be valid from when we insterted it.
1371             let addr = self.l1_table[*l1_index];
1372             if addr != 0 {
1373                 self.raw_file.write_pointer_table(
1374                     addr,
1375                     l2_table.get_values(),
1376                     CLUSTER_USED_FLAG,
1377                 )?;
1378             } else {
1379                 return Err(std::io::Error::from_raw_os_error(EINVAL));
1380             }
1381             l2_table.mark_clean();
1382         }
1383         // Write the modified refcount blocks.
1384         self.refcounts.flush_blocks(&mut self.raw_file)?;
1385         // Make sure metadata(file len) and all data clusters are written.
1386         self.raw_file.file_mut().sync_all()?;
1387 
1388         // Push L1 table and refcount table last as all the clusters they point to are now
1389         // guaranteed to be valid.
1390         let mut sync_required = false;
1391         if self.l1_table.dirty() {
1392             self.raw_file.write_pointer_table(
1393                 self.header.l1_table_offset,
1394                 self.l1_table.get_values(),
1395                 0,
1396             )?;
1397             self.l1_table.mark_clean();
1398             sync_required = true;
1399         }
1400         sync_required |= self.refcounts.flush_table(&mut self.raw_file)?;
1401         if sync_required {
1402             self.raw_file.file_mut().sync_data()?;
1403         }
1404         Ok(())
1405     }
1406 
1407     // Reads `count` bytes starting at `address`, calling `cb` repeatedly with the data source,
1408     // number of bytes read so far, offset to read from, and number of bytes to read from the file
1409     // in that invocation. If None is given to `cb` in place of the backing file, the `cb` should
1410     // infer zeros would have been read.
read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,1411     fn read_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1412     where
1413         F: FnMut(Option<&mut dyn DiskFile>, usize, u64, usize) -> std::io::Result<()>,
1414     {
1415         let read_count: usize = self.limit_range_file(address, count);
1416 
1417         let mut nread: usize = 0;
1418         while nread < read_count {
1419             let curr_addr = address + nread as u64;
1420             let file_offset = self.file_offset_read(curr_addr)?;
1421             let count = self.limit_range_cluster(curr_addr, read_count - nread);
1422 
1423             if let Some(offset) = file_offset {
1424                 cb(Some(self.raw_file.file_mut()), nread, offset, count)?;
1425             } else if let Some(backing) = self.backing_file.as_mut() {
1426                 cb(Some(backing.as_mut()), nread, curr_addr, count)?;
1427             } else {
1428                 cb(None, nread, 0, count)?;
1429             }
1430 
1431             nread += count;
1432         }
1433         Ok(read_count)
1434     }
1435 
1436     // Writes `count` bytes starting at `address`, calling `cb` repeatedly with the backing file,
1437     // number of bytes written so far, raw file offset, and number of bytes to write to the file in
1438     // that invocation.
write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize> where F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,1439     fn write_cb<F>(&mut self, address: u64, count: usize, mut cb: F) -> std::io::Result<usize>
1440     where
1441         F: FnMut(&mut File, usize, u64, usize) -> std::io::Result<()>,
1442     {
1443         let write_count: usize = self.limit_range_file(address, count);
1444 
1445         let mut nwritten: usize = 0;
1446         while nwritten < write_count {
1447             let curr_addr = address + nwritten as u64;
1448             let offset = self.file_offset_write(curr_addr)?;
1449             let count = self.limit_range_cluster(curr_addr, write_count - nwritten);
1450 
1451             cb(self.raw_file.file_mut(), nwritten, offset, count)?;
1452 
1453             nwritten += count;
1454         }
1455         Ok(write_count)
1456     }
1457 }
1458 
1459 impl Drop for QcowFile {
drop(&mut self)1460     fn drop(&mut self) {
1461         let _ = self.inner.get_mut().sync_caches();
1462     }
1463 }
1464 
1465 impl AsRawDescriptors for QcowFile {
as_raw_descriptors(&self) -> Vec<RawDescriptor>1466     fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
1467         // Taking a lock here feels wrong, but this method is generally only used during
1468         // sandboxing, so it should be OK.
1469         let inner = self.inner.lock();
1470         let mut descriptors = vec![inner.raw_file.file().as_raw_descriptor()];
1471         if let Some(backing) = &inner.backing_file {
1472             descriptors.append(&mut backing.as_raw_descriptors());
1473         }
1474         descriptors
1475     }
1476 }
1477 
1478 impl Read for QcowFile {
read(&mut self, buf: &mut [u8]) -> std::io::Result<usize>1479     fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1480         let inner = self.inner.get_mut();
1481         let len = buf.len();
1482         let slice = VolatileSlice::new(buf);
1483         let read_count = inner.read_cb(
1484             inner.current_offset,
1485             len,
1486             |file, already_read, offset, count| {
1487                 let sub_slice = slice.get_slice(already_read, count).unwrap();
1488                 match file {
1489                     Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1490                     None => {
1491                         sub_slice.write_bytes(0);
1492                         Ok(())
1493                     }
1494                 }
1495             },
1496         )?;
1497         inner.current_offset += read_count as u64;
1498         Ok(read_count)
1499     }
1500 }
1501 
1502 impl Seek for QcowFile {
seek(&mut self, pos: SeekFrom) -> std::io::Result<u64>1503     fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
1504         let inner = self.inner.get_mut();
1505         let new_offset: Option<u64> = match pos {
1506             SeekFrom::Start(off) => Some(off),
1507             SeekFrom::End(off) => {
1508                 if off < 0 {
1509                     0i64.checked_sub(off)
1510                         .and_then(|increment| inner.virtual_size().checked_sub(increment as u64))
1511                 } else {
1512                     inner.virtual_size().checked_add(off as u64)
1513                 }
1514             }
1515             SeekFrom::Current(off) => {
1516                 if off < 0 {
1517                     0i64.checked_sub(off)
1518                         .and_then(|increment| inner.current_offset.checked_sub(increment as u64))
1519                 } else {
1520                     inner.current_offset.checked_add(off as u64)
1521                 }
1522             }
1523         };
1524 
1525         if let Some(o) = new_offset {
1526             if o <= inner.virtual_size() {
1527                 inner.current_offset = o;
1528                 return Ok(o);
1529             }
1530         }
1531         Err(std::io::Error::from_raw_os_error(EINVAL))
1532     }
1533 }
1534 
1535 impl Write for QcowFile {
write(&mut self, buf: &[u8]) -> std::io::Result<usize>1536     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1537         let inner = self.inner.get_mut();
1538         let write_count = inner.write_cb(
1539             inner.current_offset,
1540             buf.len(),
1541             |file, offset, raw_offset, count| {
1542                 file.seek(SeekFrom::Start(raw_offset))?;
1543                 file.write_all(&buf[offset..(offset + count)])
1544             },
1545         )?;
1546         inner.current_offset += write_count as u64;
1547         Ok(write_count)
1548     }
1549 
flush(&mut self) -> std::io::Result<()>1550     fn flush(&mut self) -> std::io::Result<()> {
1551         self.fsync()
1552     }
1553 }
1554 
1555 impl FileReadWriteAtVolatile for QcowFile {
read_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1556     fn read_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1557         let mut inner = self.inner.lock();
1558         inner.read_cb(offset, slice.size(), |file, read, offset, count| {
1559             let sub_slice = slice.get_slice(read, count).unwrap();
1560             match file {
1561                 Some(f) => f.read_exact_at_volatile(sub_slice, offset),
1562                 None => {
1563                     sub_slice.write_bytes(0);
1564                     Ok(())
1565                 }
1566             }
1567         })
1568     }
1569 
write_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize>1570     fn write_at_volatile(&self, slice: VolatileSlice, offset: u64) -> io::Result<usize> {
1571         let mut inner = self.inner.lock();
1572         inner.write_cb(offset, slice.size(), |file, offset, raw_offset, count| {
1573             let sub_slice = slice.get_slice(offset, count).unwrap();
1574             file.write_all_at_volatile(sub_slice, raw_offset)
1575         })
1576     }
1577 }
1578 
1579 impl FileSync for QcowFile {
fsync(&self) -> std::io::Result<()>1580     fn fsync(&self) -> std::io::Result<()> {
1581         let mut inner = self.inner.lock();
1582         inner.sync_caches()?;
1583         let unref_clusters = std::mem::take(&mut inner.unref_clusters);
1584         inner.avail_clusters.extend(unref_clusters);
1585         Ok(())
1586     }
1587 
fdatasync(&self) -> io::Result<()>1588     fn fdatasync(&self) -> io::Result<()> {
1589         // QcowFile does not implement fdatasync. Just fall back to fsync.
1590         self.fsync()
1591     }
1592 }
1593 
1594 impl FileSetLen for QcowFile {
set_len(&self, _len: u64) -> std::io::Result<()>1595     fn set_len(&self, _len: u64) -> std::io::Result<()> {
1596         Err(std::io::Error::new(
1597             std::io::ErrorKind::Other,
1598             "set_len() not supported for QcowFile",
1599         ))
1600     }
1601 }
1602 
1603 impl DiskGetLen for QcowFile {
get_len(&self) -> io::Result<u64>1604     fn get_len(&self) -> io::Result<u64> {
1605         Ok(self.virtual_size)
1606     }
1607 }
1608 
1609 impl FileAllocate for QcowFile {
allocate(&self, offset: u64, len: u64) -> io::Result<()>1610     fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
1611         let mut inner = self.inner.lock();
1612         // Call write_cb with a do-nothing callback, which will have the effect
1613         // of allocating all clusters in the specified range.
1614         inner.write_cb(
1615             offset,
1616             len as usize,
1617             |_file, _offset, _raw_offset, _count| Ok(()),
1618         )?;
1619         Ok(())
1620     }
1621 }
1622 
1623 impl PunchHole for QcowFile {
punch_hole(&self, offset: u64, length: u64) -> std::io::Result<()>1624     fn punch_hole(&self, offset: u64, length: u64) -> std::io::Result<()> {
1625         let mut inner = self.inner.lock();
1626         let mut remaining = length;
1627         let mut offset = offset;
1628         while remaining > 0 {
1629             let chunk_length = min(remaining, usize::MAX as u64) as usize;
1630             inner.zero_bytes(offset, chunk_length)?;
1631             remaining -= chunk_length as u64;
1632             offset += chunk_length as u64;
1633         }
1634         Ok(())
1635     }
1636 }
1637 
1638 impl WriteZeroesAt for QcowFile {
write_zeroes_at(&self, offset: u64, length: usize) -> io::Result<usize>1639     fn write_zeroes_at(&self, offset: u64, length: usize) -> io::Result<usize> {
1640         self.punch_hole(offset, length as u64)?;
1641         Ok(length)
1642     }
1643 }
1644 
1645 impl ToAsyncDisk for QcowFile {
to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>>1646     fn to_async_disk(self: Box<Self>, ex: &Executor) -> crate::Result<Box<dyn AsyncDisk>> {
1647         Ok(Box::new(AsyncDiskFileWrapper::new(*self, ex)))
1648     }
1649 }
1650 
1651 // Returns an Error if the given offset doesn't align to a cluster boundary.
offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()>1652 fn offset_is_cluster_boundary(offset: u64, cluster_bits: u32) -> Result<()> {
1653     if offset & ((0x01 << cluster_bits) - 1) != 0 {
1654         return Err(Error::InvalidOffset(offset));
1655     }
1656     Ok(())
1657 }
1658 
1659 #[cfg(test)]
1660 mod tests {
1661     use std::fs::OpenOptions;
1662     use std::io::Read;
1663     use std::io::Seek;
1664     use std::io::SeekFrom;
1665     use std::io::Write;
1666 
1667     use tempfile::tempfile;
1668     use tempfile::TempDir;
1669 
1670     use super::*;
1671 
valid_header() -> Vec<u8>1672     fn valid_header() -> Vec<u8> {
1673         vec![
1674             0x51u8, 0x46, 0x49, 0xfb, // magic
1675             0x00, 0x00, 0x00, 0x03, // version
1676             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1677             0x00, 0x00, 0x00, 0x00, // backing file size
1678             0x00, 0x00, 0x00, 0x10, // cluster_bits
1679             0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, // size
1680             0x00, 0x00, 0x00, 0x00, // crypt method
1681             0x00, 0x00, 0x01, 0x00, // L1 size
1682             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1683             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1684             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1685             0x00, 0x00, 0x00, 0x00, // nb snapshots
1686             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1687             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1688             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1689             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1690             0x00, 0x00, 0x00, 0x04, // refcount_order
1691             0x00, 0x00, 0x00, 0x68, // header_length
1692         ]
1693     }
1694 
1695     // Test case found by clusterfuzz to allocate excessive memory.
test_huge_header() -> Vec<u8>1696     fn test_huge_header() -> Vec<u8> {
1697         vec![
1698             0x51, 0x46, 0x49, 0xfb, // magic
1699             0x00, 0x00, 0x00, 0x03, // version
1700             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // backing file offset
1701             0x00, 0x00, 0x00, 0x00, // backing file size
1702             0x00, 0x00, 0x00, 0x09, // cluster_bits
1703             0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // size
1704             0x00, 0x00, 0x00, 0x00, // crypt method
1705             0x00, 0x00, 0x01, 0x00, // L1 size
1706             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // L1 table offset
1707             0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, // refcount table offset
1708             0x00, 0x00, 0x00, 0x03, // refcount table clusters
1709             0x00, 0x00, 0x00, 0x00, // nb snapshots
1710             0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, // snapshots offset
1711             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // incompatible_features
1712             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // compatible_features
1713             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // autoclear_features
1714             0x00, 0x00, 0x00, 0x04, // refcount_order
1715             0x00, 0x00, 0x00, 0x68, // header_length
1716         ]
1717     }
1718 
test_params() -> DiskFileParams1719     fn test_params() -> DiskFileParams {
1720         DiskFileParams {
1721             path: PathBuf::from("/foo"),
1722             is_read_only: false,
1723             is_sparse_file: false,
1724             is_overlapped: false,
1725             is_direct: false,
1726             lock: true,
1727             depth: 0,
1728         }
1729     }
1730 
basic_file(header: &[u8]) -> File1731     fn basic_file(header: &[u8]) -> File {
1732         let mut disk_file = tempfile().expect("failed to create temp file");
1733         disk_file.write_all(header).unwrap();
1734         disk_file.set_len(0x8000_0000).unwrap();
1735         disk_file.seek(SeekFrom::Start(0)).unwrap();
1736         disk_file
1737     }
1738 
with_basic_file<F>(header: &[u8], mut testfn: F) where F: FnMut(File),1739     fn with_basic_file<F>(header: &[u8], mut testfn: F)
1740     where
1741         F: FnMut(File),
1742     {
1743         testfn(basic_file(header)); // File closed when the function exits.
1744     }
1745 
with_default_file<F>(file_size: u64, mut testfn: F) where F: FnMut(QcowFile),1746     fn with_default_file<F>(file_size: u64, mut testfn: F)
1747     where
1748         F: FnMut(QcowFile),
1749     {
1750         let file = tempfile().expect("failed to create temp file");
1751         let qcow_file = QcowFile::new(file, test_params(), file_size).unwrap();
1752 
1753         testfn(qcow_file); // File closed when the function exits.
1754     }
1755 
1756     // Test helper function to convert a normal slice to a VolatileSlice and write it.
write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()>1757     fn write_all_at(qcow: &mut QcowFile, data: &[u8], offset: u64) -> std::io::Result<()> {
1758         let mut mem = data.to_owned();
1759         let vslice = VolatileSlice::new(&mut mem);
1760         qcow.write_all_at_volatile(vslice, offset)
1761     }
1762 
1763     // Test helper function to read to a VolatileSlice and copy it to a normal slice.
read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()>1764     fn read_exact_at(qcow: &mut QcowFile, data: &mut [u8], offset: u64) -> std::io::Result<()> {
1765         let mut mem = data.to_owned();
1766         let vslice = VolatileSlice::new(&mut mem);
1767         qcow.read_exact_at_volatile(vslice, offset)?;
1768         vslice.copy_to(data);
1769         Ok(())
1770     }
1771 
1772     #[test]
default_header()1773     fn default_header() {
1774         let header = QcowHeader::create_for_size_and_path(0x10_0000, None);
1775         let mut disk_file = tempfile().expect("failed to create temp file");
1776         header
1777             .expect("Failed to create header.")
1778             .write_to(&mut disk_file)
1779             .expect("Failed to write header to shm.");
1780         disk_file.seek(SeekFrom::Start(0)).unwrap();
1781         QcowFile::from(disk_file, test_params())
1782             .expect("Failed to create Qcow from default Header");
1783     }
1784 
1785     #[test]
header_read()1786     fn header_read() {
1787         with_basic_file(&valid_header(), |mut disk_file: File| {
1788             QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
1789         });
1790     }
1791 
1792     #[test]
header_with_backing()1793     fn header_with_backing() {
1794         let header = QcowHeader::create_for_size_and_path(0x10_0000, Some("/my/path/to/a/file"))
1795             .expect("Failed to create header.");
1796         let mut disk_file = tempfile().expect("failed to create temp file");
1797         header
1798             .write_to(&mut disk_file)
1799             .expect("Failed to write header to shm.");
1800         disk_file.seek(SeekFrom::Start(0)).unwrap();
1801         let read_header = QcowHeader::new(&mut disk_file).expect("Failed to create header.");
1802         assert_eq!(
1803             header.backing_file_path,
1804             Some(String::from("/my/path/to/a/file"))
1805         );
1806         assert_eq!(read_header.backing_file_path, header.backing_file_path);
1807     }
1808 
1809     #[test]
invalid_magic()1810     fn invalid_magic() {
1811         let invalid_header = vec![0x51u8, 0x46, 0x4a, 0xfb];
1812         with_basic_file(&invalid_header, |mut disk_file: File| {
1813             QcowHeader::new(&mut disk_file).expect_err("Invalid header worked.");
1814         });
1815     }
1816 
1817     #[test]
invalid_refcount_order()1818     fn invalid_refcount_order() {
1819         let mut header = valid_header();
1820         header[99] = 2;
1821         with_basic_file(&header, |disk_file: File| {
1822             QcowFile::from(disk_file, test_params()).expect_err("Invalid refcount order worked.");
1823         });
1824     }
1825 
1826     #[test]
invalid_cluster_bits()1827     fn invalid_cluster_bits() {
1828         let mut header = valid_header();
1829         header[23] = 3;
1830         with_basic_file(&header, |disk_file: File| {
1831             QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1832         });
1833     }
1834 
1835     #[test]
test_header_huge_file()1836     fn test_header_huge_file() {
1837         let header = test_huge_header();
1838         with_basic_file(&header, |disk_file: File| {
1839             QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1840         });
1841     }
1842 
1843     #[test]
test_header_excessive_file_size_rejected()1844     fn test_header_excessive_file_size_rejected() {
1845         let mut header = valid_header();
1846         header[24..32].copy_from_slice(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e]);
1847         with_basic_file(&header, |disk_file: File| {
1848             QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1849         });
1850     }
1851 
1852     #[test]
test_huge_l1_table()1853     fn test_huge_l1_table() {
1854         let mut header = valid_header();
1855         header[36] = 0x12;
1856         with_basic_file(&header, |disk_file: File| {
1857             QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1858         });
1859     }
1860 
1861     #[test]
test_header_1_tb_file_min_cluster()1862     fn test_header_1_tb_file_min_cluster() {
1863         let mut header = test_huge_header();
1864         header[24] = 0;
1865         header[26] = 1;
1866         header[31] = 0;
1867         // 1 TB with the min cluster size makes the arrays too big, it should fail.
1868         with_basic_file(&header, |disk_file: File| {
1869             QcowFile::from(disk_file, test_params()).expect_err("Failed to create file.");
1870         });
1871     }
1872 
1873     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1874     #[test]
test_header_1_tb_file()1875     fn test_header_1_tb_file() {
1876         let mut header = test_huge_header();
1877         // reset to 1 TB size.
1878         header[24] = 0;
1879         header[26] = 1;
1880         header[31] = 0;
1881         // set cluster_bits
1882         header[23] = 16;
1883         with_basic_file(&header, |disk_file: File| {
1884             let mut qcow =
1885                 QcowFile::from(disk_file, test_params()).expect("Failed to create file.");
1886             let value = 0x0000_0040_3f00_ffffu64;
1887             write_all_at(&mut qcow, &value.to_le_bytes(), 0x100_0000_0000 - 8)
1888                 .expect("failed to write data");
1889         });
1890     }
1891 
1892     #[test]
test_header_huge_num_refcounts()1893     fn test_header_huge_num_refcounts() {
1894         let mut header = valid_header();
1895         header[56..60].copy_from_slice(&[0x02, 0x00, 0xe8, 0xff]);
1896         with_basic_file(&header, |disk_file: File| {
1897             QcowFile::from(disk_file, test_params())
1898                 .expect_err("Created disk with excessive refcount clusters");
1899         });
1900     }
1901 
1902     #[test]
test_header_huge_refcount_offset()1903     fn test_header_huge_refcount_offset() {
1904         let mut header = valid_header();
1905         header[48..56].copy_from_slice(&[0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x02, 0x00]);
1906         with_basic_file(&header, |disk_file: File| {
1907             QcowFile::from(disk_file, test_params())
1908                 .expect_err("Created disk with excessive refcount offset");
1909         });
1910     }
1911 
1912     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1913     #[test]
write_read_start()1914     fn write_read_start() {
1915         with_basic_file(&valid_header(), |disk_file: File| {
1916             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1917             write_all_at(&mut q, b"test first bytes", 0).expect("Failed to write test string.");
1918             let mut buf = [0u8; 4];
1919             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
1920             assert_eq!(&buf, b"test");
1921         });
1922     }
1923 
1924     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1925     #[test]
write_read_start_backing()1926     fn write_read_start_backing() {
1927         let disk_file = basic_file(&valid_header());
1928         let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
1929         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1930         let mut buf = [0u8; 4];
1931         let wrapping_disk_file = basic_file(&valid_header());
1932         let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
1933         wrapping.set_backing_file(Some(Box::new(backing)));
1934         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1935         assert_eq!(&buf, b"test");
1936     }
1937 
1938     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1939     #[test]
write_read_start_backing_overlap()1940     fn write_read_start_backing_overlap() {
1941         let disk_file = basic_file(&valid_header());
1942         let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
1943         write_all_at(&mut backing, b"test first bytes", 0).expect("Failed to write test string.");
1944         let wrapping_disk_file = basic_file(&valid_header());
1945         let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
1946         wrapping.set_backing_file(Some(Box::new(backing)));
1947         write_all_at(&mut wrapping, b"TEST", 0).expect("Failed to write second test string.");
1948         let mut buf = [0u8; 10];
1949         read_exact_at(&mut wrapping, &mut buf, 0).expect("Failed to read.");
1950         assert_eq!(&buf, b"TEST first");
1951     }
1952 
1953     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1954     #[test]
offset_write_read()1955     fn offset_write_read() {
1956         with_basic_file(&valid_header(), |disk_file: File| {
1957             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1958             let b = [0x55u8; 0x1000];
1959             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1960             let mut buf = [0u8; 4];
1961             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1962             assert_eq!(buf[0], 0x55);
1963         });
1964     }
1965 
1966     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1967     #[test]
write_zeroes_read()1968     fn write_zeroes_read() {
1969         with_basic_file(&valid_header(), |disk_file: File| {
1970             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1971             // Write some test data.
1972             let b = [0x55u8; 0x1000];
1973             write_all_at(&mut q, &b, 0xfff2000).expect("Failed to write test string.");
1974             // Overwrite the test data with zeroes.
1975             q.write_zeroes_all_at(0xfff2000, 0x200)
1976                 .expect("Failed to write zeroes.");
1977             // Verify that the correct part of the data was zeroed out.
1978             let mut buf = [0u8; 0x1000];
1979             read_exact_at(&mut q, &mut buf, 0xfff2000).expect("Failed to read.");
1980             assert_eq!(buf[0], 0);
1981             assert_eq!(buf[0x1FF], 0);
1982             assert_eq!(buf[0x200], 0x55);
1983             assert_eq!(buf[0xFFF], 0x55);
1984         });
1985     }
1986 
1987     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
1988     #[test]
write_zeroes_full_cluster()1989     fn write_zeroes_full_cluster() {
1990         // Choose a size that is larger than a cluster.
1991         // valid_header uses cluster_bits = 12, which corresponds to a cluster size of 4096.
1992         const CHUNK_SIZE: usize = 4096 * 2 + 512;
1993         with_basic_file(&valid_header(), |disk_file: File| {
1994             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
1995             // Write some test data.
1996             let b = [0x55u8; CHUNK_SIZE];
1997             write_all_at(&mut q, &b, 0).expect("Failed to write test string.");
1998             // Overwrite the full cluster with zeroes.
1999             q.write_zeroes_all_at(0, CHUNK_SIZE)
2000                 .expect("Failed to write zeroes.");
2001             // Verify that the data was zeroed out.
2002             let mut buf = [0u8; CHUNK_SIZE];
2003             read_exact_at(&mut q, &mut buf, 0).expect("Failed to read.");
2004             assert_eq!(buf[0], 0);
2005             assert_eq!(buf[CHUNK_SIZE - 1], 0);
2006         });
2007     }
2008 
2009     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2010     #[test]
write_zeroes_backing()2011     fn write_zeroes_backing() {
2012         let disk_file = basic_file(&valid_header());
2013         let mut backing = QcowFile::from(disk_file, test_params()).unwrap();
2014         // Write some test data.
2015         let b = [0x55u8; 0x1000];
2016         write_all_at(&mut backing, &b, 0xfff2000).expect("Failed to write test string.");
2017         let wrapping_disk_file = basic_file(&valid_header());
2018         let mut wrapping = QcowFile::from(wrapping_disk_file, test_params()).unwrap();
2019         wrapping.set_backing_file(Some(Box::new(backing)));
2020         // Overwrite the test data with zeroes.
2021         // This should allocate new clusters in the wrapping file so that they can be zeroed.
2022         wrapping
2023             .write_zeroes_all_at(0xfff2000, 0x200)
2024             .expect("Failed to write zeroes.");
2025         // Verify that the correct part of the data was zeroed out.
2026         let mut buf = [0u8; 0x1000];
2027         read_exact_at(&mut wrapping, &mut buf, 0xfff2000).expect("Failed to read.");
2028         assert_eq!(buf[0], 0);
2029         assert_eq!(buf[0x1FF], 0);
2030         assert_eq!(buf[0x200], 0x55);
2031         assert_eq!(buf[0xFFF], 0x55);
2032     }
2033     #[test]
test_header()2034     fn test_header() {
2035         with_basic_file(&valid_header(), |disk_file: File| {
2036             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2037             assert_eq!(q.inner.get_mut().virtual_size(), 0x20_0000_0000);
2038         });
2039     }
2040 
2041     #[test]
read_small_buffer()2042     fn read_small_buffer() {
2043         with_basic_file(&valid_header(), |disk_file: File| {
2044             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2045             let mut b = [5u8; 16];
2046             read_exact_at(&mut q, &mut b, 1000).expect("Failed to read.");
2047             assert_eq!(0, b[0]);
2048             assert_eq!(0, b[15]);
2049         });
2050     }
2051 
2052     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2053     #[test]
replay_ext4()2054     fn replay_ext4() {
2055         with_basic_file(&valid_header(), |disk_file: File| {
2056             let mut q = QcowFile::from(disk_file, test_params()).unwrap();
2057             const BUF_SIZE: usize = 0x1000;
2058             let mut b = [0u8; BUF_SIZE];
2059 
2060             struct Transfer {
2061                 pub write: bool,
2062                 pub addr: u64,
2063             }
2064 
2065             // Write transactions from mkfs.ext4.
2066             let xfers: Vec<Transfer> = vec![
2067                 Transfer {
2068                     write: false,
2069                     addr: 0xfff0000,
2070                 },
2071                 Transfer {
2072                     write: false,
2073                     addr: 0xfffe000,
2074                 },
2075                 Transfer {
2076                     write: false,
2077                     addr: 0x0,
2078                 },
2079                 Transfer {
2080                     write: false,
2081                     addr: 0x1000,
2082                 },
2083                 Transfer {
2084                     write: false,
2085                     addr: 0xffff000,
2086                 },
2087                 Transfer {
2088                     write: false,
2089                     addr: 0xffdf000,
2090                 },
2091                 Transfer {
2092                     write: false,
2093                     addr: 0xfff8000,
2094                 },
2095                 Transfer {
2096                     write: false,
2097                     addr: 0xffe0000,
2098                 },
2099                 Transfer {
2100                     write: false,
2101                     addr: 0xffce000,
2102                 },
2103                 Transfer {
2104                     write: false,
2105                     addr: 0xffb6000,
2106                 },
2107                 Transfer {
2108                     write: false,
2109                     addr: 0xffab000,
2110                 },
2111                 Transfer {
2112                     write: false,
2113                     addr: 0xffa4000,
2114                 },
2115                 Transfer {
2116                     write: false,
2117                     addr: 0xff8e000,
2118                 },
2119                 Transfer {
2120                     write: false,
2121                     addr: 0xff86000,
2122                 },
2123                 Transfer {
2124                     write: false,
2125                     addr: 0xff84000,
2126                 },
2127                 Transfer {
2128                     write: false,
2129                     addr: 0xff89000,
2130                 },
2131                 Transfer {
2132                     write: false,
2133                     addr: 0xfe7e000,
2134                 },
2135                 Transfer {
2136                     write: false,
2137                     addr: 0x100000,
2138                 },
2139                 Transfer {
2140                     write: false,
2141                     addr: 0x3000,
2142                 },
2143                 Transfer {
2144                     write: false,
2145                     addr: 0x7000,
2146                 },
2147                 Transfer {
2148                     write: false,
2149                     addr: 0xf000,
2150                 },
2151                 Transfer {
2152                     write: false,
2153                     addr: 0x2000,
2154                 },
2155                 Transfer {
2156                     write: false,
2157                     addr: 0x4000,
2158                 },
2159                 Transfer {
2160                     write: false,
2161                     addr: 0x5000,
2162                 },
2163                 Transfer {
2164                     write: false,
2165                     addr: 0x6000,
2166                 },
2167                 Transfer {
2168                     write: false,
2169                     addr: 0x8000,
2170                 },
2171                 Transfer {
2172                     write: false,
2173                     addr: 0x9000,
2174                 },
2175                 Transfer {
2176                     write: false,
2177                     addr: 0xa000,
2178                 },
2179                 Transfer {
2180                     write: false,
2181                     addr: 0xb000,
2182                 },
2183                 Transfer {
2184                     write: false,
2185                     addr: 0xc000,
2186                 },
2187                 Transfer {
2188                     write: false,
2189                     addr: 0xd000,
2190                 },
2191                 Transfer {
2192                     write: false,
2193                     addr: 0xe000,
2194                 },
2195                 Transfer {
2196                     write: false,
2197                     addr: 0x10000,
2198                 },
2199                 Transfer {
2200                     write: false,
2201                     addr: 0x11000,
2202                 },
2203                 Transfer {
2204                     write: false,
2205                     addr: 0x12000,
2206                 },
2207                 Transfer {
2208                     write: false,
2209                     addr: 0x13000,
2210                 },
2211                 Transfer {
2212                     write: false,
2213                     addr: 0x14000,
2214                 },
2215                 Transfer {
2216                     write: false,
2217                     addr: 0x15000,
2218                 },
2219                 Transfer {
2220                     write: false,
2221                     addr: 0x16000,
2222                 },
2223                 Transfer {
2224                     write: false,
2225                     addr: 0x17000,
2226                 },
2227                 Transfer {
2228                     write: false,
2229                     addr: 0x18000,
2230                 },
2231                 Transfer {
2232                     write: false,
2233                     addr: 0x19000,
2234                 },
2235                 Transfer {
2236                     write: false,
2237                     addr: 0x1a000,
2238                 },
2239                 Transfer {
2240                     write: false,
2241                     addr: 0x1b000,
2242                 },
2243                 Transfer {
2244                     write: false,
2245                     addr: 0x1c000,
2246                 },
2247                 Transfer {
2248                     write: false,
2249                     addr: 0x1d000,
2250                 },
2251                 Transfer {
2252                     write: false,
2253                     addr: 0x1e000,
2254                 },
2255                 Transfer {
2256                     write: false,
2257                     addr: 0x1f000,
2258                 },
2259                 Transfer {
2260                     write: false,
2261                     addr: 0x21000,
2262                 },
2263                 Transfer {
2264                     write: false,
2265                     addr: 0x22000,
2266                 },
2267                 Transfer {
2268                     write: false,
2269                     addr: 0x24000,
2270                 },
2271                 Transfer {
2272                     write: false,
2273                     addr: 0x40000,
2274                 },
2275                 Transfer {
2276                     write: false,
2277                     addr: 0x0,
2278                 },
2279                 Transfer {
2280                     write: false,
2281                     addr: 0x3000,
2282                 },
2283                 Transfer {
2284                     write: false,
2285                     addr: 0x7000,
2286                 },
2287                 Transfer {
2288                     write: false,
2289                     addr: 0x0,
2290                 },
2291                 Transfer {
2292                     write: false,
2293                     addr: 0x1000,
2294                 },
2295                 Transfer {
2296                     write: false,
2297                     addr: 0x2000,
2298                 },
2299                 Transfer {
2300                     write: false,
2301                     addr: 0x3000,
2302                 },
2303                 Transfer {
2304                     write: false,
2305                     addr: 0x0,
2306                 },
2307                 Transfer {
2308                     write: false,
2309                     addr: 0x449000,
2310                 },
2311                 Transfer {
2312                     write: false,
2313                     addr: 0x48000,
2314                 },
2315                 Transfer {
2316                     write: false,
2317                     addr: 0x48000,
2318                 },
2319                 Transfer {
2320                     write: false,
2321                     addr: 0x448000,
2322                 },
2323                 Transfer {
2324                     write: false,
2325                     addr: 0x44a000,
2326                 },
2327                 Transfer {
2328                     write: false,
2329                     addr: 0x48000,
2330                 },
2331                 Transfer {
2332                     write: false,
2333                     addr: 0x48000,
2334                 },
2335                 Transfer {
2336                     write: true,
2337                     addr: 0x0,
2338                 },
2339                 Transfer {
2340                     write: true,
2341                     addr: 0x448000,
2342                 },
2343                 Transfer {
2344                     write: true,
2345                     addr: 0x449000,
2346                 },
2347                 Transfer {
2348                     write: true,
2349                     addr: 0x44a000,
2350                 },
2351                 Transfer {
2352                     write: true,
2353                     addr: 0xfff0000,
2354                 },
2355                 Transfer {
2356                     write: true,
2357                     addr: 0xfff1000,
2358                 },
2359                 Transfer {
2360                     write: true,
2361                     addr: 0xfff2000,
2362                 },
2363                 Transfer {
2364                     write: true,
2365                     addr: 0xfff3000,
2366                 },
2367                 Transfer {
2368                     write: true,
2369                     addr: 0xfff4000,
2370                 },
2371                 Transfer {
2372                     write: true,
2373                     addr: 0xfff5000,
2374                 },
2375                 Transfer {
2376                     write: true,
2377                     addr: 0xfff6000,
2378                 },
2379                 Transfer {
2380                     write: true,
2381                     addr: 0xfff7000,
2382                 },
2383                 Transfer {
2384                     write: true,
2385                     addr: 0xfff8000,
2386                 },
2387                 Transfer {
2388                     write: true,
2389                     addr: 0xfff9000,
2390                 },
2391                 Transfer {
2392                     write: true,
2393                     addr: 0xfffa000,
2394                 },
2395                 Transfer {
2396                     write: true,
2397                     addr: 0xfffb000,
2398                 },
2399                 Transfer {
2400                     write: true,
2401                     addr: 0xfffc000,
2402                 },
2403                 Transfer {
2404                     write: true,
2405                     addr: 0xfffd000,
2406                 },
2407                 Transfer {
2408                     write: true,
2409                     addr: 0xfffe000,
2410                 },
2411                 Transfer {
2412                     write: true,
2413                     addr: 0xffff000,
2414                 },
2415             ];
2416 
2417             for xfer in &xfers {
2418                 if xfer.write {
2419                     write_all_at(&mut q, &b, xfer.addr).expect("Failed to write.");
2420                 } else {
2421                     read_exact_at(&mut q, &mut b, xfer.addr).expect("Failed to read.");
2422                 }
2423             }
2424         });
2425     }
2426 
2427     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2428     #[test]
combo_write_read()2429     fn combo_write_read() {
2430         with_default_file(1024 * 1024 * 1024 * 256, |mut qcow_file| {
2431             const NUM_BLOCKS: usize = 55;
2432             const BLOCK_SIZE: usize = 0x1_0000;
2433             const OFFSET: u64 = 0x1_0000_0020;
2434             let data = [0x55u8; BLOCK_SIZE];
2435             let mut readback = [0u8; BLOCK_SIZE];
2436             for i in 0..NUM_BLOCKS {
2437                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2438                 write_all_at(&mut qcow_file, &data, seek_offset)
2439                     .expect("Failed to write test data.");
2440                 // Read back the data to check it was written correctly.
2441                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2442                 for (orig, read) in data.iter().zip(readback.iter()) {
2443                     assert_eq!(orig, read);
2444                 }
2445             }
2446             // Check that address 0 is still zeros.
2447             read_exact_at(&mut qcow_file, &mut readback, 0).expect("Failed to read.");
2448             for read in readback.iter() {
2449                 assert_eq!(*read, 0);
2450             }
2451             // Check the data again after the writes have happened.
2452             for i in 0..NUM_BLOCKS {
2453                 let seek_offset = OFFSET + (i as u64) * (BLOCK_SIZE as u64);
2454                 read_exact_at(&mut qcow_file, &mut readback, seek_offset).expect("Failed to read.");
2455                 for (orig, read) in data.iter().zip(readback.iter()) {
2456                     assert_eq!(orig, read);
2457                 }
2458             }
2459 
2460             assert_eq!(
2461                 qcow_file.inner.get_mut().first_zero_refcount().unwrap(),
2462                 None
2463             );
2464         });
2465     }
2466 
2467     #[test]
rebuild_refcounts()2468     fn rebuild_refcounts() {
2469         with_basic_file(&valid_header(), |mut disk_file: File| {
2470             let header = QcowHeader::new(&mut disk_file).expect("Failed to create Header.");
2471             let cluster_size = 65536;
2472             let mut raw_file =
2473                 QcowRawFile::from(disk_file, cluster_size).expect("Failed to create QcowRawFile.");
2474             QcowFileInner::rebuild_refcounts(&mut raw_file, header)
2475                 .expect("Failed to rebuild recounts.");
2476         });
2477     }
2478 
2479     #[cfg_attr(windows, ignore = "TODO(b/257958782): Enable large test on windows")]
2480     #[test]
nested_qcow()2481     fn nested_qcow() {
2482         let tmp_dir = TempDir::new().unwrap();
2483 
2484         // A file `backing` is backing a qcow file `qcow.l1`, which in turn is backing another
2485         // qcow file.
2486         let backing_file_path = tmp_dir.path().join("backing");
2487         let _backing_file = OpenOptions::new()
2488             .read(true)
2489             .write(true)
2490             .create_new(true)
2491             .open(&backing_file_path)
2492             .unwrap();
2493 
2494         let level1_qcow_file_path = tmp_dir.path().join("qcow.l1");
2495         let level1_qcow_file = OpenOptions::new()
2496             .read(true)
2497             .write(true)
2498             .create_new(true)
2499             .open(&level1_qcow_file_path)
2500             .unwrap();
2501         let _level1_qcow_file = QcowFile::new_from_backing(
2502             level1_qcow_file,
2503             test_params(),
2504             backing_file_path.to_str().unwrap(),
2505         )
2506         .unwrap();
2507 
2508         let level2_qcow_file = tempfile().unwrap();
2509         let _level2_qcow_file = QcowFile::new_from_backing(
2510             level2_qcow_file,
2511             test_params(),
2512             level1_qcow_file_path.to_str().unwrap(),
2513         )
2514         .expect("failed to create level2 qcow file");
2515     }
2516 
2517     #[test]
io_seek()2518     fn io_seek() {
2519         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2520             // Cursor should start at 0.
2521             assert_eq!(qcow_file.stream_position().unwrap(), 0);
2522 
2523             // Seek 1 MB from start.
2524             assert_eq!(
2525                 qcow_file.seek(SeekFrom::Start(1024 * 1024)).unwrap(),
2526                 1024 * 1024
2527             );
2528 
2529             // Rewind 1 MB + 1 byte (past beginning) - seeking to a negative offset is an error and
2530             // should not move the cursor.
2531             qcow_file
2532                 .seek(SeekFrom::Current(-(1024 * 1024 + 1)))
2533                 .expect_err("negative offset seek should fail");
2534             assert_eq!(qcow_file.stream_position().unwrap(), 1024 * 1024);
2535 
2536             // Seek to last byte.
2537             assert_eq!(
2538                 qcow_file.seek(SeekFrom::End(-1)).unwrap(),
2539                 1024 * 1024 * 10 - 1
2540             );
2541 
2542             // Seek to EOF.
2543             assert_eq!(qcow_file.seek(SeekFrom::End(0)).unwrap(), 1024 * 1024 * 10);
2544 
2545             // Seek past EOF is not allowed.
2546             qcow_file
2547                 .seek(SeekFrom::End(1))
2548                 .expect_err("seek past EOF should fail");
2549         });
2550     }
2551 
2552     #[test]
io_write_read()2553     fn io_write_read() {
2554         with_default_file(1024 * 1024 * 10, |mut qcow_file| {
2555             const BLOCK_SIZE: usize = 0x1_0000;
2556             let data_55 = [0x55u8; BLOCK_SIZE];
2557             let data_aa = [0xaau8; BLOCK_SIZE];
2558             let mut readback = [0u8; BLOCK_SIZE];
2559 
2560             qcow_file.write_all(&data_55).unwrap();
2561             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64);
2562 
2563             qcow_file.write_all(&data_aa).unwrap();
2564             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2565 
2566             // Read BLOCK_SIZE of just 0xaa.
2567             assert_eq!(
2568                 qcow_file
2569                     .seek(SeekFrom::Current(-(BLOCK_SIZE as i64)))
2570                     .unwrap(),
2571                 BLOCK_SIZE as u64
2572             );
2573             qcow_file.read_exact(&mut readback).unwrap();
2574             assert_eq!(qcow_file.stream_position().unwrap(), BLOCK_SIZE as u64 * 2);
2575             for (orig, read) in data_aa.iter().zip(readback.iter()) {
2576                 assert_eq!(orig, read);
2577             }
2578 
2579             // Read BLOCK_SIZE of just 0x55.
2580             qcow_file.rewind().unwrap();
2581             qcow_file.read_exact(&mut readback).unwrap();
2582             for (orig, read) in data_55.iter().zip(readback.iter()) {
2583                 assert_eq!(orig, read);
2584             }
2585 
2586             // Read BLOCK_SIZE crossing between the block of 0x55 and 0xaa.
2587             qcow_file
2588                 .seek(SeekFrom::Start(BLOCK_SIZE as u64 / 2))
2589                 .unwrap();
2590             qcow_file.read_exact(&mut readback).unwrap();
2591             for (orig, read) in data_55[BLOCK_SIZE / 2..]
2592                 .iter()
2593                 .chain(data_aa[..BLOCK_SIZE / 2].iter())
2594                 .zip(readback.iter())
2595             {
2596                 assert_eq!(orig, read);
2597             }
2598         });
2599     }
2600 }
2601