xref: /aosp_15_r20/external/crosvm/disk/src/disk.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! VM disk image file format I/O.
6 
7 use std::cmp::min;
8 use std::fmt::Debug;
9 use std::fs::File;
10 use std::io;
11 use std::io::Seek;
12 use std::io::SeekFrom;
13 use std::path::PathBuf;
14 use std::sync::Arc;
15 
16 use async_trait::async_trait;
17 use base::info;
18 use base::AsRawDescriptors;
19 use base::FileAllocate;
20 use base::FileReadWriteAtVolatile;
21 use base::FileSetLen;
22 use cros_async::BackingMemory;
23 use cros_async::Executor;
24 use cros_async::IoSource;
25 use cros_async::MemRegionIter;
26 use thiserror::Error as ThisError;
27 
28 mod asynchronous;
29 #[allow(unused)]
30 pub(crate) use asynchronous::AsyncDiskFileWrapper;
31 #[cfg(feature = "qcow")]
32 mod qcow;
33 #[cfg(feature = "qcow")]
34 pub use qcow::QcowFile;
35 #[cfg(feature = "qcow")]
36 pub use qcow::QCOW_MAGIC;
37 mod sys;
38 
39 #[cfg(feature = "composite-disk")]
40 mod composite;
41 #[cfg(feature = "composite-disk")]
42 use composite::CompositeDiskFile;
43 #[cfg(feature = "composite-disk")]
44 use composite::CDISK_MAGIC;
45 #[cfg(feature = "composite-disk")]
46 mod gpt;
47 #[cfg(feature = "composite-disk")]
48 pub use composite::create_composite_disk;
49 #[cfg(feature = "composite-disk")]
50 pub use composite::create_zero_filler;
51 #[cfg(feature = "composite-disk")]
52 pub use composite::Error as CompositeError;
53 #[cfg(feature = "composite-disk")]
54 pub use composite::ImagePartitionType;
55 #[cfg(feature = "composite-disk")]
56 pub use composite::PartitionInfo;
57 #[cfg(feature = "composite-disk")]
58 pub use gpt::Error as GptError;
59 
60 #[cfg(feature = "android-sparse")]
61 mod android_sparse;
62 #[cfg(feature = "android-sparse")]
63 use android_sparse::AndroidSparse;
64 #[cfg(feature = "android-sparse")]
65 use android_sparse::SPARSE_HEADER_MAGIC;
66 use sys::read_from_disk;
67 
68 #[cfg(feature = "zstd")]
69 mod zstd;
70 #[cfg(feature = "zstd")]
71 use zstd::ZstdDisk;
72 #[cfg(feature = "zstd")]
73 use zstd::ZSTD_FRAME_MAGIC;
74 #[cfg(feature = "zstd")]
75 use zstd::ZSTD_SKIPPABLE_MAGIC_HIGH;
76 #[cfg(feature = "zstd")]
77 use zstd::ZSTD_SKIPPABLE_MAGIC_LOW;
78 
79 /// Nesting depth limit for disk formats that can open other disk files.
80 const MAX_NESTING_DEPTH: u32 = 10;
81 
82 #[derive(ThisError, Debug)]
83 pub enum Error {
84     #[error("failed to create block device: {0}")]
85     BlockDeviceNew(base::Error),
86     #[error("requested file conversion not supported")]
87     ConversionNotSupported,
88     #[cfg(feature = "android-sparse")]
89     #[error("failure in android sparse disk: {0}")]
90     CreateAndroidSparseDisk(android_sparse::Error),
91     #[cfg(feature = "composite-disk")]
92     #[error("failure in composite disk: {0}")]
93     CreateCompositeDisk(composite::Error),
94     #[cfg(feature = "zstd")]
95     #[error("failure in zstd disk: {0}")]
96     CreateZstdDisk(anyhow::Error),
97     #[error("failure creating single file disk: {0}")]
98     CreateSingleFileDisk(cros_async::AsyncError),
99     #[error("failed to set O_DIRECT on disk image: {0}")]
100     DirectFailed(base::Error),
101     #[error("failure with fdatasync: {0}")]
102     Fdatasync(cros_async::AsyncError),
103     #[error("failure with fsync: {0}")]
104     Fsync(cros_async::AsyncError),
105     #[error("failed to lock file: {0}")]
106     LockFileFailure(base::Error),
107     #[error("failure with fdatasync: {0}")]
108     IoFdatasync(io::Error),
109     #[error("failure with flush: {0}")]
110     IoFlush(io::Error),
111     #[error("failure with fsync: {0}")]
112     IoFsync(io::Error),
113     #[error("failure to punch hole: {0}")]
114     IoPunchHole(io::Error),
115     #[error("checking host fs type: {0}")]
116     HostFsType(base::Error),
117     #[error("maximum disk nesting depth exceeded")]
118     MaxNestingDepthExceeded,
119     #[error("failed to open disk file \"{0}\": {1}")]
120     OpenFile(String, base::Error),
121     #[error("failure to punch hole: {0}")]
122     PunchHole(cros_async::AsyncError),
123     #[error("failure to punch hole for block device file: {0}")]
124     PunchHoleBlockDeviceFile(base::Error),
125     #[cfg(feature = "qcow")]
126     #[error("failure in qcow: {0}")]
127     QcowError(qcow::Error),
128     #[error("failed to read data: {0}")]
129     ReadingData(io::Error),
130     #[error("failed to read header: {0}")]
131     ReadingHeader(io::Error),
132     #[error("failed to read to memory: {0}")]
133     ReadToMem(cros_async::AsyncError),
134     #[error("failed to seek file: {0}")]
135     SeekingFile(io::Error),
136     #[error("failed to set file size: {0}")]
137     SettingFileSize(io::Error),
138     #[error("unknown disk type")]
139     UnknownType,
140     #[error("failed to write from memory: {0}")]
141     WriteFromMem(cros_async::AsyncError),
142     #[error("failed to write from vec: {0}")]
143     WriteFromVec(cros_async::AsyncError),
144     #[error("failed to write zeroes: {0}")]
145     WriteZeroes(io::Error),
146     #[error("failed to write data: {0}")]
147     WritingData(io::Error),
148     #[error("failed to convert to async: {0}")]
149     ToAsync(cros_async::AsyncError),
150     #[cfg(windows)]
151     #[error("failed to set disk file sparse: {0}")]
152     SetSparseFailure(io::Error),
153     #[error("failure with guest memory access: {0}")]
154     GuestMemory(cros_async::mem::Error),
155     #[error("unsupported operation")]
156     UnsupportedOperation,
157 }
158 
159 pub type Result<T> = std::result::Result<T, Error>;
160 
161 /// A trait for getting the length of a disk image or raw block device.
162 pub trait DiskGetLen {
163     /// Get the current length of the disk in bytes.
get_len(&self) -> io::Result<u64>164     fn get_len(&self) -> io::Result<u64>;
165 }
166 
167 impl DiskGetLen for File {
get_len(&self) -> io::Result<u64>168     fn get_len(&self) -> io::Result<u64> {
169         let mut s = self;
170         let orig_seek = s.stream_position()?;
171         let end = s.seek(SeekFrom::End(0))?;
172         s.seek(SeekFrom::Start(orig_seek))?;
173         Ok(end)
174     }
175 }
176 
177 /// The prerequisites necessary to support a block device.
178 pub trait DiskFile:
179     FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug
180 {
181     /// Creates a new DiskFile instance that shares the same underlying disk file image. IO
182     /// operations to a DiskFile should affect all DiskFile instances with the same underlying disk
183     /// file image.
184     ///
185     /// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support
186     /// creating an instance with the same underlying disk file image.
try_clone(&self) -> io::Result<Box<dyn DiskFile>>187     fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
188         Err(io::Error::new(
189             io::ErrorKind::Unsupported,
190             "unsupported operation",
191         ))
192     }
193 }
194 
195 /// A `DiskFile` that can be converted for asychronous access.
196 pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send {
197     /// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk.
198     /// Used to convert a standard disk image to an async disk image. This conversion and the
199     /// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is
200     /// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned
201     /// to the main device thread if the block device is destroyed or reset.
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>202     fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>;
203 }
204 
205 impl ToAsyncDisk for File {
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>206     fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> {
207         Ok(Box::new(SingleFileDisk::new(*self, ex)?))
208     }
209 }
210 
211 /// The variants of image files on the host that can be used as virtual disks.
212 #[derive(Debug, PartialEq, Eq)]
213 pub enum ImageType {
214     Raw,
215     Qcow2,
216     CompositeDisk,
217     AndroidSparse,
218     Zstd,
219 }
220 
221 /// Detect the type of an image file by checking for a valid header of the supported formats.
detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType>222 pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> {
223     let mut f = file;
224     let disk_size = f.get_len().map_err(Error::SeekingFile)?;
225     let orig_seek = f.stream_position().map_err(Error::SeekingFile)?;
226 
227     info!("disk size {}", disk_size);
228 
229     // Try to read the disk in a nicely-aligned block size unless the whole file is smaller.
230     const MAGIC_BLOCK_SIZE: usize = 4096;
231     #[repr(align(4096))]
232     struct BlockAlignedBuffer {
233         data: [u8; MAGIC_BLOCK_SIZE],
234     }
235     let mut magic = BlockAlignedBuffer {
236         data: [0u8; MAGIC_BLOCK_SIZE],
237     };
238     let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 {
239         MAGIC_BLOCK_SIZE
240     } else {
241         // This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and
242         // therefore is representable in usize.
243         disk_size as usize
244     };
245 
246     read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?;
247     f.seek(SeekFrom::Start(orig_seek))
248         .map_err(Error::SeekingFile)?;
249 
250     #[cfg(feature = "composite-disk")]
251     if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) {
252         if cdisk_magic == CDISK_MAGIC.as_bytes() {
253             return Ok(ImageType::CompositeDisk);
254         }
255     }
256 
257     #[allow(unused_variables)] // magic4 is only used with the qcow/android-sparse/zstd features.
258     if let Some(magic4) = magic
259         .data
260         .get(0..4)
261         .and_then(|v| <&[u8] as std::convert::TryInto<[u8; 4]>>::try_into(v).ok())
262     {
263         #[cfg(feature = "qcow")]
264         if magic4 == QCOW_MAGIC.to_be_bytes() {
265             return Ok(ImageType::Qcow2);
266         }
267         #[cfg(feature = "android-sparse")]
268         if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() {
269             return Ok(ImageType::AndroidSparse);
270         }
271         #[cfg(feature = "zstd")]
272         if u32::from_le_bytes(magic4) == ZSTD_FRAME_MAGIC
273             || (u32::from_le_bytes(magic4) >= ZSTD_SKIPPABLE_MAGIC_LOW
274                 && u32::from_le_bytes(magic4) <= ZSTD_SKIPPABLE_MAGIC_HIGH)
275         {
276             return Ok(ImageType::Zstd);
277         }
278     }
279 
280     Ok(ImageType::Raw)
281 }
282 
283 impl DiskFile for File {
try_clone(&self) -> io::Result<Box<dyn DiskFile>>284     fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
285         Ok(Box::new(self.try_clone()?))
286     }
287 }
288 
289 pub struct DiskFileParams {
290     pub path: PathBuf,
291     pub is_read_only: bool,
292     // Whether to call `base::set_sparse_file` on the file. Currently only affects Windows and is
293     // irrelevant for read only files.
294     pub is_sparse_file: bool,
295     // Whether to open the file in overlapped mode. Only affects Windows.
296     pub is_overlapped: bool,
297     // Whether to disable OS page caches / buffering.
298     pub is_direct: bool,
299     // Whether to lock the file.
300     pub lock: bool,
301     // The nesting depth of the file. Used to avoid infinite recursion. Users outside the disk
302     // crate should set this to zero.
303     pub depth: u32,
304 }
305 
306 /// Inspect the image file type and create an appropriate disk file to match it.
open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>>307 pub fn open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>> {
308     if params.depth > MAX_NESTING_DEPTH {
309         return Err(Error::MaxNestingDepthExceeded);
310     }
311 
312     let raw_image = sys::open_raw_disk_image(&params)?;
313     let image_type = detect_image_type(&raw_image, params.is_overlapped)?;
314     Ok(match image_type {
315         ImageType::Raw => {
316             sys::apply_raw_disk_file_options(&raw_image, params.is_sparse_file)?;
317             Box::new(raw_image) as Box<dyn DiskFile>
318         }
319         #[cfg(feature = "qcow")]
320         ImageType::Qcow2 => Box::new(QcowFile::from(raw_image, params).map_err(Error::QcowError)?)
321             as Box<dyn DiskFile>,
322         #[cfg(feature = "composite-disk")]
323         ImageType::CompositeDisk => {
324             // Valid composite disk header present
325             Box::new(
326                 CompositeDiskFile::from_file(raw_image, params)
327                     .map_err(Error::CreateCompositeDisk)?,
328             ) as Box<dyn DiskFile>
329         }
330         #[cfg(feature = "android-sparse")]
331         ImageType::AndroidSparse => {
332             Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?)
333                 as Box<dyn DiskFile>
334         }
335         #[cfg(feature = "zstd")]
336         ImageType::Zstd => Box::new(ZstdDisk::from_file(raw_image).map_err(Error::CreateZstdDisk)?)
337             as Box<dyn DiskFile>,
338         #[allow(unreachable_patterns)]
339         _ => return Err(Error::UnknownType),
340     })
341 }
342 
343 /// An asynchronously accessible disk.
344 #[async_trait(?Send)]
345 pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate {
346     /// Flush intermediary buffers and/or dirty state to file. fsync not required.
flush(&self) -> Result<()>347     async fn flush(&self) -> Result<()>;
348 
349     /// Asynchronously fsyncs any completed operations to the disk.
fsync(&self) -> Result<()>350     async fn fsync(&self) -> Result<()>;
351 
352     /// Asynchronously fdatasyncs any completed operations to the disk.
353     /// Note that an implementation may simply call fsync for fdatasync.
fdatasync(&self) -> Result<()>354     async fn fdatasync(&self) -> Result<()>;
355 
356     /// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`.
357     /// `mem_offsets` is similar to an iovec except relative to the start of `mem`.
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>358     async fn read_to_mem<'a>(
359         &'a self,
360         file_offset: u64,
361         mem: Arc<dyn BackingMemory + Send + Sync>,
362         mem_offsets: cros_async::MemRegionIter<'a>,
363     ) -> Result<usize>;
364 
365     /// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`.
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>366     async fn write_from_mem<'a>(
367         &'a self,
368         file_offset: u64,
369         mem: Arc<dyn BackingMemory + Send + Sync>,
370         mem_offsets: cros_async::MemRegionIter<'a>,
371     ) -> Result<usize>;
372 
373     /// Replaces a range of bytes with a hole.
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>374     async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>;
375 
376     /// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written.
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>377     async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>;
378 
379     /// Reads from the file at 'file_offset' into `buf`.
380     ///
381     /// Less efficient than `read_to_mem` because of extra copies and allocations.
read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize>382     async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> {
383         let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()]));
384         let region = cros_async::MemRegion {
385             offset: 0,
386             len: buf.len(),
387         };
388         let n = self
389             .read_to_mem(
390                 file_offset,
391                 backing_mem.clone(),
392                 MemRegionIter::new(&[region]),
393             )
394             .await?;
395         backing_mem
396             .get_volatile_slice(region)
397             .expect("BUG: the VecIoWrapper shrank?")
398             .sub_slice(0, n)
399             .expect("BUG: read_to_mem return value too large?")
400             .copy_to(buf);
401         Ok(n)
402     }
403 
404     /// Writes to the file at 'file_offset' from `buf`.
405     ///
406     /// Less efficient than `write_from_mem` because of extra copies and allocations.
write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize>407     async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> {
408         let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec()));
409         let region = cros_async::MemRegion {
410             offset: 0,
411             len: buf.len(),
412         };
413         self.write_from_mem(
414             file_offset,
415             backing_mem,
416             cros_async::MemRegionIter::new(&[region]),
417         )
418         .await
419     }
420 }
421 
422 /// A disk backed by a single file that implements `AsyncDisk` for access.
423 pub struct SingleFileDisk {
424     inner: IoSource<File>,
425     // Whether the backed file is a block device since the punch-hole needs different operation.
426     #[cfg(any(target_os = "android", target_os = "linux"))]
427     is_block_device_file: bool,
428 }
429 
430 impl DiskGetLen for SingleFileDisk {
get_len(&self) -> io::Result<u64>431     fn get_len(&self) -> io::Result<u64> {
432         self.inner.as_source().get_len()
433     }
434 }
435 
436 impl FileSetLen for SingleFileDisk {
set_len(&self, len: u64) -> io::Result<()>437     fn set_len(&self, len: u64) -> io::Result<()> {
438         self.inner.as_source().set_len(len)
439     }
440 }
441 
442 impl FileAllocate for SingleFileDisk {
allocate(&self, offset: u64, len: u64) -> io::Result<()>443     fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
444         self.inner.as_source().allocate(offset, len)
445     }
446 }
447 
448 #[async_trait(?Send)]
449 impl AsyncDisk for SingleFileDisk {
flush(&self) -> Result<()>450     async fn flush(&self) -> Result<()> {
451         // Nothing to flush, all file mutations are immediately sent to the OS.
452         Ok(())
453     }
454 
fsync(&self) -> Result<()>455     async fn fsync(&self) -> Result<()> {
456         self.inner.fsync().await.map_err(Error::Fsync)
457     }
458 
fdatasync(&self) -> Result<()>459     async fn fdatasync(&self) -> Result<()> {
460         self.inner.fdatasync().await.map_err(Error::Fdatasync)
461     }
462 
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>463     async fn read_to_mem<'a>(
464         &'a self,
465         file_offset: u64,
466         mem: Arc<dyn BackingMemory + Send + Sync>,
467         mem_offsets: cros_async::MemRegionIter<'a>,
468     ) -> Result<usize> {
469         self.inner
470             .read_to_mem(Some(file_offset), mem, mem_offsets)
471             .await
472             .map_err(Error::ReadToMem)
473     }
474 
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>475     async fn write_from_mem<'a>(
476         &'a self,
477         file_offset: u64,
478         mem: Arc<dyn BackingMemory + Send + Sync>,
479         mem_offsets: cros_async::MemRegionIter<'a>,
480     ) -> Result<usize> {
481         self.inner
482             .write_from_mem(Some(file_offset), mem, mem_offsets)
483             .await
484             .map_err(Error::WriteFromMem)
485     }
486 
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>487     async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> {
488         #[cfg(any(target_os = "android", target_os = "linux"))]
489         if self.is_block_device_file {
490             return base::linux::discard_block(self.inner.as_source(), file_offset, length)
491                 .map_err(Error::PunchHoleBlockDeviceFile);
492         }
493         self.inner
494             .punch_hole(file_offset, length)
495             .await
496             .map_err(Error::PunchHole)
497     }
498 
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>499     async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> {
500         if self
501             .inner
502             .write_zeroes_at(file_offset, length)
503             .await
504             .is_ok()
505         {
506             return Ok(());
507         }
508 
509         // Fall back to filling zeros if more efficient write_zeroes_at doesn't work.
510         let buf_size = min(length, 0x10000);
511         let mut nwritten = 0;
512         while nwritten < length {
513             let remaining = length - nwritten;
514             let write_size = min(remaining, buf_size) as usize;
515             let buf = vec![0u8; write_size];
516             nwritten += self
517                 .inner
518                 .write_from_vec(Some(file_offset + nwritten), buf)
519                 .await
520                 .map(|(n, _)| n as u64)
521                 .map_err(Error::WriteFromVec)?;
522         }
523         Ok(())
524     }
525 }
526