1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! VM disk image file format I/O.
6
7 use std::cmp::min;
8 use std::fmt::Debug;
9 use std::fs::File;
10 use std::io;
11 use std::io::Seek;
12 use std::io::SeekFrom;
13 use std::path::PathBuf;
14 use std::sync::Arc;
15
16 use async_trait::async_trait;
17 use base::info;
18 use base::AsRawDescriptors;
19 use base::FileAllocate;
20 use base::FileReadWriteAtVolatile;
21 use base::FileSetLen;
22 use cros_async::BackingMemory;
23 use cros_async::Executor;
24 use cros_async::IoSource;
25 use cros_async::MemRegionIter;
26 use thiserror::Error as ThisError;
27
28 mod asynchronous;
29 #[allow(unused)]
30 pub(crate) use asynchronous::AsyncDiskFileWrapper;
31 #[cfg(feature = "qcow")]
32 mod qcow;
33 #[cfg(feature = "qcow")]
34 pub use qcow::QcowFile;
35 #[cfg(feature = "qcow")]
36 pub use qcow::QCOW_MAGIC;
37 mod sys;
38
39 #[cfg(feature = "composite-disk")]
40 mod composite;
41 #[cfg(feature = "composite-disk")]
42 use composite::CompositeDiskFile;
43 #[cfg(feature = "composite-disk")]
44 use composite::CDISK_MAGIC;
45 #[cfg(feature = "composite-disk")]
46 mod gpt;
47 #[cfg(feature = "composite-disk")]
48 pub use composite::create_composite_disk;
49 #[cfg(feature = "composite-disk")]
50 pub use composite::create_zero_filler;
51 #[cfg(feature = "composite-disk")]
52 pub use composite::Error as CompositeError;
53 #[cfg(feature = "composite-disk")]
54 pub use composite::ImagePartitionType;
55 #[cfg(feature = "composite-disk")]
56 pub use composite::PartitionInfo;
57 #[cfg(feature = "composite-disk")]
58 pub use gpt::Error as GptError;
59
60 #[cfg(feature = "android-sparse")]
61 mod android_sparse;
62 #[cfg(feature = "android-sparse")]
63 use android_sparse::AndroidSparse;
64 #[cfg(feature = "android-sparse")]
65 use android_sparse::SPARSE_HEADER_MAGIC;
66 use sys::read_from_disk;
67
68 #[cfg(feature = "zstd")]
69 mod zstd;
70 #[cfg(feature = "zstd")]
71 use zstd::ZstdDisk;
72 #[cfg(feature = "zstd")]
73 use zstd::ZSTD_FRAME_MAGIC;
74 #[cfg(feature = "zstd")]
75 use zstd::ZSTD_SKIPPABLE_MAGIC_HIGH;
76 #[cfg(feature = "zstd")]
77 use zstd::ZSTD_SKIPPABLE_MAGIC_LOW;
78
79 /// Nesting depth limit for disk formats that can open other disk files.
80 const MAX_NESTING_DEPTH: u32 = 10;
81
82 #[derive(ThisError, Debug)]
83 pub enum Error {
84 #[error("failed to create block device: {0}")]
85 BlockDeviceNew(base::Error),
86 #[error("requested file conversion not supported")]
87 ConversionNotSupported,
88 #[cfg(feature = "android-sparse")]
89 #[error("failure in android sparse disk: {0}")]
90 CreateAndroidSparseDisk(android_sparse::Error),
91 #[cfg(feature = "composite-disk")]
92 #[error("failure in composite disk: {0}")]
93 CreateCompositeDisk(composite::Error),
94 #[cfg(feature = "zstd")]
95 #[error("failure in zstd disk: {0}")]
96 CreateZstdDisk(anyhow::Error),
97 #[error("failure creating single file disk: {0}")]
98 CreateSingleFileDisk(cros_async::AsyncError),
99 #[error("failed to set O_DIRECT on disk image: {0}")]
100 DirectFailed(base::Error),
101 #[error("failure with fdatasync: {0}")]
102 Fdatasync(cros_async::AsyncError),
103 #[error("failure with fsync: {0}")]
104 Fsync(cros_async::AsyncError),
105 #[error("failed to lock file: {0}")]
106 LockFileFailure(base::Error),
107 #[error("failure with fdatasync: {0}")]
108 IoFdatasync(io::Error),
109 #[error("failure with flush: {0}")]
110 IoFlush(io::Error),
111 #[error("failure with fsync: {0}")]
112 IoFsync(io::Error),
113 #[error("failure to punch hole: {0}")]
114 IoPunchHole(io::Error),
115 #[error("checking host fs type: {0}")]
116 HostFsType(base::Error),
117 #[error("maximum disk nesting depth exceeded")]
118 MaxNestingDepthExceeded,
119 #[error("failed to open disk file \"{0}\": {1}")]
120 OpenFile(String, base::Error),
121 #[error("failure to punch hole: {0}")]
122 PunchHole(cros_async::AsyncError),
123 #[error("failure to punch hole for block device file: {0}")]
124 PunchHoleBlockDeviceFile(base::Error),
125 #[cfg(feature = "qcow")]
126 #[error("failure in qcow: {0}")]
127 QcowError(qcow::Error),
128 #[error("failed to read data: {0}")]
129 ReadingData(io::Error),
130 #[error("failed to read header: {0}")]
131 ReadingHeader(io::Error),
132 #[error("failed to read to memory: {0}")]
133 ReadToMem(cros_async::AsyncError),
134 #[error("failed to seek file: {0}")]
135 SeekingFile(io::Error),
136 #[error("failed to set file size: {0}")]
137 SettingFileSize(io::Error),
138 #[error("unknown disk type")]
139 UnknownType,
140 #[error("failed to write from memory: {0}")]
141 WriteFromMem(cros_async::AsyncError),
142 #[error("failed to write from vec: {0}")]
143 WriteFromVec(cros_async::AsyncError),
144 #[error("failed to write zeroes: {0}")]
145 WriteZeroes(io::Error),
146 #[error("failed to write data: {0}")]
147 WritingData(io::Error),
148 #[error("failed to convert to async: {0}")]
149 ToAsync(cros_async::AsyncError),
150 #[cfg(windows)]
151 #[error("failed to set disk file sparse: {0}")]
152 SetSparseFailure(io::Error),
153 #[error("failure with guest memory access: {0}")]
154 GuestMemory(cros_async::mem::Error),
155 #[error("unsupported operation")]
156 UnsupportedOperation,
157 }
158
159 pub type Result<T> = std::result::Result<T, Error>;
160
161 /// A trait for getting the length of a disk image or raw block device.
162 pub trait DiskGetLen {
163 /// Get the current length of the disk in bytes.
get_len(&self) -> io::Result<u64>164 fn get_len(&self) -> io::Result<u64>;
165 }
166
167 impl DiskGetLen for File {
get_len(&self) -> io::Result<u64>168 fn get_len(&self) -> io::Result<u64> {
169 let mut s = self;
170 let orig_seek = s.stream_position()?;
171 let end = s.seek(SeekFrom::End(0))?;
172 s.seek(SeekFrom::Start(orig_seek))?;
173 Ok(end)
174 }
175 }
176
177 /// The prerequisites necessary to support a block device.
178 pub trait DiskFile:
179 FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug
180 {
181 /// Creates a new DiskFile instance that shares the same underlying disk file image. IO
182 /// operations to a DiskFile should affect all DiskFile instances with the same underlying disk
183 /// file image.
184 ///
185 /// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support
186 /// creating an instance with the same underlying disk file image.
try_clone(&self) -> io::Result<Box<dyn DiskFile>>187 fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
188 Err(io::Error::new(
189 io::ErrorKind::Unsupported,
190 "unsupported operation",
191 ))
192 }
193 }
194
195 /// A `DiskFile` that can be converted for asychronous access.
196 pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send {
197 /// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk.
198 /// Used to convert a standard disk image to an async disk image. This conversion and the
199 /// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is
200 /// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned
201 /// to the main device thread if the block device is destroyed or reset.
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>202 fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>;
203 }
204
205 impl ToAsyncDisk for File {
to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>206 fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> {
207 Ok(Box::new(SingleFileDisk::new(*self, ex)?))
208 }
209 }
210
211 /// The variants of image files on the host that can be used as virtual disks.
212 #[derive(Debug, PartialEq, Eq)]
213 pub enum ImageType {
214 Raw,
215 Qcow2,
216 CompositeDisk,
217 AndroidSparse,
218 Zstd,
219 }
220
221 /// Detect the type of an image file by checking for a valid header of the supported formats.
detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType>222 pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> {
223 let mut f = file;
224 let disk_size = f.get_len().map_err(Error::SeekingFile)?;
225 let orig_seek = f.stream_position().map_err(Error::SeekingFile)?;
226
227 info!("disk size {}", disk_size);
228
229 // Try to read the disk in a nicely-aligned block size unless the whole file is smaller.
230 const MAGIC_BLOCK_SIZE: usize = 4096;
231 #[repr(align(4096))]
232 struct BlockAlignedBuffer {
233 data: [u8; MAGIC_BLOCK_SIZE],
234 }
235 let mut magic = BlockAlignedBuffer {
236 data: [0u8; MAGIC_BLOCK_SIZE],
237 };
238 let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 {
239 MAGIC_BLOCK_SIZE
240 } else {
241 // This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and
242 // therefore is representable in usize.
243 disk_size as usize
244 };
245
246 read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?;
247 f.seek(SeekFrom::Start(orig_seek))
248 .map_err(Error::SeekingFile)?;
249
250 #[cfg(feature = "composite-disk")]
251 if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) {
252 if cdisk_magic == CDISK_MAGIC.as_bytes() {
253 return Ok(ImageType::CompositeDisk);
254 }
255 }
256
257 #[allow(unused_variables)] // magic4 is only used with the qcow/android-sparse/zstd features.
258 if let Some(magic4) = magic
259 .data
260 .get(0..4)
261 .and_then(|v| <&[u8] as std::convert::TryInto<[u8; 4]>>::try_into(v).ok())
262 {
263 #[cfg(feature = "qcow")]
264 if magic4 == QCOW_MAGIC.to_be_bytes() {
265 return Ok(ImageType::Qcow2);
266 }
267 #[cfg(feature = "android-sparse")]
268 if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() {
269 return Ok(ImageType::AndroidSparse);
270 }
271 #[cfg(feature = "zstd")]
272 if u32::from_le_bytes(magic4) == ZSTD_FRAME_MAGIC
273 || (u32::from_le_bytes(magic4) >= ZSTD_SKIPPABLE_MAGIC_LOW
274 && u32::from_le_bytes(magic4) <= ZSTD_SKIPPABLE_MAGIC_HIGH)
275 {
276 return Ok(ImageType::Zstd);
277 }
278 }
279
280 Ok(ImageType::Raw)
281 }
282
283 impl DiskFile for File {
try_clone(&self) -> io::Result<Box<dyn DiskFile>>284 fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
285 Ok(Box::new(self.try_clone()?))
286 }
287 }
288
289 pub struct DiskFileParams {
290 pub path: PathBuf,
291 pub is_read_only: bool,
292 // Whether to call `base::set_sparse_file` on the file. Currently only affects Windows and is
293 // irrelevant for read only files.
294 pub is_sparse_file: bool,
295 // Whether to open the file in overlapped mode. Only affects Windows.
296 pub is_overlapped: bool,
297 // Whether to disable OS page caches / buffering.
298 pub is_direct: bool,
299 // Whether to lock the file.
300 pub lock: bool,
301 // The nesting depth of the file. Used to avoid infinite recursion. Users outside the disk
302 // crate should set this to zero.
303 pub depth: u32,
304 }
305
306 /// Inspect the image file type and create an appropriate disk file to match it.
open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>>307 pub fn open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>> {
308 if params.depth > MAX_NESTING_DEPTH {
309 return Err(Error::MaxNestingDepthExceeded);
310 }
311
312 let raw_image = sys::open_raw_disk_image(¶ms)?;
313 let image_type = detect_image_type(&raw_image, params.is_overlapped)?;
314 Ok(match image_type {
315 ImageType::Raw => {
316 sys::apply_raw_disk_file_options(&raw_image, params.is_sparse_file)?;
317 Box::new(raw_image) as Box<dyn DiskFile>
318 }
319 #[cfg(feature = "qcow")]
320 ImageType::Qcow2 => Box::new(QcowFile::from(raw_image, params).map_err(Error::QcowError)?)
321 as Box<dyn DiskFile>,
322 #[cfg(feature = "composite-disk")]
323 ImageType::CompositeDisk => {
324 // Valid composite disk header present
325 Box::new(
326 CompositeDiskFile::from_file(raw_image, params)
327 .map_err(Error::CreateCompositeDisk)?,
328 ) as Box<dyn DiskFile>
329 }
330 #[cfg(feature = "android-sparse")]
331 ImageType::AndroidSparse => {
332 Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?)
333 as Box<dyn DiskFile>
334 }
335 #[cfg(feature = "zstd")]
336 ImageType::Zstd => Box::new(ZstdDisk::from_file(raw_image).map_err(Error::CreateZstdDisk)?)
337 as Box<dyn DiskFile>,
338 #[allow(unreachable_patterns)]
339 _ => return Err(Error::UnknownType),
340 })
341 }
342
343 /// An asynchronously accessible disk.
344 #[async_trait(?Send)]
345 pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate {
346 /// Flush intermediary buffers and/or dirty state to file. fsync not required.
flush(&self) -> Result<()>347 async fn flush(&self) -> Result<()>;
348
349 /// Asynchronously fsyncs any completed operations to the disk.
fsync(&self) -> Result<()>350 async fn fsync(&self) -> Result<()>;
351
352 /// Asynchronously fdatasyncs any completed operations to the disk.
353 /// Note that an implementation may simply call fsync for fdatasync.
fdatasync(&self) -> Result<()>354 async fn fdatasync(&self) -> Result<()>;
355
356 /// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`.
357 /// `mem_offsets` is similar to an iovec except relative to the start of `mem`.
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>358 async fn read_to_mem<'a>(
359 &'a self,
360 file_offset: u64,
361 mem: Arc<dyn BackingMemory + Send + Sync>,
362 mem_offsets: cros_async::MemRegionIter<'a>,
363 ) -> Result<usize>;
364
365 /// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`.
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>366 async fn write_from_mem<'a>(
367 &'a self,
368 file_offset: u64,
369 mem: Arc<dyn BackingMemory + Send + Sync>,
370 mem_offsets: cros_async::MemRegionIter<'a>,
371 ) -> Result<usize>;
372
373 /// Replaces a range of bytes with a hole.
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>374 async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>;
375
376 /// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written.
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>377 async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>;
378
379 /// Reads from the file at 'file_offset' into `buf`.
380 ///
381 /// Less efficient than `read_to_mem` because of extra copies and allocations.
read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize>382 async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> {
383 let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()]));
384 let region = cros_async::MemRegion {
385 offset: 0,
386 len: buf.len(),
387 };
388 let n = self
389 .read_to_mem(
390 file_offset,
391 backing_mem.clone(),
392 MemRegionIter::new(&[region]),
393 )
394 .await?;
395 backing_mem
396 .get_volatile_slice(region)
397 .expect("BUG: the VecIoWrapper shrank?")
398 .sub_slice(0, n)
399 .expect("BUG: read_to_mem return value too large?")
400 .copy_to(buf);
401 Ok(n)
402 }
403
404 /// Writes to the file at 'file_offset' from `buf`.
405 ///
406 /// Less efficient than `write_from_mem` because of extra copies and allocations.
write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize>407 async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> {
408 let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec()));
409 let region = cros_async::MemRegion {
410 offset: 0,
411 len: buf.len(),
412 };
413 self.write_from_mem(
414 file_offset,
415 backing_mem,
416 cros_async::MemRegionIter::new(&[region]),
417 )
418 .await
419 }
420 }
421
422 /// A disk backed by a single file that implements `AsyncDisk` for access.
423 pub struct SingleFileDisk {
424 inner: IoSource<File>,
425 // Whether the backed file is a block device since the punch-hole needs different operation.
426 #[cfg(any(target_os = "android", target_os = "linux"))]
427 is_block_device_file: bool,
428 }
429
430 impl DiskGetLen for SingleFileDisk {
get_len(&self) -> io::Result<u64>431 fn get_len(&self) -> io::Result<u64> {
432 self.inner.as_source().get_len()
433 }
434 }
435
436 impl FileSetLen for SingleFileDisk {
set_len(&self, len: u64) -> io::Result<()>437 fn set_len(&self, len: u64) -> io::Result<()> {
438 self.inner.as_source().set_len(len)
439 }
440 }
441
442 impl FileAllocate for SingleFileDisk {
allocate(&self, offset: u64, len: u64) -> io::Result<()>443 fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
444 self.inner.as_source().allocate(offset, len)
445 }
446 }
447
448 #[async_trait(?Send)]
449 impl AsyncDisk for SingleFileDisk {
flush(&self) -> Result<()>450 async fn flush(&self) -> Result<()> {
451 // Nothing to flush, all file mutations are immediately sent to the OS.
452 Ok(())
453 }
454
fsync(&self) -> Result<()>455 async fn fsync(&self) -> Result<()> {
456 self.inner.fsync().await.map_err(Error::Fsync)
457 }
458
fdatasync(&self) -> Result<()>459 async fn fdatasync(&self) -> Result<()> {
460 self.inner.fdatasync().await.map_err(Error::Fdatasync)
461 }
462
read_to_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>463 async fn read_to_mem<'a>(
464 &'a self,
465 file_offset: u64,
466 mem: Arc<dyn BackingMemory + Send + Sync>,
467 mem_offsets: cros_async::MemRegionIter<'a>,
468 ) -> Result<usize> {
469 self.inner
470 .read_to_mem(Some(file_offset), mem, mem_offsets)
471 .await
472 .map_err(Error::ReadToMem)
473 }
474
write_from_mem<'a>( &'a self, file_offset: u64, mem: Arc<dyn BackingMemory + Send + Sync>, mem_offsets: cros_async::MemRegionIter<'a>, ) -> Result<usize>475 async fn write_from_mem<'a>(
476 &'a self,
477 file_offset: u64,
478 mem: Arc<dyn BackingMemory + Send + Sync>,
479 mem_offsets: cros_async::MemRegionIter<'a>,
480 ) -> Result<usize> {
481 self.inner
482 .write_from_mem(Some(file_offset), mem, mem_offsets)
483 .await
484 .map_err(Error::WriteFromMem)
485 }
486
punch_hole(&self, file_offset: u64, length: u64) -> Result<()>487 async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> {
488 #[cfg(any(target_os = "android", target_os = "linux"))]
489 if self.is_block_device_file {
490 return base::linux::discard_block(self.inner.as_source(), file_offset, length)
491 .map_err(Error::PunchHoleBlockDeviceFile);
492 }
493 self.inner
494 .punch_hole(file_offset, length)
495 .await
496 .map_err(Error::PunchHole)
497 }
498
write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>499 async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> {
500 if self
501 .inner
502 .write_zeroes_at(file_offset, length)
503 .await
504 .is_ok()
505 {
506 return Ok(());
507 }
508
509 // Fall back to filling zeros if more efficient write_zeroes_at doesn't work.
510 let buf_size = min(length, 0x10000);
511 let mut nwritten = 0;
512 while nwritten < length {
513 let remaining = length - nwritten;
514 let write_size = min(remaining, buf_size) as usize;
515 let buf = vec![0u8; write_size];
516 nwritten += self
517 .inner
518 .write_from_vec(Some(file_offset + nwritten), buf)
519 .await
520 .map(|(n, _)| n as u64)
521 .map_err(Error::WriteFromVec)?;
522 }
523 Ok(())
524 }
525 }
526