xref: /aosp_15_r20/external/crosvm/vm_control/src/lib.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Handles IPC for controlling the main VM process.
6 //!
7 //! The VM Control IPC protocol is synchronous, meaning that each `VmRequest` sent over a connection
8 //! will receive a `VmResponse` for that request next time data is received over that connection.
9 //!
10 //! The wire message format is a little-endian C-struct of fixed size, along with a file descriptor
11 //! if the request type expects one.
12 
13 pub mod api;
14 #[cfg(feature = "gdb")]
15 pub mod gdb;
16 #[cfg(feature = "gpu")]
17 pub mod gpu;
18 
19 #[cfg(any(target_os = "android", target_os = "linux"))]
20 use base::linux::MemoryMappingBuilderUnix;
21 #[cfg(any(target_os = "android", target_os = "linux"))]
22 use base::sys::call_with_extended_max_files;
23 #[cfg(any(target_os = "android", target_os = "linux"))]
24 use base::MemoryMappingArena;
25 #[cfg(windows)]
26 use base::MemoryMappingBuilderWindows;
27 use hypervisor::BalloonEvent;
28 use hypervisor::MemCacheType;
29 use hypervisor::MemRegion;
30 
31 #[cfg(feature = "balloon")]
32 mod balloon_tube;
33 pub mod client;
34 mod snapshot_format;
35 pub mod sys;
36 
37 #[cfg(target_arch = "x86_64")]
38 use std::arch::x86_64::_rdtsc;
39 use std::collections::BTreeMap;
40 use std::collections::BTreeSet;
41 use std::collections::HashMap;
42 use std::convert::TryInto;
43 use std::fmt;
44 use std::fmt::Display;
45 use std::fs::File;
46 use std::path::Path;
47 use std::path::PathBuf;
48 use std::result::Result as StdResult;
49 use std::str::FromStr;
50 use std::sync::mpsc;
51 use std::sync::Arc;
52 
53 use anyhow::bail;
54 use anyhow::Context;
55 use base::error;
56 use base::info;
57 use base::warn;
58 use base::with_as_descriptor;
59 use base::AsRawDescriptor;
60 use base::Descriptor;
61 use base::Error as SysError;
62 use base::Event;
63 use base::ExternalMapping;
64 use base::IntoRawDescriptor;
65 use base::MappedRegion;
66 use base::MemoryMappingBuilder;
67 use base::MmapError;
68 use base::Protection;
69 use base::Result;
70 use base::SafeDescriptor;
71 use base::SharedMemory;
72 use base::Tube;
73 use hypervisor::Datamatch;
74 use hypervisor::IoEventAddress;
75 use hypervisor::IrqRoute;
76 use hypervisor::IrqSource;
77 pub use hypervisor::MemSlot;
78 use hypervisor::Vm;
79 use hypervisor::VmCap;
80 use libc::EINVAL;
81 use libc::EIO;
82 use libc::ENODEV;
83 use libc::ENOTSUP;
84 use libc::ERANGE;
85 #[cfg(feature = "registered_events")]
86 use protos::registered_events;
87 use remain::sorted;
88 use resources::Alloc;
89 use resources::SystemAllocator;
90 use rutabaga_gfx::DeviceId;
91 use rutabaga_gfx::RutabagaDescriptor;
92 use rutabaga_gfx::RutabagaFromRawDescriptor;
93 use rutabaga_gfx::RutabagaGralloc;
94 use rutabaga_gfx::RutabagaHandle;
95 use rutabaga_gfx::RutabagaMappedRegion;
96 use rutabaga_gfx::VulkanInfo;
97 use serde::Deserialize;
98 use serde::Serialize;
99 pub use snapshot_format::*;
100 use swap::SwapStatus;
101 use sync::Mutex;
102 #[cfg(any(target_os = "android", target_os = "linux"))]
103 pub use sys::FsMappingRequest;
104 #[cfg(windows)]
105 pub use sys::InitialAudioSessionState;
106 #[cfg(any(target_os = "android", target_os = "linux"))]
107 pub use sys::VmMemoryMappingRequest;
108 #[cfg(any(target_os = "android", target_os = "linux"))]
109 pub use sys::VmMemoryMappingResponse;
110 use thiserror::Error;
111 pub use vm_control_product::GpuSendToMain;
112 pub use vm_control_product::GpuSendToService;
113 pub use vm_control_product::ServiceSendToGpu;
114 use vm_memory::GuestAddress;
115 
116 #[cfg(feature = "balloon")]
117 pub use crate::balloon_tube::BalloonControlCommand;
118 #[cfg(feature = "balloon")]
119 pub use crate::balloon_tube::BalloonTube;
120 #[cfg(feature = "gdb")]
121 pub use crate::gdb::VcpuDebug;
122 #[cfg(feature = "gdb")]
123 pub use crate::gdb::VcpuDebugStatus;
124 #[cfg(feature = "gdb")]
125 pub use crate::gdb::VcpuDebugStatusMessage;
126 #[cfg(feature = "gpu")]
127 use crate::gpu::GpuControlCommand;
128 #[cfg(feature = "gpu")]
129 use crate::gpu::GpuControlResult;
130 
131 /// Control the state of a particular VM CPU.
132 #[derive(Clone, Debug)]
133 pub enum VcpuControl {
134     #[cfg(feature = "gdb")]
135     Debug(VcpuDebug),
136     RunState(VmRunMode),
137     MakeRT,
138     // Request the current state of the vCPU. The result is sent back over the included channel.
139     GetStates(mpsc::Sender<VmRunMode>),
140     // Request the vcpu write a snapshot of itself to the writer, then send a `Result` back over
141     // the channel after completion/failure.
142     Snapshot(SnapshotWriter, mpsc::Sender<anyhow::Result<()>>),
143     Restore(VcpuRestoreRequest),
144     #[cfg(any(target_os = "android", target_os = "linux"))]
145     Throttle(u32),
146 }
147 
148 /// Request to restore a Vcpu from a given snapshot, and report the results
149 /// back via the provided channel.
150 #[derive(Clone, Debug)]
151 pub struct VcpuRestoreRequest {
152     pub result_sender: mpsc::Sender<anyhow::Result<()>>,
153     pub snapshot_reader: SnapshotReader,
154     #[cfg(target_arch = "x86_64")]
155     pub host_tsc_reference_moment: u64,
156 }
157 
158 /// Mode of execution for the VM.
159 #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
160 pub enum VmRunMode {
161     /// The default run mode indicating the VCPUs are running.
162     #[default]
163     Running,
164     /// Indicates that the VCPUs are suspending execution until the `Running` mode is set.
165     Suspending,
166     /// Indicates that the VM is exiting all processes.
167     Exiting,
168     /// Indicates that the VM is in a breakpoint waiting for the debugger to do continue.
169     Breakpoint,
170 }
171 
172 impl Display for VmRunMode {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result173     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
174         use self::VmRunMode::*;
175 
176         match self {
177             Running => write!(f, "running"),
178             Suspending => write!(f, "suspending"),
179             Exiting => write!(f, "exiting"),
180             Breakpoint => write!(f, "breakpoint"),
181         }
182     }
183 }
184 
185 // Trait for devices that get notification on specific GPE trigger
186 pub trait GpeNotify: Send {
notify(&mut self)187     fn notify(&mut self) {}
188 }
189 
190 // Trait for devices that get notification on specific PCI PME
191 pub trait PmeNotify: Send {
notify(&mut self, _requester_id: u16)192     fn notify(&mut self, _requester_id: u16) {}
193 }
194 
195 pub trait PmResource {
pwrbtn_evt(&mut self)196     fn pwrbtn_evt(&mut self) {}
slpbtn_evt(&mut self)197     fn slpbtn_evt(&mut self) {}
rtc_evt(&mut self, _clear_evt: Event)198     fn rtc_evt(&mut self, _clear_evt: Event) {}
gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>)199     fn gpe_evt(&mut self, _gpe: u32, _clear_evt: Option<Event>) {}
pme_evt(&mut self, _requester_id: u16)200     fn pme_evt(&mut self, _requester_id: u16) {}
register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>)201     fn register_gpe_notify_dev(&mut self, _gpe: u32, _notify_dev: Arc<Mutex<dyn GpeNotify>>) {}
register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>)202     fn register_pme_notify_dev(&mut self, _bus: u8, _notify_dev: Arc<Mutex<dyn PmeNotify>>) {}
203 }
204 
205 /// The maximum number of devices that can be listed in one `UsbControlCommand`.
206 ///
207 /// This value was set to be equal to `xhci_regs::MAX_PORTS` for convenience, but it is not
208 /// necessary for correctness. Importing that value directly would be overkill because it would
209 /// require adding a big dependency for a single const.
210 pub const USB_CONTROL_MAX_PORTS: usize = 16;
211 
212 #[derive(Serialize, Deserialize, Debug)]
213 pub enum DiskControlCommand {
214     /// Resize a disk to `new_size` in bytes.
215     Resize { new_size: u64 },
216 }
217 
218 impl Display for DiskControlCommand {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result219     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
220         use self::DiskControlCommand::*;
221 
222         match self {
223             Resize { new_size } => write!(f, "disk_resize {}", new_size),
224         }
225     }
226 }
227 
228 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
229 pub enum DiskControlResult {
230     Ok,
231     Err(SysError),
232 }
233 
234 /// Net control commands for adding and removing tap devices.
235 #[cfg(feature = "pci-hotplug")]
236 #[derive(Serialize, Deserialize, Debug)]
237 pub enum NetControlCommand {
238     AddTap(String),
239     RemoveTap(u8),
240 }
241 
242 #[derive(Serialize, Deserialize, Debug)]
243 pub enum UsbControlCommand {
244     AttachDevice {
245         #[serde(with = "with_as_descriptor")]
246         file: File,
247     },
248     AttachSecurityKey {
249         #[serde(with = "with_as_descriptor")]
250         file: File,
251     },
252     DetachDevice {
253         port: u8,
254     },
255     ListDevice {
256         ports: [u8; USB_CONTROL_MAX_PORTS],
257     },
258 }
259 
260 #[derive(Serialize, Deserialize, Copy, Clone, Debug, Default)]
261 pub struct UsbControlAttachedDevice {
262     pub port: u8,
263     pub vendor_id: u16,
264     pub product_id: u16,
265 }
266 
267 impl UsbControlAttachedDevice {
valid(self) -> bool268     pub fn valid(self) -> bool {
269         self.port != 0
270     }
271 }
272 
273 #[cfg(feature = "pci-hotplug")]
274 #[derive(Serialize, Deserialize, Debug, Clone)]
275 #[must_use]
276 /// Result for hotplug and removal of PCI device.
277 pub enum PciControlResult {
278     AddOk { bus: u8 },
279     ErrString(String),
280     RemoveOk,
281 }
282 
283 #[cfg(feature = "pci-hotplug")]
284 impl Display for PciControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result285     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
286         use self::PciControlResult::*;
287 
288         match self {
289             AddOk { bus } => write!(f, "add_ok {}", bus),
290             ErrString(e) => write!(f, "error: {}", e),
291             RemoveOk => write!(f, "remove_ok"),
292         }
293     }
294 }
295 
296 #[derive(Serialize, Deserialize, Debug, Clone)]
297 pub enum UsbControlResult {
298     Ok { port: u8 },
299     NoAvailablePort,
300     NoSuchDevice,
301     NoSuchPort,
302     FailedToOpenDevice,
303     Devices([UsbControlAttachedDevice; USB_CONTROL_MAX_PORTS]),
304     FailedToInitHostDevice,
305 }
306 
307 impl Display for UsbControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result308     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
309         use self::UsbControlResult::*;
310 
311         match self {
312             UsbControlResult::Ok { port } => write!(f, "ok {}", port),
313             NoAvailablePort => write!(f, "no_available_port"),
314             NoSuchDevice => write!(f, "no_such_device"),
315             NoSuchPort => write!(f, "no_such_port"),
316             FailedToOpenDevice => write!(f, "failed_to_open_device"),
317             Devices(devices) => {
318                 write!(f, "devices")?;
319                 for d in devices.iter().filter(|d| d.valid()) {
320                     write!(f, " {} {:04x} {:04x}", d.port, d.vendor_id, d.product_id)?;
321                 }
322                 std::result::Result::Ok(())
323             }
324             FailedToInitHostDevice => write!(f, "failed_to_init_host_device"),
325         }
326     }
327 }
328 
329 /// Commands for snapshot feature
330 #[derive(Serialize, Deserialize, Debug)]
331 pub enum SnapshotCommand {
332     Take {
333         snapshot_path: PathBuf,
334         compress_memory: bool,
335         encrypt: bool,
336     },
337 }
338 
339 /// Commands for actions on devices and the devices control thread.
340 #[derive(Serialize, Deserialize, Debug)]
341 pub enum DeviceControlCommand {
342     SleepDevices,
343     WakeDevices,
344     SnapshotDevices {
345         snapshot_writer: SnapshotWriter,
346         compress_memory: bool,
347     },
348     RestoreDevices {
349         snapshot_reader: SnapshotReader,
350     },
351     GetDevicesState,
352     Exit,
353 }
354 
355 /// Commands to control the IRQ handler thread.
356 #[derive(Serialize, Deserialize)]
357 pub enum IrqHandlerRequest {
358     /// No response is sent for this command.
359     AddIrqControlTubes(Vec<Tube>),
360     /// Refreshes the set of event tokens (Events) from the Irqchip that the IRQ
361     /// handler waits on to forward IRQs to their final destination (e.g. via
362     /// Irqchip::service_irq_event).
363     ///
364     /// If the set of tokens exposed by the Irqchip changes while the VM is
365     /// running (such as for snapshot restore), this command must be sent
366     /// otherwise the VM will not receive IRQs as expected.
367     RefreshIrqEventTokens,
368     WakeAndNotifyIteration,
369     /// No response is sent for this command.
370     Exit,
371 }
372 
373 const EXPECTED_MAX_IRQ_FLUSH_ITERATIONS: usize = 100;
374 
375 /// Response for [IrqHandlerRequest].
376 #[derive(Serialize, Deserialize, Debug)]
377 pub enum IrqHandlerResponse {
378     /// Sent when the IRQ event tokens have been refreshed.
379     IrqEventTokenRefreshComplete,
380     /// Specifies the number of tokens serviced in the requested iteration
381     /// (less the token for the `WakeAndNotifyIteration` request).
382     HandlerIterationComplete(usize),
383 }
384 
385 /// Source of a `VmMemoryRequest::RegisterMemory` mapping.
386 #[derive(Serialize, Deserialize)]
387 pub enum VmMemorySource {
388     /// Register shared memory represented by the given descriptor.
389     /// On Windows, descriptor MUST be a mapping handle.
390     SharedMemory(SharedMemory),
391     /// Register a file mapping from the given descriptor.
392     Descriptor {
393         /// File descriptor to map.
394         descriptor: SafeDescriptor,
395         /// Offset within the file in bytes.
396         offset: u64,
397         /// Size of the mapping in bytes.
398         size: u64,
399     },
400     /// Register memory mapped by Vulkano.
401     Vulkan {
402         descriptor: SafeDescriptor,
403         handle_type: u32,
404         memory_idx: u32,
405         device_uuid: [u8; 16],
406         driver_uuid: [u8; 16],
407         size: u64,
408     },
409     /// Register the current rutabaga external mapping.
410     ExternalMapping { ptr: u64, size: u64 },
411 }
412 
413 // The following are wrappers to avoid base dependencies in the rutabaga crate
to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor414 fn to_rutabaga_desciptor(s: SafeDescriptor) -> RutabagaDescriptor {
415     // SAFETY:
416     // Safe because we own the SafeDescriptor at this point.
417     unsafe { RutabagaDescriptor::from_raw_descriptor(s.into_raw_descriptor()) }
418 }
419 
420 struct RutabagaMemoryRegion {
421     region: Box<dyn RutabagaMappedRegion>,
422 }
423 
424 impl RutabagaMemoryRegion {
new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion425     pub fn new(region: Box<dyn RutabagaMappedRegion>) -> RutabagaMemoryRegion {
426         RutabagaMemoryRegion { region }
427     }
428 }
429 
430 // SAFETY:
431 //
432 // Self guarantees `ptr`..`ptr+size` is an mmaped region owned by this object that
433 // can't be unmapped during the `MappedRegion`'s lifetime.
434 unsafe impl MappedRegion for RutabagaMemoryRegion {
as_ptr(&self) -> *mut u8435     fn as_ptr(&self) -> *mut u8 {
436         self.region.as_ptr()
437     }
438 
size(&self) -> usize439     fn size(&self) -> usize {
440         self.region.size()
441     }
442 }
443 
444 impl Display for VmMemorySource {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result445     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
446         use self::VmMemorySource::*;
447 
448         match self {
449             SharedMemory(..) => write!(f, "VmMemorySource::SharedMemory"),
450             Descriptor { .. } => write!(f, "VmMemorySource::Descriptor"),
451             Vulkan { .. } => write!(f, "VmMemorySource::Vulkan"),
452             ExternalMapping { .. } => write!(f, "VmMemorySource::ExternalMapping"),
453         }
454     }
455 }
456 
457 impl VmMemorySource {
458     /// Map the resource and return its mapping and size in bytes.
map( self, gralloc: &mut RutabagaGralloc, prot: Protection, ) -> Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)>459     pub fn map(
460         self,
461         gralloc: &mut RutabagaGralloc,
462         prot: Protection,
463     ) -> Result<(Box<dyn MappedRegion>, u64, Option<SafeDescriptor>)> {
464         let (mem_region, size, descriptor) = match self {
465             VmMemorySource::Descriptor {
466                 descriptor,
467                 offset,
468                 size,
469             } => (
470                 map_descriptor(&descriptor, offset, size, prot)?,
471                 size,
472                 Some(descriptor),
473             ),
474 
475             VmMemorySource::SharedMemory(shm) => {
476                 (map_descriptor(&shm, 0, shm.size(), prot)?, shm.size(), None)
477             }
478             VmMemorySource::Vulkan {
479                 descriptor,
480                 handle_type,
481                 memory_idx,
482                 device_uuid,
483                 driver_uuid,
484                 size,
485             } => {
486                 let device_id = DeviceId {
487                     device_uuid,
488                     driver_uuid,
489                 };
490                 let mapped_region = match gralloc.import_and_map(
491                     RutabagaHandle {
492                         os_handle: to_rutabaga_desciptor(descriptor),
493                         handle_type,
494                     },
495                     VulkanInfo {
496                         memory_idx,
497                         device_id,
498                     },
499                     size,
500                 ) {
501                     Ok(mapped_region) => {
502                         let mapped_region: Box<dyn MappedRegion> =
503                             Box::new(RutabagaMemoryRegion::new(mapped_region));
504                         mapped_region
505                     }
506                     Err(e) => {
507                         error!("gralloc failed to import and map: {}", e);
508                         return Err(SysError::new(EINVAL));
509                     }
510                 };
511                 (mapped_region, size, None)
512             }
513             VmMemorySource::ExternalMapping { ptr, size } => {
514                 let mapped_region: Box<dyn MappedRegion> = Box::new(ExternalMapping {
515                     ptr,
516                     size: size as usize,
517                 });
518                 (mapped_region, size, None)
519             }
520         };
521         Ok((mem_region, size, descriptor))
522     }
523 }
524 
525 /// Destination of a `VmMemoryRequest::RegisterMemory` mapping in guest address space.
526 #[derive(Serialize, Deserialize)]
527 pub enum VmMemoryDestination {
528     /// Map at an offset within an existing PCI BAR allocation.
529     ExistingAllocation { allocation: Alloc, offset: u64 },
530     /// Map at the specified guest physical address.
531     GuestPhysicalAddress(u64),
532 }
533 
534 impl VmMemoryDestination {
535     /// Allocate and return the guest address of a memory mapping destination.
allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress>536     pub fn allocate(self, allocator: &mut SystemAllocator, size: u64) -> Result<GuestAddress> {
537         let addr = match self {
538             VmMemoryDestination::ExistingAllocation { allocation, offset } => allocator
539                 .mmio_allocator_any()
540                 .address_from_pci_offset(allocation, offset, size)
541                 .map_err(|_e| SysError::new(EINVAL))?,
542             VmMemoryDestination::GuestPhysicalAddress(gpa) => gpa,
543         };
544         Ok(GuestAddress(addr))
545     }
546 }
547 
548 /// Request to register or unregister an ioevent.
549 #[derive(Serialize, Deserialize)]
550 pub struct IoEventUpdateRequest {
551     pub event: Event,
552     pub addr: u64,
553     pub datamatch: Datamatch,
554     pub register: bool,
555 }
556 
557 /// Request to mmap a file to a shared memory.
558 /// This request is supposed to follow a `VmMemoryRequest::MmapAndRegisterMemory` request that
559 /// contains `SharedMemory` that `file` is mmaped to.
560 #[cfg(any(target_os = "android", target_os = "linux"))]
561 #[derive(Serialize, Deserialize)]
562 pub struct VmMemoryFileMapping {
563     #[serde(with = "with_as_descriptor")]
564     pub file: File,
565     pub length: usize,
566     pub mem_offset: usize,
567     pub file_offset: u64,
568 }
569 
570 #[derive(Serialize, Deserialize)]
571 pub enum VmMemoryRequest {
572     /// Prepare a shared memory region to make later operations more efficient. This
573     /// may be a no-op depending on underlying platform support.
574     PrepareSharedMemoryRegion { alloc: Alloc, cache: MemCacheType },
575     /// Register a memory to be mapped to the guest.
576     RegisterMemory {
577         /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
578         source: VmMemorySource,
579         /// Where to map the memory in the guest.
580         dest: VmMemoryDestination,
581         /// Whether to map the memory read only (true) or read-write (false).
582         prot: Protection,
583         /// Cache attribute for guest memory setting
584         cache: MemCacheType,
585     },
586     #[cfg(any(target_os = "android", target_os = "linux"))]
587     /// Call mmap to `shm` and register the memory region as a read-only guest memory.
588     /// This request is followed by an array of `VmMemoryFileMapping` with length
589     /// `num_file_mappings`
590     MmapAndRegisterMemory {
591         /// Source of the memory to register (mapped file descriptor, shared memory region, etc.)
592         shm: SharedMemory,
593         /// Where to map the memory in the guest.
594         dest: VmMemoryDestination,
595         /// Length of the array of `VmMemoryFileMapping` that follows.
596         num_file_mappings: usize,
597     },
598     /// Call hypervisor to free the given memory range.
599     DynamicallyFreeMemoryRange {
600         guest_address: GuestAddress,
601         size: u64,
602     },
603     /// Call hypervisor to reclaim a priorly freed memory range.
604     DynamicallyReclaimMemoryRange {
605         guest_address: GuestAddress,
606         size: u64,
607     },
608     /// Balloon allocation/deallocation target reached.
609     BalloonTargetReached { size: u64 },
610     /// Unregister the given memory slot that was previously registered with `RegisterMemory`.
611     UnregisterMemory(VmMemoryRegionId),
612     /// Register an ioeventfd by looking up using Alloc info.
613     IoEventWithAlloc {
614         evt: Event,
615         allocation: Alloc,
616         offset: u64,
617         datamatch: Datamatch,
618         register: bool,
619     },
620     /// Register an eventfd with raw guest memory address.
621     IoEventRaw(IoEventUpdateRequest),
622 }
623 
624 /// Struct for managing `VmMemoryRequest`s IOMMU related state.
625 pub struct VmMemoryRequestIommuClient {
626     tube: Arc<Mutex<Tube>>,
627     registered_memory: BTreeSet<VmMemoryRegionId>,
628 }
629 
630 impl VmMemoryRequestIommuClient {
631     /// Constructs `VmMemoryRequestIommuClient` from a tube for communication with the viommu.
new(tube: Arc<Mutex<Tube>>) -> Self632     pub fn new(tube: Arc<Mutex<Tube>>) -> Self {
633         Self {
634             tube,
635             registered_memory: BTreeSet::new(),
636         }
637     }
638 }
639 
640 enum RegisteredMemory {
641     FixedMapping {
642         slot: MemSlot,
643         offset: usize,
644         size: usize,
645     },
646     DynamicMapping {
647         slot: MemSlot,
648     },
649 }
650 
651 pub struct VmMappedMemoryRegion {
652     guest_address: GuestAddress,
653     slot: MemSlot,
654 }
655 
656 #[derive(Default)]
657 pub struct VmMemoryRegionState {
658     mapped_regions: HashMap<Alloc, VmMappedMemoryRegion>,
659     registered_memory: BTreeMap<VmMemoryRegionId, RegisteredMemory>,
660 }
661 
try_map_to_prepared_region( vm: &mut impl Vm, region_state: &mut VmMemoryRegionState, source: &VmMemorySource, dest: &VmMemoryDestination, prot: &Protection, ) -> Option<VmMemoryResponse>662 fn try_map_to_prepared_region(
663     vm: &mut impl Vm,
664     region_state: &mut VmMemoryRegionState,
665     source: &VmMemorySource,
666     dest: &VmMemoryDestination,
667     prot: &Protection,
668 ) -> Option<VmMemoryResponse> {
669     let VmMemoryDestination::ExistingAllocation {
670         allocation,
671         offset: dest_offset,
672     } = dest
673     else {
674         return None;
675     };
676 
677     let VmMappedMemoryRegion {
678         guest_address,
679         slot,
680     } = region_state.mapped_regions.get(allocation)?;
681 
682     let (descriptor, file_offset, size) = match source {
683         VmMemorySource::Descriptor {
684             descriptor,
685             offset,
686             size,
687         } => (
688             Descriptor(descriptor.as_raw_descriptor()),
689             *offset,
690             *size as usize,
691         ),
692         VmMemorySource::SharedMemory(shm) => {
693             let size = shm.size() as usize;
694             (Descriptor(shm.as_raw_descriptor()), 0, size)
695         }
696         _ => {
697             error!(
698                 "source {} is not compatible with fixed mapping into prepared memory region",
699                 source
700             );
701             return Some(VmMemoryResponse::Err(SysError::new(EINVAL)));
702         }
703     };
704     if let Err(err) = vm.add_fd_mapping(
705         *slot,
706         *dest_offset as usize,
707         size,
708         &descriptor,
709         file_offset,
710         *prot,
711     ) {
712         return Some(VmMemoryResponse::Err(err));
713     }
714 
715     let guest_address = GuestAddress(guest_address.0 + dest_offset);
716     let region_id = VmMemoryRegionId(guest_address);
717     region_state.registered_memory.insert(
718         region_id,
719         RegisteredMemory::FixedMapping {
720             slot: *slot,
721             offset: *dest_offset as usize,
722             size,
723         },
724     );
725 
726     Some(VmMemoryResponse::RegisterMemory {
727         region_id,
728         slot: *slot,
729     })
730 }
731 
732 impl VmMemoryRequest {
733     /// Executes this request on the given Vm.
734     ///
735     /// # Arguments
736     /// * `vm` - The `Vm` to perform the request on.
737     /// * `allocator` - Used to allocate addresses.
738     ///
739     /// This does not return a result, instead encapsulating the success or failure in a
740     /// `VmMemoryResponse` with the intended purpose of sending the response back over the socket
741     /// that received this `VmMemoryResponse`.
execute( self, #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube, vm: &mut impl Vm, sys_allocator: &mut SystemAllocator, gralloc: &mut RutabagaGralloc, iommu_client: Option<&mut VmMemoryRequestIommuClient>, region_state: &mut VmMemoryRegionState, ) -> VmMemoryResponse742     pub fn execute(
743         self,
744         #[cfg(any(target_os = "android", target_os = "linux"))] tube: &Tube,
745         vm: &mut impl Vm,
746         sys_allocator: &mut SystemAllocator,
747         gralloc: &mut RutabagaGralloc,
748         iommu_client: Option<&mut VmMemoryRequestIommuClient>,
749         region_state: &mut VmMemoryRegionState,
750     ) -> VmMemoryResponse {
751         use self::VmMemoryRequest::*;
752         match self {
753             PrepareSharedMemoryRegion { alloc, cache } => {
754                 // Currently the iommu_client is only used by virtio-gpu when used alongside GPU
755                 // pci-passthrough.
756                 //
757                 // TODO(b/323368701): Make compatible with iommu_client by ensuring that
758                 // VirtioIOMMUVfioCommand::VfioDmabufMap is submitted for both dynamic mappings and
759                 // fixed mappings (i.e. whether or not try_map_to_prepared_region succeeds in
760                 // RegisterMemory case below).
761                 assert!(iommu_client.is_none());
762 
763                 if !sys::should_prepare_memory_region() {
764                     return VmMemoryResponse::Ok;
765                 }
766 
767                 match sys::prepare_shared_memory_region(vm, sys_allocator, alloc, cache) {
768                     Ok(region) => {
769                         region_state.mapped_regions.insert(alloc, region);
770                         VmMemoryResponse::Ok
771                     }
772                     Err(e) => VmMemoryResponse::Err(e),
773                 }
774             }
775             RegisterMemory {
776                 source,
777                 dest,
778                 prot,
779                 cache,
780             } => {
781                 if let Some(resp) =
782                     try_map_to_prepared_region(vm, region_state, &source, &dest, &prot)
783                 {
784                     return resp;
785                 }
786 
787                 // Correct on Windows because callers of this IPC guarantee descriptor is a mapping
788                 // handle.
789                 let (mapped_region, size, descriptor) = match source.map(gralloc, prot) {
790                     Ok((region, size, descriptor)) => (region, size, descriptor),
791                     Err(e) => return VmMemoryResponse::Err(e),
792                 };
793 
794                 let guest_addr = match dest.allocate(sys_allocator, size) {
795                     Ok(addr) => addr,
796                     Err(e) => return VmMemoryResponse::Err(e),
797                 };
798 
799                 let slot = match vm.add_memory_region(
800                     guest_addr,
801                     mapped_region,
802                     prot == Protection::read(),
803                     false,
804                     cache,
805                 ) {
806                     Ok(slot) => slot,
807                     Err(e) => return VmMemoryResponse::Err(e),
808                 };
809 
810                 let region_id = VmMemoryRegionId(guest_addr);
811                 if let (Some(descriptor), Some(iommu_client)) = (descriptor, iommu_client) {
812                     let request =
813                         VirtioIOMMURequest::VfioCommand(VirtioIOMMUVfioCommand::VfioDmabufMap {
814                             region_id,
815                             gpa: guest_addr.0,
816                             size,
817                             dma_buf: descriptor,
818                         });
819 
820                     match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
821                         Ok(VirtioIOMMUResponse::VfioResponse(VirtioIOMMUVfioResult::Ok)) => (),
822                         resp => {
823                             error!("Unexpected message response: {:?}", resp);
824                             // Ignore the result because there is nothing we can do with a failure.
825                             let _ = vm.remove_memory_region(slot);
826                             return VmMemoryResponse::Err(SysError::new(EINVAL));
827                         }
828                     };
829 
830                     iommu_client.registered_memory.insert(region_id);
831                 }
832 
833                 region_state
834                     .registered_memory
835                     .insert(region_id, RegisteredMemory::DynamicMapping { slot });
836                 VmMemoryResponse::RegisterMemory { region_id, slot }
837             }
838             #[cfg(any(target_os = "android", target_os = "linux"))]
839             MmapAndRegisterMemory {
840                 shm,
841                 dest,
842                 num_file_mappings,
843             } => {
844                 // Define a callback to be executed with extended limit of file counts.
845                 // It recieves `num_file_mappings` FDs and call `add_fd_mapping` for each.
846                 let callback = || {
847                     let mem = match MemoryMappingBuilder::new(shm.size() as usize)
848                         .from_shared_memory(&shm)
849                         .build()
850                     {
851                         Ok(mem) => mem,
852                         Err(e) => {
853                             error!("Failed to build MemoryMapping from shared memory: {:#}", e);
854                             return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
855                         }
856                     };
857                     let mut mmap_arena = MemoryMappingArena::from(mem);
858 
859                     // If `num_file_mappings` exceeds `SCM_MAX_FD`, `file_mappings` are sent in
860                     // chunks of length `SCM_MAX_FD`.
861                     let mut file_mappings = Vec::with_capacity(num_file_mappings);
862                     let mut read = 0;
863                     while read < num_file_mappings {
864                         let len = std::cmp::min(num_file_mappings - read, base::unix::SCM_MAX_FD);
865                         let mps: Vec<VmMemoryFileMapping> = match tube.recv_with_max_fds(len) {
866                             Ok(m) => m,
867                             Err(e) => {
868                                 error!(
869                                     "Failed to get {num_file_mappings} FDs to be mapped: {:#}",
870                                     e
871                                 );
872                                 return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
873                             }
874                         };
875                         file_mappings.extend(mps.into_iter());
876                         read += len;
877                     }
878 
879                     for VmMemoryFileMapping {
880                         mem_offset,
881                         length,
882                         file,
883                         file_offset,
884                     } in file_mappings
885                     {
886                         if let Err(e) = mmap_arena.add_fd_mapping(
887                             mem_offset,
888                             length,
889                             &file,
890                             file_offset,
891                             Protection::read(),
892                         ) {
893                             error!("Failed to add fd mapping: {:#}", e);
894                             return Err(VmMemoryResponse::Err(SysError::new(EINVAL)));
895                         }
896                     }
897                     Ok(mmap_arena)
898                 };
899                 let mmap_arena = match call_with_extended_max_files(callback) {
900                     Ok(Ok(m)) => m,
901                     Ok(Err(e)) => {
902                         return e;
903                     }
904                     Err(e) => {
905                         error!("Failed to set max count of file descriptors: {e}");
906                         return VmMemoryResponse::Err(e);
907                     }
908                 };
909 
910                 let size = shm.size();
911                 let guest_addr = match dest.allocate(sys_allocator, size) {
912                     Ok(addr) => addr,
913                     Err(e) => return VmMemoryResponse::Err(e),
914                 };
915 
916                 let slot = match vm.add_memory_region(
917                     guest_addr,
918                     Box::new(mmap_arena),
919                     true,
920                     false,
921                     MemCacheType::CacheCoherent,
922                 ) {
923                     Ok(slot) => slot,
924                     Err(e) => return VmMemoryResponse::Err(e),
925                 };
926 
927                 let region_id = VmMemoryRegionId(guest_addr);
928 
929                 region_state
930                     .registered_memory
931                     .insert(region_id, RegisteredMemory::DynamicMapping { slot });
932 
933                 VmMemoryResponse::RegisterMemory { region_id, slot }
934             }
935             UnregisterMemory(id) => match region_state.registered_memory.remove(&id) {
936                 Some(RegisteredMemory::DynamicMapping { slot }) => match vm
937                     .remove_memory_region(slot)
938                 {
939                     Ok(_) => {
940                         if let Some(iommu_client) = iommu_client {
941                             if iommu_client.registered_memory.remove(&id) {
942                                 let request = VirtioIOMMURequest::VfioCommand(
943                                     VirtioIOMMUVfioCommand::VfioDmabufUnmap(id),
944                                 );
945 
946                                 match virtio_iommu_request(&iommu_client.tube.lock(), &request) {
947                                     Ok(VirtioIOMMUResponse::VfioResponse(
948                                         VirtioIOMMUVfioResult::Ok,
949                                     )) => VmMemoryResponse::Ok,
950                                     resp => {
951                                         error!("Unexpected message response: {:?}", resp);
952                                         VmMemoryResponse::Err(SysError::new(EINVAL))
953                                     }
954                                 }
955                             } else {
956                                 VmMemoryResponse::Ok
957                             }
958                         } else {
959                             VmMemoryResponse::Ok
960                         }
961                     }
962                     Err(e) => VmMemoryResponse::Err(e),
963                 },
964                 Some(RegisteredMemory::FixedMapping { slot, offset, size }) => {
965                     match vm.remove_mapping(slot, offset, size) {
966                         Ok(()) => VmMemoryResponse::Ok,
967                         Err(e) => VmMemoryResponse::Err(e),
968                     }
969                 }
970                 None => VmMemoryResponse::Err(SysError::new(EINVAL)),
971             },
972             DynamicallyFreeMemoryRange {
973                 guest_address,
974                 size,
975             } => match vm.handle_balloon_event(BalloonEvent::Inflate(MemRegion {
976                 guest_address,
977                 size,
978             })) {
979                 Ok(_) => VmMemoryResponse::Ok,
980                 Err(e) => VmMemoryResponse::Err(e),
981             },
982             DynamicallyReclaimMemoryRange {
983                 guest_address,
984                 size,
985             } => match vm.handle_balloon_event(BalloonEvent::Deflate(MemRegion {
986                 guest_address,
987                 size,
988             })) {
989                 Ok(_) => VmMemoryResponse::Ok,
990                 Err(e) => VmMemoryResponse::Err(e),
991             },
992             BalloonTargetReached { size } => {
993                 match vm.handle_balloon_event(BalloonEvent::BalloonTargetReached(size)) {
994                     Ok(_) => VmMemoryResponse::Ok,
995                     Err(e) => VmMemoryResponse::Err(e),
996                 }
997             }
998             IoEventWithAlloc {
999                 evt,
1000                 allocation,
1001                 offset,
1002                 datamatch,
1003                 register,
1004             } => {
1005                 let len = match datamatch {
1006                     Datamatch::AnyLength => 1,
1007                     Datamatch::U8(_) => 1,
1008                     Datamatch::U16(_) => 2,
1009                     Datamatch::U32(_) => 4,
1010                     Datamatch::U64(_) => 8,
1011                 };
1012                 let addr = match sys_allocator
1013                     .mmio_allocator_any()
1014                     .address_from_pci_offset(allocation, offset, len)
1015                 {
1016                     Ok(addr) => addr,
1017                     Err(e) => {
1018                         error!("error getting target address: {:#}", e);
1019                         return VmMemoryResponse::Err(SysError::new(EINVAL));
1020                     }
1021                 };
1022                 let res = if register {
1023                     vm.register_ioevent(&evt, IoEventAddress::Mmio(addr), datamatch)
1024                 } else {
1025                     vm.unregister_ioevent(&evt, IoEventAddress::Mmio(addr), datamatch)
1026                 };
1027                 match res {
1028                     Ok(_) => VmMemoryResponse::Ok,
1029                     Err(e) => VmMemoryResponse::Err(e),
1030                 }
1031             }
1032             IoEventRaw(request) => {
1033                 let res = if request.register {
1034                     vm.register_ioevent(
1035                         &request.event,
1036                         IoEventAddress::Mmio(request.addr),
1037                         request.datamatch,
1038                     )
1039                 } else {
1040                     vm.unregister_ioevent(
1041                         &request.event,
1042                         IoEventAddress::Mmio(request.addr),
1043                         request.datamatch,
1044                     )
1045                 };
1046                 match res {
1047                     Ok(_) => VmMemoryResponse::Ok,
1048                     Err(e) => VmMemoryResponse::Err(e),
1049                 }
1050             }
1051         }
1052     }
1053 }
1054 
1055 #[derive(Serialize, Deserialize, Debug, PartialOrd, PartialEq, Eq, Ord, Clone, Copy)]
1056 /// Identifer for registered memory regions. Globally unique.
1057 // The current implementation uses guest physical address as the unique identifier.
1058 pub struct VmMemoryRegionId(GuestAddress);
1059 
1060 #[derive(Serialize, Deserialize, Debug)]
1061 pub enum VmMemoryResponse {
1062     /// The request to register memory into guest address space was successful.
1063     RegisterMemory {
1064         region_id: VmMemoryRegionId,
1065         slot: u32,
1066     },
1067     Ok,
1068     Err(SysError),
1069 }
1070 
1071 #[derive(Serialize, Deserialize, Debug)]
1072 pub enum VmIrqRequest {
1073     /// Allocate one gsi, and associate gsi to irqfd with register_irqfd()
1074     AllocateOneMsi {
1075         irqfd: Event,
1076         device_id: u32,
1077         queue_id: usize,
1078         device_name: String,
1079     },
1080     /// Allocate a specific gsi to irqfd with register_irqfd(). This must only
1081     /// be used when it is known that the gsi is free. Only the snapshot
1082     /// subsystem can make this guarantee, and use of this request by any other
1083     /// caller is strongly discouraged.
1084     AllocateOneMsiAtGsi {
1085         irqfd: Event,
1086         gsi: u32,
1087         device_id: u32,
1088         queue_id: usize,
1089         device_name: String,
1090     },
1091     /// Add one msi route entry into the IRQ chip.
1092     AddMsiRoute {
1093         gsi: u32,
1094         msi_address: u64,
1095         msi_data: u32,
1096     },
1097     // unregister_irqfs() and release gsi
1098     ReleaseOneIrq {
1099         gsi: u32,
1100         irqfd: Event,
1101     },
1102 }
1103 
1104 /// Data to set up an IRQ event or IRQ route on the IRQ chip.
1105 /// VmIrqRequest::execute can't take an `IrqChip` argument, because of a dependency cycle between
1106 /// devices and vm_control, so it takes a Fn that processes an `IrqSetup`.
1107 pub enum IrqSetup<'a> {
1108     Event(u32, &'a Event, u32, usize, String),
1109     Route(IrqRoute),
1110     UnRegister(u32, &'a Event),
1111 }
1112 
1113 impl VmIrqRequest {
1114     /// Executes this request on the given Vm.
1115     ///
1116     /// # Arguments
1117     /// * `set_up_irq` - A function that applies an `IrqSetup` to an IRQ chip.
1118     ///
1119     /// This does not return a result, instead encapsulating the success or failure in a
1120     /// `VmIrqResponse` with the intended purpose of sending the response back over the socket
1121     /// that received this `VmIrqResponse`.
execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse where F: FnOnce(IrqSetup) -> Result<()>,1122     pub fn execute<F>(&self, set_up_irq: F, sys_allocator: &mut SystemAllocator) -> VmIrqResponse
1123     where
1124         F: FnOnce(IrqSetup) -> Result<()>,
1125     {
1126         use self::VmIrqRequest::*;
1127         match *self {
1128             AllocateOneMsi {
1129                 ref irqfd,
1130                 device_id,
1131                 queue_id,
1132                 ref device_name,
1133             } => {
1134                 if let Some(irq_num) = sys_allocator.allocate_irq() {
1135                     match set_up_irq(IrqSetup::Event(
1136                         irq_num,
1137                         irqfd,
1138                         device_id,
1139                         queue_id,
1140                         device_name.clone(),
1141                     )) {
1142                         Ok(_) => VmIrqResponse::AllocateOneMsi { gsi: irq_num },
1143                         Err(e) => VmIrqResponse::Err(e),
1144                     }
1145                 } else {
1146                     VmIrqResponse::Err(SysError::new(EINVAL))
1147                 }
1148             }
1149             AllocateOneMsiAtGsi {
1150                 ref irqfd,
1151                 gsi,
1152                 device_id,
1153                 queue_id,
1154                 ref device_name,
1155             } => {
1156                 match set_up_irq(IrqSetup::Event(
1157                     gsi,
1158                     irqfd,
1159                     device_id,
1160                     queue_id,
1161                     device_name.clone(),
1162                 )) {
1163                     Ok(_) => VmIrqResponse::Ok,
1164                     Err(e) => VmIrqResponse::Err(e),
1165                 }
1166             }
1167             AddMsiRoute {
1168                 gsi,
1169                 msi_address,
1170                 msi_data,
1171             } => {
1172                 let route = IrqRoute {
1173                     gsi,
1174                     source: IrqSource::Msi {
1175                         address: msi_address,
1176                         data: msi_data,
1177                     },
1178                 };
1179                 match set_up_irq(IrqSetup::Route(route)) {
1180                     Ok(_) => VmIrqResponse::Ok,
1181                     Err(e) => VmIrqResponse::Err(e),
1182                 }
1183             }
1184             ReleaseOneIrq { gsi, ref irqfd } => {
1185                 let _ = set_up_irq(IrqSetup::UnRegister(gsi, irqfd));
1186                 sys_allocator.release_irq(gsi);
1187                 VmIrqResponse::Ok
1188             }
1189         }
1190     }
1191 }
1192 
1193 #[derive(Serialize, Deserialize, Debug)]
1194 pub enum VmIrqResponse {
1195     AllocateOneMsi { gsi: u32 },
1196     Ok,
1197     Err(SysError),
1198 }
1199 
1200 #[derive(Serialize, Deserialize, Debug, Clone)]
1201 pub enum DevicesState {
1202     Sleep,
1203     Wake,
1204 }
1205 
1206 #[derive(Serialize, Deserialize, Debug, Clone)]
1207 pub enum BatControlResult {
1208     Ok,
1209     NoBatDevice,
1210     NoSuchHealth,
1211     NoSuchProperty,
1212     NoSuchStatus,
1213     NoSuchBatType,
1214     StringParseIntErr,
1215     StringParseBoolErr,
1216 }
1217 
1218 impl Display for BatControlResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1219     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1220         use self::BatControlResult::*;
1221 
1222         match self {
1223             Ok => write!(f, "Setting battery property successfully"),
1224             NoBatDevice => write!(f, "No battery device created"),
1225             NoSuchHealth => write!(f, "Invalid Battery health setting. Only support: unknown/good/overheat/dead/overvoltage/unexpectedfailure/cold/watchdogtimerexpire/safetytimerexpire/overcurrent"),
1226             NoSuchProperty => write!(f, "Battery doesn't have such property. Only support: status/health/present/capacity/aconline"),
1227             NoSuchStatus => write!(f, "Invalid Battery status setting. Only support: unknown/charging/discharging/notcharging/full"),
1228             NoSuchBatType => write!(f, "Invalid Battery type setting. Only support: goldfish"),
1229             StringParseIntErr => write!(f, "Battery property target ParseInt error"),
1230             StringParseBoolErr => write!(f, "Battery property target ParseBool error"),
1231         }
1232     }
1233 }
1234 
1235 #[derive(Serialize, Deserialize, Copy, Clone, Debug, Default, PartialEq, Eq)]
1236 #[serde(rename_all = "kebab-case")]
1237 pub enum BatteryType {
1238     #[default]
1239     Goldfish,
1240 }
1241 
1242 impl FromStr for BatteryType {
1243     type Err = BatControlResult;
1244 
from_str(s: &str) -> StdResult<Self, Self::Err>1245     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1246         match s {
1247             "goldfish" => Ok(BatteryType::Goldfish),
1248             _ => Err(BatControlResult::NoSuchBatType),
1249         }
1250     }
1251 }
1252 
1253 #[derive(Serialize, Deserialize, Debug)]
1254 pub enum BatProperty {
1255     Status,
1256     Health,
1257     Present,
1258     Capacity,
1259     ACOnline,
1260     SetFakeBatConfig,
1261     CancelFakeBatConfig,
1262 }
1263 
1264 impl FromStr for BatProperty {
1265     type Err = BatControlResult;
1266 
from_str(s: &str) -> StdResult<Self, Self::Err>1267     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1268         match s {
1269             "status" => Ok(BatProperty::Status),
1270             "health" => Ok(BatProperty::Health),
1271             "present" => Ok(BatProperty::Present),
1272             "capacity" => Ok(BatProperty::Capacity),
1273             "aconline" => Ok(BatProperty::ACOnline),
1274             "set_fake_bat_config" => Ok(BatProperty::SetFakeBatConfig),
1275             "cancel_fake_bat_config" => Ok(BatProperty::CancelFakeBatConfig),
1276             _ => Err(BatControlResult::NoSuchProperty),
1277         }
1278     }
1279 }
1280 
1281 impl Display for BatProperty {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1282     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1283         match *self {
1284             BatProperty::Status => write!(f, "status"),
1285             BatProperty::Health => write!(f, "health"),
1286             BatProperty::Present => write!(f, "present"),
1287             BatProperty::Capacity => write!(f, "capacity"),
1288             BatProperty::ACOnline => write!(f, "aconline"),
1289             BatProperty::SetFakeBatConfig => write!(f, "set_fake_bat_config"),
1290             BatProperty::CancelFakeBatConfig => write!(f, "cancel_fake_bat_config"),
1291         }
1292     }
1293 }
1294 
1295 #[derive(Serialize, Deserialize, Debug)]
1296 pub enum BatStatus {
1297     Unknown,
1298     Charging,
1299     DisCharging,
1300     NotCharging,
1301     Full,
1302 }
1303 
1304 impl BatStatus {
new(status: String) -> std::result::Result<Self, BatControlResult>1305     pub fn new(status: String) -> std::result::Result<Self, BatControlResult> {
1306         match status.as_str() {
1307             "unknown" => Ok(BatStatus::Unknown),
1308             "charging" => Ok(BatStatus::Charging),
1309             "discharging" => Ok(BatStatus::DisCharging),
1310             "notcharging" => Ok(BatStatus::NotCharging),
1311             "full" => Ok(BatStatus::Full),
1312             _ => Err(BatControlResult::NoSuchStatus),
1313         }
1314     }
1315 }
1316 
1317 impl FromStr for BatStatus {
1318     type Err = BatControlResult;
1319 
from_str(s: &str) -> StdResult<Self, Self::Err>1320     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1321         match s {
1322             "unknown" => Ok(BatStatus::Unknown),
1323             "charging" => Ok(BatStatus::Charging),
1324             "discharging" => Ok(BatStatus::DisCharging),
1325             "notcharging" => Ok(BatStatus::NotCharging),
1326             "full" => Ok(BatStatus::Full),
1327             _ => Err(BatControlResult::NoSuchStatus),
1328         }
1329     }
1330 }
1331 
1332 impl From<BatStatus> for u32 {
from(status: BatStatus) -> Self1333     fn from(status: BatStatus) -> Self {
1334         status as u32
1335     }
1336 }
1337 
1338 #[derive(Serialize, Deserialize, Debug)]
1339 pub enum BatHealth {
1340     Unknown,
1341     Good,
1342     Overheat,
1343     Dead,
1344     OverVoltage,
1345     UnexpectedFailure,
1346     Cold,
1347     WatchdogTimerExpire,
1348     SafetyTimerExpire,
1349     OverCurrent,
1350 }
1351 
1352 impl FromStr for BatHealth {
1353     type Err = BatControlResult;
1354 
from_str(s: &str) -> StdResult<Self, Self::Err>1355     fn from_str(s: &str) -> StdResult<Self, Self::Err> {
1356         match s {
1357             "unknown" => Ok(BatHealth::Unknown),
1358             "good" => Ok(BatHealth::Good),
1359             "overheat" => Ok(BatHealth::Overheat),
1360             "dead" => Ok(BatHealth::Dead),
1361             "overvoltage" => Ok(BatHealth::OverVoltage),
1362             "unexpectedfailure" => Ok(BatHealth::UnexpectedFailure),
1363             "cold" => Ok(BatHealth::Cold),
1364             "watchdogtimerexpire" => Ok(BatHealth::WatchdogTimerExpire),
1365             "safetytimerexpire" => Ok(BatHealth::SafetyTimerExpire),
1366             "overcurrent" => Ok(BatHealth::OverCurrent),
1367             _ => Err(BatControlResult::NoSuchHealth),
1368         }
1369     }
1370 }
1371 
1372 impl From<BatHealth> for u32 {
from(status: BatHealth) -> Self1373     fn from(status: BatHealth) -> Self {
1374         status as u32
1375     }
1376 }
1377 
1378 /// Configuration of fake battery status information.
1379 #[derive(Serialize, Deserialize, Debug, Default)]
1380 pub enum BatConfig {
1381     // Propagates host's battery status
1382     #[default]
1383     Real,
1384     // Fake on battery status. Simulates a disconnected AC adapter.
1385     // This forces ac_online to false and sets the battery status
1386     // to DISCHARGING
1387     Fake {
1388         // Sets the maximum battery capacity reported to the guest
1389         max_capacity: u32,
1390     },
1391 }
1392 
1393 #[derive(Serialize, Deserialize, Debug)]
1394 pub enum BatControlCommand {
1395     SetStatus(BatStatus),
1396     SetHealth(BatHealth),
1397     SetPresent(u32),
1398     SetCapacity(u32),
1399     SetACOnline(u32),
1400     SetFakeBatConfig(u32),
1401     CancelFakeConfig,
1402 }
1403 
1404 impl BatControlCommand {
new(property: String, target: String) -> std::result::Result<Self, BatControlResult>1405     pub fn new(property: String, target: String) -> std::result::Result<Self, BatControlResult> {
1406         let cmd = property.parse::<BatProperty>()?;
1407         match cmd {
1408             BatProperty::Status => Ok(BatControlCommand::SetStatus(target.parse::<BatStatus>()?)),
1409             BatProperty::Health => Ok(BatControlCommand::SetHealth(target.parse::<BatHealth>()?)),
1410             BatProperty::Present => Ok(BatControlCommand::SetPresent(
1411                 target
1412                     .parse::<u32>()
1413                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1414             )),
1415             BatProperty::Capacity => Ok(BatControlCommand::SetCapacity(
1416                 target
1417                     .parse::<u32>()
1418                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1419             )),
1420             BatProperty::ACOnline => Ok(BatControlCommand::SetACOnline(
1421                 target
1422                     .parse::<u32>()
1423                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1424             )),
1425             BatProperty::SetFakeBatConfig => Ok(BatControlCommand::SetFakeBatConfig(
1426                 target
1427                     .parse::<u32>()
1428                     .map_err(|_| BatControlResult::StringParseIntErr)?,
1429             )),
1430             BatProperty::CancelFakeBatConfig => Ok(BatControlCommand::CancelFakeConfig),
1431         }
1432     }
1433 }
1434 
1435 /// Used for VM to control battery properties.
1436 pub struct BatControl {
1437     pub type_: BatteryType,
1438     pub control_tube: Tube,
1439 }
1440 
1441 // Used to mark hotplug pci device's device type
1442 #[derive(Serialize, Deserialize, Debug, Clone)]
1443 pub enum HotPlugDeviceType {
1444     UpstreamPort,
1445     DownstreamPort,
1446     EndPoint,
1447 }
1448 
1449 // Used for VM to hotplug pci devices
1450 #[derive(Serialize, Deserialize, Debug, Clone)]
1451 pub struct HotPlugDeviceInfo {
1452     pub device_type: HotPlugDeviceType,
1453     pub path: PathBuf,
1454     pub hp_interrupt: bool,
1455 }
1456 
1457 /// Message for communicating a suspend or resume to the virtio-pvclock device.
1458 #[derive(Serialize, Deserialize, Debug, Clone)]
1459 pub enum PvClockCommand {
1460     Suspend,
1461     Resume,
1462 }
1463 
1464 /// Message used by virtio-pvclock to communicate command results.
1465 #[derive(Serialize, Deserialize, Debug)]
1466 pub enum PvClockCommandResponse {
1467     Ok,
1468     Resumed { total_suspended_ticks: u64 },
1469     DeviceInactive,
1470     Err(SysError),
1471 }
1472 
1473 /// Commands for vmm-swap feature
1474 #[derive(Serialize, Deserialize, Debug)]
1475 pub enum SwapCommand {
1476     Enable,
1477     Trim,
1478     SwapOut,
1479     Disable { slow_file_cleanup: bool },
1480     Status,
1481 }
1482 
1483 ///
1484 /// A request to the main process to perform some operation on the VM.
1485 ///
1486 /// Unless otherwise noted, each request should expect a `VmResponse::Ok` to be received on success.
1487 #[derive(Serialize, Deserialize, Debug)]
1488 pub enum VmRequest {
1489     /// Break the VM's run loop and exit.
1490     Exit,
1491     /// Trigger a power button event in the guest.
1492     Powerbtn,
1493     /// Trigger a sleep button event in the guest.
1494     Sleepbtn,
1495     /// Trigger a RTC interrupt in the guest. When the irq associated with the RTC is
1496     /// resampled, it will be re-asserted as long as `clear_evt` is not signaled.
1497     Rtc { clear_evt: Event },
1498     /// Suspend the VM's VCPUs until resume.
1499     SuspendVcpus,
1500     /// Swap the memory content into files on a disk
1501     Swap(SwapCommand),
1502     /// Resume the VM's VCPUs that were previously suspended.
1503     ResumeVcpus,
1504     /// Inject a general-purpose event. If `clear_evt` is provided, when the irq associated
1505     /// with the GPE is resampled, it will be re-asserted as long as `clear_evt` is not
1506     /// signaled.
1507     Gpe { gpe: u32, clear_evt: Option<Event> },
1508     /// Inject a PCI PME
1509     PciPme(u16),
1510     /// Make the VM's RT VCPU real-time.
1511     MakeRT,
1512     /// Command for balloon driver.
1513     #[cfg(feature = "balloon")]
1514     BalloonCommand(BalloonControlCommand),
1515     /// Send a command to a disk chosen by `disk_index`.
1516     /// `disk_index` is a 0-based count of `--disk`, `--rwdisk`, and `-r` command-line options.
1517     DiskCommand {
1518         disk_index: usize,
1519         command: DiskControlCommand,
1520     },
1521     /// Command to use controller.
1522     UsbCommand(UsbControlCommand),
1523     /// Command to modify the gpu.
1524     #[cfg(feature = "gpu")]
1525     GpuCommand(GpuControlCommand),
1526     /// Command to set battery.
1527     BatCommand(BatteryType, BatControlCommand),
1528     /// Command to add/remove multiple vfio-pci devices
1529     HotPlugVfioCommand {
1530         device: HotPlugDeviceInfo,
1531         add: bool,
1532     },
1533     /// Command to add/remove network tap device as virtio-pci device
1534     #[cfg(feature = "pci-hotplug")]
1535     HotPlugNetCommand(NetControlCommand),
1536     /// Command to Snapshot devices
1537     Snapshot(SnapshotCommand),
1538     /// Register for event notification
1539     RegisterListener {
1540         socket_addr: String,
1541         event: RegisteredEvent,
1542     },
1543     /// Unregister for notifications for event
1544     UnregisterListener {
1545         socket_addr: String,
1546         event: RegisteredEvent,
1547     },
1548     /// Unregister for all event notification
1549     Unregister { socket_addr: String },
1550     /// Suspend VM VCPUs and Devices until resume.
1551     SuspendVm,
1552     /// Resume VM VCPUs and Devices.
1553     ResumeVm,
1554     /// Returns Vcpus PID/TID
1555     VcpuPidTid,
1556     /// Throttles the requested vCPU for microseconds
1557     Throttle(usize, u32),
1558 }
1559 
1560 /// NOTE: when making any changes to this enum please also update
1561 /// RegisteredEventFfi in crosvm_control/src/lib.rs
1562 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
1563 pub enum RegisteredEvent {
1564     VirtioBalloonWsReport,
1565     VirtioBalloonResize,
1566     VirtioBalloonOOMDeflation,
1567 }
1568 
1569 #[derive(Serialize, Deserialize, Debug)]
1570 pub enum RegisteredEventWithData {
1571     VirtioBalloonWsReport {
1572         ws_buckets: Vec<balloon_control::WSBucket>,
1573         balloon_actual: u64,
1574     },
1575     VirtioBalloonResize,
1576     VirtioBalloonOOMDeflation,
1577 }
1578 
1579 impl RegisteredEventWithData {
into_event(&self) -> RegisteredEvent1580     pub fn into_event(&self) -> RegisteredEvent {
1581         match self {
1582             Self::VirtioBalloonWsReport { .. } => RegisteredEvent::VirtioBalloonWsReport,
1583             Self::VirtioBalloonResize => RegisteredEvent::VirtioBalloonResize,
1584             Self::VirtioBalloonOOMDeflation => RegisteredEvent::VirtioBalloonOOMDeflation,
1585         }
1586     }
1587 
1588     #[cfg(feature = "registered_events")]
into_proto(&self) -> registered_events::RegisteredEvent1589     pub fn into_proto(&self) -> registered_events::RegisteredEvent {
1590         match self {
1591             Self::VirtioBalloonWsReport {
1592                 ws_buckets,
1593                 balloon_actual,
1594             } => {
1595                 let mut report = registered_events::VirtioBalloonWsReport {
1596                     balloon_actual: *balloon_actual,
1597                     ..registered_events::VirtioBalloonWsReport::new()
1598                 };
1599                 for ws in ws_buckets {
1600                     report.ws_buckets.push(registered_events::VirtioWsBucket {
1601                         age: ws.age,
1602                         file_bytes: ws.bytes[0],
1603                         anon_bytes: ws.bytes[1],
1604                         ..registered_events::VirtioWsBucket::new()
1605                     });
1606                 }
1607                 let mut event = registered_events::RegisteredEvent::new();
1608                 event.set_ws_report(report);
1609                 event
1610             }
1611             Self::VirtioBalloonResize => {
1612                 let mut event = registered_events::RegisteredEvent::new();
1613                 event.set_resize(registered_events::VirtioBalloonResize::new());
1614                 event
1615             }
1616             Self::VirtioBalloonOOMDeflation => {
1617                 let mut event = registered_events::RegisteredEvent::new();
1618                 event.set_oom_deflation(registered_events::VirtioBalloonOOMDeflation::new());
1619                 event
1620             }
1621         }
1622     }
1623 
from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self1624     pub fn from_ws(ws: &balloon_control::BalloonWS, balloon_actual: u64) -> Self {
1625         RegisteredEventWithData::VirtioBalloonWsReport {
1626             ws_buckets: ws.ws.clone(),
1627             balloon_actual,
1628         }
1629     }
1630 }
1631 
handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse1632 pub fn handle_disk_command(command: &DiskControlCommand, disk_host_tube: &Tube) -> VmResponse {
1633     // Forward the request to the block device process via its control socket.
1634     if let Err(e) = disk_host_tube.send(command) {
1635         error!("disk socket send failed: {}", e);
1636         return VmResponse::Err(SysError::new(EINVAL));
1637     }
1638 
1639     // Wait for the disk control command to be processed
1640     match disk_host_tube.recv() {
1641         Ok(DiskControlResult::Ok) => VmResponse::Ok,
1642         Ok(DiskControlResult::Err(e)) => VmResponse::Err(e),
1643         Err(e) => {
1644             error!("disk socket recv failed: {}", e);
1645             VmResponse::Err(SysError::new(EINVAL))
1646         }
1647     }
1648 }
1649 
1650 /// WARNING: descriptor must be a mapping handle on Windows.
map_descriptor( descriptor: &dyn AsRawDescriptor, offset: u64, size: u64, prot: Protection, ) -> Result<Box<dyn MappedRegion>>1651 fn map_descriptor(
1652     descriptor: &dyn AsRawDescriptor,
1653     offset: u64,
1654     size: u64,
1655     prot: Protection,
1656 ) -> Result<Box<dyn MappedRegion>> {
1657     let size: usize = size.try_into().map_err(|_e| SysError::new(ERANGE))?;
1658     match MemoryMappingBuilder::new(size)
1659         .from_descriptor(descriptor)
1660         .offset(offset)
1661         .protection(prot)
1662         .build()
1663     {
1664         Ok(mmap) => Ok(Box::new(mmap)),
1665         Err(MmapError::SystemCallFailed(e)) => Err(e),
1666         _ => Err(SysError::new(EINVAL)),
1667     }
1668 }
1669 
1670 // Get vCPU state. vCPUs are expected to all hold the same state.
1671 // In this function, there may be a time where vCPUs are not holding the same state
1672 // as they transition from one state to the other. This is expected, and the final result
1673 // should be all vCPUs holding the same state.
get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode>1674 fn get_vcpu_state(kick_vcpus: impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<VmRunMode> {
1675     let (send_chan, recv_chan) = mpsc::channel();
1676     kick_vcpus(VcpuControl::GetStates(send_chan));
1677     if vcpu_num == 0 {
1678         bail!("vcpu_num is zero");
1679     }
1680     let mut current_mode_vec: Vec<VmRunMode> = Vec::new();
1681     for _ in 0..vcpu_num {
1682         match recv_chan.recv() {
1683             Ok(state) => current_mode_vec.push(state),
1684             Err(e) => {
1685                 bail!("Failed to get vCPU state: {}", e);
1686             }
1687         };
1688     }
1689     let first_state = current_mode_vec[0];
1690     if first_state == VmRunMode::Exiting {
1691         panic!("Attempt to snapshot while exiting.");
1692     }
1693     if current_mode_vec.iter().any(|x| *x != first_state) {
1694         // We do not panic here. It could be that vCPUs are transitioning from one mode to another.
1695         bail!("Unknown VM state: vCPUs hold different states.");
1696     }
1697     Ok(first_state)
1698 }
1699 
1700 /// A guard to guarantee that all the vCPUs are suspended during the scope.
1701 ///
1702 /// When this guard is dropped, it rolls back the state of CPUs.
1703 pub struct VcpuSuspendGuard<'a> {
1704     saved_run_mode: VmRunMode,
1705     kick_vcpus: &'a dyn Fn(VcpuControl),
1706 }
1707 
1708 impl<'a> VcpuSuspendGuard<'a> {
1709     /// Check the all vCPU state and suspend the vCPUs if they are running.
1710     ///
1711     /// This returns [VcpuSuspendGuard] to rollback the vcpu state.
1712     ///
1713     /// # Arguments
1714     ///
1715     /// * `kick_vcpus` - A funtion to send [VcpuControl] message to all the vCPUs and interrupt
1716     ///   them.
1717     /// * `vcpu_num` - The number of vCPUs.
new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self>1718     pub fn new(kick_vcpus: &'a impl Fn(VcpuControl), vcpu_num: usize) -> anyhow::Result<Self> {
1719         // get initial vcpu state
1720         let saved_run_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1721         match saved_run_mode {
1722             VmRunMode::Running => {
1723                 kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1724                 // Blocking call, waiting for response to ensure vCPU state was updated.
1725                 // In case of failure, where a vCPU still has the state running, start up vcpus and
1726                 // abort operation.
1727                 let current_mode = get_vcpu_state(kick_vcpus, vcpu_num)?;
1728                 if current_mode != VmRunMode::Suspending {
1729                     kick_vcpus(VcpuControl::RunState(saved_run_mode));
1730                     bail!("vCPUs failed to all suspend. Kicking back all vCPUs to their previous state: {saved_run_mode}");
1731                 }
1732             }
1733             VmRunMode::Suspending => {
1734                 // do nothing. keep the state suspending.
1735             }
1736             other => {
1737                 bail!("vcpus are not in running/suspending state, but {}", other);
1738             }
1739         };
1740         Ok(Self {
1741             saved_run_mode,
1742             kick_vcpus,
1743         })
1744     }
1745 }
1746 
1747 impl Drop for VcpuSuspendGuard<'_> {
drop(&mut self)1748     fn drop(&mut self) {
1749         if self.saved_run_mode != VmRunMode::Suspending {
1750             (self.kick_vcpus)(VcpuControl::RunState(self.saved_run_mode));
1751         }
1752     }
1753 }
1754 
1755 /// A guard to guarantee that all devices are sleeping during its scope.
1756 ///
1757 /// When this guard is dropped, it wakes the devices.
1758 pub struct DeviceSleepGuard<'a> {
1759     device_control_tube: &'a Tube,
1760     devices_state: DevicesState,
1761 }
1762 
1763 impl<'a> DeviceSleepGuard<'a> {
new(device_control_tube: &'a Tube) -> anyhow::Result<Self>1764     fn new(device_control_tube: &'a Tube) -> anyhow::Result<Self> {
1765         device_control_tube
1766             .send(&DeviceControlCommand::GetDevicesState)
1767             .context("send command to devices control socket")?;
1768         let devices_state = match device_control_tube
1769             .recv()
1770             .context("receive from devices control socket")?
1771         {
1772             VmResponse::DevicesState(state) => state,
1773             resp => bail!("failed to get devices state. Unexpected behavior: {}", resp),
1774         };
1775         if let DevicesState::Wake = devices_state {
1776             device_control_tube
1777                 .send(&DeviceControlCommand::SleepDevices)
1778                 .context("send command to devices control socket")?;
1779             match device_control_tube
1780                 .recv()
1781                 .context("receive from devices control socket")?
1782             {
1783                 VmResponse::Ok => (),
1784                 resp => bail!("device sleep failed: {}", resp),
1785             }
1786         }
1787         Ok(Self {
1788             device_control_tube,
1789             devices_state,
1790         })
1791     }
1792 }
1793 
1794 impl Drop for DeviceSleepGuard<'_> {
drop(&mut self)1795     fn drop(&mut self) {
1796         if let DevicesState::Wake = self.devices_state {
1797             if let Err(e) = self
1798                 .device_control_tube
1799                 .send(&DeviceControlCommand::WakeDevices)
1800             {
1801                 panic!("failed to request device wake after snapshot: {}", e);
1802             }
1803             match self.device_control_tube.recv() {
1804                 Ok(VmResponse::Ok) => (),
1805                 Ok(resp) => panic!("unexpected response to device wake request: {}", resp),
1806                 Err(e) => panic!("failed to get reply for device wake request: {}", e),
1807             }
1808         }
1809     }
1810 }
1811 
1812 impl VmRequest {
1813     /// Executes this request on the given Vm and other mutable state.
1814     ///
1815     /// This does not return a result, instead encapsulating the success or failure in a
1816     /// `VmResponse` with the intended purpose of sending the response back over the  socket that
1817     /// received this `VmRequest`.
1818     ///
1819     /// `suspended_pvclock_state`: If the hypervisor has its own pvclock (not the same as
1820     /// virtio-pvclock) and the VM is suspended (not just the vCPUs, but the full VM), then
1821     /// `suspended_pvclock_state` will be used to store the ClockState saved just after the vCPUs
1822     /// were suspended. It is important that we save the value right after the vCPUs are suspended
1823     /// and restore it right before the vCPUs are resumed (instead of, more naturally, during the
1824     /// snapshot/restore steps) because the pvclock continues to tick even when the vCPUs are
1825     /// suspended.
1826     #[allow(unused_variables)]
execute( &self, vm: &impl Vm, disk_host_tubes: &[Tube], pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>, gpu_control_tube: Option<&Tube>, usb_control_tube: Option<&Tube>, bat_control: &mut Option<BatControl>, kick_vcpus: impl Fn(VcpuControl), #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl), force_s2idle: bool, #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>, device_control_tube: &Tube, vcpu_size: usize, irq_handler_control: &Tube, snapshot_irqchip: impl Fn() -> anyhow::Result<serde_json::Value>, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, ) -> VmResponse1827     pub fn execute(
1828         &self,
1829         vm: &impl Vm,
1830         disk_host_tubes: &[Tube],
1831         pm: &mut Option<Arc<Mutex<dyn PmResource + Send>>>,
1832         gpu_control_tube: Option<&Tube>,
1833         usb_control_tube: Option<&Tube>,
1834         bat_control: &mut Option<BatControl>,
1835         kick_vcpus: impl Fn(VcpuControl),
1836         #[cfg(any(target_os = "android", target_os = "linux"))] kick_vcpu: impl Fn(usize, VcpuControl),
1837         force_s2idle: bool,
1838         #[cfg(feature = "swap")] swap_controller: Option<&swap::SwapController>,
1839         device_control_tube: &Tube,
1840         vcpu_size: usize,
1841         irq_handler_control: &Tube,
1842         snapshot_irqchip: impl Fn() -> anyhow::Result<serde_json::Value>,
1843         suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
1844     ) -> VmResponse {
1845         match self {
1846             VmRequest::Exit => {
1847                 panic!("VmRequest::Exit should be handled by the platform run loop");
1848             }
1849             VmRequest::Powerbtn => {
1850                 if let Some(pm) = pm {
1851                     pm.lock().pwrbtn_evt();
1852                     VmResponse::Ok
1853                 } else {
1854                     error!("{:#?} not supported", *self);
1855                     VmResponse::Err(SysError::new(ENOTSUP))
1856                 }
1857             }
1858             VmRequest::Sleepbtn => {
1859                 if let Some(pm) = pm {
1860                     pm.lock().slpbtn_evt();
1861                     VmResponse::Ok
1862                 } else {
1863                     error!("{:#?} not supported", *self);
1864                     VmResponse::Err(SysError::new(ENOTSUP))
1865                 }
1866             }
1867             VmRequest::Rtc { clear_evt } => {
1868                 if let Some(pm) = pm.as_ref() {
1869                     match clear_evt.try_clone() {
1870                         Ok(clear_evt) => {
1871                             // RTC event will asynchronously trigger wakeup.
1872                             pm.lock().rtc_evt(clear_evt);
1873                             VmResponse::Ok
1874                         }
1875                         Err(err) => {
1876                             error!("Error cloning clear_evt: {:?}", err);
1877                             VmResponse::Err(SysError::new(EIO))
1878                         }
1879                     }
1880                 } else {
1881                     error!("{:#?} not supported", *self);
1882                     VmResponse::Err(SysError::new(ENOTSUP))
1883                 }
1884             }
1885             VmRequest::SuspendVcpus => {
1886                 if !force_s2idle {
1887                     kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
1888                     let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
1889                         Ok(state) => state,
1890                         Err(e) => {
1891                             error!("failed to get vcpu state: {e}");
1892                             return VmResponse::Err(SysError::new(EIO));
1893                         }
1894                     };
1895                     if current_mode != VmRunMode::Suspending {
1896                         error!("vCPUs failed to all suspend.");
1897                         return VmResponse::Err(SysError::new(EIO));
1898                     }
1899                 }
1900                 VmResponse::Ok
1901             }
1902             VmRequest::ResumeVcpus => {
1903                 if let Err(e) = device_control_tube.send(&DeviceControlCommand::GetDevicesState) {
1904                     error!("failed to send GetDevicesState: {}", e);
1905                     return VmResponse::Err(SysError::new(EIO));
1906                 }
1907                 let devices_state = match device_control_tube.recv() {
1908                     Ok(VmResponse::DevicesState(state)) => state,
1909                     Ok(resp) => {
1910                         error!("failed to get devices state. Unexpected behavior: {}", resp);
1911                         return VmResponse::Err(SysError::new(EINVAL));
1912                     }
1913                     Err(e) => {
1914                         error!("failed to get devices state. Unexpected behavior: {}", e);
1915                         return VmResponse::Err(SysError::new(EINVAL));
1916                     }
1917                 };
1918                 if let DevicesState::Sleep = devices_state {
1919                     error!("Trying to wake Vcpus while Devices are asleep. Did you mean to use `crosvm resume --full`?");
1920                     return VmResponse::Err(SysError::new(EINVAL));
1921                 }
1922 
1923                 if force_s2idle {
1924                     // During resume also emulate powerbtn event which will allow to wakeup fully
1925                     // suspended guest.
1926                     if let Some(pm) = pm {
1927                         pm.lock().pwrbtn_evt();
1928                     } else {
1929                         error!("triggering power btn during resume not supported");
1930                         return VmResponse::Err(SysError::new(ENOTSUP));
1931                     }
1932                 }
1933 
1934                 kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
1935                 VmResponse::Ok
1936             }
1937             VmRequest::Swap(SwapCommand::Enable) => {
1938                 #[cfg(feature = "swap")]
1939                 if let Some(swap_controller) = swap_controller {
1940                     // Suspend all vcpus and devices while vmm-swap is enabling (move the guest
1941                     // memory contents to the staging memory) to guarantee no processes other than
1942                     // the swap monitor process access the guest memory.
1943                     let _vcpu_guard = match VcpuSuspendGuard::new(&kick_vcpus, vcpu_size) {
1944                         Ok(guard) => guard,
1945                         Err(e) => {
1946                             error!("failed to suspend vcpus: {:?}", e);
1947                             return VmResponse::Err(SysError::new(EINVAL));
1948                         }
1949                     };
1950                     // TODO(b/253386409): Use `devices::Suspendable::sleep()` instead of sending
1951                     // `SIGSTOP` signal.
1952                     let _devices_guard = match swap_controller.suspend_devices() {
1953                         Ok(guard) => guard,
1954                         Err(e) => {
1955                             error!("failed to suspend devices: {:?}", e);
1956                             return VmResponse::Err(SysError::new(EINVAL));
1957                         }
1958                     };
1959 
1960                     return match swap_controller.enable() {
1961                         Ok(()) => VmResponse::Ok,
1962                         Err(e) => {
1963                             error!("swap enable failed: {}", e);
1964                             VmResponse::Err(SysError::new(EINVAL))
1965                         }
1966                     };
1967                 }
1968                 VmResponse::Err(SysError::new(ENOTSUP))
1969             }
1970             VmRequest::Swap(SwapCommand::Trim) => {
1971                 #[cfg(feature = "swap")]
1972                 if let Some(swap_controller) = swap_controller {
1973                     return match swap_controller.trim() {
1974                         Ok(()) => VmResponse::Ok,
1975                         Err(e) => {
1976                             error!("swap trim failed: {}", e);
1977                             VmResponse::Err(SysError::new(EINVAL))
1978                         }
1979                     };
1980                 }
1981                 VmResponse::Err(SysError::new(ENOTSUP))
1982             }
1983             VmRequest::Swap(SwapCommand::SwapOut) => {
1984                 #[cfg(feature = "swap")]
1985                 if let Some(swap_controller) = swap_controller {
1986                     return match swap_controller.swap_out() {
1987                         Ok(()) => VmResponse::Ok,
1988                         Err(e) => {
1989                             error!("swap out failed: {}", e);
1990                             VmResponse::Err(SysError::new(EINVAL))
1991                         }
1992                     };
1993                 }
1994                 VmResponse::Err(SysError::new(ENOTSUP))
1995             }
1996             VmRequest::Swap(SwapCommand::Disable {
1997                 #[cfg(feature = "swap")]
1998                 slow_file_cleanup,
1999                 ..
2000             }) => {
2001                 #[cfg(feature = "swap")]
2002                 if let Some(swap_controller) = swap_controller {
2003                     return match swap_controller.disable(*slow_file_cleanup) {
2004                         Ok(()) => VmResponse::Ok,
2005                         Err(e) => {
2006                             error!("swap disable failed: {}", e);
2007                             VmResponse::Err(SysError::new(EINVAL))
2008                         }
2009                     };
2010                 }
2011                 VmResponse::Err(SysError::new(ENOTSUP))
2012             }
2013             VmRequest::Swap(SwapCommand::Status) => {
2014                 #[cfg(feature = "swap")]
2015                 if let Some(swap_controller) = swap_controller {
2016                     return match swap_controller.status() {
2017                         Ok(status) => VmResponse::SwapStatus(status),
2018                         Err(e) => {
2019                             error!("swap status failed: {}", e);
2020                             VmResponse::Err(SysError::new(EINVAL))
2021                         }
2022                     };
2023                 }
2024                 VmResponse::Err(SysError::new(ENOTSUP))
2025             }
2026             VmRequest::SuspendVm => {
2027                 info!("Starting crosvm suspend");
2028                 kick_vcpus(VcpuControl::RunState(VmRunMode::Suspending));
2029                 let current_mode = match get_vcpu_state(kick_vcpus, vcpu_size) {
2030                     Ok(state) => state,
2031                     Err(e) => {
2032                         error!("failed to get vcpu state: {e}");
2033                         return VmResponse::Err(SysError::new(EIO));
2034                     }
2035                 };
2036                 if current_mode != VmRunMode::Suspending {
2037                     error!("vCPUs failed to all suspend.");
2038                     return VmResponse::Err(SysError::new(EIO));
2039                 }
2040                 // Snapshot the pvclock ASAP after stopping vCPUs.
2041                 if vm.check_capability(VmCap::PvClock) {
2042                     if suspended_pvclock_state.is_none() {
2043                         *suspended_pvclock_state = Some(match vm.get_pvclock() {
2044                             Ok(x) => x,
2045                             Err(e) => {
2046                                 error!("suspend_pvclock failed: {e:?}");
2047                                 return VmResponse::Err(SysError::new(EIO));
2048                             }
2049                         });
2050                     }
2051                 }
2052                 if let Err(e) = device_control_tube
2053                     .send(&DeviceControlCommand::SleepDevices)
2054                     .context("send command to devices control socket")
2055                 {
2056                     error!("{:?}", e);
2057                     return VmResponse::Err(SysError::new(EIO));
2058                 };
2059                 match device_control_tube
2060                     .recv()
2061                     .context("receive from devices control socket")
2062                 {
2063                     Ok(VmResponse::Ok) => {
2064                         info!("Finished crosvm suspend successfully");
2065                         VmResponse::Ok
2066                     }
2067                     Ok(resp) => {
2068                         error!("device sleep failed: {}", resp);
2069                         VmResponse::Err(SysError::new(EIO))
2070                     }
2071                     Err(e) => {
2072                         error!("receive from devices control socket: {:?}", e);
2073                         VmResponse::Err(SysError::new(EIO))
2074                     }
2075                 }
2076             }
2077             VmRequest::ResumeVm => {
2078                 info!("Starting crosvm resume");
2079                 if let Err(e) = device_control_tube
2080                     .send(&DeviceControlCommand::WakeDevices)
2081                     .context("send command to devices control socket")
2082                 {
2083                     error!("{:?}", e);
2084                     return VmResponse::Err(SysError::new(EIO));
2085                 };
2086                 match device_control_tube
2087                     .recv()
2088                     .context("receive from devices control socket")
2089                 {
2090                     Ok(VmResponse::Ok) => {
2091                         info!("Finished crosvm resume successfully");
2092                     }
2093                     Ok(resp) => {
2094                         error!("device wake failed: {}", resp);
2095                         return VmResponse::Err(SysError::new(EIO));
2096                     }
2097                     Err(e) => {
2098                         error!("receive from devices control socket: {:?}", e);
2099                         return VmResponse::Err(SysError::new(EIO));
2100                     }
2101                 }
2102                 // Resume the pvclock as late as possible before starting vCPUs.
2103                 if vm.check_capability(VmCap::PvClock) {
2104                     // If None, then we aren't suspended, which is a valid case.
2105                     if let Some(x) = suspended_pvclock_state {
2106                         if let Err(e) = vm.set_pvclock(x) {
2107                             error!("resume_pvclock failed: {e:?}");
2108                             return VmResponse::Err(SysError::new(EIO));
2109                         }
2110                     }
2111                 }
2112                 kick_vcpus(VcpuControl::RunState(VmRunMode::Running));
2113                 VmResponse::Ok
2114             }
2115             VmRequest::Gpe { gpe, clear_evt } => {
2116                 if let Some(pm) = pm.as_ref() {
2117                     match clear_evt.as_ref().map(|e| e.try_clone()).transpose() {
2118                         Ok(clear_evt) => {
2119                             pm.lock().gpe_evt(*gpe, clear_evt);
2120                             VmResponse::Ok
2121                         }
2122                         Err(err) => {
2123                             error!("Error cloning clear_evt: {:?}", err);
2124                             VmResponse::Err(SysError::new(EIO))
2125                         }
2126                     }
2127                 } else {
2128                     error!("{:#?} not supported", *self);
2129                     VmResponse::Err(SysError::new(ENOTSUP))
2130                 }
2131             }
2132             VmRequest::PciPme(requester_id) => {
2133                 if let Some(pm) = pm.as_ref() {
2134                     pm.lock().pme_evt(*requester_id);
2135                     VmResponse::Ok
2136                 } else {
2137                     error!("{:#?} not supported", *self);
2138                     VmResponse::Err(SysError::new(ENOTSUP))
2139                 }
2140             }
2141             VmRequest::MakeRT => {
2142                 kick_vcpus(VcpuControl::MakeRT);
2143                 VmResponse::Ok
2144             }
2145             #[cfg(feature = "balloon")]
2146             VmRequest::BalloonCommand(_) => unreachable!("Should be handled with BalloonTube"),
2147             VmRequest::DiskCommand {
2148                 disk_index,
2149                 ref command,
2150             } => match &disk_host_tubes.get(*disk_index) {
2151                 Some(tube) => handle_disk_command(command, tube),
2152                 None => VmResponse::Err(SysError::new(ENODEV)),
2153             },
2154             #[cfg(feature = "gpu")]
2155             VmRequest::GpuCommand(ref cmd) => match gpu_control_tube {
2156                 Some(gpu_control) => {
2157                     let res = gpu_control.send(cmd);
2158                     if let Err(e) = res {
2159                         error!("fail to send command to gpu control socket: {}", e);
2160                         return VmResponse::Err(SysError::new(EIO));
2161                     }
2162                     match gpu_control.recv() {
2163                         Ok(response) => VmResponse::GpuResponse(response),
2164                         Err(e) => {
2165                             error!("fail to recv command from gpu control socket: {}", e);
2166                             VmResponse::Err(SysError::new(EIO))
2167                         }
2168                     }
2169                 }
2170                 None => {
2171                     error!("gpu control is not enabled in crosvm");
2172                     VmResponse::Err(SysError::new(EIO))
2173                 }
2174             },
2175             VmRequest::UsbCommand(ref cmd) => {
2176                 let usb_control_tube = match usb_control_tube {
2177                     Some(t) => t,
2178                     None => {
2179                         error!("attempted to execute USB request without control tube");
2180                         return VmResponse::Err(SysError::new(ENODEV));
2181                     }
2182                 };
2183                 let res = usb_control_tube.send(cmd);
2184                 if let Err(e) = res {
2185                     error!("fail to send command to usb control socket: {}", e);
2186                     return VmResponse::Err(SysError::new(EIO));
2187                 }
2188                 match usb_control_tube.recv() {
2189                     Ok(response) => VmResponse::UsbResponse(response),
2190                     Err(e) => {
2191                         error!("fail to recv command from usb control socket: {}", e);
2192                         VmResponse::Err(SysError::new(EIO))
2193                     }
2194                 }
2195             }
2196             VmRequest::BatCommand(type_, ref cmd) => {
2197                 match bat_control {
2198                     Some(battery) => {
2199                         if battery.type_ != *type_ {
2200                             error!("ignored battery command due to battery type: expected {:?}, got {:?}", battery.type_, type_);
2201                             return VmResponse::Err(SysError::new(EINVAL));
2202                         }
2203 
2204                         let res = battery.control_tube.send(cmd);
2205                         if let Err(e) = res {
2206                             error!("fail to send command to bat control socket: {}", e);
2207                             return VmResponse::Err(SysError::new(EIO));
2208                         }
2209 
2210                         match battery.control_tube.recv() {
2211                             Ok(response) => VmResponse::BatResponse(response),
2212                             Err(e) => {
2213                                 error!("fail to recv command from bat control socket: {}", e);
2214                                 VmResponse::Err(SysError::new(EIO))
2215                             }
2216                         }
2217                     }
2218                     None => VmResponse::BatResponse(BatControlResult::NoBatDevice),
2219                 }
2220             }
2221             VmRequest::HotPlugVfioCommand { device: _, add: _ } => VmResponse::Ok,
2222             #[cfg(feature = "pci-hotplug")]
2223             VmRequest::HotPlugNetCommand(ref _net_cmd) => {
2224                 VmResponse::ErrString("hot plug not supported".to_owned())
2225             }
2226             VmRequest::Snapshot(SnapshotCommand::Take {
2227                 ref snapshot_path,
2228                 compress_memory,
2229                 encrypt,
2230             }) => {
2231                 info!("Starting crosvm snapshot");
2232                 match do_snapshot(
2233                     snapshot_path.to_path_buf(),
2234                     kick_vcpus,
2235                     irq_handler_control,
2236                     device_control_tube,
2237                     vcpu_size,
2238                     snapshot_irqchip,
2239                     *compress_memory,
2240                     *encrypt,
2241                     suspended_pvclock_state,
2242                 ) {
2243                     Ok(()) => {
2244                         info!("Finished crosvm snapshot successfully");
2245                         VmResponse::Ok
2246                     }
2247                     Err(e) => {
2248                         error!("failed to handle snapshot: {:?}", e);
2249                         VmResponse::Err(SysError::new(EIO))
2250                     }
2251                 }
2252             }
2253             VmRequest::RegisterListener {
2254                 socket_addr: _,
2255                 event: _,
2256             } => VmResponse::Ok,
2257             VmRequest::UnregisterListener {
2258                 socket_addr: _,
2259                 event: _,
2260             } => VmResponse::Ok,
2261             VmRequest::Unregister { socket_addr: _ } => VmResponse::Ok,
2262             VmRequest::VcpuPidTid => unreachable!(),
2263             VmRequest::Throttle(_, _) => unreachable!(),
2264         }
2265     }
2266 }
2267 
2268 /// Snapshot the VM to file at `snapshot_path`
do_snapshot( snapshot_path: PathBuf, kick_vcpus: impl Fn(VcpuControl), irq_handler_control: &Tube, device_control_tube: &Tube, vcpu_size: usize, snapshot_irqchip: impl Fn() -> anyhow::Result<serde_json::Value>, compress_memory: bool, encrypt: bool, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, ) -> anyhow::Result<()>2269 fn do_snapshot(
2270     snapshot_path: PathBuf,
2271     kick_vcpus: impl Fn(VcpuControl),
2272     irq_handler_control: &Tube,
2273     device_control_tube: &Tube,
2274     vcpu_size: usize,
2275     snapshot_irqchip: impl Fn() -> anyhow::Result<serde_json::Value>,
2276     compress_memory: bool,
2277     encrypt: bool,
2278     suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2279 ) -> anyhow::Result<()> {
2280     let _vcpu_guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size)?;
2281     let _device_guard = DeviceSleepGuard::new(device_control_tube)?;
2282 
2283     // We want to flush all pending IRQs to the LAPICs. There are two cases:
2284     //
2285     // MSIs: these are directly delivered to the LAPIC. We must verify the handler
2286     // thread cycles once to deliver these interrupts.
2287     //
2288     // Legacy interrupts: in the case of a split IRQ chip, these interrupts may
2289     // flow through the userspace IOAPIC. If the hypervisor does not support
2290     // irqfds (e.g. WHPX), a single iteration will only flush the IRQ to the
2291     // IOAPIC. The underlying MSI will be asserted at this point, but if the
2292     // IRQ handler doesn't run another iteration, it won't be delivered to the
2293     // LAPIC. This is why we cycle the handler thread twice (doing so ensures we
2294     // process the underlying MSI).
2295     //
2296     // We can handle both of these cases by iterating until there are no tokens
2297     // serviced on the requested iteration. Note that in the legacy case, this
2298     // ensures at least two iterations.
2299     //
2300     // Note: within CrosVM, *all* interrupts are eventually converted into the
2301     // same mechanicism that MSIs use. This is why we say "underlying" MSI for
2302     // a legacy IRQ.
2303     let mut flush_attempts = 0;
2304     loop {
2305         irq_handler_control
2306             .send(&IrqHandlerRequest::WakeAndNotifyIteration)
2307             .context("failed to send flush command to IRQ handler thread")?;
2308         let resp = irq_handler_control
2309             .recv()
2310             .context("failed to recv flush response from IRQ handler thread")?;
2311         match resp {
2312             IrqHandlerResponse::HandlerIterationComplete(tokens_serviced) => {
2313                 if tokens_serviced == 0 {
2314                     break;
2315                 }
2316             }
2317             _ => bail!("received unexpected reply from IRQ handler: {:?}", resp),
2318         }
2319         flush_attempts += 1;
2320         if flush_attempts > EXPECTED_MAX_IRQ_FLUSH_ITERATIONS {
2321             warn!("flushing IRQs for snapshot may be stalled after iteration {}, expected <= {} iterations", flush_attempts, EXPECTED_MAX_IRQ_FLUSH_ITERATIONS);
2322         }
2323     }
2324     info!("flushed IRQs in {} iterations", flush_attempts);
2325 
2326     let snapshot_writer = SnapshotWriter::new(snapshot_path, encrypt)?;
2327 
2328     // Snapshot hypervisor's paravirtualized clock.
2329     snapshot_writer.write_fragment("pvclock", &serde_json::to_value(suspended_pvclock_state)?)?;
2330 
2331     // Snapshot Vcpus
2332     info!("VCPUs snapshotting...");
2333     let (send_chan, recv_chan) = mpsc::channel();
2334     kick_vcpus(VcpuControl::Snapshot(
2335         snapshot_writer.add_namespace("vcpu")?,
2336         send_chan,
2337     ));
2338     // Validate all Vcpus snapshot successfully
2339     for _ in 0..vcpu_size {
2340         recv_chan
2341             .recv()
2342             .context("Failed to recv Vcpu snapshot response")?
2343             .context("Failed to snapshot Vcpu")?;
2344     }
2345     info!("VCPUs snapshotted.");
2346 
2347     // Snapshot irqchip
2348     info!("Snapshotting irqchip...");
2349     let irqchip_snap = snapshot_irqchip()?;
2350     snapshot_writer
2351         .write_fragment("irqchip", &irqchip_snap)
2352         .context("Failed to write irqchip state")?;
2353     info!("Snapshotted irqchip.");
2354 
2355     // Snapshot devices
2356     info!("Devices snapshotting...");
2357     device_control_tube
2358         .send(&DeviceControlCommand::SnapshotDevices {
2359             snapshot_writer,
2360             compress_memory,
2361         })
2362         .context("send command to devices control socket")?;
2363     let resp: VmResponse = device_control_tube
2364         .recv()
2365         .context("receive from devices control socket")?;
2366     if !matches!(resp, VmResponse::Ok) {
2367         bail!("unexpected SnapshotDevices response: {resp}");
2368     }
2369     info!("Devices snapshotted.");
2370     Ok(())
2371 }
2372 
2373 /// Restore the VM to the snapshot at `restore_path`.
2374 ///
2375 /// Same as `VmRequest::execute` with a `VmRequest::Restore`. Exposed as a separate function
2376 /// because not all the `VmRequest::execute` arguments are available in the "cold restore" flow.
do_restore( restore_path: &Path, kick_vcpus: impl Fn(VcpuControl), kick_vcpu: impl Fn(VcpuControl, usize), irq_handler_control: &Tube, device_control_tube: &Tube, vcpu_size: usize, mut restore_irqchip: impl FnMut(serde_json::Value) -> anyhow::Result<()>, require_encrypted: bool, suspended_pvclock_state: &mut Option<hypervisor::ClockState>, ) -> anyhow::Result<()>2377 pub fn do_restore(
2378     restore_path: &Path,
2379     kick_vcpus: impl Fn(VcpuControl),
2380     kick_vcpu: impl Fn(VcpuControl, usize),
2381     irq_handler_control: &Tube,
2382     device_control_tube: &Tube,
2383     vcpu_size: usize,
2384     mut restore_irqchip: impl FnMut(serde_json::Value) -> anyhow::Result<()>,
2385     require_encrypted: bool,
2386     suspended_pvclock_state: &mut Option<hypervisor::ClockState>,
2387 ) -> anyhow::Result<()> {
2388     let _guard = VcpuSuspendGuard::new(&kick_vcpus, vcpu_size);
2389     let _devices_guard = DeviceSleepGuard::new(device_control_tube)?;
2390 
2391     let snapshot_reader = SnapshotReader::new(restore_path, require_encrypted)?;
2392 
2393     // Restore hypervisor's paravirtualized clock.
2394     *suspended_pvclock_state = snapshot_reader.read_fragment("pvclock")?;
2395 
2396     // Restore IrqChip
2397     let irq_snapshot: serde_json::Value = snapshot_reader.read_fragment("irqchip")?;
2398     restore_irqchip(irq_snapshot)?;
2399 
2400     // Restore Vcpu(s)
2401     let vcpu_snapshot_reader = snapshot_reader.namespace("vcpu")?;
2402     let vcpu_snapshot_count = vcpu_snapshot_reader.list_fragments()?.len();
2403     if vcpu_snapshot_count != vcpu_size {
2404         bail!(
2405             "bad cpu count in snapshot: expected={} got={}",
2406             vcpu_size,
2407             vcpu_snapshot_count,
2408         );
2409     }
2410     #[cfg(target_arch = "x86_64")]
2411     let host_tsc_reference_moment = {
2412         // SAFETY: rdtsc takes no arguments.
2413         unsafe { _rdtsc() }
2414     };
2415     let (send_chan, recv_chan) = mpsc::channel();
2416     for vcpu_id in 0..vcpu_size {
2417         kick_vcpu(
2418             VcpuControl::Restore(VcpuRestoreRequest {
2419                 result_sender: send_chan.clone(),
2420                 snapshot_reader: vcpu_snapshot_reader.clone(),
2421                 #[cfg(target_arch = "x86_64")]
2422                 host_tsc_reference_moment,
2423             }),
2424             vcpu_id,
2425         );
2426     }
2427     for _ in 0..vcpu_size {
2428         recv_chan
2429             .recv()
2430             .context("Failed to recv restore response")?
2431             .context("Failed to restore vcpu")?;
2432     }
2433 
2434     // Restore devices
2435     device_control_tube
2436         .send(&DeviceControlCommand::RestoreDevices { snapshot_reader })
2437         .context("send command to devices control socket")?;
2438     let resp: VmResponse = device_control_tube
2439         .recv()
2440         .context("receive from devices control socket")?;
2441     if !matches!(resp, VmResponse::Ok) {
2442         bail!("unexpected RestoreDevices response: {resp}");
2443     }
2444 
2445     irq_handler_control
2446         .send(&IrqHandlerRequest::RefreshIrqEventTokens)
2447         .context("failed to send refresh irq event token command to IRQ handler thread")?;
2448     let resp: IrqHandlerResponse = irq_handler_control
2449         .recv()
2450         .context("failed to recv refresh response from IRQ handler thread")?;
2451     if !matches!(resp, IrqHandlerResponse::IrqEventTokenRefreshComplete) {
2452         bail!(
2453             "received unexpected reply from IRQ handler thread: {:?}",
2454             resp
2455         );
2456     }
2457     Ok(())
2458 }
2459 
2460 /// Indication of success or failure of a `VmRequest`.
2461 ///
2462 /// Success is usually indicated `VmResponse::Ok` unless there is data associated with the response.
2463 #[derive(Serialize, Deserialize, Debug, Clone)]
2464 #[must_use]
2465 pub enum VmResponse {
2466     /// Indicates the request was executed successfully.
2467     Ok,
2468     /// Indicates the request encountered some error during execution.
2469     Err(SysError),
2470     /// Indicates the request encountered some error during execution.
2471     ErrString(String),
2472     /// The memory was registered into guest address space in memory slot number `slot`.
2473     RegisterMemory { slot: u32 },
2474     /// Results of balloon control commands.
2475     #[cfg(feature = "balloon")]
2476     BalloonStats {
2477         stats: balloon_control::BalloonStats,
2478         balloon_actual: u64,
2479     },
2480     /// Results of balloon WS-R command
2481     #[cfg(feature = "balloon")]
2482     BalloonWS {
2483         ws: balloon_control::BalloonWS,
2484         balloon_actual: u64,
2485     },
2486     /// Results of PCI hot plug
2487     #[cfg(feature = "pci-hotplug")]
2488     PciHotPlugResponse { bus: u8 },
2489     /// Results of usb control commands.
2490     UsbResponse(UsbControlResult),
2491     #[cfg(feature = "gpu")]
2492     /// Results of gpu control commands.
2493     GpuResponse(GpuControlResult),
2494     /// Results of battery control commands.
2495     BatResponse(BatControlResult),
2496     /// Results of swap status command.
2497     SwapStatus(SwapStatus),
2498     /// Gets the state of Devices (sleep/wake)
2499     DevicesState(DevicesState),
2500     /// Map of the Vcpu PID/TIDs
2501     VcpuPidTidResponse {
2502         pid_tid_map: BTreeMap<usize, (u32, u32)>,
2503     },
2504 }
2505 
2506 impl Display for VmResponse {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2507     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2508         use self::VmResponse::*;
2509 
2510         match self {
2511             Ok => write!(f, "ok"),
2512             Err(e) => write!(f, "error: {}", e),
2513             ErrString(e) => write!(f, "error: {}", e),
2514             RegisterMemory { slot } => write!(f, "memory registered in slot {}", slot),
2515             #[cfg(feature = "balloon")]
2516             VmResponse::BalloonStats {
2517                 stats,
2518                 balloon_actual,
2519             } => {
2520                 write!(
2521                     f,
2522                     "stats: {}\nballoon_actual: {}",
2523                     serde_json::to_string_pretty(&stats)
2524                         .unwrap_or_else(|_| "invalid_response".to_string()),
2525                     balloon_actual
2526                 )
2527             }
2528             #[cfg(feature = "balloon")]
2529             VmResponse::BalloonWS { ws, balloon_actual } => {
2530                 write!(
2531                     f,
2532                     "ws: {}, balloon_actual: {}",
2533                     serde_json::to_string_pretty(&ws)
2534                         .unwrap_or_else(|_| "invalid_response".to_string()),
2535                     balloon_actual,
2536                 )
2537             }
2538             UsbResponse(result) => write!(f, "usb control request get result {:?}", result),
2539             #[cfg(feature = "pci-hotplug")]
2540             PciHotPlugResponse { bus } => write!(f, "pci hotplug bus {:?}", bus),
2541             #[cfg(feature = "gpu")]
2542             GpuResponse(result) => write!(f, "gpu control request result {:?}", result),
2543             BatResponse(result) => write!(f, "{}", result),
2544             SwapStatus(status) => {
2545                 write!(
2546                     f,
2547                     "{}",
2548                     serde_json::to_string(&status)
2549                         .unwrap_or_else(|_| "invalid_response".to_string()),
2550                 )
2551             }
2552             DevicesState(status) => write!(f, "devices status: {:?}", status),
2553             VcpuPidTidResponse { pid_tid_map } => write!(f, "vcpu pid tid map: {:?}", pid_tid_map),
2554         }
2555     }
2556 }
2557 
2558 /// Enum that allows remote control of a wait context (used between the Windows GpuDisplay & the
2559 /// GPU worker).
2560 #[derive(Serialize, Deserialize)]
2561 pub enum ModifyWaitContext {
2562     Add(#[serde(with = "with_as_descriptor")] Descriptor),
2563 }
2564 
2565 #[sorted]
2566 #[derive(Error, Debug)]
2567 pub enum VirtioIOMMUVfioError {
2568     #[error("socket failed")]
2569     SocketFailed,
2570     #[error("unexpected response: {0}")]
2571     UnexpectedResponse(VirtioIOMMUResponse),
2572     #[error("unknown command: `{0}`")]
2573     UnknownCommand(String),
2574     #[error("{0}")]
2575     VfioControl(VirtioIOMMUVfioResult),
2576 }
2577 
2578 #[derive(Serialize, Deserialize, Debug)]
2579 pub enum VirtioIOMMUVfioCommand {
2580     // Add the vfio device attached to virtio-iommu.
2581     VfioDeviceAdd {
2582         endpoint_addr: u32,
2583         wrapper_id: u32,
2584         #[serde(with = "with_as_descriptor")]
2585         container: File,
2586     },
2587     // Delete the vfio device attached to virtio-iommu.
2588     VfioDeviceDel {
2589         endpoint_addr: u32,
2590     },
2591     // Map a dma-buf into vfio iommu table
2592     VfioDmabufMap {
2593         region_id: VmMemoryRegionId,
2594         gpa: u64,
2595         size: u64,
2596         dma_buf: SafeDescriptor,
2597     },
2598     // Unmap a dma-buf from vfio iommu table
2599     VfioDmabufUnmap(VmMemoryRegionId),
2600 }
2601 
2602 #[derive(Serialize, Deserialize, Debug)]
2603 pub enum VirtioIOMMUVfioResult {
2604     Ok,
2605     NotInPCIRanges,
2606     NoAvailableContainer,
2607     NoSuchDevice,
2608     NoSuchMappedDmabuf,
2609     InvalidParam,
2610 }
2611 
2612 impl Display for VirtioIOMMUVfioResult {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2613     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2614         use self::VirtioIOMMUVfioResult::*;
2615 
2616         match self {
2617             Ok => write!(f, "successfully"),
2618             NotInPCIRanges => write!(f, "not in the pci ranges of virtio-iommu"),
2619             NoAvailableContainer => write!(f, "no available vfio container"),
2620             NoSuchDevice => write!(f, "no such a vfio device"),
2621             NoSuchMappedDmabuf => write!(f, "no such a mapped dmabuf"),
2622             InvalidParam => write!(f, "invalid parameters"),
2623         }
2624     }
2625 }
2626 
2627 /// A request to the virtio-iommu process to perform some operations.
2628 ///
2629 /// Unless otherwise noted, each request should expect a `VirtioIOMMUResponse::Ok` to be received on
2630 /// success.
2631 #[derive(Serialize, Deserialize, Debug)]
2632 pub enum VirtioIOMMURequest {
2633     /// Command for vfio related operations.
2634     VfioCommand(VirtioIOMMUVfioCommand),
2635 }
2636 
2637 /// Indication of success or failure of a `VirtioIOMMURequest`.
2638 ///
2639 /// Success is usually indicated `VirtioIOMMUResponse::Ok` unless there is data associated with the
2640 /// response.
2641 #[derive(Serialize, Deserialize, Debug)]
2642 pub enum VirtioIOMMUResponse {
2643     /// Indicates the request was executed successfully.
2644     Ok,
2645     /// Indicates the request encountered some error during execution.
2646     Err(SysError),
2647     /// Results for Vfio commands.
2648     VfioResponse(VirtioIOMMUVfioResult),
2649 }
2650 
2651 impl Display for VirtioIOMMUResponse {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result2652     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2653         use self::VirtioIOMMUResponse::*;
2654         match self {
2655             Ok => write!(f, "ok"),
2656             Err(e) => write!(f, "error: {}", e),
2657             VfioResponse(result) => write!(
2658                 f,
2659                 "The vfio-related virtio-iommu request got result: {:?}",
2660                 result
2661             ),
2662         }
2663     }
2664 }
2665 
2666 /// Send VirtioIOMMURequest without waiting for the response
virtio_iommu_request_async( iommu_control_tube: &Tube, req: &VirtioIOMMURequest, ) -> VirtioIOMMUResponse2667 pub fn virtio_iommu_request_async(
2668     iommu_control_tube: &Tube,
2669     req: &VirtioIOMMURequest,
2670 ) -> VirtioIOMMUResponse {
2671     match iommu_control_tube.send(&req) {
2672         Ok(_) => VirtioIOMMUResponse::Ok,
2673         Err(e) => {
2674             error!("virtio-iommu socket send failed: {:?}", e);
2675             VirtioIOMMUResponse::Err(SysError::last())
2676         }
2677     }
2678 }
2679 
2680 pub type VirtioIOMMURequestResult = std::result::Result<VirtioIOMMUResponse, ()>;
2681 
2682 /// Send VirtioIOMMURequest and wait to get the response
virtio_iommu_request( iommu_control_tube: &Tube, req: &VirtioIOMMURequest, ) -> VirtioIOMMURequestResult2683 pub fn virtio_iommu_request(
2684     iommu_control_tube: &Tube,
2685     req: &VirtioIOMMURequest,
2686 ) -> VirtioIOMMURequestResult {
2687     let response = match virtio_iommu_request_async(iommu_control_tube, req) {
2688         VirtioIOMMUResponse::Ok => match iommu_control_tube.recv() {
2689             Ok(response) => response,
2690             Err(e) => {
2691                 error!("virtio-iommu socket recv failed: {:?}", e);
2692                 VirtioIOMMUResponse::Err(SysError::last())
2693             }
2694         },
2695         resp => resp,
2696     };
2697     Ok(response)
2698 }
2699