xref: /aosp_15_r20/external/crosvm/kvm/src/lib.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! A safe wrapper around the kernel's KVM interface.
6 //!
7 //! New code should use the `hypervisor` crate instead.
8 
9 #![cfg(any(target_os = "android", target_os = "linux"))]
10 
11 mod cap;
12 
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Ordering;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::ffi::CString;
19 use std::fs::File;
20 use std::mem::size_of;
21 use std::ops::Deref;
22 use std::ops::DerefMut;
23 use std::os::raw::*;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::ptr::copy_nonoverlapping;
27 use std::sync::Arc;
28 
29 #[allow(unused_imports)]
30 use base::ioctl;
31 #[allow(unused_imports)]
32 use base::ioctl_with_mut_ptr;
33 #[allow(unused_imports)]
34 use base::ioctl_with_mut_ref;
35 #[allow(unused_imports)]
36 use base::ioctl_with_ptr;
37 #[allow(unused_imports)]
38 use base::ioctl_with_ref;
39 #[allow(unused_imports)]
40 use base::ioctl_with_val;
41 #[allow(unused_imports)]
42 use base::pagesize;
43 #[allow(unused_imports)]
44 use base::signal;
45 use base::sys::BlockedSignal;
46 #[allow(unused_imports)]
47 use base::unblock_signal;
48 #[allow(unused_imports)]
49 use base::warn;
50 use base::AsRawDescriptor;
51 #[allow(unused_imports)]
52 use base::Error;
53 #[allow(unused_imports)]
54 use base::Event;
55 use base::FromRawDescriptor;
56 #[allow(unused_imports)]
57 use base::IoctlNr;
58 #[allow(unused_imports)]
59 use base::MappedRegion;
60 #[allow(unused_imports)]
61 use base::MemoryMapping;
62 #[allow(unused_imports)]
63 use base::MemoryMappingBuilder;
64 #[allow(unused_imports)]
65 use base::MmapError;
66 use base::RawDescriptor;
67 #[allow(unused_imports)]
68 use base::Result;
69 #[allow(unused_imports)]
70 use base::SIGRTMIN;
71 use data_model::vec_with_array_field;
72 #[cfg(target_arch = "x86_64")]
73 use data_model::FlexibleArrayWrapper;
74 use kvm_sys::*;
75 use libc::open64;
76 use libc::sigset_t;
77 use libc::EBUSY;
78 use libc::EINVAL;
79 use libc::ENOENT;
80 use libc::ENOSPC;
81 use libc::EOVERFLOW;
82 use libc::O_CLOEXEC;
83 use libc::O_RDWR;
84 use sync::Mutex;
85 use vm_memory::GuestAddress;
86 use vm_memory::GuestMemory;
87 
88 pub use crate::cap::*;
89 
errno_result<T>() -> Result<T>90 fn errno_result<T>() -> Result<T> {
91     Err(Error::last())
92 }
93 
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>94 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
95     fd: &F,
96     slot: u32,
97     read_only: bool,
98     log_dirty_pages: bool,
99     guest_addr: u64,
100     memory_size: u64,
101     userspace_addr: *mut u8,
102 ) -> Result<()> {
103     let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
104     if log_dirty_pages {
105         flags |= KVM_MEM_LOG_DIRTY_PAGES;
106     }
107     let region = kvm_userspace_memory_region {
108         slot,
109         flags,
110         guest_phys_addr: guest_addr,
111         memory_size,
112         userspace_addr: userspace_addr as u64,
113     };
114 
115     let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION, &region);
116     if ret == 0 {
117         Ok(())
118     } else {
119         errno_result()
120     }
121 }
122 
123 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
124 /// size.
125 ///
126 /// # Arguments
127 ///
128 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize129 pub fn dirty_log_bitmap_size(size: usize) -> usize {
130     let page_size = pagesize();
131     (((size + page_size - 1) / page_size) + 7) / 8
132 }
133 
134 /// A wrapper around opening and using `/dev/kvm`.
135 ///
136 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
137 /// create a `Vm` object.
138 pub struct Kvm {
139     kvm: File,
140 }
141 
142 impl Kvm {
143     /// Opens `/dev/kvm` and returns a Kvm object on success.
new() -> Result<Kvm>144     pub fn new() -> Result<Kvm> {
145         Kvm::new_with_path(Path::new("/dev/kvm"))
146     }
147 
148     /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>149     pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
150         let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
151         // SAFETY:
152         // Open calls are safe because we give a nul-terminated string and verify the result.
153         let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
154         if ret < 0 {
155             return errno_result();
156         }
157         Ok(Kvm {
158             kvm: {
159                 // SAFETY:
160                 // Safe because we verify that ret is valid and we own the fd.
161                 unsafe { File::from_raw_descriptor(ret) }
162             },
163         })
164     }
165 
check_extension_int(&self, c: Cap) -> i32166     fn check_extension_int(&self, c: Cap) -> i32 {
167         // SAFETY:
168         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
169         // defined by kernel.
170         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, c as c_ulong) }
171     }
172 
173     /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool174     pub fn check_extension(&self, c: Cap) -> bool {
175         self.check_extension_int(c) == 1
176     }
177 
178     /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>179     pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
180         // SAFETY:
181         // Safe because we know that our file is a KVM fd and we verify the return result.
182         let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE) };
183         if res > 0 {
184             Ok(res as usize)
185         } else {
186             errno_result()
187         }
188     }
189 
190     #[cfg(target_arch = "x86_64")]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>191     fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
192         const MAX_KVM_CPUID_ENTRIES: usize = 256;
193         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
194 
195         // SAFETY:
196         // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
197         // allocated for the struct. The limit is read from nent, which is set to the allocated
198         // size(MAX_KVM_CPUID_ENTRIES) above.
199         let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr()) };
200         if ret < 0 {
201             return errno_result();
202         }
203 
204         Ok(cpuid)
205     }
206 
207     /// X86 specific call to get the system supported CPUID values
208     #[cfg(target_arch = "x86_64")]
get_supported_cpuid(&self) -> Result<CpuId>209     pub fn get_supported_cpuid(&self) -> Result<CpuId> {
210         self.get_cpuid(KVM_GET_SUPPORTED_CPUID)
211     }
212 
213     /// X86 specific call to get the system emulated CPUID values
214     #[cfg(target_arch = "x86_64")]
get_emulated_cpuid(&self) -> Result<CpuId>215     pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
216         self.get_cpuid(KVM_GET_EMULATED_CPUID)
217     }
218 
219     /// X86 specific call to get list of supported MSRS
220     ///
221     /// See the documentation for KVM_GET_MSR_INDEX_LIST.
222     #[cfg(target_arch = "x86_64")]
get_msr_index_list(&self) -> Result<Vec<u32>>223     pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
224         const MAX_KVM_MSR_ENTRIES: usize = 256;
225 
226         let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
227         msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
228 
229         // SAFETY:
230         // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
231         // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
232         // size (MAX_KVM_MSR_ENTRIES) above.
233         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST, &mut msr_list[0]) };
234         if ret < 0 {
235             return errno_result();
236         }
237 
238         let mut nmsrs = msr_list[0].nmsrs;
239 
240         // SAFETY:
241         // Mapping the unsized array to a slice is unsafe because the length isn't known.  Using
242         // the length we originally allocated with eliminates the possibility of overflow.
243         let indices: &[u32] = unsafe {
244             if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
245                 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
246             }
247             msr_list[0].indices.as_slice(nmsrs as usize)
248         };
249 
250         Ok(indices.to_vec())
251     }
252 
253     #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
254     // The x86 and riscv machine type is always 0
get_vm_type(&self) -> c_ulong255     pub fn get_vm_type(&self) -> c_ulong {
256         0
257     }
258 
259     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
260     // Compute the machine type, which should be the IPA range for the VM
261     // Ideally, this would take a description of the memory map and return
262     // the closest machine type for this VM. Here, we just return the maximum
263     // the kernel support.
264     #[allow(clippy::useless_conversion)]
get_vm_type(&self) -> c_ulong265     pub fn get_vm_type(&self) -> c_ulong {
266         // SAFETY:
267         // Safe because we know self is a real kvm fd
268         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
269             // Not supported? Use 0 as the machine type, which implies 40bit IPA
270             ret if ret < 0 => 0,
271             // Use the lower 8 bits representing the IPA space as the machine type
272             ipa => (ipa & 0xff) as c_ulong,
273         }
274     }
275 }
276 
277 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor278     fn as_raw_descriptor(&self) -> RawDescriptor {
279         self.kvm.as_raw_descriptor()
280     }
281 }
282 
283 /// An address either in programmable I/O space or in memory mapped I/O space.
284 #[derive(Copy, Clone, Debug)]
285 pub enum IoeventAddress {
286     Pio(u64),
287     Mmio(u64),
288 }
289 
290 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
291 pub enum Datamatch {
292     AnyLength,
293     U8(Option<u8>),
294     U16(Option<u16>),
295     U32(Option<u32>),
296     U64(Option<u64>),
297 }
298 
299 /// A source of IRQs in an `IrqRoute`.
300 pub enum IrqSource {
301     Irqchip { chip: u32, pin: u32 },
302     Msi { address: u64, data: u32 },
303 }
304 
305 /// A single route for an IRQ.
306 pub struct IrqRoute {
307     pub gsi: u32,
308     pub source: IrqSource,
309 }
310 
311 /// Interrupt controller IDs
312 pub enum PicId {
313     Primary = 0,
314     Secondary = 1,
315 }
316 
317 /// Number of pins on the IOAPIC.
318 pub const NUM_IOAPIC_PINS: usize = 24;
319 
320 // Used to invert the order when stored in a max-heap.
321 #[derive(Copy, Clone, Eq, PartialEq)]
322 struct MemSlot(u32);
323 
324 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering325     fn cmp(&self, other: &MemSlot) -> Ordering {
326         // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
327         // max-heap.
328         other.0.cmp(&self.0)
329     }
330 }
331 
332 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>333     fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
334         Some(self.cmp(other))
335     }
336 }
337 
338 /// A wrapper around creating and using a VM.
339 pub struct Vm {
340     vm: File,
341     guest_mem: GuestMemory,
342     mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
343     mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
344 }
345 
346 impl Vm {
347     /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>348     pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
349         // SAFETY:
350         // Safe because we know kvm is a real kvm fd as this module is the only one that can make
351         // Kvm objects.
352         let ret = unsafe { ioctl_with_val(kvm, KVM_CREATE_VM, kvm.get_vm_type()) };
353         if ret >= 0 {
354             // SAFETY:
355             // Safe because we verify the value of ret and we are the owners of the fd.
356             let vm_file = unsafe { File::from_raw_descriptor(ret) };
357             for region in guest_mem.regions() {
358                 // SAFETY:
359                 // Safe because the guest regions are guaranteed not to overlap.
360                 unsafe {
361                     set_user_memory_region(
362                         &vm_file,
363                         region.index as u32,
364                         false,
365                         false,
366                         region.guest_addr.offset(),
367                         region.size as u64,
368                         region.host_addr as *mut u8,
369                     )
370                 }?;
371             }
372 
373             Ok(Vm {
374                 vm: vm_file,
375                 guest_mem,
376                 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
377                 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
378             })
379         } else {
380             errno_result()
381         }
382     }
383 
384     /// Checks if a particular `Cap` is available.
385     ///
386     /// This is distinct from the `Kvm` version of this method because the some extensions depend on
387     /// the particular `Vm` existence. This method is encouraged by the kernel because it more
388     /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool389     pub fn check_extension(&self, c: Cap) -> bool {
390         // SAFETY:
391         // Safe because we know that our file is a KVM fd and that the extension is one of the ones
392         // defined by kernel.
393         unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, c as c_ulong) == 1 }
394     }
395 
396     /// Inserts the given `mem` into the VM's address space at `guest_addr`.
397     ///
398     /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
399     /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
400     /// take back ownership of `mem`.
401     ///
402     /// Note that memory inserted into the VM's address space must not overlap with any other memory
403     /// slot's region.
404     ///
405     /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
406     /// write will trigger a mmio VM exit, leaving the memory untouched.
407     ///
408     /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
409     /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>410     pub fn add_memory_region(
411         &mut self,
412         guest_addr: GuestAddress,
413         mem: Box<dyn MappedRegion>,
414         read_only: bool,
415         log_dirty_pages: bool,
416     ) -> Result<u32> {
417         let size = mem.size() as u64;
418         let end_addr = guest_addr
419             .checked_add(size)
420             .ok_or_else(|| Error::new(EOVERFLOW))?;
421         if self.guest_mem.range_overlap(guest_addr, end_addr) {
422             return Err(Error::new(ENOSPC));
423         }
424         let mut regions = self.mem_regions.lock();
425         let mut gaps = self.mem_slot_gaps.lock();
426         let slot = match gaps.pop() {
427             Some(gap) => gap.0,
428             None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
429         };
430 
431         // SAFETY:
432         // Safe because we check that the given guest address is valid and has no overlaps. We also
433         // know that the pointer and size are correct because the MemoryMapping interface ensures
434         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
435         // is removed.
436         let res = unsafe {
437             set_user_memory_region(
438                 &self.vm,
439                 slot,
440                 read_only,
441                 log_dirty_pages,
442                 guest_addr.offset(),
443                 size,
444                 mem.as_ptr(),
445             )
446         };
447 
448         if let Err(e) = res {
449             gaps.push(MemSlot(slot));
450             return Err(e);
451         }
452         regions.insert(slot, mem);
453         Ok(slot)
454     }
455 
456     /// Removes memory that was previously added at the given slot.
457     ///
458     /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>459     pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
460         let mut regions = self.mem_regions.lock();
461         if !regions.contains_key(&slot) {
462             return Err(Error::new(ENOENT));
463         }
464         // SAFETY:
465         // Safe because the slot is checked against the list of memory slots.
466         unsafe {
467             set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
468         }
469         self.mem_slot_gaps.lock().push(MemSlot(slot));
470         // This remove will always succeed because of the contains_key check above.
471         Ok(regions.remove(&slot).unwrap())
472     }
473 
474     /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
475     /// `slot`.
476     ///
477     /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
478     /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
479     /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>480     pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
481         match self.mem_regions.lock().get(&slot) {
482             Some(mem) => {
483                 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
484                 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
485                     return Err(Error::new(EINVAL));
486                 }
487                 let mut dirty_log_kvm = kvm_dirty_log {
488                     slot,
489                     ..Default::default()
490                 };
491                 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
492                 // SAFETY:
493                 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
494                 // (because it's from a slice) and we checked that it will be large enough to hold
495                 // the entire log.
496                 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
497                 if ret == 0 {
498                     Ok(())
499                 } else {
500                     errno_result()
501                 }
502             }
503             _ => Err(Error::new(ENOENT)),
504         }
505     }
506 
507     /// Gets a reference to the guest memory owned by this VM.
508     ///
509     /// Note that `GuestMemory` does not include any mmio memory that may have been added after
510     /// this VM was constructed.
get_memory(&self) -> &GuestMemory511     pub fn get_memory(&self) -> &GuestMemory {
512         &self.guest_mem
513     }
514 
515     /// Sets the address of a one-page region in the VM's address space.
516     ///
517     /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
518     #[cfg(target_arch = "x86_64")]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>519     pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
520         // SAFETY:
521         // Safe because we know that our file is a VM fd and we verify the return result.
522         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR, &addr.offset()) };
523         if ret == 0 {
524             Ok(())
525         } else {
526             errno_result()
527         }
528     }
529 
530     /// Retrieves the current timestamp of kvmclock as seen by the current guest.
531     ///
532     /// See the documentation on the KVM_GET_CLOCK ioctl.
533     #[cfg(target_arch = "x86_64")]
get_clock(&self) -> Result<kvm_clock_data>534     pub fn get_clock(&self) -> Result<kvm_clock_data> {
535         // SAFETY: trivially safe
536         let mut clock_data = unsafe { std::mem::zeroed() };
537         // SAFETY:
538         // Safe because we know that our file is a VM fd, we know the kernel will only write
539         // correct amount of memory to our pointer, and we verify the return result.
540         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK, &mut clock_data) };
541         if ret == 0 {
542             Ok(clock_data)
543         } else {
544             errno_result()
545         }
546     }
547 
548     /// Sets the current timestamp of kvmclock to the specified value.
549     ///
550     /// See the documentation on the KVM_SET_CLOCK ioctl.
551     #[cfg(target_arch = "x86_64")]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>552     pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
553         // SAFETY:
554         // Safe because we know that our file is a VM fd, we know the kernel will only read
555         // correct amount of memory from our pointer, and we verify the return result.
556         let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK, clock_data) };
557         if ret == 0 {
558             Ok(())
559         } else {
560             errno_result()
561         }
562     }
563 
564     /// Crates an in kernel interrupt controller.
565     ///
566     /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
567     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
create_irq_chip(&self) -> Result<()>568     pub fn create_irq_chip(&self) -> Result<()> {
569         // SAFETY:
570         // Safe because we know that our file is a VM fd and we verify the return result.
571         let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
572         if ret == 0 {
573             Ok(())
574         } else {
575             errno_result()
576         }
577     }
578 
579     /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
580     ///
581     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
582     #[cfg(target_arch = "x86_64")]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>583     pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
584         let mut irqchip_state = kvm_irqchip {
585             chip_id: id as u32,
586             ..Default::default()
587         };
588         // SAFETY:
589         // Safe because we know our file is a VM fd, we know the kernel will only write
590         // correct amount of memory to our pointer, and we verify the return result.
591         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP, &mut irqchip_state) };
592         if ret == 0 {
593             Ok(
594                 // SAFETY:
595                 // Safe as we know that we are retrieving data related to the
596                 // PIC (primary or secondary) and not IOAPIC.
597                 unsafe { irqchip_state.chip.pic },
598             )
599         } else {
600             errno_result()
601         }
602     }
603 
604     /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
605     ///
606     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
607     #[cfg(target_arch = "x86_64")]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>608     pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
609         let mut irqchip_state = kvm_irqchip {
610             chip_id: id as u32,
611             ..Default::default()
612         };
613         irqchip_state.chip.pic = *state;
614         // SAFETY:
615         // Safe because we know that our file is a VM fd, we know the kernel will only read
616         // correct amount of memory from our pointer, and we verify the return result.
617         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP, &irqchip_state) };
618         if ret == 0 {
619             Ok(())
620         } else {
621             errno_result()
622         }
623     }
624 
625     /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
626     ///
627     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
628     #[cfg(target_arch = "x86_64")]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>629     pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
630         let mut irqchip_state = kvm_irqchip {
631             chip_id: 2,
632             ..Default::default()
633         };
634         let ret =
635             // SAFETY:
636             // Safe because we know our file is a VM fd, we know the kernel will only write
637             // correct amount of memory to our pointer, and we verify the return result.
638             unsafe {
639                 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP, &mut irqchip_state)
640         };
641         if ret == 0 {
642             Ok(
643                 // SAFETY:
644                 // Safe as we know that we are retrieving data related to the
645                 // IOAPIC and not PIC.
646                 unsafe { irqchip_state.chip.ioapic },
647             )
648         } else {
649             errno_result()
650         }
651     }
652 
653     /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
654     ///
655     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
656     #[cfg(target_arch = "x86_64")]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>657     pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
658         let mut irqchip_state = kvm_irqchip {
659             chip_id: 2,
660             ..Default::default()
661         };
662         irqchip_state.chip.ioapic = *state;
663         // SAFETY:
664         // Safe because we know that our file is a VM fd, we know the kernel will only read
665         // correct amount of memory from our pointer, and we verify the return result.
666         let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP, &irqchip_state) };
667         if ret == 0 {
668             Ok(())
669         } else {
670             errno_result()
671         }
672     }
673 
674     /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
675     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>676     pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
677         let mut irq_level = kvm_irq_level::default();
678         irq_level.__bindgen_anon_1.irq = irq;
679         irq_level.level = active.into();
680 
681         // SAFETY:
682         // Safe because we know that our file is a VM fd, we know the kernel will only read the
683         // correct amount of memory from our pointer, and we verify the return result.
684         let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
685         if ret == 0 {
686             Ok(())
687         } else {
688             errno_result()
689         }
690     }
691 
692     /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
693     ///
694     /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
695     #[cfg(target_arch = "x86_64")]
create_pit(&self) -> Result<()>696     pub fn create_pit(&self) -> Result<()> {
697         let pit_config = kvm_pit_config::default();
698         // SAFETY:
699         // Safe because we know that our file is a VM fd, we know the kernel will only read the
700         // correct amount of memory from our pointer, and we verify the return result.
701         let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2, &pit_config) };
702         if ret == 0 {
703             Ok(())
704         } else {
705             errno_result()
706         }
707     }
708 
709     /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
710     ///
711     /// Note that this call can only succeed after a call to `Vm::create_pit`.
712     #[cfg(target_arch = "x86_64")]
get_pit_state(&self) -> Result<kvm_pit_state2>713     pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
714         // SAFETY: trivially safe
715         let mut pit_state = unsafe { std::mem::zeroed() };
716         // SAFETY:
717         // Safe because we know that our file is a VM fd, we know the kernel will only write
718         // correct amount of memory to our pointer, and we verify the return result.
719         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2, &mut pit_state) };
720         if ret == 0 {
721             Ok(pit_state)
722         } else {
723             errno_result()
724         }
725     }
726 
727     /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
728     ///
729     /// Note that this call can only succeed after a call to `Vm::create_pit`.
730     #[cfg(target_arch = "x86_64")]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>731     pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
732         // SAFETY:
733         // Safe because we know that our file is a VM fd, we know the kernel will only read
734         // correct amount of memory from our pointer, and we verify the return result.
735         let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2, pit_state) };
736         if ret == 0 {
737             Ok(())
738         } else {
739             errno_result()
740         }
741     }
742 
743     /// Registers an event to be signaled whenever a certain address is written to.
744     ///
745     /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
746     /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
747     /// and must match the expected size of the guest's write.
748     ///
749     /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
750     /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>751     pub fn register_ioevent(
752         &self,
753         evt: &Event,
754         addr: IoeventAddress,
755         datamatch: Datamatch,
756     ) -> Result<()> {
757         self.ioeventfd(evt, addr, datamatch, false)
758     }
759 
760     /// Unregisters an event previously registered with `register_ioevent`.
761     ///
762     /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
763     /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>764     pub fn unregister_ioevent(
765         &self,
766         evt: &Event,
767         addr: IoeventAddress,
768         datamatch: Datamatch,
769     ) -> Result<()> {
770         self.ioeventfd(evt, addr, datamatch, true)
771     }
772 
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>773     fn ioeventfd(
774         &self,
775         evt: &Event,
776         addr: IoeventAddress,
777         datamatch: Datamatch,
778         deassign: bool,
779     ) -> Result<()> {
780         let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
781             Datamatch::AnyLength => (false, 0, 0),
782             Datamatch::U8(v) => match v {
783                 Some(u) => (true, u as u64, 1),
784                 None => (false, 0, 1),
785             },
786             Datamatch::U16(v) => match v {
787                 Some(u) => (true, u as u64, 2),
788                 None => (false, 0, 2),
789             },
790             Datamatch::U32(v) => match v {
791                 Some(u) => (true, u as u64, 4),
792                 None => (false, 0, 4),
793             },
794             Datamatch::U64(v) => match v {
795                 Some(u) => (true, u, 8),
796                 None => (false, 0, 8),
797             },
798         };
799         let mut flags = 0;
800         if deassign {
801             flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
802         }
803         if do_datamatch {
804             flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
805         }
806         if let IoeventAddress::Pio(_) = addr {
807             flags |= 1 << kvm_ioeventfd_flag_nr_pio;
808         }
809         let ioeventfd = kvm_ioeventfd {
810             datamatch: datamatch_value,
811             len: datamatch_len,
812             addr: match addr {
813                 IoeventAddress::Pio(p) => p,
814                 IoeventAddress::Mmio(m) => m,
815             },
816             fd: evt.as_raw_descriptor(),
817             flags,
818             ..Default::default()
819         };
820         // SAFETY:
821         // Safe because we know that our file is a VM fd, we know the kernel will only read the
822         // correct amount of memory from our pointer, and we verify the return result.
823         let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
824         if ret == 0 {
825             Ok(())
826         } else {
827             errno_result()
828         }
829     }
830 
831     /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
832     /// get triggered when the irqchip is resampled.
833     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>834     pub fn register_irqfd_resample(
835         &self,
836         evt: &Event,
837         resample_evt: &Event,
838         gsi: u32,
839     ) -> Result<()> {
840         let irqfd = kvm_irqfd {
841             flags: KVM_IRQFD_FLAG_RESAMPLE,
842             fd: evt.as_raw_descriptor() as u32,
843             resamplefd: resample_evt.as_raw_descriptor() as u32,
844             gsi,
845             ..Default::default()
846         };
847         // SAFETY:
848         // Safe because we know that our file is a VM fd, we know the kernel will only read the
849         // correct amount of memory from our pointer, and we verify the return result.
850         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
851         if ret == 0 {
852             Ok(())
853         } else {
854             errno_result()
855         }
856     }
857 
858     /// Unregisters an event that was previously registered with
859     /// `register_irqfd`/`register_irqfd_resample`.
860     ///
861     /// The `evt` and `gsi` pair must be the same as the ones passed into
862     /// `register_irqfd`/`register_irqfd_resample`.
863     #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>864     pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
865         let irqfd = kvm_irqfd {
866             fd: evt.as_raw_descriptor() as u32,
867             gsi,
868             flags: KVM_IRQFD_FLAG_DEASSIGN,
869             ..Default::default()
870         };
871         // SAFETY:
872         // Safe because we know that our file is a VM fd, we know the kernel will only read the
873         // correct amount of memory from our pointer, and we verify the return result.
874         let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
875         if ret == 0 {
876             Ok(())
877         } else {
878             errno_result()
879         }
880     }
881 
882     /// Sets the GSI routing table, replacing any table set with previous calls to
883     /// `set_gsi_routing`.
884     #[cfg(target_arch = "x86_64")]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>885     pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
886         let mut irq_routing =
887             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
888         irq_routing[0].nr = routes.len() as u32;
889 
890         // SAFETY:
891         // Safe because we ensured there is enough space in irq_routing to hold the number of
892         // route entries.
893         let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
894         for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
895             irq_route.gsi = route.gsi;
896             match route.source {
897                 IrqSource::Irqchip { chip, pin } => {
898                     irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
899                     irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
900                 }
901                 IrqSource::Msi { address, data } => {
902                     irq_route.type_ = KVM_IRQ_ROUTING_MSI;
903                     irq_route.u.msi = kvm_irq_routing_msi {
904                         address_lo: address as u32,
905                         address_hi: (address >> 32) as u32,
906                         data,
907                         ..Default::default()
908                     }
909                 }
910             }
911         }
912 
913         // TODO(b/315998194): Add safety comment
914         #[allow(clippy::undocumented_unsafe_blocks)]
915         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) };
916         if ret == 0 {
917             Ok(())
918         } else {
919             errno_result()
920         }
921     }
922 
923     /// Enable the specified capability.
924     /// See documentation for KVM_ENABLE_CAP.
925     /// # Safety
926     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
927     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>928     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
929         // Safe because we allocated the struct and we know the kernel will read exactly the size of
930         // the struct.
931         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, cap);
932         if ret < 0 {
933             errno_result()
934         } else {
935             Ok(())
936         }
937     }
938 }
939 
940 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor941     fn as_raw_descriptor(&self) -> RawDescriptor {
942         self.vm.as_raw_descriptor()
943     }
944 }
945 
946 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
947 #[derive(Debug)]
948 pub enum VcpuExit {
949     /// An out port instruction was run on the given port with the given data.
950     IoOut {
951         port: u16,
952         size: usize,
953         data: [u8; 8],
954     },
955     /// An in port instruction was run on the given port.
956     ///
957     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
958     /// called again.
959     IoIn {
960         port: u16,
961         size: usize,
962     },
963     /// A read instruction was run against the given MMIO address.
964     ///
965     /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
966     /// called again.
967     MmioRead {
968         address: u64,
969         size: usize,
970     },
971     /// A write instruction was run against the given MMIO address with the given data.
972     MmioWrite {
973         address: u64,
974         size: usize,
975         data: [u8; 8],
976     },
977     IoapicEoi {
978         vector: u8,
979     },
980     HypervSynic {
981         msr: u32,
982         control: u64,
983         evt_page: u64,
984         msg_page: u64,
985     },
986     HypervHcall {
987         input: u64,
988         params: [u64; 2],
989     },
990     Unknown,
991     Exception,
992     Hypercall,
993     Debug,
994     Hlt,
995     IrqWindowOpen,
996     Shutdown,
997     FailEntry {
998         hardware_entry_failure_reason: u64,
999     },
1000     Intr,
1001     SetTpr,
1002     TprAccess,
1003     S390Sieic,
1004     S390Reset,
1005     Dcr,
1006     Nmi,
1007     InternalError,
1008     Osi,
1009     PaprHcall,
1010     S390Ucontrol,
1011     Watchdog,
1012     S390Tsch,
1013     Epr,
1014     /// The cpu triggered a system level event which is specified by the type field.
1015     /// The first field is the event type and the second field is flags.
1016     /// The possible event types are shutdown, reset, or crash.  So far there
1017     /// are not any flags defined.
1018     SystemEvent(u32 /* event_type */, u64 /* flags */),
1019 }
1020 
1021 /// A wrapper around creating and using a VCPU.
1022 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
1023 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
1024 pub struct Vcpu {
1025     vcpu: File,
1026     run_mmap: MemoryMapping,
1027 }
1028 
1029 pub struct VcpuThread {
1030     run: *mut kvm_run,
1031     signal_num: Option<c_int>,
1032 }
1033 
1034 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = const { RefCell::new(None) });
1035 
1036 impl Vcpu {
1037     /// Constructs a new VCPU for `vm`.
1038     ///
1039     /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>1040     pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
1041         let run_mmap_size = kvm.get_vcpu_mmap_size()?;
1042 
1043         // SAFETY:
1044         // Safe because we know that vm a VM fd and we verify the return result.
1045         let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU, id) };
1046         if vcpu_fd < 0 {
1047             return errno_result();
1048         }
1049 
1050         // SAFETY:
1051         // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
1052         // the value of the fd and we own the fd.
1053         let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
1054 
1055         let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
1056             .from_file(&vcpu)
1057             .build()
1058             .map_err(|_| Error::new(ENOSPC))?;
1059 
1060         Ok(Vcpu { vcpu, run_mmap })
1061     }
1062 
1063     /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
1064     /// guest.
1065     /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
1066     /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
1067     /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
1068     /// destroyed.
1069     ///
1070     /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
1071     #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>1072     pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
1073         // Block signal while we add -- if a signal fires (very unlikely,
1074         // as this means something is trying to pause the vcpu before it has
1075         // even started) it'll try to grab the read lock while this write
1076         // lock is grabbed and cause a deadlock.
1077         // Assuming that a failure to block means it's already blocked.
1078         let _blocked_signal = signal_num.map(BlockedSignal::new);
1079 
1080         VCPU_THREAD.with(|v| {
1081             if v.borrow().is_none() {
1082                 *v.borrow_mut() = Some(VcpuThread {
1083                     run: self.run_mmap.as_ptr() as *mut kvm_run,
1084                     signal_num,
1085                 });
1086                 Ok(())
1087             } else {
1088                 Err(Error::new(EBUSY))
1089             }
1090         })?;
1091 
1092         Ok(RunnableVcpu {
1093             vcpu: self,
1094             phantom: Default::default(),
1095         })
1096     }
1097 
1098     /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1099     ///
1100     /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1101     /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1102     #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1103     pub fn set_data(&self, data: &[u8]) -> Result<()> {
1104         // SAFETY:
1105         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1106         // kernel told us how large it was. The pointer is page aligned so casting to a different
1107         // type is well defined, hence the clippy allow attribute.
1108         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1109         match run.exit_reason {
1110             KVM_EXIT_IO => {
1111                 let run_start = run as *mut kvm_run as *mut u8;
1112                 // SAFETY:
1113                 // Safe because the exit_reason (which comes from the kernel) told us which
1114                 // union field to use.
1115                 let io = unsafe { run.__bindgen_anon_1.io };
1116                 if io.direction as u32 != KVM_EXIT_IO_IN {
1117                     return Err(Error::new(EINVAL));
1118                 }
1119                 let data_size = (io.count as usize) * (io.size as usize);
1120                 if data_size != data.len() {
1121                     return Err(Error::new(EINVAL));
1122                 }
1123                 // SAFETY:
1124                 // The data_offset is defined by the kernel to be some number of bytes into the
1125                 // kvm_run structure, which we have fully mmap'd.
1126                 unsafe {
1127                     let data_ptr = run_start.offset(io.data_offset as isize);
1128                     copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1129                 }
1130                 Ok(())
1131             }
1132             KVM_EXIT_MMIO => {
1133                 // SAFETY:
1134                 // Safe because the exit_reason (which comes from the kernel) told us which
1135                 // union field to use.
1136                 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1137                 if mmio.is_write != 0 {
1138                     return Err(Error::new(EINVAL));
1139                 }
1140                 let len = mmio.len as usize;
1141                 if len != data.len() {
1142                     return Err(Error::new(EINVAL));
1143                 }
1144                 mmio.data[..len].copy_from_slice(data);
1145                 Ok(())
1146             }
1147             KVM_EXIT_HYPERV => {
1148                 // SAFETY:
1149                 // Safe because the exit_reason (which comes from the kernel) told us which
1150                 // union field to use.
1151                 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1152                 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1153                     return Err(Error::new(EINVAL));
1154                 }
1155                 // TODO(b/315998194): Add safety comment
1156                 #[allow(clippy::undocumented_unsafe_blocks)]
1157                 let hcall = unsafe { &mut hyperv.u.hcall };
1158                 match data.try_into() {
1159                     Ok(data) => {
1160                         hcall.result = u64::from_ne_bytes(data);
1161                     }
1162                     _ => return Err(Error::new(EINVAL)),
1163                 }
1164                 Ok(())
1165             }
1166             _ => Err(Error::new(EINVAL)),
1167         }
1168     }
1169 
1170     /// Sets the bit that requests an immediate exit.
1171     #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1172     pub fn set_immediate_exit(&self, exit: bool) {
1173         // SAFETY:
1174         // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1175         // kernel told us how large it was. The pointer is page aligned so casting to a different
1176         // type is well defined, hence the clippy allow attribute.
1177         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1178         run.immediate_exit = exit.into();
1179     }
1180 
1181     /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1182     pub fn set_local_immediate_exit(exit: bool) {
1183         VCPU_THREAD.with(|v| {
1184             if let Some(state) = &(*v.borrow()) {
1185                 // TODO(b/315998194): Add safety comment
1186                 #[allow(clippy::undocumented_unsafe_blocks)]
1187                 unsafe {
1188                     (*state.run).immediate_exit = exit.into();
1189                 };
1190             }
1191         });
1192     }
1193 
1194     /// Gets the VCPU registers.
1195     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1196     pub fn get_regs(&self) -> Result<kvm_regs> {
1197         // SAFETY: trivially safe
1198         let mut regs = unsafe { std::mem::zeroed() };
1199         // SAFETY:
1200         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1201         // correct amount of memory from our pointer, and we verify the return result.
1202         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS, &mut regs) };
1203         if ret != 0 {
1204             return errno_result();
1205         }
1206         Ok(regs)
1207     }
1208 
1209     /// Sets the VCPU registers.
1210     #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1211     pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1212         // SAFETY:
1213         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1214         // correct amount of memory from our pointer, and we verify the return result.
1215         let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS, regs) };
1216         if ret != 0 {
1217             return errno_result();
1218         }
1219         Ok(())
1220     }
1221 
1222     /// Gets the VCPU special registers.
1223     #[cfg(target_arch = "x86_64")]
get_sregs(&self) -> Result<kvm_sregs>1224     pub fn get_sregs(&self) -> Result<kvm_sregs> {
1225         // SAFETY: trivially safe
1226         let mut regs = unsafe { std::mem::zeroed() };
1227         // SAFETY:
1228         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1229         // correct amount of memory to our pointer, and we verify the return result.
1230         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS, &mut regs) };
1231         if ret != 0 {
1232             return errno_result();
1233         }
1234         Ok(regs)
1235     }
1236 
1237     /// Sets the VCPU special registers.
1238     #[cfg(target_arch = "x86_64")]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1239     pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1240         // SAFETY:
1241         // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1242         // correct amount of memory from our pointer, and we verify the return result.
1243         let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS, sregs) };
1244         if ret != 0 {
1245             return errno_result();
1246         }
1247         Ok(())
1248     }
1249 
1250     /// Gets the VCPU FPU registers.
1251     #[cfg(target_arch = "x86_64")]
get_fpu(&self) -> Result<kvm_fpu>1252     pub fn get_fpu(&self) -> Result<kvm_fpu> {
1253         // SAFETY: trivially safe
1254         // correct amount of memory to our pointer, and we verify the return result.
1255         let mut regs = unsafe { std::mem::zeroed() };
1256         // SAFETY:
1257         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1258         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU, &mut regs) };
1259         if ret != 0 {
1260             return errno_result();
1261         }
1262         Ok(regs)
1263     }
1264 
1265     /// X86 specific call to setup the FPU
1266     ///
1267     /// See the documentation for KVM_SET_FPU.
1268     #[cfg(target_arch = "x86_64")]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1269     pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1270         let ret = {
1271             // SAFETY:
1272             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1273             unsafe { ioctl_with_ref(self, KVM_SET_FPU, fpu) }
1274         };
1275         if ret < 0 {
1276             return errno_result();
1277         }
1278         Ok(())
1279     }
1280 
1281     /// Gets the VCPU debug registers.
1282     #[cfg(target_arch = "x86_64")]
get_debugregs(&self) -> Result<kvm_debugregs>1283     pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1284         // SAFETY: trivially safe
1285         let mut regs = unsafe { std::mem::zeroed() };
1286         // SAFETY:
1287         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1288         // correct amount of memory to our pointer, and we verify the return result.
1289         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS, &mut regs) };
1290         if ret != 0 {
1291             return errno_result();
1292         }
1293         Ok(regs)
1294     }
1295 
1296     /// Sets the VCPU debug registers
1297     #[cfg(target_arch = "x86_64")]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1298     pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1299         let ret = {
1300             // SAFETY:
1301             // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1302             unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS, dregs) }
1303         };
1304         if ret < 0 {
1305             return errno_result();
1306         }
1307         Ok(())
1308     }
1309 
1310     /// Gets the VCPU extended control registers
1311     #[cfg(target_arch = "x86_64")]
get_xcrs(&self) -> Result<kvm_xcrs>1312     pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1313         // SAFETY: trivially safe
1314         let mut regs = unsafe { std::mem::zeroed() };
1315         // SAFETY:
1316         // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1317         // correct amount of memory to our pointer, and we verify the return result.
1318         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS, &mut regs) };
1319         if ret != 0 {
1320             return errno_result();
1321         }
1322         Ok(regs)
1323     }
1324 
1325     /// Sets the VCPU extended control registers
1326     #[cfg(target_arch = "x86_64")]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1327     pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1328         let ret = {
1329             // SAFETY:
1330             // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1331             unsafe { ioctl_with_ref(self, KVM_SET_XCRS, xcrs) }
1332         };
1333         if ret < 0 {
1334             return errno_result();
1335         }
1336         Ok(())
1337     }
1338 
1339     /// X86 specific call to get the MSRS
1340     ///
1341     /// See the documentation for KVM_SET_MSRS.
1342     #[cfg(target_arch = "x86_64")]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1343     pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1344         let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1345         {
1346             // SAFETY:
1347             // Mapping the unsized array to a slice is unsafe because the length isn't known.
1348             // Providing the length used to create the struct guarantees the entire slice is valid.
1349             unsafe {
1350                 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1351                 entries.copy_from_slice(msr_entries);
1352             }
1353         }
1354         msrs[0].nmsrs = msr_entries.len() as u32;
1355         let ret = {
1356             // SAFETY:
1357             // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1358             unsafe { ioctl_with_mut_ref(self, KVM_GET_MSRS, &mut msrs[0]) }
1359         };
1360         if ret < 0 {
1361             // KVM_SET_MSRS actually returns the number of msr entries written.
1362             return errno_result();
1363         }
1364         // TODO(b/315998194): Add safety comment
1365         #[allow(clippy::undocumented_unsafe_blocks)]
1366         unsafe {
1367             let count = ret as usize;
1368             assert!(count <= msr_entries.len());
1369             let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1370             msr_entries.truncate(count);
1371             msr_entries.copy_from_slice(entries);
1372         }
1373         Ok(())
1374     }
1375 
1376     /// X86 specific call to setup the MSRS
1377     ///
1378     /// See the documentation for KVM_SET_MSRS.
1379     #[cfg(target_arch = "x86_64")]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1380     pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1381         let ret = {
1382             // SAFETY:
1383             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1384             unsafe { ioctl_with_ref(self, KVM_SET_MSRS, msrs) }
1385         };
1386         if ret < 0 {
1387             // KVM_SET_MSRS actually returns the number of msr entries written.
1388             return errno_result();
1389         }
1390         Ok(())
1391     }
1392 
1393     /// X86 specific call to setup the CPUID registers
1394     ///
1395     /// See the documentation for KVM_SET_CPUID2.
1396     #[cfg(target_arch = "x86_64")]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1397     pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1398         let ret = {
1399             // SAFETY:
1400             // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1401             unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2, cpuid.as_ptr()) }
1402         };
1403         if ret < 0 {
1404             return errno_result();
1405         }
1406         Ok(())
1407     }
1408 
1409     /// X86 specific call to get the system emulated hyper-v CPUID values
1410     #[cfg(target_arch = "x86_64")]
get_hyperv_cpuid(&self) -> Result<CpuId>1411     pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1412         const MAX_KVM_CPUID_ENTRIES: usize = 256;
1413         let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1414 
1415         let ret = {
1416             // SAFETY:
1417             // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1418             // allocated for the struct. The limit is read from nent, which is set to the allocated
1419             // size(MAX_KVM_CPUID_ENTRIES) above.
1420             unsafe { ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID, cpuid.as_mut_ptr()) }
1421         };
1422         if ret < 0 {
1423             return errno_result();
1424         }
1425         Ok(cpuid)
1426     }
1427 
1428     /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt
1429     /// Controller".
1430     ///
1431     /// See the documentation for KVM_GET_LAPIC.
1432     #[cfg(target_arch = "x86_64")]
get_lapic(&self) -> Result<kvm_lapic_state>1433     pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1434         let mut klapic: kvm_lapic_state = Default::default();
1435 
1436         let ret = {
1437             // SAFETY:
1438             // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1439             // local_apic struct.
1440             unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC, &mut klapic) }
1441         };
1442         if ret < 0 {
1443             return errno_result();
1444         }
1445         Ok(klapic)
1446     }
1447 
1448     /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt
1449     /// Controller".
1450     ///
1451     /// See the documentation for KVM_SET_LAPIC.
1452     #[cfg(target_arch = "x86_64")]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1453     pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1454         let ret = {
1455             // SAFETY:
1456             // The ioctl is safe because the kernel will only read from the klapic struct.
1457             unsafe { ioctl_with_ref(self, KVM_SET_LAPIC, klapic) }
1458         };
1459         if ret < 0 {
1460             return errno_result();
1461         }
1462         Ok(())
1463     }
1464 
1465     /// Gets the vcpu's current "multiprocessing state".
1466     ///
1467     /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1468     /// a call to `Vm::create_irq_chip`.
1469     ///
1470     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1471     /// to run crosvm on s390.
1472     #[cfg(target_arch = "x86_64")]
get_mp_state(&self) -> Result<kvm_mp_state>1473     pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1474         // SAFETY: trivially safe
1475         let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1476         // SAFETY:
1477         // Safe because we know that our file is a VCPU fd, we know the kernel will only
1478         // write correct amount of memory to our pointer, and we verify the return result.
1479         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) };
1480         if ret < 0 {
1481             return errno_result();
1482         }
1483         Ok(state)
1484     }
1485 
1486     /// Sets the vcpu's current "multiprocessing state".
1487     ///
1488     /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1489     /// a call to `Vm::create_irq_chip`.
1490     ///
1491     /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1492     /// to run crosvm on s390.
1493     #[cfg(target_arch = "x86_64")]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1494     pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1495         let ret = {
1496             // SAFETY:
1497             // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1498             unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1499         };
1500         if ret < 0 {
1501             return errno_result();
1502         }
1503         Ok(())
1504     }
1505 
1506     /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1507     ///
1508     /// See the documentation for KVM_GET_VCPU_EVENTS.
1509     #[cfg(target_arch = "x86_64")]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1510     pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1511         // SAFETY: trivially safe
1512         let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1513         // SAFETY:
1514         // Safe because we know that our file is a VCPU fd, we know the kernel
1515         // will only write correct amount of memory to our pointer, and we
1516         // verify the return result.
1517         let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS, &mut events) };
1518         if ret < 0 {
1519             return errno_result();
1520         }
1521         Ok(events)
1522     }
1523 
1524     /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1525     ///
1526     /// See the documentation for KVM_SET_VCPU_EVENTS.
1527     #[cfg(target_arch = "x86_64")]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1528     pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1529         let ret = {
1530             // SAFETY:
1531             // The ioctl is safe because the kernel will only read from the
1532             // kvm_vcpu_events.
1533             unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS, events) }
1534         };
1535         if ret < 0 {
1536             return errno_result();
1537         }
1538         Ok(())
1539     }
1540 
1541     /// Enable the specified capability.
1542     /// See documentation for KVM_ENABLE_CAP.
1543     /// # Safety
1544     /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1545     /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1546     pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1547         // SAFETY:
1548         // Safe because we allocated the struct and we know the kernel will read exactly the size of
1549         // the struct.
1550         let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, cap);
1551         if ret < 0 {
1552             return errno_result();
1553         }
1554         Ok(())
1555     }
1556 
1557     /// Specifies set of signals that are blocked during execution of KVM_RUN.
1558     /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1559     ///
1560     /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1561     pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1562         let sigset = signal::create_sigset(signals)?;
1563 
1564         let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1565         // Rust definition of sigset_t takes 128 bytes, but the kernel only
1566         // expects 8-bytes structure, so we can't write
1567         // kvm_sigmask.len  = size_of::<sigset_t>() as u32;
1568         kvm_sigmask[0].len = 8;
1569         // Ensure the length is not too big.
1570         const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1571 
1572         // SAFETY:
1573         // Safe as we allocated exactly the needed space
1574         unsafe {
1575             copy_nonoverlapping(
1576                 &sigset as *const sigset_t as *const u8,
1577                 kvm_sigmask[0].sigset.as_mut_ptr(),
1578                 8,
1579             );
1580         }
1581 
1582         let ret = {
1583             // SAFETY:
1584             // The ioctl is safe because the kernel will only read from the
1585             // kvm_signal_mask structure.
1586             unsafe { ioctl_with_ref(self, KVM_SET_SIGNAL_MASK, &kvm_sigmask[0]) }
1587         };
1588         if ret < 0 {
1589             return errno_result();
1590         }
1591         Ok(())
1592     }
1593 
1594     /// Sets the value of one register on this VCPU.  The id of the register is
1595     /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1596     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1597     pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1598         let data_ref = &data as *const u64;
1599         let onereg = kvm_one_reg {
1600             id: reg_id,
1601             addr: data_ref as u64,
1602         };
1603         // SAFETY:
1604         // safe because we allocated the struct and we know the kernel will read
1605         // exactly the size of the struct
1606         let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
1607         if ret < 0 {
1608             return errno_result();
1609         }
1610         Ok(())
1611     }
1612 }
1613 
1614 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1615     fn as_raw_descriptor(&self) -> RawDescriptor {
1616         self.vcpu.as_raw_descriptor()
1617     }
1618 }
1619 
1620 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1621 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1622 /// function to execute the guest.
1623 pub struct RunnableVcpu {
1624     vcpu: Vcpu,
1625     // vcpus must stay on the same thread once they start.
1626     // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1627     phantom: std::marker::PhantomData<*mut u8>,
1628 }
1629 
1630 impl RunnableVcpu {
1631     /// Runs the VCPU until it exits, returning the reason for the exit.
1632     ///
1633     /// Note that the state of the VCPU and associated VM must be setup first for this to do
1634     /// anything useful.
1635     #[allow(clippy::cast_ptr_alignment)]
1636     // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1637     // allow attribute.
run(&self) -> Result<VcpuExit>1638     pub fn run(&self) -> Result<VcpuExit> {
1639         // SAFETY:
1640         // Safe because we know that our file is a VCPU fd and we verify the return result.
1641         let ret = unsafe { ioctl(self, KVM_RUN) };
1642         if ret == 0 {
1643             // SAFETY:
1644             // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1645             // kernel told us how large it was.
1646             let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1647             match run.exit_reason {
1648                 KVM_EXIT_IO => {
1649                     // SAFETY:
1650                     // Safe because the exit_reason (which comes from the kernel) told us which
1651                     // union field to use.
1652                     let io = unsafe { run.__bindgen_anon_1.io };
1653                     let port = io.port;
1654                     let size = (io.count as usize) * (io.size as usize);
1655                     match io.direction as u32 {
1656                         KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1657                         KVM_EXIT_IO_OUT => {
1658                             let mut data = [0; 8];
1659                             let run_start = run as *const kvm_run as *const u8;
1660                             // SAFETY:
1661                             // The data_offset is defined by the kernel to be some number of bytes
1662                             // into the kvm_run structure, which we have fully mmap'd.
1663                             unsafe {
1664                                 let data_ptr = run_start.offset(io.data_offset as isize);
1665                                 copy_nonoverlapping(
1666                                     data_ptr,
1667                                     data.as_mut_ptr(),
1668                                     min(size, data.len()),
1669                                 );
1670                             }
1671                             Ok(VcpuExit::IoOut { port, size, data })
1672                         }
1673                         _ => Err(Error::new(EINVAL)),
1674                     }
1675                 }
1676                 KVM_EXIT_MMIO => {
1677                     // SAFETY:
1678                     // Safe because the exit_reason (which comes from the kernel) told us which
1679                     // union field to use.
1680                     let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1681                     let address = mmio.phys_addr;
1682                     let size = min(mmio.len as usize, mmio.data.len());
1683                     if mmio.is_write != 0 {
1684                         Ok(VcpuExit::MmioWrite {
1685                             address,
1686                             size,
1687                             data: mmio.data,
1688                         })
1689                     } else {
1690                         Ok(VcpuExit::MmioRead { address, size })
1691                     }
1692                 }
1693                 KVM_EXIT_IOAPIC_EOI => {
1694                     // SAFETY:
1695                     // Safe because the exit_reason (which comes from the kernel) told us which
1696                     // union field to use.
1697                     let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1698                     Ok(VcpuExit::IoapicEoi { vector })
1699                 }
1700                 KVM_EXIT_HYPERV => {
1701                     // SAFETY:
1702                     // Safe because the exit_reason (which comes from the kernel) told us which
1703                     // union field to use.
1704                     let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1705                     match hyperv.type_ {
1706                         KVM_EXIT_HYPERV_SYNIC => {
1707                             // TODO(b/315998194): Add safety comment
1708                             #[allow(clippy::undocumented_unsafe_blocks)]
1709                             let synic = unsafe { &hyperv.u.synic };
1710                             Ok(VcpuExit::HypervSynic {
1711                                 msr: synic.msr,
1712                                 control: synic.control,
1713                                 evt_page: synic.evt_page,
1714                                 msg_page: synic.msg_page,
1715                             })
1716                         }
1717                         KVM_EXIT_HYPERV_HCALL => {
1718                             // TODO(b/315998194): Add safety comment
1719                             #[allow(clippy::undocumented_unsafe_blocks)]
1720                             let hcall = unsafe { &hyperv.u.hcall };
1721                             Ok(VcpuExit::HypervHcall {
1722                                 input: hcall.input,
1723                                 params: hcall.params,
1724                             })
1725                         }
1726                         _ => Err(Error::new(EINVAL)),
1727                     }
1728                 }
1729                 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1730                 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1731                 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1732                 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1733                 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1734                 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1735                 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1736                 KVM_EXIT_FAIL_ENTRY => {
1737                     // SAFETY:
1738                     // Safe because the exit_reason (which comes from the kernel) told us which
1739                     // union field to use.
1740                     let hardware_entry_failure_reason = unsafe {
1741                         run.__bindgen_anon_1
1742                             .fail_entry
1743                             .hardware_entry_failure_reason
1744                     };
1745                     Ok(VcpuExit::FailEntry {
1746                         hardware_entry_failure_reason,
1747                     })
1748                 }
1749                 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1750                 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1751                 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1752                 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1753                 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1754                 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1755                 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1756                 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1757                 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1758                 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1759                 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1760                 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1761                 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1762                 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1763                 KVM_EXIT_SYSTEM_EVENT => {
1764                     let event_type = {
1765                         // SAFETY:
1766                         // Safe because we know the exit reason told us this union
1767                         // field is valid
1768                         unsafe { run.__bindgen_anon_1.system_event.type_ }
1769                     };
1770                     // TODO(b/315998194): Add safety comment
1771                     #[allow(clippy::undocumented_unsafe_blocks)]
1772                     let event_flags =
1773                         unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1774                     Ok(VcpuExit::SystemEvent(event_type, event_flags))
1775                 }
1776                 r => panic!("unknown kvm exit reason: {}", r),
1777             }
1778         } else {
1779             errno_result()
1780         }
1781     }
1782 }
1783 
1784 impl Deref for RunnableVcpu {
1785     type Target = Vcpu;
deref(&self) -> &Self::Target1786     fn deref(&self) -> &Self::Target {
1787         &self.vcpu
1788     }
1789 }
1790 
1791 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1792     fn deref_mut(&mut self) -> &mut Self::Target {
1793         &mut self.vcpu
1794     }
1795 }
1796 
1797 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1798     fn as_raw_descriptor(&self) -> RawDescriptor {
1799         self.vcpu.as_raw_descriptor()
1800     }
1801 }
1802 
1803 impl Drop for RunnableVcpu {
drop(&mut self)1804     fn drop(&mut self) {
1805         VCPU_THREAD.with(|v| {
1806             // This assumes that a failure in `BlockedSignal::new` means the signal is already
1807             // blocked and there it should not be unblocked on exit.
1808             let _blocked_signal = &(*v.borrow())
1809                 .as_ref()
1810                 .and_then(|state| state.signal_num)
1811                 .map(BlockedSignal::new);
1812 
1813             *v.borrow_mut() = None;
1814         });
1815     }
1816 }
1817 
1818 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1819 /// Hides the zero length array behind a bounds check.
1820 #[cfg(target_arch = "x86_64")]
1821 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1822