1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! A safe wrapper around the kernel's KVM interface.
6 //!
7 //! New code should use the `hypervisor` crate instead.
8
9 #![cfg(any(target_os = "android", target_os = "linux"))]
10
11 mod cap;
12
13 use std::cell::RefCell;
14 use std::cmp::min;
15 use std::cmp::Ordering;
16 use std::collections::BTreeMap;
17 use std::collections::BinaryHeap;
18 use std::ffi::CString;
19 use std::fs::File;
20 use std::mem::size_of;
21 use std::ops::Deref;
22 use std::ops::DerefMut;
23 use std::os::raw::*;
24 use std::os::unix::prelude::OsStrExt;
25 use std::path::Path;
26 use std::ptr::copy_nonoverlapping;
27 use std::sync::Arc;
28
29 #[allow(unused_imports)]
30 use base::ioctl;
31 #[allow(unused_imports)]
32 use base::ioctl_with_mut_ptr;
33 #[allow(unused_imports)]
34 use base::ioctl_with_mut_ref;
35 #[allow(unused_imports)]
36 use base::ioctl_with_ptr;
37 #[allow(unused_imports)]
38 use base::ioctl_with_ref;
39 #[allow(unused_imports)]
40 use base::ioctl_with_val;
41 #[allow(unused_imports)]
42 use base::pagesize;
43 #[allow(unused_imports)]
44 use base::signal;
45 use base::sys::BlockedSignal;
46 #[allow(unused_imports)]
47 use base::unblock_signal;
48 #[allow(unused_imports)]
49 use base::warn;
50 use base::AsRawDescriptor;
51 #[allow(unused_imports)]
52 use base::Error;
53 #[allow(unused_imports)]
54 use base::Event;
55 use base::FromRawDescriptor;
56 #[allow(unused_imports)]
57 use base::IoctlNr;
58 #[allow(unused_imports)]
59 use base::MappedRegion;
60 #[allow(unused_imports)]
61 use base::MemoryMapping;
62 #[allow(unused_imports)]
63 use base::MemoryMappingBuilder;
64 #[allow(unused_imports)]
65 use base::MmapError;
66 use base::RawDescriptor;
67 #[allow(unused_imports)]
68 use base::Result;
69 #[allow(unused_imports)]
70 use base::SIGRTMIN;
71 use data_model::vec_with_array_field;
72 #[cfg(target_arch = "x86_64")]
73 use data_model::FlexibleArrayWrapper;
74 use kvm_sys::*;
75 use libc::open64;
76 use libc::sigset_t;
77 use libc::EBUSY;
78 use libc::EINVAL;
79 use libc::ENOENT;
80 use libc::ENOSPC;
81 use libc::EOVERFLOW;
82 use libc::O_CLOEXEC;
83 use libc::O_RDWR;
84 use sync::Mutex;
85 use vm_memory::GuestAddress;
86 use vm_memory::GuestMemory;
87
88 pub use crate::cap::*;
89
errno_result<T>() -> Result<T>90 fn errno_result<T>() -> Result<T> {
91 Err(Error::last())
92 }
93
set_user_memory_region<F: AsRawDescriptor>( fd: &F, slot: u32, read_only: bool, log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>94 unsafe fn set_user_memory_region<F: AsRawDescriptor>(
95 fd: &F,
96 slot: u32,
97 read_only: bool,
98 log_dirty_pages: bool,
99 guest_addr: u64,
100 memory_size: u64,
101 userspace_addr: *mut u8,
102 ) -> Result<()> {
103 let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
104 if log_dirty_pages {
105 flags |= KVM_MEM_LOG_DIRTY_PAGES;
106 }
107 let region = kvm_userspace_memory_region {
108 slot,
109 flags,
110 guest_phys_addr: guest_addr,
111 memory_size,
112 userspace_addr: userspace_addr as u64,
113 };
114
115 let ret = ioctl_with_ref(fd, KVM_SET_USER_MEMORY_REGION, ®ion);
116 if ret == 0 {
117 Ok(())
118 } else {
119 errno_result()
120 }
121 }
122
123 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
124 /// size.
125 ///
126 /// # Arguments
127 ///
128 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize129 pub fn dirty_log_bitmap_size(size: usize) -> usize {
130 let page_size = pagesize();
131 (((size + page_size - 1) / page_size) + 7) / 8
132 }
133
134 /// A wrapper around opening and using `/dev/kvm`.
135 ///
136 /// Useful for querying extensions and basic values from the KVM backend. A `Kvm` is required to
137 /// create a `Vm` object.
138 pub struct Kvm {
139 kvm: File,
140 }
141
142 impl Kvm {
143 /// Opens `/dev/kvm` and returns a Kvm object on success.
new() -> Result<Kvm>144 pub fn new() -> Result<Kvm> {
145 Kvm::new_with_path(Path::new("/dev/kvm"))
146 }
147
148 /// Opens a KVM device at `device_path` and returns a Kvm object on success.
new_with_path(device_path: &Path) -> Result<Kvm>149 pub fn new_with_path(device_path: &Path) -> Result<Kvm> {
150 let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
151 // SAFETY:
152 // Open calls are safe because we give a nul-terminated string and verify the result.
153 let ret = unsafe { open64(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
154 if ret < 0 {
155 return errno_result();
156 }
157 Ok(Kvm {
158 kvm: {
159 // SAFETY:
160 // Safe because we verify that ret is valid and we own the fd.
161 unsafe { File::from_raw_descriptor(ret) }
162 },
163 })
164 }
165
check_extension_int(&self, c: Cap) -> i32166 fn check_extension_int(&self, c: Cap) -> i32 {
167 // SAFETY:
168 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
169 // defined by kernel.
170 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, c as c_ulong) }
171 }
172
173 /// Checks if a particular `Cap` is available.
check_extension(&self, c: Cap) -> bool174 pub fn check_extension(&self, c: Cap) -> bool {
175 self.check_extension_int(c) == 1
176 }
177
178 /// Gets the size of the mmap required to use vcpu's `kvm_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>179 pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
180 // SAFETY:
181 // Safe because we know that our file is a KVM fd and we verify the return result.
182 let res = unsafe { ioctl(self, KVM_GET_VCPU_MMAP_SIZE) };
183 if res > 0 {
184 Ok(res as usize)
185 } else {
186 errno_result()
187 }
188 }
189
190 #[cfg(target_arch = "x86_64")]
get_cpuid(&self, kind: IoctlNr) -> Result<CpuId>191 fn get_cpuid(&self, kind: IoctlNr) -> Result<CpuId> {
192 const MAX_KVM_CPUID_ENTRIES: usize = 256;
193 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
194
195 // SAFETY:
196 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
197 // allocated for the struct. The limit is read from nent, which is set to the allocated
198 // size(MAX_KVM_CPUID_ENTRIES) above.
199 let ret = unsafe { ioctl_with_mut_ptr(self, kind, cpuid.as_mut_ptr()) };
200 if ret < 0 {
201 return errno_result();
202 }
203
204 Ok(cpuid)
205 }
206
207 /// X86 specific call to get the system supported CPUID values
208 #[cfg(target_arch = "x86_64")]
get_supported_cpuid(&self) -> Result<CpuId>209 pub fn get_supported_cpuid(&self) -> Result<CpuId> {
210 self.get_cpuid(KVM_GET_SUPPORTED_CPUID)
211 }
212
213 /// X86 specific call to get the system emulated CPUID values
214 #[cfg(target_arch = "x86_64")]
get_emulated_cpuid(&self) -> Result<CpuId>215 pub fn get_emulated_cpuid(&self) -> Result<CpuId> {
216 self.get_cpuid(KVM_GET_EMULATED_CPUID)
217 }
218
219 /// X86 specific call to get list of supported MSRS
220 ///
221 /// See the documentation for KVM_GET_MSR_INDEX_LIST.
222 #[cfg(target_arch = "x86_64")]
get_msr_index_list(&self) -> Result<Vec<u32>>223 pub fn get_msr_index_list(&self) -> Result<Vec<u32>> {
224 const MAX_KVM_MSR_ENTRIES: usize = 256;
225
226 let mut msr_list = vec_with_array_field::<kvm_msr_list, u32>(MAX_KVM_MSR_ENTRIES);
227 msr_list[0].nmsrs = MAX_KVM_MSR_ENTRIES as u32;
228
229 // SAFETY:
230 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
231 // allocated for the struct. The limit is read from nmsrs, which is set to the allocated
232 // size (MAX_KVM_MSR_ENTRIES) above.
233 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MSR_INDEX_LIST, &mut msr_list[0]) };
234 if ret < 0 {
235 return errno_result();
236 }
237
238 let mut nmsrs = msr_list[0].nmsrs;
239
240 // SAFETY:
241 // Mapping the unsized array to a slice is unsafe because the length isn't known. Using
242 // the length we originally allocated with eliminates the possibility of overflow.
243 let indices: &[u32] = unsafe {
244 if nmsrs > MAX_KVM_MSR_ENTRIES as u32 {
245 nmsrs = MAX_KVM_MSR_ENTRIES as u32;
246 }
247 msr_list[0].indices.as_slice(nmsrs as usize)
248 };
249
250 Ok(indices.to_vec())
251 }
252
253 #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
254 // The x86 and riscv machine type is always 0
get_vm_type(&self) -> c_ulong255 pub fn get_vm_type(&self) -> c_ulong {
256 0
257 }
258
259 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
260 // Compute the machine type, which should be the IPA range for the VM
261 // Ideally, this would take a description of the memory map and return
262 // the closest machine type for this VM. Here, we just return the maximum
263 // the kernel support.
264 #[allow(clippy::useless_conversion)]
get_vm_type(&self) -> c_ulong265 pub fn get_vm_type(&self) -> c_ulong {
266 // SAFETY:
267 // Safe because we know self is a real kvm fd
268 match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
269 // Not supported? Use 0 as the machine type, which implies 40bit IPA
270 ret if ret < 0 => 0,
271 // Use the lower 8 bits representing the IPA space as the machine type
272 ipa => (ipa & 0xff) as c_ulong,
273 }
274 }
275 }
276
277 impl AsRawDescriptor for Kvm {
as_raw_descriptor(&self) -> RawDescriptor278 fn as_raw_descriptor(&self) -> RawDescriptor {
279 self.kvm.as_raw_descriptor()
280 }
281 }
282
283 /// An address either in programmable I/O space or in memory mapped I/O space.
284 #[derive(Copy, Clone, Debug)]
285 pub enum IoeventAddress {
286 Pio(u64),
287 Mmio(u64),
288 }
289
290 /// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
291 pub enum Datamatch {
292 AnyLength,
293 U8(Option<u8>),
294 U16(Option<u16>),
295 U32(Option<u32>),
296 U64(Option<u64>),
297 }
298
299 /// A source of IRQs in an `IrqRoute`.
300 pub enum IrqSource {
301 Irqchip { chip: u32, pin: u32 },
302 Msi { address: u64, data: u32 },
303 }
304
305 /// A single route for an IRQ.
306 pub struct IrqRoute {
307 pub gsi: u32,
308 pub source: IrqSource,
309 }
310
311 /// Interrupt controller IDs
312 pub enum PicId {
313 Primary = 0,
314 Secondary = 1,
315 }
316
317 /// Number of pins on the IOAPIC.
318 pub const NUM_IOAPIC_PINS: usize = 24;
319
320 // Used to invert the order when stored in a max-heap.
321 #[derive(Copy, Clone, Eq, PartialEq)]
322 struct MemSlot(u32);
323
324 impl Ord for MemSlot {
cmp(&self, other: &MemSlot) -> Ordering325 fn cmp(&self, other: &MemSlot) -> Ordering {
326 // Notice the order is inverted so the lowest magnitude slot has the highest priority in a
327 // max-heap.
328 other.0.cmp(&self.0)
329 }
330 }
331
332 impl PartialOrd for MemSlot {
partial_cmp(&self, other: &MemSlot) -> Option<Ordering>333 fn partial_cmp(&self, other: &MemSlot) -> Option<Ordering> {
334 Some(self.cmp(other))
335 }
336 }
337
338 /// A wrapper around creating and using a VM.
339 pub struct Vm {
340 vm: File,
341 guest_mem: GuestMemory,
342 mem_regions: Arc<Mutex<BTreeMap<u32, Box<dyn MappedRegion>>>>,
343 mem_slot_gaps: Arc<Mutex<BinaryHeap<MemSlot>>>,
344 }
345
346 impl Vm {
347 /// Constructs a new `Vm` using the given `Kvm` instance.
new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm>348 pub fn new(kvm: &Kvm, guest_mem: GuestMemory) -> Result<Vm> {
349 // SAFETY:
350 // Safe because we know kvm is a real kvm fd as this module is the only one that can make
351 // Kvm objects.
352 let ret = unsafe { ioctl_with_val(kvm, KVM_CREATE_VM, kvm.get_vm_type()) };
353 if ret >= 0 {
354 // SAFETY:
355 // Safe because we verify the value of ret and we are the owners of the fd.
356 let vm_file = unsafe { File::from_raw_descriptor(ret) };
357 for region in guest_mem.regions() {
358 // SAFETY:
359 // Safe because the guest regions are guaranteed not to overlap.
360 unsafe {
361 set_user_memory_region(
362 &vm_file,
363 region.index as u32,
364 false,
365 false,
366 region.guest_addr.offset(),
367 region.size as u64,
368 region.host_addr as *mut u8,
369 )
370 }?;
371 }
372
373 Ok(Vm {
374 vm: vm_file,
375 guest_mem,
376 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
377 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
378 })
379 } else {
380 errno_result()
381 }
382 }
383
384 /// Checks if a particular `Cap` is available.
385 ///
386 /// This is distinct from the `Kvm` version of this method because the some extensions depend on
387 /// the particular `Vm` existence. This method is encouraged by the kernel because it more
388 /// accurately reflects the usable capabilities.
check_extension(&self, c: Cap) -> bool389 pub fn check_extension(&self, c: Cap) -> bool {
390 // SAFETY:
391 // Safe because we know that our file is a KVM fd and that the extension is one of the ones
392 // defined by kernel.
393 unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, c as c_ulong) == 1 }
394 }
395
396 /// Inserts the given `mem` into the VM's address space at `guest_addr`.
397 ///
398 /// The slot that was assigned the kvm memory mapping is returned on success. The slot can be
399 /// given to `Vm::remove_memory_region` to remove the memory from the VM's address space and
400 /// take back ownership of `mem`.
401 ///
402 /// Note that memory inserted into the VM's address space must not overlap with any other memory
403 /// slot's region.
404 ///
405 /// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
406 /// write will trigger a mmio VM exit, leaving the memory untouched.
407 ///
408 /// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
409 /// by the guest with `get_dirty_log`.
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, ) -> Result<u32>410 pub fn add_memory_region(
411 &mut self,
412 guest_addr: GuestAddress,
413 mem: Box<dyn MappedRegion>,
414 read_only: bool,
415 log_dirty_pages: bool,
416 ) -> Result<u32> {
417 let size = mem.size() as u64;
418 let end_addr = guest_addr
419 .checked_add(size)
420 .ok_or_else(|| Error::new(EOVERFLOW))?;
421 if self.guest_mem.range_overlap(guest_addr, end_addr) {
422 return Err(Error::new(ENOSPC));
423 }
424 let mut regions = self.mem_regions.lock();
425 let mut gaps = self.mem_slot_gaps.lock();
426 let slot = match gaps.pop() {
427 Some(gap) => gap.0,
428 None => (regions.len() + self.guest_mem.num_regions() as usize) as u32,
429 };
430
431 // SAFETY:
432 // Safe because we check that the given guest address is valid and has no overlaps. We also
433 // know that the pointer and size are correct because the MemoryMapping interface ensures
434 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
435 // is removed.
436 let res = unsafe {
437 set_user_memory_region(
438 &self.vm,
439 slot,
440 read_only,
441 log_dirty_pages,
442 guest_addr.offset(),
443 size,
444 mem.as_ptr(),
445 )
446 };
447
448 if let Err(e) = res {
449 gaps.push(MemSlot(slot));
450 return Err(e);
451 }
452 regions.insert(slot, mem);
453 Ok(slot)
454 }
455
456 /// Removes memory that was previously added at the given slot.
457 ///
458 /// Ownership of the host memory mapping associated with the given slot is returned on success.
remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>>459 pub fn remove_memory_region(&mut self, slot: u32) -> Result<Box<dyn MappedRegion>> {
460 let mut regions = self.mem_regions.lock();
461 if !regions.contains_key(&slot) {
462 return Err(Error::new(ENOENT));
463 }
464 // SAFETY:
465 // Safe because the slot is checked against the list of memory slots.
466 unsafe {
467 set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut())?;
468 }
469 self.mem_slot_gaps.lock().push(MemSlot(slot));
470 // This remove will always succeed because of the contains_key check above.
471 Ok(regions.remove(&slot).unwrap())
472 }
473
474 /// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
475 /// `slot`.
476 ///
477 /// The size of `dirty_log` must be at least as many bits as there are pages in the memory
478 /// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
479 /// be 2 bytes or greater.
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>480 pub fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
481 match self.mem_regions.lock().get(&slot) {
482 Some(mem) => {
483 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
484 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
485 return Err(Error::new(EINVAL));
486 }
487 let mut dirty_log_kvm = kvm_dirty_log {
488 slot,
489 ..Default::default()
490 };
491 dirty_log_kvm.__bindgen_anon_1.dirty_bitmap = dirty_log.as_ptr() as *mut c_void;
492 // SAFETY:
493 // Safe because the `dirty_bitmap` pointer assigned above is guaranteed to be valid
494 // (because it's from a slice) and we checked that it will be large enough to hold
495 // the entire log.
496 let ret = unsafe { ioctl_with_ref(self, KVM_GET_DIRTY_LOG, &dirty_log_kvm) };
497 if ret == 0 {
498 Ok(())
499 } else {
500 errno_result()
501 }
502 }
503 _ => Err(Error::new(ENOENT)),
504 }
505 }
506
507 /// Gets a reference to the guest memory owned by this VM.
508 ///
509 /// Note that `GuestMemory` does not include any mmio memory that may have been added after
510 /// this VM was constructed.
get_memory(&self) -> &GuestMemory511 pub fn get_memory(&self) -> &GuestMemory {
512 &self.guest_mem
513 }
514
515 /// Sets the address of a one-page region in the VM's address space.
516 ///
517 /// See the documentation on the KVM_SET_IDENTITY_MAP_ADDR ioctl.
518 #[cfg(target_arch = "x86_64")]
set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>519 pub fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()> {
520 // SAFETY:
521 // Safe because we know that our file is a VM fd and we verify the return result.
522 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IDENTITY_MAP_ADDR, &addr.offset()) };
523 if ret == 0 {
524 Ok(())
525 } else {
526 errno_result()
527 }
528 }
529
530 /// Retrieves the current timestamp of kvmclock as seen by the current guest.
531 ///
532 /// See the documentation on the KVM_GET_CLOCK ioctl.
533 #[cfg(target_arch = "x86_64")]
get_clock(&self) -> Result<kvm_clock_data>534 pub fn get_clock(&self) -> Result<kvm_clock_data> {
535 // SAFETY: trivially safe
536 let mut clock_data = unsafe { std::mem::zeroed() };
537 // SAFETY:
538 // Safe because we know that our file is a VM fd, we know the kernel will only write
539 // correct amount of memory to our pointer, and we verify the return result.
540 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_CLOCK, &mut clock_data) };
541 if ret == 0 {
542 Ok(clock_data)
543 } else {
544 errno_result()
545 }
546 }
547
548 /// Sets the current timestamp of kvmclock to the specified value.
549 ///
550 /// See the documentation on the KVM_SET_CLOCK ioctl.
551 #[cfg(target_arch = "x86_64")]
set_clock(&self, clock_data: &kvm_clock_data) -> Result<()>552 pub fn set_clock(&self, clock_data: &kvm_clock_data) -> Result<()> {
553 // SAFETY:
554 // Safe because we know that our file is a VM fd, we know the kernel will only read
555 // correct amount of memory from our pointer, and we verify the return result.
556 let ret = unsafe { ioctl_with_ref(self, KVM_SET_CLOCK, clock_data) };
557 if ret == 0 {
558 Ok(())
559 } else {
560 errno_result()
561 }
562 }
563
564 /// Crates an in kernel interrupt controller.
565 ///
566 /// See the documentation on the KVM_CREATE_IRQCHIP ioctl.
567 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
create_irq_chip(&self) -> Result<()>568 pub fn create_irq_chip(&self) -> Result<()> {
569 // SAFETY:
570 // Safe because we know that our file is a VM fd and we verify the return result.
571 let ret = unsafe { ioctl(self, KVM_CREATE_IRQCHIP) };
572 if ret == 0 {
573 Ok(())
574 } else {
575 errno_result()
576 }
577 }
578
579 /// Retrieves the state of given interrupt controller by issuing KVM_GET_IRQCHIP ioctl.
580 ///
581 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
582 #[cfg(target_arch = "x86_64")]
get_pic_state(&self, id: PicId) -> Result<kvm_pic_state>583 pub fn get_pic_state(&self, id: PicId) -> Result<kvm_pic_state> {
584 let mut irqchip_state = kvm_irqchip {
585 chip_id: id as u32,
586 ..Default::default()
587 };
588 // SAFETY:
589 // Safe because we know our file is a VM fd, we know the kernel will only write
590 // correct amount of memory to our pointer, and we verify the return result.
591 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_IRQCHIP, &mut irqchip_state) };
592 if ret == 0 {
593 Ok(
594 // SAFETY:
595 // Safe as we know that we are retrieving data related to the
596 // PIC (primary or secondary) and not IOAPIC.
597 unsafe { irqchip_state.chip.pic },
598 )
599 } else {
600 errno_result()
601 }
602 }
603
604 /// Sets the state of given interrupt controller by issuing KVM_SET_IRQCHIP ioctl.
605 ///
606 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
607 #[cfg(target_arch = "x86_64")]
set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()>608 pub fn set_pic_state(&self, id: PicId, state: &kvm_pic_state) -> Result<()> {
609 let mut irqchip_state = kvm_irqchip {
610 chip_id: id as u32,
611 ..Default::default()
612 };
613 irqchip_state.chip.pic = *state;
614 // SAFETY:
615 // Safe because we know that our file is a VM fd, we know the kernel will only read
616 // correct amount of memory from our pointer, and we verify the return result.
617 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP, &irqchip_state) };
618 if ret == 0 {
619 Ok(())
620 } else {
621 errno_result()
622 }
623 }
624
625 /// Retrieves the state of IOAPIC by issuing KVM_GET_IRQCHIP ioctl.
626 ///
627 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
628 #[cfg(target_arch = "x86_64")]
get_ioapic_state(&self) -> Result<kvm_ioapic_state>629 pub fn get_ioapic_state(&self) -> Result<kvm_ioapic_state> {
630 let mut irqchip_state = kvm_irqchip {
631 chip_id: 2,
632 ..Default::default()
633 };
634 let ret =
635 // SAFETY:
636 // Safe because we know our file is a VM fd, we know the kernel will only write
637 // correct amount of memory to our pointer, and we verify the return result.
638 unsafe {
639 ioctl_with_mut_ref(self, KVM_GET_IRQCHIP, &mut irqchip_state)
640 };
641 if ret == 0 {
642 Ok(
643 // SAFETY:
644 // Safe as we know that we are retrieving data related to the
645 // IOAPIC and not PIC.
646 unsafe { irqchip_state.chip.ioapic },
647 )
648 } else {
649 errno_result()
650 }
651 }
652
653 /// Sets the state of IOAPIC by issuing KVM_SET_IRQCHIP ioctl.
654 ///
655 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
656 #[cfg(target_arch = "x86_64")]
set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()>657 pub fn set_ioapic_state(&self, state: &kvm_ioapic_state) -> Result<()> {
658 let mut irqchip_state = kvm_irqchip {
659 chip_id: 2,
660 ..Default::default()
661 };
662 irqchip_state.chip.ioapic = *state;
663 // SAFETY:
664 // Safe because we know that our file is a VM fd, we know the kernel will only read
665 // correct amount of memory from our pointer, and we verify the return result.
666 let ret = unsafe { ioctl_with_ref(self, KVM_SET_IRQCHIP, &irqchip_state) };
667 if ret == 0 {
668 Ok(())
669 } else {
670 errno_result()
671 }
672 }
673
674 /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
675 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
set_irq_line(&self, irq: u32, active: bool) -> Result<()>676 pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
677 let mut irq_level = kvm_irq_level::default();
678 irq_level.__bindgen_anon_1.irq = irq;
679 irq_level.level = active.into();
680
681 // SAFETY:
682 // Safe because we know that our file is a VM fd, we know the kernel will only read the
683 // correct amount of memory from our pointer, and we verify the return result.
684 let ret = unsafe { ioctl_with_ref(self, KVM_IRQ_LINE, &irq_level) };
685 if ret == 0 {
686 Ok(())
687 } else {
688 errno_result()
689 }
690 }
691
692 /// Creates a PIT as per the KVM_CREATE_PIT2 ioctl.
693 ///
694 /// Note that this call can only succeed after a call to `Vm::create_irq_chip`.
695 #[cfg(target_arch = "x86_64")]
create_pit(&self) -> Result<()>696 pub fn create_pit(&self) -> Result<()> {
697 let pit_config = kvm_pit_config::default();
698 // SAFETY:
699 // Safe because we know that our file is a VM fd, we know the kernel will only read the
700 // correct amount of memory from our pointer, and we verify the return result.
701 let ret = unsafe { ioctl_with_ref(self, KVM_CREATE_PIT2, &pit_config) };
702 if ret == 0 {
703 Ok(())
704 } else {
705 errno_result()
706 }
707 }
708
709 /// Retrieves the state of PIT by issuing KVM_GET_PIT2 ioctl.
710 ///
711 /// Note that this call can only succeed after a call to `Vm::create_pit`.
712 #[cfg(target_arch = "x86_64")]
get_pit_state(&self) -> Result<kvm_pit_state2>713 pub fn get_pit_state(&self) -> Result<kvm_pit_state2> {
714 // SAFETY: trivially safe
715 let mut pit_state = unsafe { std::mem::zeroed() };
716 // SAFETY:
717 // Safe because we know that our file is a VM fd, we know the kernel will only write
718 // correct amount of memory to our pointer, and we verify the return result.
719 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_PIT2, &mut pit_state) };
720 if ret == 0 {
721 Ok(pit_state)
722 } else {
723 errno_result()
724 }
725 }
726
727 /// Sets the state of PIT by issuing KVM_SET_PIT2 ioctl.
728 ///
729 /// Note that this call can only succeed after a call to `Vm::create_pit`.
730 #[cfg(target_arch = "x86_64")]
set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()>731 pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> {
732 // SAFETY:
733 // Safe because we know that our file is a VM fd, we know the kernel will only read
734 // correct amount of memory from our pointer, and we verify the return result.
735 let ret = unsafe { ioctl_with_ref(self, KVM_SET_PIT2, pit_state) };
736 if ret == 0 {
737 Ok(())
738 } else {
739 errno_result()
740 }
741 }
742
743 /// Registers an event to be signaled whenever a certain address is written to.
744 ///
745 /// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
746 /// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
747 /// and must match the expected size of the guest's write.
748 ///
749 /// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
750 /// triggered is prevented.
register_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>751 pub fn register_ioevent(
752 &self,
753 evt: &Event,
754 addr: IoeventAddress,
755 datamatch: Datamatch,
756 ) -> Result<()> {
757 self.ioeventfd(evt, addr, datamatch, false)
758 }
759
760 /// Unregisters an event previously registered with `register_ioevent`.
761 ///
762 /// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
763 /// `register_ioevent`.
unregister_ioevent( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, ) -> Result<()>764 pub fn unregister_ioevent(
765 &self,
766 evt: &Event,
767 addr: IoeventAddress,
768 datamatch: Datamatch,
769 ) -> Result<()> {
770 self.ioeventfd(evt, addr, datamatch, true)
771 }
772
ioeventfd( &self, evt: &Event, addr: IoeventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>773 fn ioeventfd(
774 &self,
775 evt: &Event,
776 addr: IoeventAddress,
777 datamatch: Datamatch,
778 deassign: bool,
779 ) -> Result<()> {
780 let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
781 Datamatch::AnyLength => (false, 0, 0),
782 Datamatch::U8(v) => match v {
783 Some(u) => (true, u as u64, 1),
784 None => (false, 0, 1),
785 },
786 Datamatch::U16(v) => match v {
787 Some(u) => (true, u as u64, 2),
788 None => (false, 0, 2),
789 },
790 Datamatch::U32(v) => match v {
791 Some(u) => (true, u as u64, 4),
792 None => (false, 0, 4),
793 },
794 Datamatch::U64(v) => match v {
795 Some(u) => (true, u, 8),
796 None => (false, 0, 8),
797 },
798 };
799 let mut flags = 0;
800 if deassign {
801 flags |= 1 << kvm_ioeventfd_flag_nr_deassign;
802 }
803 if do_datamatch {
804 flags |= 1 << kvm_ioeventfd_flag_nr_datamatch
805 }
806 if let IoeventAddress::Pio(_) = addr {
807 flags |= 1 << kvm_ioeventfd_flag_nr_pio;
808 }
809 let ioeventfd = kvm_ioeventfd {
810 datamatch: datamatch_value,
811 len: datamatch_len,
812 addr: match addr {
813 IoeventAddress::Pio(p) => p,
814 IoeventAddress::Mmio(m) => m,
815 },
816 fd: evt.as_raw_descriptor(),
817 flags,
818 ..Default::default()
819 };
820 // SAFETY:
821 // Safe because we know that our file is a VM fd, we know the kernel will only read the
822 // correct amount of memory from our pointer, and we verify the return result.
823 let ret = unsafe { ioctl_with_ref(self, KVM_IOEVENTFD, &ioeventfd) };
824 if ret == 0 {
825 Ok(())
826 } else {
827 errno_result()
828 }
829 }
830
831 /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt` will
832 /// get triggered when the irqchip is resampled.
833 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
register_irqfd_resample( &self, evt: &Event, resample_evt: &Event, gsi: u32, ) -> Result<()>834 pub fn register_irqfd_resample(
835 &self,
836 evt: &Event,
837 resample_evt: &Event,
838 gsi: u32,
839 ) -> Result<()> {
840 let irqfd = kvm_irqfd {
841 flags: KVM_IRQFD_FLAG_RESAMPLE,
842 fd: evt.as_raw_descriptor() as u32,
843 resamplefd: resample_evt.as_raw_descriptor() as u32,
844 gsi,
845 ..Default::default()
846 };
847 // SAFETY:
848 // Safe because we know that our file is a VM fd, we know the kernel will only read the
849 // correct amount of memory from our pointer, and we verify the return result.
850 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
851 if ret == 0 {
852 Ok(())
853 } else {
854 errno_result()
855 }
856 }
857
858 /// Unregisters an event that was previously registered with
859 /// `register_irqfd`/`register_irqfd_resample`.
860 ///
861 /// The `evt` and `gsi` pair must be the same as the ones passed into
862 /// `register_irqfd`/`register_irqfd_resample`.
863 #[cfg(any(target_arch = "x86_64", target_arch = "arm", target_arch = "aarch64"))]
unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()>864 pub fn unregister_irqfd(&self, evt: &Event, gsi: u32) -> Result<()> {
865 let irqfd = kvm_irqfd {
866 fd: evt.as_raw_descriptor() as u32,
867 gsi,
868 flags: KVM_IRQFD_FLAG_DEASSIGN,
869 ..Default::default()
870 };
871 // SAFETY:
872 // Safe because we know that our file is a VM fd, we know the kernel will only read the
873 // correct amount of memory from our pointer, and we verify the return result.
874 let ret = unsafe { ioctl_with_ref(self, KVM_IRQFD, &irqfd) };
875 if ret == 0 {
876 Ok(())
877 } else {
878 errno_result()
879 }
880 }
881
882 /// Sets the GSI routing table, replacing any table set with previous calls to
883 /// `set_gsi_routing`.
884 #[cfg(target_arch = "x86_64")]
set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()>885 pub fn set_gsi_routing(&self, routes: &[IrqRoute]) -> Result<()> {
886 let mut irq_routing =
887 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(routes.len());
888 irq_routing[0].nr = routes.len() as u32;
889
890 // SAFETY:
891 // Safe because we ensured there is enough space in irq_routing to hold the number of
892 // route entries.
893 let irq_routes = unsafe { irq_routing[0].entries.as_mut_slice(routes.len()) };
894 for (route, irq_route) in routes.iter().zip(irq_routes.iter_mut()) {
895 irq_route.gsi = route.gsi;
896 match route.source {
897 IrqSource::Irqchip { chip, pin } => {
898 irq_route.type_ = KVM_IRQ_ROUTING_IRQCHIP;
899 irq_route.u.irqchip = kvm_irq_routing_irqchip { irqchip: chip, pin }
900 }
901 IrqSource::Msi { address, data } => {
902 irq_route.type_ = KVM_IRQ_ROUTING_MSI;
903 irq_route.u.msi = kvm_irq_routing_msi {
904 address_lo: address as u32,
905 address_hi: (address >> 32) as u32,
906 data,
907 ..Default::default()
908 }
909 }
910 }
911 }
912
913 // TODO(b/315998194): Add safety comment
914 #[allow(clippy::undocumented_unsafe_blocks)]
915 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GSI_ROUTING, &irq_routing[0]) };
916 if ret == 0 {
917 Ok(())
918 } else {
919 errno_result()
920 }
921 }
922
923 /// Enable the specified capability.
924 /// See documentation for KVM_ENABLE_CAP.
925 /// # Safety
926 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
927 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>928 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
929 // Safe because we allocated the struct and we know the kernel will read exactly the size of
930 // the struct.
931 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, cap);
932 if ret < 0 {
933 errno_result()
934 } else {
935 Ok(())
936 }
937 }
938 }
939
940 impl AsRawDescriptor for Vm {
as_raw_descriptor(&self) -> RawDescriptor941 fn as_raw_descriptor(&self) -> RawDescriptor {
942 self.vm.as_raw_descriptor()
943 }
944 }
945
946 /// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
947 #[derive(Debug)]
948 pub enum VcpuExit {
949 /// An out port instruction was run on the given port with the given data.
950 IoOut {
951 port: u16,
952 size: usize,
953 data: [u8; 8],
954 },
955 /// An in port instruction was run on the given port.
956 ///
957 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
958 /// called again.
959 IoIn {
960 port: u16,
961 size: usize,
962 },
963 /// A read instruction was run against the given MMIO address.
964 ///
965 /// The date that the instruction receives should be set with `set_data` before `Vcpu::run` is
966 /// called again.
967 MmioRead {
968 address: u64,
969 size: usize,
970 },
971 /// A write instruction was run against the given MMIO address with the given data.
972 MmioWrite {
973 address: u64,
974 size: usize,
975 data: [u8; 8],
976 },
977 IoapicEoi {
978 vector: u8,
979 },
980 HypervSynic {
981 msr: u32,
982 control: u64,
983 evt_page: u64,
984 msg_page: u64,
985 },
986 HypervHcall {
987 input: u64,
988 params: [u64; 2],
989 },
990 Unknown,
991 Exception,
992 Hypercall,
993 Debug,
994 Hlt,
995 IrqWindowOpen,
996 Shutdown,
997 FailEntry {
998 hardware_entry_failure_reason: u64,
999 },
1000 Intr,
1001 SetTpr,
1002 TprAccess,
1003 S390Sieic,
1004 S390Reset,
1005 Dcr,
1006 Nmi,
1007 InternalError,
1008 Osi,
1009 PaprHcall,
1010 S390Ucontrol,
1011 Watchdog,
1012 S390Tsch,
1013 Epr,
1014 /// The cpu triggered a system level event which is specified by the type field.
1015 /// The first field is the event type and the second field is flags.
1016 /// The possible event types are shutdown, reset, or crash. So far there
1017 /// are not any flags defined.
1018 SystemEvent(u32 /* event_type */, u64 /* flags */),
1019 }
1020
1021 /// A wrapper around creating and using a VCPU.
1022 /// `Vcpu` provides all functionality except for running. To run, `to_runnable` must be called to
1023 /// lock the vcpu to a thread. Then the returned `RunnableVcpu` can be used for running.
1024 pub struct Vcpu {
1025 vcpu: File,
1026 run_mmap: MemoryMapping,
1027 }
1028
1029 pub struct VcpuThread {
1030 run: *mut kvm_run,
1031 signal_num: Option<c_int>,
1032 }
1033
1034 thread_local!(static VCPU_THREAD: RefCell<Option<VcpuThread>> = const { RefCell::new(None) });
1035
1036 impl Vcpu {
1037 /// Constructs a new VCPU for `vm`.
1038 ///
1039 /// The `id` argument is the CPU number between [0, max vcpus).
new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu>1040 pub fn new(id: c_ulong, kvm: &Kvm, vm: &Vm) -> Result<Vcpu> {
1041 let run_mmap_size = kvm.get_vcpu_mmap_size()?;
1042
1043 // SAFETY:
1044 // Safe because we know that vm a VM fd and we verify the return result.
1045 let vcpu_fd = unsafe { ioctl_with_val(vm, KVM_CREATE_VCPU, id) };
1046 if vcpu_fd < 0 {
1047 return errno_result();
1048 }
1049
1050 // SAFETY:
1051 // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
1052 // the value of the fd and we own the fd.
1053 let vcpu = unsafe { File::from_raw_descriptor(vcpu_fd) };
1054
1055 let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
1056 .from_file(&vcpu)
1057 .build()
1058 .map_err(|_| Error::new(ENOSPC))?;
1059
1060 Ok(Vcpu { vcpu, run_mmap })
1061 }
1062
1063 /// Consumes `self` and returns a `RunnableVcpu`. A `RunnableVcpu` is required to run the
1064 /// guest.
1065 /// Assigns a vcpu to the current thread and stores it in a hash map that can be used by signal
1066 /// handlers to call set_local_immediate_exit(). An optional signal number will be temporarily
1067 /// blocked while assigning the vcpu to the thread and later blocked when `RunnableVcpu` is
1068 /// destroyed.
1069 ///
1070 /// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
1071 #[allow(clippy::cast_ptr_alignment)]
to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu>1072 pub fn to_runnable(self, signal_num: Option<c_int>) -> Result<RunnableVcpu> {
1073 // Block signal while we add -- if a signal fires (very unlikely,
1074 // as this means something is trying to pause the vcpu before it has
1075 // even started) it'll try to grab the read lock while this write
1076 // lock is grabbed and cause a deadlock.
1077 // Assuming that a failure to block means it's already blocked.
1078 let _blocked_signal = signal_num.map(BlockedSignal::new);
1079
1080 VCPU_THREAD.with(|v| {
1081 if v.borrow().is_none() {
1082 *v.borrow_mut() = Some(VcpuThread {
1083 run: self.run_mmap.as_ptr() as *mut kvm_run,
1084 signal_num,
1085 });
1086 Ok(())
1087 } else {
1088 Err(Error::new(EBUSY))
1089 }
1090 })?;
1091
1092 Ok(RunnableVcpu {
1093 vcpu: self,
1094 phantom: Default::default(),
1095 })
1096 }
1097
1098 /// Sets the data received by a mmio read, ioport in, or hypercall instruction.
1099 ///
1100 /// This function should be called after `Vcpu::run` returns an `VcpuExit::IoIn`,
1101 /// `VcpuExit::MmioRead`, or 'VcpuExit::HypervHcall`.
1102 #[allow(clippy::cast_ptr_alignment)]
set_data(&self, data: &[u8]) -> Result<()>1103 pub fn set_data(&self, data: &[u8]) -> Result<()> {
1104 // SAFETY:
1105 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1106 // kernel told us how large it was. The pointer is page aligned so casting to a different
1107 // type is well defined, hence the clippy allow attribute.
1108 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1109 match run.exit_reason {
1110 KVM_EXIT_IO => {
1111 let run_start = run as *mut kvm_run as *mut u8;
1112 // SAFETY:
1113 // Safe because the exit_reason (which comes from the kernel) told us which
1114 // union field to use.
1115 let io = unsafe { run.__bindgen_anon_1.io };
1116 if io.direction as u32 != KVM_EXIT_IO_IN {
1117 return Err(Error::new(EINVAL));
1118 }
1119 let data_size = (io.count as usize) * (io.size as usize);
1120 if data_size != data.len() {
1121 return Err(Error::new(EINVAL));
1122 }
1123 // SAFETY:
1124 // The data_offset is defined by the kernel to be some number of bytes into the
1125 // kvm_run structure, which we have fully mmap'd.
1126 unsafe {
1127 let data_ptr = run_start.offset(io.data_offset as isize);
1128 copy_nonoverlapping(data.as_ptr(), data_ptr, data_size);
1129 }
1130 Ok(())
1131 }
1132 KVM_EXIT_MMIO => {
1133 // SAFETY:
1134 // Safe because the exit_reason (which comes from the kernel) told us which
1135 // union field to use.
1136 let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1137 if mmio.is_write != 0 {
1138 return Err(Error::new(EINVAL));
1139 }
1140 let len = mmio.len as usize;
1141 if len != data.len() {
1142 return Err(Error::new(EINVAL));
1143 }
1144 mmio.data[..len].copy_from_slice(data);
1145 Ok(())
1146 }
1147 KVM_EXIT_HYPERV => {
1148 // SAFETY:
1149 // Safe because the exit_reason (which comes from the kernel) told us which
1150 // union field to use.
1151 let hyperv = unsafe { &mut run.__bindgen_anon_1.hyperv };
1152 if hyperv.type_ != KVM_EXIT_HYPERV_HCALL {
1153 return Err(Error::new(EINVAL));
1154 }
1155 // TODO(b/315998194): Add safety comment
1156 #[allow(clippy::undocumented_unsafe_blocks)]
1157 let hcall = unsafe { &mut hyperv.u.hcall };
1158 match data.try_into() {
1159 Ok(data) => {
1160 hcall.result = u64::from_ne_bytes(data);
1161 }
1162 _ => return Err(Error::new(EINVAL)),
1163 }
1164 Ok(())
1165 }
1166 _ => Err(Error::new(EINVAL)),
1167 }
1168 }
1169
1170 /// Sets the bit that requests an immediate exit.
1171 #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1172 pub fn set_immediate_exit(&self, exit: bool) {
1173 // SAFETY:
1174 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1175 // kernel told us how large it was. The pointer is page aligned so casting to a different
1176 // type is well defined, hence the clippy allow attribute.
1177 let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut kvm_run) };
1178 run.immediate_exit = exit.into();
1179 }
1180
1181 /// Sets/clears the bit for immediate exit for the vcpu on the current thread.
set_local_immediate_exit(exit: bool)1182 pub fn set_local_immediate_exit(exit: bool) {
1183 VCPU_THREAD.with(|v| {
1184 if let Some(state) = &(*v.borrow()) {
1185 // TODO(b/315998194): Add safety comment
1186 #[allow(clippy::undocumented_unsafe_blocks)]
1187 unsafe {
1188 (*state.run).immediate_exit = exit.into();
1189 };
1190 }
1191 });
1192 }
1193
1194 /// Gets the VCPU registers.
1195 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
get_regs(&self) -> Result<kvm_regs>1196 pub fn get_regs(&self) -> Result<kvm_regs> {
1197 // SAFETY: trivially safe
1198 let mut regs = unsafe { std::mem::zeroed() };
1199 // SAFETY:
1200 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1201 // correct amount of memory from our pointer, and we verify the return result.
1202 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_REGS, &mut regs) };
1203 if ret != 0 {
1204 return errno_result();
1205 }
1206 Ok(regs)
1207 }
1208
1209 /// Sets the VCPU registers.
1210 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
set_regs(&self, regs: &kvm_regs) -> Result<()>1211 pub fn set_regs(&self, regs: &kvm_regs) -> Result<()> {
1212 // SAFETY:
1213 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1214 // correct amount of memory from our pointer, and we verify the return result.
1215 let ret = unsafe { ioctl_with_ref(self, KVM_SET_REGS, regs) };
1216 if ret != 0 {
1217 return errno_result();
1218 }
1219 Ok(())
1220 }
1221
1222 /// Gets the VCPU special registers.
1223 #[cfg(target_arch = "x86_64")]
get_sregs(&self) -> Result<kvm_sregs>1224 pub fn get_sregs(&self) -> Result<kvm_sregs> {
1225 // SAFETY: trivially safe
1226 let mut regs = unsafe { std::mem::zeroed() };
1227 // SAFETY:
1228 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1229 // correct amount of memory to our pointer, and we verify the return result.
1230 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_SREGS, &mut regs) };
1231 if ret != 0 {
1232 return errno_result();
1233 }
1234 Ok(regs)
1235 }
1236
1237 /// Sets the VCPU special registers.
1238 #[cfg(target_arch = "x86_64")]
set_sregs(&self, sregs: &kvm_sregs) -> Result<()>1239 pub fn set_sregs(&self, sregs: &kvm_sregs) -> Result<()> {
1240 // SAFETY:
1241 // Safe because we know that our file is a VCPU fd, we know the kernel will only read the
1242 // correct amount of memory from our pointer, and we verify the return result.
1243 let ret = unsafe { ioctl_with_ref(self, KVM_SET_SREGS, sregs) };
1244 if ret != 0 {
1245 return errno_result();
1246 }
1247 Ok(())
1248 }
1249
1250 /// Gets the VCPU FPU registers.
1251 #[cfg(target_arch = "x86_64")]
get_fpu(&self) -> Result<kvm_fpu>1252 pub fn get_fpu(&self) -> Result<kvm_fpu> {
1253 // SAFETY: trivially safe
1254 // correct amount of memory to our pointer, and we verify the return result.
1255 let mut regs = unsafe { std::mem::zeroed() };
1256 // SAFETY:
1257 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1258 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_FPU, &mut regs) };
1259 if ret != 0 {
1260 return errno_result();
1261 }
1262 Ok(regs)
1263 }
1264
1265 /// X86 specific call to setup the FPU
1266 ///
1267 /// See the documentation for KVM_SET_FPU.
1268 #[cfg(target_arch = "x86_64")]
set_fpu(&self, fpu: &kvm_fpu) -> Result<()>1269 pub fn set_fpu(&self, fpu: &kvm_fpu) -> Result<()> {
1270 let ret = {
1271 // SAFETY:
1272 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1273 unsafe { ioctl_with_ref(self, KVM_SET_FPU, fpu) }
1274 };
1275 if ret < 0 {
1276 return errno_result();
1277 }
1278 Ok(())
1279 }
1280
1281 /// Gets the VCPU debug registers.
1282 #[cfg(target_arch = "x86_64")]
get_debugregs(&self) -> Result<kvm_debugregs>1283 pub fn get_debugregs(&self) -> Result<kvm_debugregs> {
1284 // SAFETY: trivially safe
1285 let mut regs = unsafe { std::mem::zeroed() };
1286 // SAFETY:
1287 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1288 // correct amount of memory to our pointer, and we verify the return result.
1289 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_DEBUGREGS, &mut regs) };
1290 if ret != 0 {
1291 return errno_result();
1292 }
1293 Ok(regs)
1294 }
1295
1296 /// Sets the VCPU debug registers
1297 #[cfg(target_arch = "x86_64")]
set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()>1298 pub fn set_debugregs(&self, dregs: &kvm_debugregs) -> Result<()> {
1299 let ret = {
1300 // SAFETY:
1301 // Here we trust the kernel not to read past the end of the kvm_fpu struct.
1302 unsafe { ioctl_with_ref(self, KVM_SET_DEBUGREGS, dregs) }
1303 };
1304 if ret < 0 {
1305 return errno_result();
1306 }
1307 Ok(())
1308 }
1309
1310 /// Gets the VCPU extended control registers
1311 #[cfg(target_arch = "x86_64")]
get_xcrs(&self) -> Result<kvm_xcrs>1312 pub fn get_xcrs(&self) -> Result<kvm_xcrs> {
1313 // SAFETY: trivially safe
1314 let mut regs = unsafe { std::mem::zeroed() };
1315 // SAFETY:
1316 // Safe because we know that our file is a VCPU fd, we know the kernel will only write the
1317 // correct amount of memory to our pointer, and we verify the return result.
1318 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_XCRS, &mut regs) };
1319 if ret != 0 {
1320 return errno_result();
1321 }
1322 Ok(regs)
1323 }
1324
1325 /// Sets the VCPU extended control registers
1326 #[cfg(target_arch = "x86_64")]
set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()>1327 pub fn set_xcrs(&self, xcrs: &kvm_xcrs) -> Result<()> {
1328 let ret = {
1329 // SAFETY:
1330 // Here we trust the kernel not to read past the end of the kvm_xcrs struct.
1331 unsafe { ioctl_with_ref(self, KVM_SET_XCRS, xcrs) }
1332 };
1333 if ret < 0 {
1334 return errno_result();
1335 }
1336 Ok(())
1337 }
1338
1339 /// X86 specific call to get the MSRS
1340 ///
1341 /// See the documentation for KVM_SET_MSRS.
1342 #[cfg(target_arch = "x86_64")]
get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()>1343 pub fn get_msrs(&self, msr_entries: &mut Vec<kvm_msr_entry>) -> Result<()> {
1344 let mut msrs = vec_with_array_field::<kvm_msrs, kvm_msr_entry>(msr_entries.len());
1345 {
1346 // SAFETY:
1347 // Mapping the unsized array to a slice is unsafe because the length isn't known.
1348 // Providing the length used to create the struct guarantees the entire slice is valid.
1349 unsafe {
1350 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(msr_entries.len());
1351 entries.copy_from_slice(msr_entries);
1352 }
1353 }
1354 msrs[0].nmsrs = msr_entries.len() as u32;
1355 let ret = {
1356 // SAFETY:
1357 // Here we trust the kernel not to read or write past the end of the kvm_msrs struct.
1358 unsafe { ioctl_with_mut_ref(self, KVM_GET_MSRS, &mut msrs[0]) }
1359 };
1360 if ret < 0 {
1361 // KVM_SET_MSRS actually returns the number of msr entries written.
1362 return errno_result();
1363 }
1364 // TODO(b/315998194): Add safety comment
1365 #[allow(clippy::undocumented_unsafe_blocks)]
1366 unsafe {
1367 let count = ret as usize;
1368 assert!(count <= msr_entries.len());
1369 let entries: &mut [kvm_msr_entry] = msrs[0].entries.as_mut_slice(count);
1370 msr_entries.truncate(count);
1371 msr_entries.copy_from_slice(entries);
1372 }
1373 Ok(())
1374 }
1375
1376 /// X86 specific call to setup the MSRS
1377 ///
1378 /// See the documentation for KVM_SET_MSRS.
1379 #[cfg(target_arch = "x86_64")]
set_msrs(&self, msrs: &kvm_msrs) -> Result<()>1380 pub fn set_msrs(&self, msrs: &kvm_msrs) -> Result<()> {
1381 let ret = {
1382 // SAFETY:
1383 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1384 unsafe { ioctl_with_ref(self, KVM_SET_MSRS, msrs) }
1385 };
1386 if ret < 0 {
1387 // KVM_SET_MSRS actually returns the number of msr entries written.
1388 return errno_result();
1389 }
1390 Ok(())
1391 }
1392
1393 /// X86 specific call to setup the CPUID registers
1394 ///
1395 /// See the documentation for KVM_SET_CPUID2.
1396 #[cfg(target_arch = "x86_64")]
set_cpuid2(&self, cpuid: &CpuId) -> Result<()>1397 pub fn set_cpuid2(&self, cpuid: &CpuId) -> Result<()> {
1398 let ret = {
1399 // SAFETY:
1400 // Here we trust the kernel not to read past the end of the kvm_msrs struct.
1401 unsafe { ioctl_with_ptr(self, KVM_SET_CPUID2, cpuid.as_ptr()) }
1402 };
1403 if ret < 0 {
1404 return errno_result();
1405 }
1406 Ok(())
1407 }
1408
1409 /// X86 specific call to get the system emulated hyper-v CPUID values
1410 #[cfg(target_arch = "x86_64")]
get_hyperv_cpuid(&self) -> Result<CpuId>1411 pub fn get_hyperv_cpuid(&self) -> Result<CpuId> {
1412 const MAX_KVM_CPUID_ENTRIES: usize = 256;
1413 let mut cpuid = CpuId::new(MAX_KVM_CPUID_ENTRIES);
1414
1415 let ret = {
1416 // SAFETY:
1417 // ioctl is unsafe. The kernel is trusted not to write beyond the bounds of the memory
1418 // allocated for the struct. The limit is read from nent, which is set to the allocated
1419 // size(MAX_KVM_CPUID_ENTRIES) above.
1420 unsafe { ioctl_with_mut_ptr(self, KVM_GET_SUPPORTED_HV_CPUID, cpuid.as_mut_ptr()) }
1421 };
1422 if ret < 0 {
1423 return errno_result();
1424 }
1425 Ok(cpuid)
1426 }
1427
1428 /// X86 specific call to get the state of the "Local Advanced Programmable Interrupt
1429 /// Controller".
1430 ///
1431 /// See the documentation for KVM_GET_LAPIC.
1432 #[cfg(target_arch = "x86_64")]
get_lapic(&self) -> Result<kvm_lapic_state>1433 pub fn get_lapic(&self) -> Result<kvm_lapic_state> {
1434 let mut klapic: kvm_lapic_state = Default::default();
1435
1436 let ret = {
1437 // SAFETY:
1438 // The ioctl is unsafe unless you trust the kernel not to write past the end of the
1439 // local_apic struct.
1440 unsafe { ioctl_with_mut_ref(self, KVM_GET_LAPIC, &mut klapic) }
1441 };
1442 if ret < 0 {
1443 return errno_result();
1444 }
1445 Ok(klapic)
1446 }
1447
1448 /// X86 specific call to set the state of the "Local Advanced Programmable Interrupt
1449 /// Controller".
1450 ///
1451 /// See the documentation for KVM_SET_LAPIC.
1452 #[cfg(target_arch = "x86_64")]
set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()>1453 pub fn set_lapic(&self, klapic: &kvm_lapic_state) -> Result<()> {
1454 let ret = {
1455 // SAFETY:
1456 // The ioctl is safe because the kernel will only read from the klapic struct.
1457 unsafe { ioctl_with_ref(self, KVM_SET_LAPIC, klapic) }
1458 };
1459 if ret < 0 {
1460 return errno_result();
1461 }
1462 Ok(())
1463 }
1464
1465 /// Gets the vcpu's current "multiprocessing state".
1466 ///
1467 /// See the documentation for KVM_GET_MP_STATE. This call can only succeed after
1468 /// a call to `Vm::create_irq_chip`.
1469 ///
1470 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1471 /// to run crosvm on s390.
1472 #[cfg(target_arch = "x86_64")]
get_mp_state(&self) -> Result<kvm_mp_state>1473 pub fn get_mp_state(&self) -> Result<kvm_mp_state> {
1474 // SAFETY: trivially safe
1475 let mut state: kvm_mp_state = unsafe { std::mem::zeroed() };
1476 // SAFETY:
1477 // Safe because we know that our file is a VCPU fd, we know the kernel will only
1478 // write correct amount of memory to our pointer, and we verify the return result.
1479 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_MP_STATE, &mut state) };
1480 if ret < 0 {
1481 return errno_result();
1482 }
1483 Ok(state)
1484 }
1485
1486 /// Sets the vcpu's current "multiprocessing state".
1487 ///
1488 /// See the documentation for KVM_SET_MP_STATE. This call can only succeed after
1489 /// a call to `Vm::create_irq_chip`.
1490 ///
1491 /// Note that KVM defines the call for both x86 and s390 but we do not expect anyone
1492 /// to run crosvm on s390.
1493 #[cfg(target_arch = "x86_64")]
set_mp_state(&self, state: &kvm_mp_state) -> Result<()>1494 pub fn set_mp_state(&self, state: &kvm_mp_state) -> Result<()> {
1495 let ret = {
1496 // SAFETY:
1497 // The ioctl is safe because the kernel will only read from the kvm_mp_state struct.
1498 unsafe { ioctl_with_ref(self, KVM_SET_MP_STATE, state) }
1499 };
1500 if ret < 0 {
1501 return errno_result();
1502 }
1503 Ok(())
1504 }
1505
1506 /// Gets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1507 ///
1508 /// See the documentation for KVM_GET_VCPU_EVENTS.
1509 #[cfg(target_arch = "x86_64")]
get_vcpu_events(&self) -> Result<kvm_vcpu_events>1510 pub fn get_vcpu_events(&self) -> Result<kvm_vcpu_events> {
1511 // SAFETY: trivially safe
1512 let mut events: kvm_vcpu_events = unsafe { std::mem::zeroed() };
1513 // SAFETY:
1514 // Safe because we know that our file is a VCPU fd, we know the kernel
1515 // will only write correct amount of memory to our pointer, and we
1516 // verify the return result.
1517 let ret = unsafe { ioctl_with_mut_ref(self, KVM_GET_VCPU_EVENTS, &mut events) };
1518 if ret < 0 {
1519 return errno_result();
1520 }
1521 Ok(events)
1522 }
1523
1524 /// Sets the vcpu's currently pending exceptions, interrupts, NMIs, etc
1525 ///
1526 /// See the documentation for KVM_SET_VCPU_EVENTS.
1527 #[cfg(target_arch = "x86_64")]
set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()>1528 pub fn set_vcpu_events(&self, events: &kvm_vcpu_events) -> Result<()> {
1529 let ret = {
1530 // SAFETY:
1531 // The ioctl is safe because the kernel will only read from the
1532 // kvm_vcpu_events.
1533 unsafe { ioctl_with_ref(self, KVM_SET_VCPU_EVENTS, events) }
1534 };
1535 if ret < 0 {
1536 return errno_result();
1537 }
1538 Ok(())
1539 }
1540
1541 /// Enable the specified capability.
1542 /// See documentation for KVM_ENABLE_CAP.
1543 /// # Safety
1544 /// This function is marked as unsafe because `cap` may contain values which are interpreted as
1545 /// pointers by the kernel.
kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()>1546 pub unsafe fn kvm_enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> {
1547 // SAFETY:
1548 // Safe because we allocated the struct and we know the kernel will read exactly the size of
1549 // the struct.
1550 let ret = ioctl_with_ref(self, KVM_ENABLE_CAP, cap);
1551 if ret < 0 {
1552 return errno_result();
1553 }
1554 Ok(())
1555 }
1556
1557 /// Specifies set of signals that are blocked during execution of KVM_RUN.
1558 /// Signals that are not blocked will cause KVM_RUN to return with -EINTR.
1559 ///
1560 /// See the documentation for KVM_SET_SIGNAL_MASK
set_signal_mask(&self, signals: &[c_int]) -> Result<()>1561 pub fn set_signal_mask(&self, signals: &[c_int]) -> Result<()> {
1562 let sigset = signal::create_sigset(signals)?;
1563
1564 let mut kvm_sigmask = vec_with_array_field::<kvm_signal_mask, sigset_t>(1);
1565 // Rust definition of sigset_t takes 128 bytes, but the kernel only
1566 // expects 8-bytes structure, so we can't write
1567 // kvm_sigmask.len = size_of::<sigset_t>() as u32;
1568 kvm_sigmask[0].len = 8;
1569 // Ensure the length is not too big.
1570 const _ASSERT: usize = size_of::<sigset_t>() - 8usize;
1571
1572 // SAFETY:
1573 // Safe as we allocated exactly the needed space
1574 unsafe {
1575 copy_nonoverlapping(
1576 &sigset as *const sigset_t as *const u8,
1577 kvm_sigmask[0].sigset.as_mut_ptr(),
1578 8,
1579 );
1580 }
1581
1582 let ret = {
1583 // SAFETY:
1584 // The ioctl is safe because the kernel will only read from the
1585 // kvm_signal_mask structure.
1586 unsafe { ioctl_with_ref(self, KVM_SET_SIGNAL_MASK, &kvm_sigmask[0]) }
1587 };
1588 if ret < 0 {
1589 return errno_result();
1590 }
1591 Ok(())
1592 }
1593
1594 /// Sets the value of one register on this VCPU. The id of the register is
1595 /// encoded as specified in the kernel documentation for KVM_SET_ONE_REG.
1596 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
set_one_reg(&self, reg_id: u64, data: u64) -> Result<()>1597 pub fn set_one_reg(&self, reg_id: u64, data: u64) -> Result<()> {
1598 let data_ref = &data as *const u64;
1599 let onereg = kvm_one_reg {
1600 id: reg_id,
1601 addr: data_ref as u64,
1602 };
1603 // SAFETY:
1604 // safe because we allocated the struct and we know the kernel will read
1605 // exactly the size of the struct
1606 let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
1607 if ret < 0 {
1608 return errno_result();
1609 }
1610 Ok(())
1611 }
1612 }
1613
1614 impl AsRawDescriptor for Vcpu {
as_raw_descriptor(&self) -> RawDescriptor1615 fn as_raw_descriptor(&self) -> RawDescriptor {
1616 self.vcpu.as_raw_descriptor()
1617 }
1618 }
1619
1620 /// A Vcpu that has a thread and can be run. Created by calling `to_runnable` on a `Vcpu`.
1621 /// Implements `Deref` to a `Vcpu` so all `Vcpu` methods are usable, with the addition of the `run`
1622 /// function to execute the guest.
1623 pub struct RunnableVcpu {
1624 vcpu: Vcpu,
1625 // vcpus must stay on the same thread once they start.
1626 // Add the PhantomData pointer to ensure RunnableVcpu is not `Send`.
1627 phantom: std::marker::PhantomData<*mut u8>,
1628 }
1629
1630 impl RunnableVcpu {
1631 /// Runs the VCPU until it exits, returning the reason for the exit.
1632 ///
1633 /// Note that the state of the VCPU and associated VM must be setup first for this to do
1634 /// anything useful.
1635 #[allow(clippy::cast_ptr_alignment)]
1636 // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1637 // allow attribute.
run(&self) -> Result<VcpuExit>1638 pub fn run(&self) -> Result<VcpuExit> {
1639 // SAFETY:
1640 // Safe because we know that our file is a VCPU fd and we verify the return result.
1641 let ret = unsafe { ioctl(self, KVM_RUN) };
1642 if ret == 0 {
1643 // SAFETY:
1644 // Safe because we know we mapped enough memory to hold the kvm_run struct because the
1645 // kernel told us how large it was.
1646 let run = unsafe { &*(self.run_mmap.as_ptr() as *const kvm_run) };
1647 match run.exit_reason {
1648 KVM_EXIT_IO => {
1649 // SAFETY:
1650 // Safe because the exit_reason (which comes from the kernel) told us which
1651 // union field to use.
1652 let io = unsafe { run.__bindgen_anon_1.io };
1653 let port = io.port;
1654 let size = (io.count as usize) * (io.size as usize);
1655 match io.direction as u32 {
1656 KVM_EXIT_IO_IN => Ok(VcpuExit::IoIn { port, size }),
1657 KVM_EXIT_IO_OUT => {
1658 let mut data = [0; 8];
1659 let run_start = run as *const kvm_run as *const u8;
1660 // SAFETY:
1661 // The data_offset is defined by the kernel to be some number of bytes
1662 // into the kvm_run structure, which we have fully mmap'd.
1663 unsafe {
1664 let data_ptr = run_start.offset(io.data_offset as isize);
1665 copy_nonoverlapping(
1666 data_ptr,
1667 data.as_mut_ptr(),
1668 min(size, data.len()),
1669 );
1670 }
1671 Ok(VcpuExit::IoOut { port, size, data })
1672 }
1673 _ => Err(Error::new(EINVAL)),
1674 }
1675 }
1676 KVM_EXIT_MMIO => {
1677 // SAFETY:
1678 // Safe because the exit_reason (which comes from the kernel) told us which
1679 // union field to use.
1680 let mmio = unsafe { &run.__bindgen_anon_1.mmio };
1681 let address = mmio.phys_addr;
1682 let size = min(mmio.len as usize, mmio.data.len());
1683 if mmio.is_write != 0 {
1684 Ok(VcpuExit::MmioWrite {
1685 address,
1686 size,
1687 data: mmio.data,
1688 })
1689 } else {
1690 Ok(VcpuExit::MmioRead { address, size })
1691 }
1692 }
1693 KVM_EXIT_IOAPIC_EOI => {
1694 // SAFETY:
1695 // Safe because the exit_reason (which comes from the kernel) told us which
1696 // union field to use.
1697 let vector = unsafe { run.__bindgen_anon_1.eoi.vector };
1698 Ok(VcpuExit::IoapicEoi { vector })
1699 }
1700 KVM_EXIT_HYPERV => {
1701 // SAFETY:
1702 // Safe because the exit_reason (which comes from the kernel) told us which
1703 // union field to use.
1704 let hyperv = unsafe { &run.__bindgen_anon_1.hyperv };
1705 match hyperv.type_ {
1706 KVM_EXIT_HYPERV_SYNIC => {
1707 // TODO(b/315998194): Add safety comment
1708 #[allow(clippy::undocumented_unsafe_blocks)]
1709 let synic = unsafe { &hyperv.u.synic };
1710 Ok(VcpuExit::HypervSynic {
1711 msr: synic.msr,
1712 control: synic.control,
1713 evt_page: synic.evt_page,
1714 msg_page: synic.msg_page,
1715 })
1716 }
1717 KVM_EXIT_HYPERV_HCALL => {
1718 // TODO(b/315998194): Add safety comment
1719 #[allow(clippy::undocumented_unsafe_blocks)]
1720 let hcall = unsafe { &hyperv.u.hcall };
1721 Ok(VcpuExit::HypervHcall {
1722 input: hcall.input,
1723 params: hcall.params,
1724 })
1725 }
1726 _ => Err(Error::new(EINVAL)),
1727 }
1728 }
1729 KVM_EXIT_UNKNOWN => Ok(VcpuExit::Unknown),
1730 KVM_EXIT_EXCEPTION => Ok(VcpuExit::Exception),
1731 KVM_EXIT_HYPERCALL => Ok(VcpuExit::Hypercall),
1732 KVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1733 KVM_EXIT_HLT => Ok(VcpuExit::Hlt),
1734 KVM_EXIT_IRQ_WINDOW_OPEN => Ok(VcpuExit::IrqWindowOpen),
1735 KVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown),
1736 KVM_EXIT_FAIL_ENTRY => {
1737 // SAFETY:
1738 // Safe because the exit_reason (which comes from the kernel) told us which
1739 // union field to use.
1740 let hardware_entry_failure_reason = unsafe {
1741 run.__bindgen_anon_1
1742 .fail_entry
1743 .hardware_entry_failure_reason
1744 };
1745 Ok(VcpuExit::FailEntry {
1746 hardware_entry_failure_reason,
1747 })
1748 }
1749 KVM_EXIT_INTR => Ok(VcpuExit::Intr),
1750 KVM_EXIT_SET_TPR => Ok(VcpuExit::SetTpr),
1751 KVM_EXIT_TPR_ACCESS => Ok(VcpuExit::TprAccess),
1752 KVM_EXIT_S390_SIEIC => Ok(VcpuExit::S390Sieic),
1753 KVM_EXIT_S390_RESET => Ok(VcpuExit::S390Reset),
1754 KVM_EXIT_DCR => Ok(VcpuExit::Dcr),
1755 KVM_EXIT_NMI => Ok(VcpuExit::Nmi),
1756 KVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1757 KVM_EXIT_OSI => Ok(VcpuExit::Osi),
1758 KVM_EXIT_PAPR_HCALL => Ok(VcpuExit::PaprHcall),
1759 KVM_EXIT_S390_UCONTROL => Ok(VcpuExit::S390Ucontrol),
1760 KVM_EXIT_WATCHDOG => Ok(VcpuExit::Watchdog),
1761 KVM_EXIT_S390_TSCH => Ok(VcpuExit::S390Tsch),
1762 KVM_EXIT_EPR => Ok(VcpuExit::Epr),
1763 KVM_EXIT_SYSTEM_EVENT => {
1764 let event_type = {
1765 // SAFETY:
1766 // Safe because we know the exit reason told us this union
1767 // field is valid
1768 unsafe { run.__bindgen_anon_1.system_event.type_ }
1769 };
1770 // TODO(b/315998194): Add safety comment
1771 #[allow(clippy::undocumented_unsafe_blocks)]
1772 let event_flags =
1773 unsafe { run.__bindgen_anon_1.system_event.__bindgen_anon_1.flags };
1774 Ok(VcpuExit::SystemEvent(event_type, event_flags))
1775 }
1776 r => panic!("unknown kvm exit reason: {}", r),
1777 }
1778 } else {
1779 errno_result()
1780 }
1781 }
1782 }
1783
1784 impl Deref for RunnableVcpu {
1785 type Target = Vcpu;
deref(&self) -> &Self::Target1786 fn deref(&self) -> &Self::Target {
1787 &self.vcpu
1788 }
1789 }
1790
1791 impl DerefMut for RunnableVcpu {
deref_mut(&mut self) -> &mut Self::Target1792 fn deref_mut(&mut self) -> &mut Self::Target {
1793 &mut self.vcpu
1794 }
1795 }
1796
1797 impl AsRawDescriptor for RunnableVcpu {
as_raw_descriptor(&self) -> RawDescriptor1798 fn as_raw_descriptor(&self) -> RawDescriptor {
1799 self.vcpu.as_raw_descriptor()
1800 }
1801 }
1802
1803 impl Drop for RunnableVcpu {
drop(&mut self)1804 fn drop(&mut self) {
1805 VCPU_THREAD.with(|v| {
1806 // This assumes that a failure in `BlockedSignal::new` means the signal is already
1807 // blocked and there it should not be unblocked on exit.
1808 let _blocked_signal = &(*v.borrow())
1809 .as_ref()
1810 .and_then(|state| state.signal_num)
1811 .map(BlockedSignal::new);
1812
1813 *v.borrow_mut() = None;
1814 });
1815 }
1816 }
1817
1818 /// Wrapper for kvm_cpuid2 which has a zero length array at the end.
1819 /// Hides the zero length array behind a bounds check.
1820 #[cfg(target_arch = "x86_64")]
1821 pub type CpuId = FlexibleArrayWrapper<kvm_cpuid2, kvm_cpuid_entry2>;
1822