xref: /aosp_15_r20/external/crosvm/hypervisor/src/kvm/aarch64.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // We have u32 constants from bindings that are passed into archiitecture-dependent functions
6 // taking u32/64 parameters. So on 32 bit platforms we may have needless casts.
7 #![allow(clippy::useless_conversion)]
8 
9 use std::collections::BTreeMap;
10 use std::convert::TryFrom;
11 use std::mem::offset_of;
12 
13 use anyhow::Context;
14 use base::errno_result;
15 use base::error;
16 use base::ioctl_with_mut_ref;
17 use base::ioctl_with_ref;
18 use base::ioctl_with_val;
19 use base::warn;
20 use base::Error;
21 use base::Result;
22 use cros_fdt::Fdt;
23 use data_model::vec_with_array_field;
24 use kvm_sys::*;
25 use libc::EINVAL;
26 use libc::ENOMEM;
27 use libc::ENOTSUP;
28 use libc::ENXIO;
29 use serde::Deserialize;
30 use serde::Serialize;
31 use vm_memory::GuestAddress;
32 
33 use super::Config;
34 use super::Kvm;
35 use super::KvmCap;
36 use super::KvmVcpu;
37 use super::KvmVm;
38 use crate::AArch64SysRegId;
39 use crate::ClockState;
40 use crate::DeviceKind;
41 use crate::Hypervisor;
42 use crate::IrqSourceChip;
43 use crate::ProtectionType;
44 use crate::PsciVersion;
45 use crate::VcpuAArch64;
46 use crate::VcpuExit;
47 use crate::VcpuFeature;
48 use crate::VcpuRegAArch64;
49 use crate::VmAArch64;
50 use crate::VmCap;
51 use crate::AARCH64_MAX_REG_COUNT;
52 use crate::PSCI_0_2;
53 
54 impl Kvm {
55     // Compute the machine type, which should be the IPA range for the VM
56     // Ideally, this would take a description of the memory map and return
57     // the closest machine type for this VM. Here, we just return the maximum
58     // the kernel support.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>59     pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
60         // SAFETY:
61         // Safe because we know self is a real kvm fd
62         let ipa_size = match unsafe {
63             ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into())
64         } {
65             // Not supported? Use 0 as the machine type, which implies 40bit IPA
66             ret if ret < 0 => 0,
67             ipa => ipa as u32,
68         };
69         let protection_flag = if protection_type.isolates_memory() {
70             KVM_VM_TYPE_ARM_PROTECTED
71         } else {
72             0
73         };
74         // Use the lower 8 bits representing the IPA space as the machine type
75         Ok((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | protection_flag)
76     }
77 
78     /// Get the size of guest physical addresses (IPA) in bits.
get_guest_phys_addr_bits(&self) -> u879     pub fn get_guest_phys_addr_bits(&self) -> u8 {
80         // SAFETY:
81         // Safe because we know self is a real kvm fd
82         match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
83             // Default physical address size is 40 bits if the extension is not supported.
84             ret if ret <= 0 => 40,
85             ipa => ipa as u8,
86         }
87     }
88 }
89 
90 impl KvmVm {
91     /// Does platform specific initialization for the KvmVm.
init_arch(&self, cfg: &Config) -> Result<()>92     pub fn init_arch(&self, cfg: &Config) -> Result<()> {
93         #[cfg(target_arch = "aarch64")]
94         if cfg.mte {
95             // SAFETY:
96             // Safe because it does not take pointer arguments.
97             unsafe { self.enable_raw_capability(KvmCap::ArmMte, 0, &[0, 0, 0, 0])? }
98         }
99         #[cfg(not(target_arch = "aarch64"))]
100         {
101             // Suppress warning.
102             let _ = cfg;
103         }
104 
105         Ok(())
106     }
107 
108     /// Whether running under pKVM.
is_pkvm(&self) -> bool109     pub fn is_pkvm(&self) -> bool {
110         self.get_protected_vm_info().is_ok()
111     }
112 
113     /// Checks if a particular `VmCap` is available, or returns None if arch-independent
114     /// Vm.check_capability() should handle the check.
check_capability_arch(&self, _c: VmCap) -> Option<bool>115     pub fn check_capability_arch(&self, _c: VmCap) -> Option<bool> {
116         None
117     }
118 
119     /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
120     /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device>121     pub fn get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device> {
122         match kind {
123             DeviceKind::ArmVgicV2 => Some(kvm_create_device {
124                 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2,
125                 fd: 0,
126                 flags: 0,
127             }),
128             DeviceKind::ArmVgicV3 => Some(kvm_create_device {
129                 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3,
130                 fd: 0,
131                 flags: 0,
132             }),
133             _ => None,
134         }
135     }
136 
137     /// Arch-specific implementation of `Vm::get_pvclock`.  Always returns an error on AArch64.
get_pvclock_arch(&self) -> Result<ClockState>138     pub fn get_pvclock_arch(&self) -> Result<ClockState> {
139         Err(Error::new(ENXIO))
140     }
141 
142     /// Arch-specific implementation of `Vm::set_pvclock`.  Always returns an error on AArch64.
set_pvclock_arch(&self, _state: &ClockState) -> Result<()>143     pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> {
144         Err(Error::new(ENXIO))
145     }
146 
147     /// Get pKVM hypervisor details, e.g. the firmware size.
148     ///
149     /// Returns `Err` if not running under pKVM.
150     ///
151     /// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
152     /// in KVM.
get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo>153     fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
154         let mut info = KvmProtectedVmInfo {
155             firmware_size: 0,
156             reserved: [0; 7],
157         };
158         // SAFETY:
159         // Safe because we allocated the struct and we know the kernel won't write beyond the end of
160         // the struct or keep a pointer to it.
161         unsafe {
162             self.enable_raw_capability(
163                 KvmCap::ArmProtectedVm,
164                 KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO,
165                 &[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
166             )
167         }?;
168         Ok(info)
169     }
170 
set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()>171     fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()> {
172         // SAFETY:
173         // Safe because none of the args are pointers.
174         unsafe {
175             self.enable_raw_capability(
176                 KvmCap::ArmProtectedVm,
177                 KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA,
178                 &[fw_addr.0, 0, 0, 0],
179             )
180         }
181     }
182 }
183 
184 #[repr(C)]
185 struct KvmProtectedVmInfo {
186     firmware_size: u64,
187     reserved: [u64; 7],
188 }
189 
190 impl VmAArch64 for KvmVm {
get_hypervisor(&self) -> &dyn Hypervisor191     fn get_hypervisor(&self) -> &dyn Hypervisor {
192         &self.kvm
193     }
194 
load_protected_vm_firmware( &mut self, fw_addr: GuestAddress, fw_max_size: u64, ) -> Result<()>195     fn load_protected_vm_firmware(
196         &mut self,
197         fw_addr: GuestAddress,
198         fw_max_size: u64,
199     ) -> Result<()> {
200         let info = self.get_protected_vm_info()?;
201         if info.firmware_size == 0 {
202             Err(Error::new(EINVAL))
203         } else {
204             if info.firmware_size > fw_max_size {
205                 return Err(Error::new(ENOMEM));
206             }
207             self.set_protected_vm_firmware_ipa(fw_addr)
208         }
209     }
210 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>>211     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>> {
212         // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
213         // or VcpuX86.  But both use the same implementation in KvmVm::create_kvm_vcpu.
214         Ok(Box::new(self.create_kvm_vcpu(id)?))
215     }
216 
create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()>217     fn create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()> {
218         Ok(())
219     }
220 
init_arch( &self, _payload_entry_address: GuestAddress, _fdt_address: GuestAddress, _fdt_size: usize, ) -> Result<()>221     fn init_arch(
222         &self,
223         _payload_entry_address: GuestAddress,
224         _fdt_address: GuestAddress,
225         _fdt_size: usize,
226     ) -> Result<()> {
227         Ok(())
228     }
229 
set_counter_offset(&self, offset: u64) -> Result<()>230     fn set_counter_offset(&self, offset: u64) -> Result<()> {
231         let off = kvm_arm_counter_offset {
232             counter_offset: offset,
233             reserved: 0,
234         };
235         // SAFETY: self.vm is a valid KVM fd
236         let ret = unsafe { ioctl_with_ref(&self.vm, KVM_ARM_SET_COUNTER_OFFSET, &off) };
237         if ret != 0 {
238             return errno_result();
239         }
240         Ok(())
241     }
242 }
243 
244 impl KvmVcpu {
245     /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
246     /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
247     ///
248     /// `event_flags` should be one or more of the `KVM_SYSTEM_EVENT_RESET_FLAG_*` values defined by
249     /// KVM.
system_event_reset(&self, event_flags: u64) -> Result<VcpuExit>250     pub fn system_event_reset(&self, event_flags: u64) -> Result<VcpuExit> {
251         if event_flags & u64::from(KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2) != 0 {
252             // Read reset_type and cookie from x1 and x2.
253             let reset_type = self.get_one_reg(VcpuRegAArch64::X(1))?;
254             let cookie = self.get_one_reg(VcpuRegAArch64::X(2))?;
255             warn!(
256                 "PSCI SYSTEM_RESET2 with reset_type={:#x}, cookie={:#x}",
257                 reset_type, cookie
258             );
259         }
260         Ok(VcpuExit::SystemEventReset)
261     }
262 
kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister>263     fn kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister> {
264         match reg {
265             VcpuRegAArch64::X(n @ 0..=30) => Ok(KvmVcpuRegister::X(n)),
266             VcpuRegAArch64::Sp => Ok(KvmVcpuRegister::Sp),
267             VcpuRegAArch64::Pc => Ok(KvmVcpuRegister::Pc),
268             VcpuRegAArch64::Pstate => Ok(KvmVcpuRegister::Pstate),
269             // Special case for multiplexed KVM registers
270             VcpuRegAArch64::System(AArch64SysRegId::CCSIDR_EL1) => {
271                 let csselr =
272                     self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::CSSELR_EL1))?;
273                 Ok(KvmVcpuRegister::Ccsidr(csselr as u8))
274             }
275             VcpuRegAArch64::System(sysreg) => Ok(KvmVcpuRegister::System(sysreg)),
276             _ => Err(Error::new(EINVAL)),
277         }
278     }
279 
set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()>280     fn set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()> {
281         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
282     }
283 
set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()>284     fn set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()> {
285         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
286     }
287 
set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()>288     fn set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()> {
289         self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
290     }
291 
set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()>292     fn set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()> {
293         assert_eq!(kvm_reg_id.size(), data.len());
294         let id: u64 = kvm_reg_id.into();
295         let onereg = kvm_one_reg {
296             id,
297             addr: (data.as_ptr() as usize)
298                 .try_into()
299                 .expect("can't represent usize as u64"),
300         };
301         // SAFETY:
302         // Safe because we allocated the struct and we know the kernel will read exactly the size of
303         // the struct.
304         let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
305         if ret == 0 {
306             Ok(())
307         } else {
308             errno_result()
309         }
310     }
311 
get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32>312     fn get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32> {
313         let mut bytes = 0u32.to_ne_bytes();
314         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
315         Ok(u32::from_ne_bytes(bytes))
316     }
317 
get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64>318     fn get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64> {
319         let mut bytes = 0u64.to_ne_bytes();
320         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
321         Ok(u64::from_ne_bytes(bytes))
322     }
323 
get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128>324     fn get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128> {
325         let mut bytes = 0u128.to_ne_bytes();
326         self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
327         Ok(u128::from_ne_bytes(bytes))
328     }
329 
get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()>330     fn get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()> {
331         assert_eq!(kvm_reg_id.size(), data.len());
332         let id: u64 = kvm_reg_id.into();
333         let onereg = kvm_one_reg {
334             id,
335             addr: (data.as_mut_ptr() as usize)
336                 .try_into()
337                 .expect("can't represent usize as u64"),
338         };
339 
340         // SAFETY:
341         // Safe because we allocated the struct and we know the kernel will read exactly the size of
342         // the struct.
343         let ret = unsafe { ioctl_with_ref(self, KVM_GET_ONE_REG, &onereg) };
344         if ret == 0 {
345             Ok(())
346         } else {
347             errno_result()
348         }
349     }
350 
351     #[inline]
handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit>352     pub(crate) fn handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit> {
353         // No aarch64-specific exits (for now)
354         None
355     }
356 
get_reg_list(&self) -> Result<Vec<u64>>357     fn get_reg_list(&self) -> Result<Vec<u64>> {
358         let mut kvm_reg_list = vec_with_array_field::<kvm_reg_list, u64>(AARCH64_MAX_REG_COUNT);
359         kvm_reg_list[0].n = AARCH64_MAX_REG_COUNT as u64;
360         let ret =
361             // SAFETY:
362             // We trust the kernel not to read/write past the end of kvm_reg_list struct.
363             unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST, &mut kvm_reg_list[0]) };
364         if ret < 0 {
365             return errno_result();
366         }
367         let n = kvm_reg_list[0].n;
368         assert!(
369             n <= AARCH64_MAX_REG_COUNT as u64,
370             "Get reg list returned more registers than possible"
371         );
372         // SAFETY:
373         // Mapping the unsized array to a slice is unsafe because the length isn't known.
374         // Providing the length used to create the struct guarantees the entire slice is valid.
375         let reg_list: &[u64] = unsafe { kvm_reg_list[0].reg.as_slice(n as usize) };
376         Ok(reg_list.to_vec())
377     }
378 
get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32>379     fn get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32> {
380         let mut all_features = 0;
381         let check_extension = |ext: u32| -> bool {
382             // SAFETY:
383             // Safe because we know self.vm is a real kvm fd
384             unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION, ext.into()) == 1 }
385         };
386 
387         for f in features {
388             let shift = match f {
389                 VcpuFeature::PsciV0_2 => KVM_ARM_VCPU_PSCI_0_2,
390                 VcpuFeature::PmuV3 => KVM_ARM_VCPU_PMU_V3,
391                 VcpuFeature::PowerOff => KVM_ARM_VCPU_POWER_OFF,
392                 VcpuFeature::Sve => {
393                     if !check_extension(KVM_CAP_ARM_SVE) {
394                         return Err(Error::new(ENOTSUP));
395                     }
396                     KVM_ARM_VCPU_SVE
397                 }
398             };
399             all_features |= 1 << shift;
400         }
401 
402         if check_extension(KVM_CAP_ARM_PTRAUTH_ADDRESS)
403             && check_extension(KVM_CAP_ARM_PTRAUTH_GENERIC)
404         {
405             all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
406             all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
407         }
408 
409         Ok(all_features)
410     }
411 
412     /// Finalize VCPU features setup. This does not affect features that do not make use of
413     /// finalize.
finalize(&self, features: u32) -> Result<()>414     fn finalize(&self, features: u32) -> Result<()> {
415         if (features & 1 << KVM_ARM_VCPU_SVE) != 0 {
416             // SAFETY:
417             // Safe because we know that our file is a Vcpu fd and we verify the return result.
418             let ret = unsafe {
419                 ioctl_with_ref(
420                     self,
421                     KVM_ARM_VCPU_FINALIZE,
422                     &std::os::raw::c_int::try_from(KVM_ARM_VCPU_SVE)
423                         .map_err(|_| Error::new(EINVAL))?,
424                 )
425             };
426             if ret != 0 {
427                 return errno_result();
428             }
429         }
430 
431         Ok(())
432     }
433 }
434 
435 /// KVM registers as used by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API
436 ///
437 /// These variants represent the registers as exposed by KVM which must be different from
438 /// `VcpuRegAArch64` to support registers which don't have an architectural definition such as
439 /// pseudo-registers (`Firmware`) and multiplexed registers (`Ccsidr`).
440 ///
441 /// See https://docs.kernel.org/virt/kvm/api.html for more details.
442 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
443 pub enum KvmVcpuRegister {
444     /// General Purpose Registers X0-X30
445     X(u8),
446     /// Stack Pointer
447     Sp,
448     /// Program Counter
449     Pc,
450     /// Processor State
451     Pstate,
452     /// FP & SIMD Registers V0-V31
453     V(u8),
454     /// KVM Firmware Pseudo-Registers
455     Firmware(u16),
456     /// System Registers
457     System(AArch64SysRegId),
458     /// CCSIDR_EL1 Demultiplexed by CSSELR_EL1
459     Ccsidr(u8),
460 }
461 
462 impl KvmVcpuRegister {
463     // Firmware pseudo-registers are part of the ARM KVM interface:
464     //     https://docs.kernel.org/virt/kvm/arm/hypercalls.html
465     pub const PSCI_VERSION: Self = Self::Firmware(0);
466     pub const SMCCC_ARCH_WORKAROUND_1: Self = Self::Firmware(1);
467     pub const SMCCC_ARCH_WORKAROUND_2: Self = Self::Firmware(2);
468     pub const SMCCC_ARCH_WORKAROUND_3: Self = Self::Firmware(3);
469 
470     /// Size of this register in bytes.
size(&self) -> usize471     pub fn size(&self) -> usize {
472         let kvm_reg = u64::from(*self);
473         let size_field = kvm_reg & KVM_REG_SIZE_MASK;
474         const REG_SIZE_U8: u64 = KVM_REG_SIZE_U8 as u64; // cast from bindgen's u32 to u64
475         match size_field {
476             REG_SIZE_U8 => 1,
477             KVM_REG_SIZE_U16 => 2,
478             KVM_REG_SIZE_U32 => 4,
479             KVM_REG_SIZE_U64 => 8,
480             KVM_REG_SIZE_U128 => 16,
481             KVM_REG_SIZE_U256 => 32,
482             KVM_REG_SIZE_U512 => 64,
483             KVM_REG_SIZE_U1024 => 128,
484             KVM_REG_SIZE_U2048 => 256,
485             // `From<KvmVcpuRegister> for u64` should always include a valid size.
486             _ => panic!("invalid size field {}", size_field),
487         }
488     }
489 }
490 
491 /// Gives the `u64` register ID expected by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API.
492 ///
493 /// See the KVM documentation of those ioctls for details about the format of the register ID.
494 impl From<KvmVcpuRegister> for u64 {
from(register: KvmVcpuRegister) -> Self495     fn from(register: KvmVcpuRegister) -> Self {
496         const fn reg(size: u64, kind: u64, fields: u64) -> u64 {
497             KVM_REG_ARM64 | size | kind | fields
498         }
499 
500         const fn kvm_regs_reg(size: u64, offset: usize) -> u64 {
501             let offset = offset / std::mem::size_of::<u32>();
502 
503             reg(size, KVM_REG_ARM_CORE as u64, offset as u64)
504         }
505 
506         const fn kvm_reg(offset: usize) -> u64 {
507             kvm_regs_reg(KVM_REG_SIZE_U64, offset)
508         }
509 
510         fn spsr_reg(spsr_reg: u32) -> u64 {
511             let n = std::mem::size_of::<u64>() * (spsr_reg as usize);
512             kvm_reg(offset_of!(kvm_regs, spsr) + n)
513         }
514 
515         fn user_pt_reg(offset: usize) -> u64 {
516             kvm_regs_reg(KVM_REG_SIZE_U64, offset_of!(kvm_regs, regs) + offset)
517         }
518 
519         fn user_fpsimd_state_reg(size: u64, offset: usize) -> u64 {
520             kvm_regs_reg(size, offset_of!(kvm_regs, fp_regs) + offset)
521         }
522 
523         const fn reg_u64(kind: u64, fields: u64) -> u64 {
524             reg(KVM_REG_SIZE_U64, kind, fields)
525         }
526 
527         const fn demux_reg(size: u64, index: u64, value: u64) -> u64 {
528             let index = (index << KVM_REG_ARM_DEMUX_ID_SHIFT) & (KVM_REG_ARM_DEMUX_ID_MASK as u64);
529             let value =
530                 (value << KVM_REG_ARM_DEMUX_VAL_SHIFT) & (KVM_REG_ARM_DEMUX_VAL_MASK as u64);
531 
532             reg(size, KVM_REG_ARM_DEMUX as u64, index | value)
533         }
534 
535         match register {
536             KvmVcpuRegister::X(n @ 0..=30) => {
537                 let n = std::mem::size_of::<u64>() * (n as usize);
538 
539                 user_pt_reg(offset_of!(user_pt_regs, regs) + n)
540             }
541             KvmVcpuRegister::X(n) => unreachable!("invalid KvmVcpuRegister Xn index: {n}"),
542             KvmVcpuRegister::Sp => user_pt_reg(offset_of!(user_pt_regs, sp)),
543             KvmVcpuRegister::Pc => user_pt_reg(offset_of!(user_pt_regs, pc)),
544             KvmVcpuRegister::Pstate => user_pt_reg(offset_of!(user_pt_regs, pstate)),
545             KvmVcpuRegister::V(n @ 0..=31) => {
546                 let n = std::mem::size_of::<u128>() * (n as usize);
547 
548                 user_fpsimd_state_reg(KVM_REG_SIZE_U128, offset_of!(user_fpsimd_state, vregs) + n)
549             }
550             KvmVcpuRegister::V(n) => unreachable!("invalid KvmVcpuRegister Vn index: {n}"),
551             KvmVcpuRegister::System(AArch64SysRegId::FPSR) => {
552                 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpsr))
553             }
554             KvmVcpuRegister::System(AArch64SysRegId::FPCR) => {
555                 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpcr))
556             }
557             KvmVcpuRegister::System(AArch64SysRegId::SPSR_EL1) => spsr_reg(KVM_SPSR_EL1),
558             KvmVcpuRegister::System(AArch64SysRegId::SPSR_abt) => spsr_reg(KVM_SPSR_ABT),
559             KvmVcpuRegister::System(AArch64SysRegId::SPSR_und) => spsr_reg(KVM_SPSR_UND),
560             KvmVcpuRegister::System(AArch64SysRegId::SPSR_irq) => spsr_reg(KVM_SPSR_IRQ),
561             KvmVcpuRegister::System(AArch64SysRegId::SPSR_fiq) => spsr_reg(KVM_SPSR_FIQ),
562             KvmVcpuRegister::System(AArch64SysRegId::SP_EL1) => {
563                 kvm_reg(offset_of!(kvm_regs, sp_el1))
564             }
565             KvmVcpuRegister::System(AArch64SysRegId::ELR_EL1) => {
566                 kvm_reg(offset_of!(kvm_regs, elr_el1))
567             }
568             // The KVM API accidentally swapped CNTV_CVAL_EL0 and CNTVCT_EL0.
569             KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0) => reg_u64(
570                 KVM_REG_ARM64_SYSREG.into(),
571                 AArch64SysRegId::CNTVCT_EL0.encoded().into(),
572             ),
573             KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0) => reg_u64(
574                 KVM_REG_ARM64_SYSREG.into(),
575                 AArch64SysRegId::CNTV_CVAL_EL0.encoded().into(),
576             ),
577             KvmVcpuRegister::System(sysreg) => {
578                 reg_u64(KVM_REG_ARM64_SYSREG.into(), sysreg.encoded().into())
579             }
580             KvmVcpuRegister::Firmware(n) => reg_u64(KVM_REG_ARM_FW.into(), n.into()),
581             KvmVcpuRegister::Ccsidr(n) => demux_reg(KVM_REG_SIZE_U32, 0, n.into()),
582         }
583     }
584 }
585 
586 impl VcpuAArch64 for KvmVcpu {
init(&self, features: &[VcpuFeature]) -> Result<()>587     fn init(&self, features: &[VcpuFeature]) -> Result<()> {
588         let mut kvi = kvm_vcpu_init {
589             target: KVM_ARM_TARGET_GENERIC_V8,
590             features: [0; 7],
591         };
592         // SAFETY:
593         // Safe because we allocated the struct and we know the kernel will write exactly the size
594         // of the struct.
595         let ret = unsafe { ioctl_with_mut_ref(&self.vm, KVM_ARM_PREFERRED_TARGET, &mut kvi) };
596         if ret != 0 {
597             return errno_result();
598         }
599 
600         kvi.features[0] = self.get_features_bitmap(features)?;
601         // SAFETY:
602         // Safe because we allocated the struct and we know the kernel will read exactly the size of
603         // the struct.
604         let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT, &kvi) };
605         if ret != 0 {
606             return errno_result();
607         }
608 
609         self.finalize(kvi.features[0])?;
610         Ok(())
611     }
612 
init_pmu(&self, irq: u64) -> Result<()>613     fn init_pmu(&self, irq: u64) -> Result<()> {
614         let irq_addr = &irq as *const u64;
615 
616         // The in-kernel PMU virtualization is initialized by setting the irq
617         // with KVM_ARM_VCPU_PMU_V3_IRQ and then by KVM_ARM_VCPU_PMU_V3_INIT.
618 
619         let irq_attr = kvm_device_attr {
620             group: KVM_ARM_VCPU_PMU_V3_CTRL,
621             attr: KVM_ARM_VCPU_PMU_V3_IRQ as u64,
622             addr: irq_addr as u64,
623             flags: 0,
624         };
625         // SAFETY:
626         // Safe because we allocated the struct and we know the kernel will read exactly the size of
627         // the struct.
628         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &irq_attr) };
629         if ret < 0 {
630             return errno_result();
631         }
632 
633         // SAFETY:
634         // Safe because we allocated the struct and we know the kernel will read exactly the size of
635         // the struct.
636         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &irq_attr) };
637         if ret < 0 {
638             return errno_result();
639         }
640 
641         let init_attr = kvm_device_attr {
642             group: KVM_ARM_VCPU_PMU_V3_CTRL,
643             attr: KVM_ARM_VCPU_PMU_V3_INIT as u64,
644             addr: 0,
645             flags: 0,
646         };
647         // SAFETY:
648         // Safe because we allocated the struct and we know the kernel will read exactly the size of
649         // the struct.
650         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &init_attr) };
651         if ret < 0 {
652             return errno_result();
653         }
654 
655         Ok(())
656     }
657 
has_pvtime_support(&self) -> bool658     fn has_pvtime_support(&self) -> bool {
659         // The in-kernel PV time structure is initialized by setting the base
660         // address with KVM_ARM_VCPU_PVTIME_IPA
661         let pvtime_attr = kvm_device_attr {
662             group: KVM_ARM_VCPU_PVTIME_CTRL,
663             attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
664             addr: 0,
665             flags: 0,
666         };
667         // SAFETY:
668         // Safe because we allocated the struct and we know the kernel will read exactly the size of
669         // the struct.
670         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &pvtime_attr) };
671         ret >= 0
672     }
673 
init_pvtime(&self, pvtime_ipa: u64) -> Result<()>674     fn init_pvtime(&self, pvtime_ipa: u64) -> Result<()> {
675         let pvtime_ipa_addr = &pvtime_ipa as *const u64;
676 
677         // The in-kernel PV time structure is initialized by setting the base
678         // address with KVM_ARM_VCPU_PVTIME_IPA
679         let pvtime_attr = kvm_device_attr {
680             group: KVM_ARM_VCPU_PVTIME_CTRL,
681             attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
682             addr: pvtime_ipa_addr as u64,
683             flags: 0,
684         };
685 
686         // SAFETY:
687         // Safe because we allocated the struct and we know the kernel will read exactly the size of
688         // the struct.
689         let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &pvtime_attr) };
690         if ret < 0 {
691             return errno_result();
692         }
693 
694         Ok(())
695     }
696 
set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()>697     fn set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()> {
698         let kvm_reg = self.kvm_reg_id(reg_id)?;
699         match kvm_reg.size() {
700             4 => self.set_one_kvm_reg_u32(kvm_reg, data as u32),
701             8 => self.set_one_kvm_reg_u64(kvm_reg, data),
702             size => panic!("bad reg size {size}"),
703         }
704     }
705 
get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64>706     fn get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64> {
707         let kvm_reg = self.kvm_reg_id(reg_id)?;
708         match kvm_reg.size() {
709             4 => self.get_one_kvm_reg_u32(kvm_reg).map(u64::from),
710             8 => self.get_one_kvm_reg_u64(kvm_reg),
711             size => panic!("bad reg size {size}"),
712         }
713     }
714 
set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()>715     fn set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()> {
716         if reg_num > 31 {
717             return Err(Error::new(EINVAL));
718         }
719         self.set_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num), data)
720     }
721 
get_vector_reg(&self, reg_num: u8) -> Result<u128>722     fn get_vector_reg(&self, reg_num: u8) -> Result<u128> {
723         if reg_num > 31 {
724             return Err(Error::new(EINVAL));
725         }
726         self.get_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num))
727     }
728 
get_mpidr(&self) -> Result<u64>729     fn get_mpidr(&self) -> Result<u64> {
730         self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::MPIDR_EL1))
731     }
732 
get_psci_version(&self) -> Result<PsciVersion>733     fn get_psci_version(&self) -> Result<PsciVersion> {
734         let version = if let Ok(v) = self.get_one_kvm_reg_u64(KvmVcpuRegister::PSCI_VERSION) {
735             let v = u32::try_from(v).map_err(|_| Error::new(EINVAL))?;
736             PsciVersion::try_from(v)?
737         } else {
738             // When `KVM_REG_ARM_PSCI_VERSION` is not supported, we can return PSCI 0.2, as vCPU
739             // has been initialized with `KVM_ARM_VCPU_PSCI_0_2` successfully.
740             PSCI_0_2
741         };
742 
743         if version < PSCI_0_2 {
744             // PSCI v0.1 isn't currently supported for guests
745             Err(Error::new(ENOTSUP))
746         } else {
747             Ok(version)
748         }
749     }
750 
get_max_hw_bps(&self) -> Result<usize>751     fn get_max_hw_bps(&self) -> Result<usize> {
752         // SAFETY:
753         // Safe because the kernel will only return the result of the ioctl.
754         let max_hw_bps = unsafe {
755             ioctl_with_val(
756                 &self.vm,
757                 KVM_CHECK_EXTENSION,
758                 KVM_CAP_GUEST_DEBUG_HW_BPS.into(),
759             )
760         };
761 
762         if max_hw_bps < 0 {
763             errno_result()
764         } else {
765             Ok(max_hw_bps.try_into().expect("can't represent u64 as usize"))
766         }
767     }
768 
get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>>769     fn get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>> {
770         let reg_list = self.get_reg_list()?;
771         let cntvct_el0: u16 = AArch64SysRegId::CNTVCT_EL0.encoded();
772         let cntv_cval_el0: u16 = AArch64SysRegId::CNTV_CVAL_EL0.encoded();
773         let mut sys_regs = BTreeMap::new();
774         for reg in reg_list {
775             if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM64_SYSREG {
776                 let r = if reg as u16 == cntvct_el0 {
777                     AArch64SysRegId::CNTV_CVAL_EL0
778                 } else if reg as u16 == cntv_cval_el0 {
779                     AArch64SysRegId::CNTVCT_EL0
780                 } else {
781                     AArch64SysRegId::from_encoded((reg & 0xFFFF) as u16)
782                 };
783                 sys_regs.insert(r, self.get_one_reg(VcpuRegAArch64::System(r))?);
784                 // The register representations are tricky. Double check they round trip correctly.
785                 assert_eq!(
786                     Ok(reg),
787                     self.kvm_reg_id(VcpuRegAArch64::System(r)).map(u64::from),
788                 );
789             }
790         }
791         Ok(sys_regs)
792     }
793 
get_cache_info(&self) -> Result<BTreeMap<u8, u64>>794     fn get_cache_info(&self) -> Result<BTreeMap<u8, u64>> {
795         const KVM_REG_CCSIDR: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | (KVM_REG_ARM_DEMUX as u64);
796         const CCSIDR_INDEX_MASK: u64 = 0xFF;
797         let reg_list = self.get_reg_list()?;
798         let mut cache_info = BTreeMap::new();
799         for reg in reg_list {
800             if (reg & !CCSIDR_INDEX_MASK) == KVM_REG_CCSIDR {
801                 let idx = reg as u8;
802                 cache_info.insert(
803                     idx,
804                     self.get_one_kvm_reg_u32(KvmVcpuRegister::Ccsidr(idx))?
805                         .into(),
806                 );
807             }
808         }
809         Ok(cache_info)
810     }
811 
set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()>812     fn set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()> {
813         for (idx, val) in cache_info {
814             self.set_one_kvm_reg_u32(
815                 KvmVcpuRegister::Ccsidr(idx),
816                 val.try_into()
817                     .expect("trying to set a u32 register with a u64 value"),
818             )?;
819         }
820         Ok(())
821     }
822 
hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value>823     fn hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value> {
824         let reg_list = self.get_reg_list()?;
825         let mut firmware_regs = BTreeMap::new();
826         for reg in reg_list {
827             if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM_FW {
828                 firmware_regs.insert(
829                     reg as u16,
830                     self.get_one_kvm_reg_u64(KvmVcpuRegister::Firmware(reg as u16))?,
831                 );
832             }
833         }
834 
835         serde_json::to_value(KvmSnapshot { firmware_regs })
836             .context("Failed to serialize KVM specific data")
837     }
838 
hypervisor_specific_restore(&self, data: serde_json::Value) -> anyhow::Result<()>839     fn hypervisor_specific_restore(&self, data: serde_json::Value) -> anyhow::Result<()> {
840         let deser: KvmSnapshot =
841             serde_json::from_value(data).context("Failed to deserialize KVM specific data")?;
842         // TODO: need to set firmware registers before "create_fdt" is called, earlier in the
843         // stack.
844         for (id, val) in &deser.firmware_regs {
845             self.set_one_kvm_reg_u64(KvmVcpuRegister::Firmware(*id), *val)?;
846         }
847         Ok(())
848     }
849 
850     #[allow(clippy::unusual_byte_groupings)]
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>851     fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
852         let mut dbg = kvm_guest_debug {
853             control: KVM_GUESTDBG_ENABLE,
854             ..Default::default()
855         };
856 
857         if enable_singlestep {
858             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
859         }
860         if !addrs.is_empty() {
861             dbg.control |= KVM_GUESTDBG_USE_HW;
862         }
863 
864         for (i, guest_addr) in addrs.iter().enumerate() {
865             // From the ARMv8 Architecture Reference Manual (DDI0487H.a) D31.3.{2,3}:
866             // When DBGBCR<n>_EL1.BT == 0b000x:
867             //      DBGBVR<n>_EL1, Bits [1:0]: Reserved, RES0
868             if guest_addr.0 & 0b11 != 0 {
869                 return Err(Error::new(EINVAL));
870             }
871             let sign_ext = 15;
872             //      DBGBVR<n>_EL1.RESS[14:0], bits [63:49]: Reserved, Sign extended
873             dbg.arch.dbg_bvr[i] = (((guest_addr.0 << sign_ext) as i64) >> sign_ext) as u64;
874             // DBGBCR<n>_EL1.BT, bits [23:20]: Breakpoint Type
875             //      0b0000: Unlinked instruction address match.
876             //              DBGBVR<n>_EL1 is the address of an instruction.
877             // DBGBCR<n>_EL1.BAS, bits [8:5]: Byte address select
878             //      0b1111: Use for A64 and A32 instructions
879             // DBGBCR<n>_EL1.PMC, bits [2:1]: Privilege mode control
880             //      0b11: EL1 & EL0
881             // DBGBCR<n>_EL1.E, bit [0]: Enable breakpoint
882             //      0b1: Enabled
883             dbg.arch.dbg_bcr[i] = 0b1111_11_1;
884         }
885 
886         // SAFETY:
887         // Safe because the kernel won't read past the end of the kvm_guest_debug struct.
888         let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG, &dbg) };
889         if ret == 0 {
890             Ok(())
891         } else {
892             errno_result()
893         }
894     }
895 }
896 
897 #[derive(Debug, Serialize, Deserialize)]
898 struct KvmSnapshot {
899     firmware_regs: BTreeMap<u16, u64>,
900 }
901 
902 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
903 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
904 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u32905 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
906     match chip {
907         // ARM does not have a constant for this, but the default routing
908         // setup seems to set this to 0
909         IrqSourceChip::Gic => 0,
910         _ => {
911             error!("Invalid IrqChipSource for ARM {:?}", chip);
912             0
913         }
914     }
915 }
916 
917 #[cfg(test)]
918 mod tests {
919     use super::*;
920 
921     #[test]
system_timer_register_mixup()922     fn system_timer_register_mixup() {
923         // Per https://docs.kernel.org/virt/kvm/api.html ARM64 system register encoding docs,
924         // KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT were accidentally defined backwards.
925         // Ensure the AArch64SysRegId to KvmVcpuRegister encoding maps these to the expected
926         // values.
927 
928         const KVM_REG_ARM_TIMER_CVAL: u64 = 0x6030_0000_0013_DF02;
929         let cntv_cval_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0);
930         assert_eq!(u64::from(cntv_cval_el0_kvm), KVM_REG_ARM_TIMER_CVAL);
931 
932         const KVM_REG_ARM_TIMER_CNT: u64 = 0x6030_0000_0013_DF1A;
933         let cntvct_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0);
934         assert_eq!(u64::from(cntvct_el0_kvm), KVM_REG_ARM_TIMER_CNT);
935     }
936 }
937