1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // We have u32 constants from bindings that are passed into archiitecture-dependent functions
6 // taking u32/64 parameters. So on 32 bit platforms we may have needless casts.
7 #![allow(clippy::useless_conversion)]
8
9 use std::collections::BTreeMap;
10 use std::convert::TryFrom;
11 use std::mem::offset_of;
12
13 use anyhow::Context;
14 use base::errno_result;
15 use base::error;
16 use base::ioctl_with_mut_ref;
17 use base::ioctl_with_ref;
18 use base::ioctl_with_val;
19 use base::warn;
20 use base::Error;
21 use base::Result;
22 use cros_fdt::Fdt;
23 use data_model::vec_with_array_field;
24 use kvm_sys::*;
25 use libc::EINVAL;
26 use libc::ENOMEM;
27 use libc::ENOTSUP;
28 use libc::ENXIO;
29 use serde::Deserialize;
30 use serde::Serialize;
31 use vm_memory::GuestAddress;
32
33 use super::Config;
34 use super::Kvm;
35 use super::KvmCap;
36 use super::KvmVcpu;
37 use super::KvmVm;
38 use crate::AArch64SysRegId;
39 use crate::ClockState;
40 use crate::DeviceKind;
41 use crate::Hypervisor;
42 use crate::IrqSourceChip;
43 use crate::ProtectionType;
44 use crate::PsciVersion;
45 use crate::VcpuAArch64;
46 use crate::VcpuExit;
47 use crate::VcpuFeature;
48 use crate::VcpuRegAArch64;
49 use crate::VmAArch64;
50 use crate::VmCap;
51 use crate::AARCH64_MAX_REG_COUNT;
52 use crate::PSCI_0_2;
53
54 impl Kvm {
55 // Compute the machine type, which should be the IPA range for the VM
56 // Ideally, this would take a description of the memory map and return
57 // the closest machine type for this VM. Here, we just return the maximum
58 // the kernel support.
get_vm_type(&self, protection_type: ProtectionType) -> Result<u32>59 pub fn get_vm_type(&self, protection_type: ProtectionType) -> Result<u32> {
60 // SAFETY:
61 // Safe because we know self is a real kvm fd
62 let ipa_size = match unsafe {
63 ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into())
64 } {
65 // Not supported? Use 0 as the machine type, which implies 40bit IPA
66 ret if ret < 0 => 0,
67 ipa => ipa as u32,
68 };
69 let protection_flag = if protection_type.isolates_memory() {
70 KVM_VM_TYPE_ARM_PROTECTED
71 } else {
72 0
73 };
74 // Use the lower 8 bits representing the IPA space as the machine type
75 Ok((ipa_size & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) | protection_flag)
76 }
77
78 /// Get the size of guest physical addresses (IPA) in bits.
get_guest_phys_addr_bits(&self) -> u879 pub fn get_guest_phys_addr_bits(&self) -> u8 {
80 // SAFETY:
81 // Safe because we know self is a real kvm fd
82 match unsafe { ioctl_with_val(self, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE.into()) } {
83 // Default physical address size is 40 bits if the extension is not supported.
84 ret if ret <= 0 => 40,
85 ipa => ipa as u8,
86 }
87 }
88 }
89
90 impl KvmVm {
91 /// Does platform specific initialization for the KvmVm.
init_arch(&self, cfg: &Config) -> Result<()>92 pub fn init_arch(&self, cfg: &Config) -> Result<()> {
93 #[cfg(target_arch = "aarch64")]
94 if cfg.mte {
95 // SAFETY:
96 // Safe because it does not take pointer arguments.
97 unsafe { self.enable_raw_capability(KvmCap::ArmMte, 0, &[0, 0, 0, 0])? }
98 }
99 #[cfg(not(target_arch = "aarch64"))]
100 {
101 // Suppress warning.
102 let _ = cfg;
103 }
104
105 Ok(())
106 }
107
108 /// Whether running under pKVM.
is_pkvm(&self) -> bool109 pub fn is_pkvm(&self) -> bool {
110 self.get_protected_vm_info().is_ok()
111 }
112
113 /// Checks if a particular `VmCap` is available, or returns None if arch-independent
114 /// Vm.check_capability() should handle the check.
check_capability_arch(&self, _c: VmCap) -> Option<bool>115 pub fn check_capability_arch(&self, _c: VmCap) -> Option<bool> {
116 None
117 }
118
119 /// Returns the params to pass to KVM_CREATE_DEVICE for a `kind` device on this arch, or None to
120 /// let the arch-independent `KvmVm::create_device` handle it.
get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device>121 pub fn get_device_params_arch(&self, kind: DeviceKind) -> Option<kvm_create_device> {
122 match kind {
123 DeviceKind::ArmVgicV2 => Some(kvm_create_device {
124 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2,
125 fd: 0,
126 flags: 0,
127 }),
128 DeviceKind::ArmVgicV3 => Some(kvm_create_device {
129 type_: kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3,
130 fd: 0,
131 flags: 0,
132 }),
133 _ => None,
134 }
135 }
136
137 /// Arch-specific implementation of `Vm::get_pvclock`. Always returns an error on AArch64.
get_pvclock_arch(&self) -> Result<ClockState>138 pub fn get_pvclock_arch(&self) -> Result<ClockState> {
139 Err(Error::new(ENXIO))
140 }
141
142 /// Arch-specific implementation of `Vm::set_pvclock`. Always returns an error on AArch64.
set_pvclock_arch(&self, _state: &ClockState) -> Result<()>143 pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> {
144 Err(Error::new(ENXIO))
145 }
146
147 /// Get pKVM hypervisor details, e.g. the firmware size.
148 ///
149 /// Returns `Err` if not running under pKVM.
150 ///
151 /// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
152 /// in KVM.
get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo>153 fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
154 let mut info = KvmProtectedVmInfo {
155 firmware_size: 0,
156 reserved: [0; 7],
157 };
158 // SAFETY:
159 // Safe because we allocated the struct and we know the kernel won't write beyond the end of
160 // the struct or keep a pointer to it.
161 unsafe {
162 self.enable_raw_capability(
163 KvmCap::ArmProtectedVm,
164 KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO,
165 &[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
166 )
167 }?;
168 Ok(info)
169 }
170
set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()>171 fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()> {
172 // SAFETY:
173 // Safe because none of the args are pointers.
174 unsafe {
175 self.enable_raw_capability(
176 KvmCap::ArmProtectedVm,
177 KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA,
178 &[fw_addr.0, 0, 0, 0],
179 )
180 }
181 }
182 }
183
184 #[repr(C)]
185 struct KvmProtectedVmInfo {
186 firmware_size: u64,
187 reserved: [u64; 7],
188 }
189
190 impl VmAArch64 for KvmVm {
get_hypervisor(&self) -> &dyn Hypervisor191 fn get_hypervisor(&self) -> &dyn Hypervisor {
192 &self.kvm
193 }
194
load_protected_vm_firmware( &mut self, fw_addr: GuestAddress, fw_max_size: u64, ) -> Result<()>195 fn load_protected_vm_firmware(
196 &mut self,
197 fw_addr: GuestAddress,
198 fw_max_size: u64,
199 ) -> Result<()> {
200 let info = self.get_protected_vm_info()?;
201 if info.firmware_size == 0 {
202 Err(Error::new(EINVAL))
203 } else {
204 if info.firmware_size > fw_max_size {
205 return Err(Error::new(ENOMEM));
206 }
207 self.set_protected_vm_firmware_ipa(fw_addr)
208 }
209 }
210
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>>211 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>> {
212 // create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
213 // or VcpuX86. But both use the same implementation in KvmVm::create_kvm_vcpu.
214 Ok(Box::new(self.create_kvm_vcpu(id)?))
215 }
216
create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()>217 fn create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()> {
218 Ok(())
219 }
220
init_arch( &self, _payload_entry_address: GuestAddress, _fdt_address: GuestAddress, _fdt_size: usize, ) -> Result<()>221 fn init_arch(
222 &self,
223 _payload_entry_address: GuestAddress,
224 _fdt_address: GuestAddress,
225 _fdt_size: usize,
226 ) -> Result<()> {
227 Ok(())
228 }
229
set_counter_offset(&self, offset: u64) -> Result<()>230 fn set_counter_offset(&self, offset: u64) -> Result<()> {
231 let off = kvm_arm_counter_offset {
232 counter_offset: offset,
233 reserved: 0,
234 };
235 // SAFETY: self.vm is a valid KVM fd
236 let ret = unsafe { ioctl_with_ref(&self.vm, KVM_ARM_SET_COUNTER_OFFSET, &off) };
237 if ret != 0 {
238 return errno_result();
239 }
240 Ok(())
241 }
242 }
243
244 impl KvmVcpu {
245 /// Handles a `KVM_EXIT_SYSTEM_EVENT` with event type `KVM_SYSTEM_EVENT_RESET` with the given
246 /// event flags and returns the appropriate `VcpuExit` value for the run loop to handle.
247 ///
248 /// `event_flags` should be one or more of the `KVM_SYSTEM_EVENT_RESET_FLAG_*` values defined by
249 /// KVM.
system_event_reset(&self, event_flags: u64) -> Result<VcpuExit>250 pub fn system_event_reset(&self, event_flags: u64) -> Result<VcpuExit> {
251 if event_flags & u64::from(KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2) != 0 {
252 // Read reset_type and cookie from x1 and x2.
253 let reset_type = self.get_one_reg(VcpuRegAArch64::X(1))?;
254 let cookie = self.get_one_reg(VcpuRegAArch64::X(2))?;
255 warn!(
256 "PSCI SYSTEM_RESET2 with reset_type={:#x}, cookie={:#x}",
257 reset_type, cookie
258 );
259 }
260 Ok(VcpuExit::SystemEventReset)
261 }
262
kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister>263 fn kvm_reg_id(&self, reg: VcpuRegAArch64) -> Result<KvmVcpuRegister> {
264 match reg {
265 VcpuRegAArch64::X(n @ 0..=30) => Ok(KvmVcpuRegister::X(n)),
266 VcpuRegAArch64::Sp => Ok(KvmVcpuRegister::Sp),
267 VcpuRegAArch64::Pc => Ok(KvmVcpuRegister::Pc),
268 VcpuRegAArch64::Pstate => Ok(KvmVcpuRegister::Pstate),
269 // Special case for multiplexed KVM registers
270 VcpuRegAArch64::System(AArch64SysRegId::CCSIDR_EL1) => {
271 let csselr =
272 self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::CSSELR_EL1))?;
273 Ok(KvmVcpuRegister::Ccsidr(csselr as u8))
274 }
275 VcpuRegAArch64::System(sysreg) => Ok(KvmVcpuRegister::System(sysreg)),
276 _ => Err(Error::new(EINVAL)),
277 }
278 }
279
set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()>280 fn set_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister, data: u32) -> Result<()> {
281 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
282 }
283
set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()>284 fn set_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister, data: u64) -> Result<()> {
285 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
286 }
287
set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()>288 fn set_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister, data: u128) -> Result<()> {
289 self.set_one_kvm_reg(kvm_reg_id, data.to_ne_bytes().as_slice())
290 }
291
set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()>292 fn set_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &[u8]) -> Result<()> {
293 assert_eq!(kvm_reg_id.size(), data.len());
294 let id: u64 = kvm_reg_id.into();
295 let onereg = kvm_one_reg {
296 id,
297 addr: (data.as_ptr() as usize)
298 .try_into()
299 .expect("can't represent usize as u64"),
300 };
301 // SAFETY:
302 // Safe because we allocated the struct and we know the kernel will read exactly the size of
303 // the struct.
304 let ret = unsafe { ioctl_with_ref(self, KVM_SET_ONE_REG, &onereg) };
305 if ret == 0 {
306 Ok(())
307 } else {
308 errno_result()
309 }
310 }
311
get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32>312 fn get_one_kvm_reg_u32(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u32> {
313 let mut bytes = 0u32.to_ne_bytes();
314 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
315 Ok(u32::from_ne_bytes(bytes))
316 }
317
get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64>318 fn get_one_kvm_reg_u64(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u64> {
319 let mut bytes = 0u64.to_ne_bytes();
320 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
321 Ok(u64::from_ne_bytes(bytes))
322 }
323
get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128>324 fn get_one_kvm_reg_u128(&self, kvm_reg_id: KvmVcpuRegister) -> Result<u128> {
325 let mut bytes = 0u128.to_ne_bytes();
326 self.get_one_kvm_reg(kvm_reg_id, bytes.as_mut_slice())?;
327 Ok(u128::from_ne_bytes(bytes))
328 }
329
get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()>330 fn get_one_kvm_reg(&self, kvm_reg_id: KvmVcpuRegister, data: &mut [u8]) -> Result<()> {
331 assert_eq!(kvm_reg_id.size(), data.len());
332 let id: u64 = kvm_reg_id.into();
333 let onereg = kvm_one_reg {
334 id,
335 addr: (data.as_mut_ptr() as usize)
336 .try_into()
337 .expect("can't represent usize as u64"),
338 };
339
340 // SAFETY:
341 // Safe because we allocated the struct and we know the kernel will read exactly the size of
342 // the struct.
343 let ret = unsafe { ioctl_with_ref(self, KVM_GET_ONE_REG, &onereg) };
344 if ret == 0 {
345 Ok(())
346 } else {
347 errno_result()
348 }
349 }
350
351 #[inline]
handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit>352 pub(crate) fn handle_vm_exit_arch(&self, _run: &mut kvm_run) -> Option<VcpuExit> {
353 // No aarch64-specific exits (for now)
354 None
355 }
356
get_reg_list(&self) -> Result<Vec<u64>>357 fn get_reg_list(&self) -> Result<Vec<u64>> {
358 let mut kvm_reg_list = vec_with_array_field::<kvm_reg_list, u64>(AARCH64_MAX_REG_COUNT);
359 kvm_reg_list[0].n = AARCH64_MAX_REG_COUNT as u64;
360 let ret =
361 // SAFETY:
362 // We trust the kernel not to read/write past the end of kvm_reg_list struct.
363 unsafe { ioctl_with_mut_ref(self, KVM_GET_REG_LIST, &mut kvm_reg_list[0]) };
364 if ret < 0 {
365 return errno_result();
366 }
367 let n = kvm_reg_list[0].n;
368 assert!(
369 n <= AARCH64_MAX_REG_COUNT as u64,
370 "Get reg list returned more registers than possible"
371 );
372 // SAFETY:
373 // Mapping the unsized array to a slice is unsafe because the length isn't known.
374 // Providing the length used to create the struct guarantees the entire slice is valid.
375 let reg_list: &[u64] = unsafe { kvm_reg_list[0].reg.as_slice(n as usize) };
376 Ok(reg_list.to_vec())
377 }
378
get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32>379 fn get_features_bitmap(&self, features: &[VcpuFeature]) -> Result<u32> {
380 let mut all_features = 0;
381 let check_extension = |ext: u32| -> bool {
382 // SAFETY:
383 // Safe because we know self.vm is a real kvm fd
384 unsafe { ioctl_with_val(&self.vm, KVM_CHECK_EXTENSION, ext.into()) == 1 }
385 };
386
387 for f in features {
388 let shift = match f {
389 VcpuFeature::PsciV0_2 => KVM_ARM_VCPU_PSCI_0_2,
390 VcpuFeature::PmuV3 => KVM_ARM_VCPU_PMU_V3,
391 VcpuFeature::PowerOff => KVM_ARM_VCPU_POWER_OFF,
392 VcpuFeature::Sve => {
393 if !check_extension(KVM_CAP_ARM_SVE) {
394 return Err(Error::new(ENOTSUP));
395 }
396 KVM_ARM_VCPU_SVE
397 }
398 };
399 all_features |= 1 << shift;
400 }
401
402 if check_extension(KVM_CAP_ARM_PTRAUTH_ADDRESS)
403 && check_extension(KVM_CAP_ARM_PTRAUTH_GENERIC)
404 {
405 all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS;
406 all_features |= 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC;
407 }
408
409 Ok(all_features)
410 }
411
412 /// Finalize VCPU features setup. This does not affect features that do not make use of
413 /// finalize.
finalize(&self, features: u32) -> Result<()>414 fn finalize(&self, features: u32) -> Result<()> {
415 if (features & 1 << KVM_ARM_VCPU_SVE) != 0 {
416 // SAFETY:
417 // Safe because we know that our file is a Vcpu fd and we verify the return result.
418 let ret = unsafe {
419 ioctl_with_ref(
420 self,
421 KVM_ARM_VCPU_FINALIZE,
422 &std::os::raw::c_int::try_from(KVM_ARM_VCPU_SVE)
423 .map_err(|_| Error::new(EINVAL))?,
424 )
425 };
426 if ret != 0 {
427 return errno_result();
428 }
429 }
430
431 Ok(())
432 }
433 }
434
435 /// KVM registers as used by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API
436 ///
437 /// These variants represent the registers as exposed by KVM which must be different from
438 /// `VcpuRegAArch64` to support registers which don't have an architectural definition such as
439 /// pseudo-registers (`Firmware`) and multiplexed registers (`Ccsidr`).
440 ///
441 /// See https://docs.kernel.org/virt/kvm/api.html for more details.
442 #[derive(Debug, Copy, Clone, Serialize, Deserialize)]
443 pub enum KvmVcpuRegister {
444 /// General Purpose Registers X0-X30
445 X(u8),
446 /// Stack Pointer
447 Sp,
448 /// Program Counter
449 Pc,
450 /// Processor State
451 Pstate,
452 /// FP & SIMD Registers V0-V31
453 V(u8),
454 /// KVM Firmware Pseudo-Registers
455 Firmware(u16),
456 /// System Registers
457 System(AArch64SysRegId),
458 /// CCSIDR_EL1 Demultiplexed by CSSELR_EL1
459 Ccsidr(u8),
460 }
461
462 impl KvmVcpuRegister {
463 // Firmware pseudo-registers are part of the ARM KVM interface:
464 // https://docs.kernel.org/virt/kvm/arm/hypercalls.html
465 pub const PSCI_VERSION: Self = Self::Firmware(0);
466 pub const SMCCC_ARCH_WORKAROUND_1: Self = Self::Firmware(1);
467 pub const SMCCC_ARCH_WORKAROUND_2: Self = Self::Firmware(2);
468 pub const SMCCC_ARCH_WORKAROUND_3: Self = Self::Firmware(3);
469
470 /// Size of this register in bytes.
size(&self) -> usize471 pub fn size(&self) -> usize {
472 let kvm_reg = u64::from(*self);
473 let size_field = kvm_reg & KVM_REG_SIZE_MASK;
474 const REG_SIZE_U8: u64 = KVM_REG_SIZE_U8 as u64; // cast from bindgen's u32 to u64
475 match size_field {
476 REG_SIZE_U8 => 1,
477 KVM_REG_SIZE_U16 => 2,
478 KVM_REG_SIZE_U32 => 4,
479 KVM_REG_SIZE_U64 => 8,
480 KVM_REG_SIZE_U128 => 16,
481 KVM_REG_SIZE_U256 => 32,
482 KVM_REG_SIZE_U512 => 64,
483 KVM_REG_SIZE_U1024 => 128,
484 KVM_REG_SIZE_U2048 => 256,
485 // `From<KvmVcpuRegister> for u64` should always include a valid size.
486 _ => panic!("invalid size field {}", size_field),
487 }
488 }
489 }
490
491 /// Gives the `u64` register ID expected by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API.
492 ///
493 /// See the KVM documentation of those ioctls for details about the format of the register ID.
494 impl From<KvmVcpuRegister> for u64 {
from(register: KvmVcpuRegister) -> Self495 fn from(register: KvmVcpuRegister) -> Self {
496 const fn reg(size: u64, kind: u64, fields: u64) -> u64 {
497 KVM_REG_ARM64 | size | kind | fields
498 }
499
500 const fn kvm_regs_reg(size: u64, offset: usize) -> u64 {
501 let offset = offset / std::mem::size_of::<u32>();
502
503 reg(size, KVM_REG_ARM_CORE as u64, offset as u64)
504 }
505
506 const fn kvm_reg(offset: usize) -> u64 {
507 kvm_regs_reg(KVM_REG_SIZE_U64, offset)
508 }
509
510 fn spsr_reg(spsr_reg: u32) -> u64 {
511 let n = std::mem::size_of::<u64>() * (spsr_reg as usize);
512 kvm_reg(offset_of!(kvm_regs, spsr) + n)
513 }
514
515 fn user_pt_reg(offset: usize) -> u64 {
516 kvm_regs_reg(KVM_REG_SIZE_U64, offset_of!(kvm_regs, regs) + offset)
517 }
518
519 fn user_fpsimd_state_reg(size: u64, offset: usize) -> u64 {
520 kvm_regs_reg(size, offset_of!(kvm_regs, fp_regs) + offset)
521 }
522
523 const fn reg_u64(kind: u64, fields: u64) -> u64 {
524 reg(KVM_REG_SIZE_U64, kind, fields)
525 }
526
527 const fn demux_reg(size: u64, index: u64, value: u64) -> u64 {
528 let index = (index << KVM_REG_ARM_DEMUX_ID_SHIFT) & (KVM_REG_ARM_DEMUX_ID_MASK as u64);
529 let value =
530 (value << KVM_REG_ARM_DEMUX_VAL_SHIFT) & (KVM_REG_ARM_DEMUX_VAL_MASK as u64);
531
532 reg(size, KVM_REG_ARM_DEMUX as u64, index | value)
533 }
534
535 match register {
536 KvmVcpuRegister::X(n @ 0..=30) => {
537 let n = std::mem::size_of::<u64>() * (n as usize);
538
539 user_pt_reg(offset_of!(user_pt_regs, regs) + n)
540 }
541 KvmVcpuRegister::X(n) => unreachable!("invalid KvmVcpuRegister Xn index: {n}"),
542 KvmVcpuRegister::Sp => user_pt_reg(offset_of!(user_pt_regs, sp)),
543 KvmVcpuRegister::Pc => user_pt_reg(offset_of!(user_pt_regs, pc)),
544 KvmVcpuRegister::Pstate => user_pt_reg(offset_of!(user_pt_regs, pstate)),
545 KvmVcpuRegister::V(n @ 0..=31) => {
546 let n = std::mem::size_of::<u128>() * (n as usize);
547
548 user_fpsimd_state_reg(KVM_REG_SIZE_U128, offset_of!(user_fpsimd_state, vregs) + n)
549 }
550 KvmVcpuRegister::V(n) => unreachable!("invalid KvmVcpuRegister Vn index: {n}"),
551 KvmVcpuRegister::System(AArch64SysRegId::FPSR) => {
552 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpsr))
553 }
554 KvmVcpuRegister::System(AArch64SysRegId::FPCR) => {
555 user_fpsimd_state_reg(KVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpcr))
556 }
557 KvmVcpuRegister::System(AArch64SysRegId::SPSR_EL1) => spsr_reg(KVM_SPSR_EL1),
558 KvmVcpuRegister::System(AArch64SysRegId::SPSR_abt) => spsr_reg(KVM_SPSR_ABT),
559 KvmVcpuRegister::System(AArch64SysRegId::SPSR_und) => spsr_reg(KVM_SPSR_UND),
560 KvmVcpuRegister::System(AArch64SysRegId::SPSR_irq) => spsr_reg(KVM_SPSR_IRQ),
561 KvmVcpuRegister::System(AArch64SysRegId::SPSR_fiq) => spsr_reg(KVM_SPSR_FIQ),
562 KvmVcpuRegister::System(AArch64SysRegId::SP_EL1) => {
563 kvm_reg(offset_of!(kvm_regs, sp_el1))
564 }
565 KvmVcpuRegister::System(AArch64SysRegId::ELR_EL1) => {
566 kvm_reg(offset_of!(kvm_regs, elr_el1))
567 }
568 // The KVM API accidentally swapped CNTV_CVAL_EL0 and CNTVCT_EL0.
569 KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0) => reg_u64(
570 KVM_REG_ARM64_SYSREG.into(),
571 AArch64SysRegId::CNTVCT_EL0.encoded().into(),
572 ),
573 KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0) => reg_u64(
574 KVM_REG_ARM64_SYSREG.into(),
575 AArch64SysRegId::CNTV_CVAL_EL0.encoded().into(),
576 ),
577 KvmVcpuRegister::System(sysreg) => {
578 reg_u64(KVM_REG_ARM64_SYSREG.into(), sysreg.encoded().into())
579 }
580 KvmVcpuRegister::Firmware(n) => reg_u64(KVM_REG_ARM_FW.into(), n.into()),
581 KvmVcpuRegister::Ccsidr(n) => demux_reg(KVM_REG_SIZE_U32, 0, n.into()),
582 }
583 }
584 }
585
586 impl VcpuAArch64 for KvmVcpu {
init(&self, features: &[VcpuFeature]) -> Result<()>587 fn init(&self, features: &[VcpuFeature]) -> Result<()> {
588 let mut kvi = kvm_vcpu_init {
589 target: KVM_ARM_TARGET_GENERIC_V8,
590 features: [0; 7],
591 };
592 // SAFETY:
593 // Safe because we allocated the struct and we know the kernel will write exactly the size
594 // of the struct.
595 let ret = unsafe { ioctl_with_mut_ref(&self.vm, KVM_ARM_PREFERRED_TARGET, &mut kvi) };
596 if ret != 0 {
597 return errno_result();
598 }
599
600 kvi.features[0] = self.get_features_bitmap(features)?;
601 // SAFETY:
602 // Safe because we allocated the struct and we know the kernel will read exactly the size of
603 // the struct.
604 let ret = unsafe { ioctl_with_ref(self, KVM_ARM_VCPU_INIT, &kvi) };
605 if ret != 0 {
606 return errno_result();
607 }
608
609 self.finalize(kvi.features[0])?;
610 Ok(())
611 }
612
init_pmu(&self, irq: u64) -> Result<()>613 fn init_pmu(&self, irq: u64) -> Result<()> {
614 let irq_addr = &irq as *const u64;
615
616 // The in-kernel PMU virtualization is initialized by setting the irq
617 // with KVM_ARM_VCPU_PMU_V3_IRQ and then by KVM_ARM_VCPU_PMU_V3_INIT.
618
619 let irq_attr = kvm_device_attr {
620 group: KVM_ARM_VCPU_PMU_V3_CTRL,
621 attr: KVM_ARM_VCPU_PMU_V3_IRQ as u64,
622 addr: irq_addr as u64,
623 flags: 0,
624 };
625 // SAFETY:
626 // Safe because we allocated the struct and we know the kernel will read exactly the size of
627 // the struct.
628 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &irq_attr) };
629 if ret < 0 {
630 return errno_result();
631 }
632
633 // SAFETY:
634 // Safe because we allocated the struct and we know the kernel will read exactly the size of
635 // the struct.
636 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &irq_attr) };
637 if ret < 0 {
638 return errno_result();
639 }
640
641 let init_attr = kvm_device_attr {
642 group: KVM_ARM_VCPU_PMU_V3_CTRL,
643 attr: KVM_ARM_VCPU_PMU_V3_INIT as u64,
644 addr: 0,
645 flags: 0,
646 };
647 // SAFETY:
648 // Safe because we allocated the struct and we know the kernel will read exactly the size of
649 // the struct.
650 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &init_attr) };
651 if ret < 0 {
652 return errno_result();
653 }
654
655 Ok(())
656 }
657
has_pvtime_support(&self) -> bool658 fn has_pvtime_support(&self) -> bool {
659 // The in-kernel PV time structure is initialized by setting the base
660 // address with KVM_ARM_VCPU_PVTIME_IPA
661 let pvtime_attr = kvm_device_attr {
662 group: KVM_ARM_VCPU_PVTIME_CTRL,
663 attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
664 addr: 0,
665 flags: 0,
666 };
667 // SAFETY:
668 // Safe because we allocated the struct and we know the kernel will read exactly the size of
669 // the struct.
670 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_HAS_DEVICE_ATTR, &pvtime_attr) };
671 ret >= 0
672 }
673
init_pvtime(&self, pvtime_ipa: u64) -> Result<()>674 fn init_pvtime(&self, pvtime_ipa: u64) -> Result<()> {
675 let pvtime_ipa_addr = &pvtime_ipa as *const u64;
676
677 // The in-kernel PV time structure is initialized by setting the base
678 // address with KVM_ARM_VCPU_PVTIME_IPA
679 let pvtime_attr = kvm_device_attr {
680 group: KVM_ARM_VCPU_PVTIME_CTRL,
681 attr: KVM_ARM_VCPU_PVTIME_IPA as u64,
682 addr: pvtime_ipa_addr as u64,
683 flags: 0,
684 };
685
686 // SAFETY:
687 // Safe because we allocated the struct and we know the kernel will read exactly the size of
688 // the struct.
689 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_SET_DEVICE_ATTR, &pvtime_attr) };
690 if ret < 0 {
691 return errno_result();
692 }
693
694 Ok(())
695 }
696
set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()>697 fn set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()> {
698 let kvm_reg = self.kvm_reg_id(reg_id)?;
699 match kvm_reg.size() {
700 4 => self.set_one_kvm_reg_u32(kvm_reg, data as u32),
701 8 => self.set_one_kvm_reg_u64(kvm_reg, data),
702 size => panic!("bad reg size {size}"),
703 }
704 }
705
get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64>706 fn get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64> {
707 let kvm_reg = self.kvm_reg_id(reg_id)?;
708 match kvm_reg.size() {
709 4 => self.get_one_kvm_reg_u32(kvm_reg).map(u64::from),
710 8 => self.get_one_kvm_reg_u64(kvm_reg),
711 size => panic!("bad reg size {size}"),
712 }
713 }
714
set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()>715 fn set_vector_reg(&self, reg_num: u8, data: u128) -> Result<()> {
716 if reg_num > 31 {
717 return Err(Error::new(EINVAL));
718 }
719 self.set_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num), data)
720 }
721
get_vector_reg(&self, reg_num: u8) -> Result<u128>722 fn get_vector_reg(&self, reg_num: u8) -> Result<u128> {
723 if reg_num > 31 {
724 return Err(Error::new(EINVAL));
725 }
726 self.get_one_kvm_reg_u128(KvmVcpuRegister::V(reg_num))
727 }
728
get_mpidr(&self) -> Result<u64>729 fn get_mpidr(&self) -> Result<u64> {
730 self.get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::MPIDR_EL1))
731 }
732
get_psci_version(&self) -> Result<PsciVersion>733 fn get_psci_version(&self) -> Result<PsciVersion> {
734 let version = if let Ok(v) = self.get_one_kvm_reg_u64(KvmVcpuRegister::PSCI_VERSION) {
735 let v = u32::try_from(v).map_err(|_| Error::new(EINVAL))?;
736 PsciVersion::try_from(v)?
737 } else {
738 // When `KVM_REG_ARM_PSCI_VERSION` is not supported, we can return PSCI 0.2, as vCPU
739 // has been initialized with `KVM_ARM_VCPU_PSCI_0_2` successfully.
740 PSCI_0_2
741 };
742
743 if version < PSCI_0_2 {
744 // PSCI v0.1 isn't currently supported for guests
745 Err(Error::new(ENOTSUP))
746 } else {
747 Ok(version)
748 }
749 }
750
get_max_hw_bps(&self) -> Result<usize>751 fn get_max_hw_bps(&self) -> Result<usize> {
752 // SAFETY:
753 // Safe because the kernel will only return the result of the ioctl.
754 let max_hw_bps = unsafe {
755 ioctl_with_val(
756 &self.vm,
757 KVM_CHECK_EXTENSION,
758 KVM_CAP_GUEST_DEBUG_HW_BPS.into(),
759 )
760 };
761
762 if max_hw_bps < 0 {
763 errno_result()
764 } else {
765 Ok(max_hw_bps.try_into().expect("can't represent u64 as usize"))
766 }
767 }
768
get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>>769 fn get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>> {
770 let reg_list = self.get_reg_list()?;
771 let cntvct_el0: u16 = AArch64SysRegId::CNTVCT_EL0.encoded();
772 let cntv_cval_el0: u16 = AArch64SysRegId::CNTV_CVAL_EL0.encoded();
773 let mut sys_regs = BTreeMap::new();
774 for reg in reg_list {
775 if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM64_SYSREG {
776 let r = if reg as u16 == cntvct_el0 {
777 AArch64SysRegId::CNTV_CVAL_EL0
778 } else if reg as u16 == cntv_cval_el0 {
779 AArch64SysRegId::CNTVCT_EL0
780 } else {
781 AArch64SysRegId::from_encoded((reg & 0xFFFF) as u16)
782 };
783 sys_regs.insert(r, self.get_one_reg(VcpuRegAArch64::System(r))?);
784 // The register representations are tricky. Double check they round trip correctly.
785 assert_eq!(
786 Ok(reg),
787 self.kvm_reg_id(VcpuRegAArch64::System(r)).map(u64::from),
788 );
789 }
790 }
791 Ok(sys_regs)
792 }
793
get_cache_info(&self) -> Result<BTreeMap<u8, u64>>794 fn get_cache_info(&self) -> Result<BTreeMap<u8, u64>> {
795 const KVM_REG_CCSIDR: u64 = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | (KVM_REG_ARM_DEMUX as u64);
796 const CCSIDR_INDEX_MASK: u64 = 0xFF;
797 let reg_list = self.get_reg_list()?;
798 let mut cache_info = BTreeMap::new();
799 for reg in reg_list {
800 if (reg & !CCSIDR_INDEX_MASK) == KVM_REG_CCSIDR {
801 let idx = reg as u8;
802 cache_info.insert(
803 idx,
804 self.get_one_kvm_reg_u32(KvmVcpuRegister::Ccsidr(idx))?
805 .into(),
806 );
807 }
808 }
809 Ok(cache_info)
810 }
811
set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()>812 fn set_cache_info(&self, cache_info: BTreeMap<u8, u64>) -> Result<()> {
813 for (idx, val) in cache_info {
814 self.set_one_kvm_reg_u32(
815 KvmVcpuRegister::Ccsidr(idx),
816 val.try_into()
817 .expect("trying to set a u32 register with a u64 value"),
818 )?;
819 }
820 Ok(())
821 }
822
hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value>823 fn hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value> {
824 let reg_list = self.get_reg_list()?;
825 let mut firmware_regs = BTreeMap::new();
826 for reg in reg_list {
827 if (reg as u32) & KVM_REG_ARM_COPROC_MASK == KVM_REG_ARM_FW {
828 firmware_regs.insert(
829 reg as u16,
830 self.get_one_kvm_reg_u64(KvmVcpuRegister::Firmware(reg as u16))?,
831 );
832 }
833 }
834
835 serde_json::to_value(KvmSnapshot { firmware_regs })
836 .context("Failed to serialize KVM specific data")
837 }
838
hypervisor_specific_restore(&self, data: serde_json::Value) -> anyhow::Result<()>839 fn hypervisor_specific_restore(&self, data: serde_json::Value) -> anyhow::Result<()> {
840 let deser: KvmSnapshot =
841 serde_json::from_value(data).context("Failed to deserialize KVM specific data")?;
842 // TODO: need to set firmware registers before "create_fdt" is called, earlier in the
843 // stack.
844 for (id, val) in &deser.firmware_regs {
845 self.set_one_kvm_reg_u64(KvmVcpuRegister::Firmware(*id), *val)?;
846 }
847 Ok(())
848 }
849
850 #[allow(clippy::unusual_byte_groupings)]
set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>851 fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()> {
852 let mut dbg = kvm_guest_debug {
853 control: KVM_GUESTDBG_ENABLE,
854 ..Default::default()
855 };
856
857 if enable_singlestep {
858 dbg.control |= KVM_GUESTDBG_SINGLESTEP;
859 }
860 if !addrs.is_empty() {
861 dbg.control |= KVM_GUESTDBG_USE_HW;
862 }
863
864 for (i, guest_addr) in addrs.iter().enumerate() {
865 // From the ARMv8 Architecture Reference Manual (DDI0487H.a) D31.3.{2,3}:
866 // When DBGBCR<n>_EL1.BT == 0b000x:
867 // DBGBVR<n>_EL1, Bits [1:0]: Reserved, RES0
868 if guest_addr.0 & 0b11 != 0 {
869 return Err(Error::new(EINVAL));
870 }
871 let sign_ext = 15;
872 // DBGBVR<n>_EL1.RESS[14:0], bits [63:49]: Reserved, Sign extended
873 dbg.arch.dbg_bvr[i] = (((guest_addr.0 << sign_ext) as i64) >> sign_ext) as u64;
874 // DBGBCR<n>_EL1.BT, bits [23:20]: Breakpoint Type
875 // 0b0000: Unlinked instruction address match.
876 // DBGBVR<n>_EL1 is the address of an instruction.
877 // DBGBCR<n>_EL1.BAS, bits [8:5]: Byte address select
878 // 0b1111: Use for A64 and A32 instructions
879 // DBGBCR<n>_EL1.PMC, bits [2:1]: Privilege mode control
880 // 0b11: EL1 & EL0
881 // DBGBCR<n>_EL1.E, bit [0]: Enable breakpoint
882 // 0b1: Enabled
883 dbg.arch.dbg_bcr[i] = 0b1111_11_1;
884 }
885
886 // SAFETY:
887 // Safe because the kernel won't read past the end of the kvm_guest_debug struct.
888 let ret = unsafe { ioctl_with_ref(self, KVM_SET_GUEST_DEBUG, &dbg) };
889 if ret == 0 {
890 Ok(())
891 } else {
892 errno_result()
893 }
894 }
895 }
896
897 #[derive(Debug, Serialize, Deserialize)]
898 struct KvmSnapshot {
899 firmware_regs: BTreeMap<u16, u64>,
900 }
901
902 // This function translates an IrqSrouceChip to the kvm u32 equivalent. It has a different
903 // implementation between x86_64 and aarch64 because the irqchip KVM constants are not defined on
904 // all architectures.
chip_to_kvm_chip(chip: IrqSourceChip) -> u32905 pub(super) fn chip_to_kvm_chip(chip: IrqSourceChip) -> u32 {
906 match chip {
907 // ARM does not have a constant for this, but the default routing
908 // setup seems to set this to 0
909 IrqSourceChip::Gic => 0,
910 _ => {
911 error!("Invalid IrqChipSource for ARM {:?}", chip);
912 0
913 }
914 }
915 }
916
917 #[cfg(test)]
918 mod tests {
919 use super::*;
920
921 #[test]
system_timer_register_mixup()922 fn system_timer_register_mixup() {
923 // Per https://docs.kernel.org/virt/kvm/api.html ARM64 system register encoding docs,
924 // KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT were accidentally defined backwards.
925 // Ensure the AArch64SysRegId to KvmVcpuRegister encoding maps these to the expected
926 // values.
927
928 const KVM_REG_ARM_TIMER_CVAL: u64 = 0x6030_0000_0013_DF02;
929 let cntv_cval_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTV_CVAL_EL0);
930 assert_eq!(u64::from(cntv_cval_el0_kvm), KVM_REG_ARM_TIMER_CVAL);
931
932 const KVM_REG_ARM_TIMER_CNT: u64 = 0x6030_0000_0013_DF1A;
933 let cntvct_el0_kvm = KvmVcpuRegister::System(AArch64SysRegId::CNTVCT_EL0);
934 assert_eq!(u64::from(cntvct_el0_kvm), KVM_REG_ARM_TIMER_CNT);
935 }
936 }
937