1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 //! ARM 64-bit architecture support.
6
7 #![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8
9 use std::collections::BTreeMap;
10 use std::fs::File;
11 use std::io;
12 use std::path::PathBuf;
13 use std::sync::atomic::AtomicU32;
14 use std::sync::mpsc;
15 use std::sync::Arc;
16
17 use arch::get_serial_cmdline;
18 use arch::CpuSet;
19 use arch::DtbOverlay;
20 use arch::FdtPosition;
21 use arch::GetSerialCmdlineError;
22 use arch::MemoryRegionConfig;
23 use arch::RunnableLinuxVm;
24 use arch::SveConfig;
25 use arch::VcpuAffinity;
26 use arch::VmComponents;
27 use arch::VmImage;
28 use base::MemoryMappingBuilder;
29 use base::SendTube;
30 use base::Tube;
31 use devices::serial_device::SerialHardware;
32 use devices::serial_device::SerialParameters;
33 use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
34 use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
35 use devices::Bus;
36 use devices::BusDeviceObj;
37 use devices::BusError;
38 use devices::BusType;
39 use devices::IrqChip;
40 use devices::IrqChipAArch64;
41 use devices::IrqEventSource;
42 use devices::PciAddress;
43 use devices::PciConfigMmio;
44 use devices::PciDevice;
45 use devices::PciRootCommand;
46 use devices::Serial;
47 #[cfg(any(target_os = "android", target_os = "linux"))]
48 use devices::VirtCpufreq;
49 #[cfg(any(target_os = "android", target_os = "linux"))]
50 use devices::VirtCpufreqV2;
51 #[cfg(feature = "gdb")]
52 use gdbstub::arch::Arch;
53 #[cfg(feature = "gdb")]
54 use gdbstub_arch::aarch64::reg::id::AArch64RegId;
55 #[cfg(feature = "gdb")]
56 use gdbstub_arch::aarch64::AArch64 as GdbArch;
57 #[cfg(feature = "gdb")]
58 use hypervisor::AArch64SysRegId;
59 use hypervisor::CpuConfigAArch64;
60 use hypervisor::DeviceKind;
61 use hypervisor::Hypervisor;
62 use hypervisor::HypervisorCap;
63 use hypervisor::MemCacheType;
64 use hypervisor::ProtectionType;
65 use hypervisor::VcpuAArch64;
66 use hypervisor::VcpuFeature;
67 use hypervisor::VcpuInitAArch64;
68 use hypervisor::VcpuRegAArch64;
69 use hypervisor::Vm;
70 use hypervisor::VmAArch64;
71 #[cfg(windows)]
72 use jail::FakeMinijailStub as Minijail;
73 use kernel_loader::LoadedKernel;
74 #[cfg(any(target_os = "android", target_os = "linux"))]
75 use minijail::Minijail;
76 use remain::sorted;
77 use resources::address_allocator::AddressAllocator;
78 use resources::AddressRange;
79 use resources::MmioType;
80 use resources::SystemAllocator;
81 use resources::SystemAllocatorConfig;
82 use sync::Condvar;
83 use sync::Mutex;
84 use thiserror::Error;
85 use vm_control::BatControl;
86 use vm_control::BatteryType;
87 use vm_memory::GuestAddress;
88 use vm_memory::GuestMemory;
89 use vm_memory::GuestMemoryError;
90 use vm_memory::MemoryRegionOptions;
91 use vm_memory::MemoryRegionPurpose;
92
93 mod fdt;
94
95 const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
96 const AARCH64_FDT_ALIGN: u64 = 0x200000;
97 const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
98
99 // Maximum Linux arm64 kernel command line size (arch/arm64/include/uapi/asm/setup.h).
100 const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
101
102 // These constants indicate the address space used by the ARM vGIC.
103 const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
104 const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
105
106 // This indicates the start of DRAM inside the physical address space.
107 const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
108 const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
109
110 const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
111 const AARCH64_PROTECTED_VM_FW_START: u64 =
112 AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
113
114 const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
115 const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
116 const AARCH64_PVTIME_SIZE: u64 = 64;
117
118 // These constants indicate the placement of the GIC registers in the physical
119 // address space.
120 const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
121 const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
122 const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
123
124 // PSR (Processor State Register) bits
125 const PSR_MODE_EL1H: u64 = 0x00000005;
126 const PSR_F_BIT: u64 = 0x00000040;
127 const PSR_I_BIT: u64 = 0x00000080;
128 const PSR_A_BIT: u64 = 0x00000100;
129 const PSR_D_BIT: u64 = 0x00000200;
130
131 // This was the speed kvmtool used, not sure if it matters.
132 const AARCH64_SERIAL_SPEED: u32 = 1843200;
133 // The serial device gets the first interrupt line
134 // Which gets mapped to the first SPI interrupt (physical 32).
135 const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
136 const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
137
138 // Place the RTC device at page 2
139 const AARCH64_RTC_ADDR: u64 = 0x2000;
140 // The RTC device gets one 4k page
141 const AARCH64_RTC_SIZE: u64 = 0x1000;
142 // The RTC device gets the second interrupt line
143 const AARCH64_RTC_IRQ: u32 = 1;
144
145 // The Goldfish battery device gets the 3rd interrupt line
146 const AARCH64_BAT_IRQ: u32 = 3;
147
148 // Place the virtual watchdog device at page 3
149 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
150 // The virtual watchdog device gets one 4k page
151 const AARCH64_VMWDT_SIZE: u64 = 0x1000;
152
153 // Default PCI MMIO configuration region base address.
154 const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
155 // Default PCI MMIO configuration region size.
156 const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
157 // Default PCI mem base address.
158 const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
159 // Default PCI mem size.
160 const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
161 // Virtio devices start at SPI interrupt number 4
162 const AARCH64_IRQ_BASE: u32 = 4;
163
164 // Virtual CPU Frequency Device.
165 const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
166 const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
167 const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
168 const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
169
170 // PMU PPI interrupt, same as qemu
171 const AARCH64_PMU_IRQ: u32 = 7;
172
173 // VCPU stall detector interrupt
174 const AARCH64_VMWDT_IRQ: u32 = 15;
175
176 enum PayloadType {
177 Bios {
178 entry: GuestAddress,
179 image_size: u64,
180 },
181 Kernel(LoadedKernel),
182 }
183
184 impl PayloadType {
entry(&self) -> GuestAddress185 fn entry(&self) -> GuestAddress {
186 match self {
187 Self::Bios {
188 entry,
189 image_size: _,
190 } => *entry,
191 Self::Kernel(k) => k.entry,
192 }
193 }
194
size(&self) -> u64195 fn size(&self) -> u64 {
196 match self {
197 Self::Bios {
198 entry: _,
199 image_size,
200 } => *image_size,
201 Self::Kernel(k) => k.size,
202 }
203 }
204 }
205
206 // When static swiotlb allocation is required, returns the address it should be allocated at.
207 // Otherwise, returns None.
get_swiotlb_addr( memory_size: u64, swiotlb_size: u64, hypervisor: &(impl Hypervisor + ?Sized), ) -> Option<GuestAddress>208 fn get_swiotlb_addr(
209 memory_size: u64,
210 swiotlb_size: u64,
211 hypervisor: &(impl Hypervisor + ?Sized),
212 ) -> Option<GuestAddress> {
213 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
214 Some(GuestAddress(
215 AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
216 ))
217 } else {
218 None
219 }
220 }
221
222 #[sorted]
223 #[derive(Error, Debug)]
224 pub enum Error {
225 #[error("failed to allocate IRQ number")]
226 AllocateIrq,
227 #[error("bios could not be loaded: {0}")]
228 BiosLoadFailure(arch::LoadImageError),
229 #[error("failed to build arm pvtime memory: {0}")]
230 BuildPvtimeError(base::MmapError),
231 #[error("unable to clone an Event: {0}")]
232 CloneEvent(base::Error),
233 #[error("failed to clone IRQ chip: {0}")]
234 CloneIrqChip(base::Error),
235 #[error("the given kernel command line was invalid: {0}")]
236 Cmdline(kernel_cmdline::Error),
237 #[error("bad PCI CAM configuration: {0}")]
238 ConfigurePciCam(String),
239 #[error("bad PCI mem configuration: {0}")]
240 ConfigurePciMem(String),
241 #[error("failed to configure CPU Frequencies: {0}")]
242 CpuFrequencies(base::Error),
243 #[error("failed to configure CPU topology: {0}")]
244 CpuTopology(base::Error),
245 #[error("unable to create battery devices: {0}")]
246 CreateBatDevices(arch::DeviceRegistrationError),
247 #[error("unable to make an Event: {0}")]
248 CreateEvent(base::Error),
249 #[error("FDT could not be created: {0}")]
250 CreateFdt(cros_fdt::Error),
251 #[error("failed to create GIC: {0}")]
252 CreateGICFailure(base::Error),
253 #[error("failed to create a PCI root hub: {0}")]
254 CreatePciRoot(arch::DeviceRegistrationError),
255 #[error("failed to create platform bus: {0}")]
256 CreatePlatformBus(arch::DeviceRegistrationError),
257 #[error("unable to create serial devices: {0}")]
258 CreateSerialDevices(arch::DeviceRegistrationError),
259 #[error("failed to create socket: {0}")]
260 CreateSocket(io::Error),
261 #[error("failed to create tube: {0}")]
262 CreateTube(base::TubeError),
263 #[error("failed to create VCPU: {0}")]
264 CreateVcpu(base::Error),
265 #[error("unable to create vm watchdog timer device: {0}")]
266 CreateVmwdtDevice(anyhow::Error),
267 #[error("custom pVM firmware could not be loaded: {0}")]
268 CustomPvmFwLoadFailure(arch::LoadImageError),
269 #[error("vm created wrong kind of vcpu")]
270 DowncastVcpu,
271 #[error("failed to enable singlestep execution: {0}")]
272 EnableSinglestep(base::Error),
273 #[error("failed to finalize IRQ chip: {0}")]
274 FinalizeIrqChip(base::Error),
275 #[error("failed to get HW breakpoint count: {0}")]
276 GetMaxHwBreakPoint(base::Error),
277 #[error("failed to get PSCI version: {0}")]
278 GetPsciVersion(base::Error),
279 #[error("failed to get serial cmdline: {0}")]
280 GetSerialCmdline(GetSerialCmdlineError),
281 #[error("failed to initialize arm pvtime: {0}")]
282 InitPvtimeError(base::Error),
283 #[error("initrd could not be loaded: {0}")]
284 InitrdLoadFailure(arch::LoadImageError),
285 #[error("failed to initialize virtual machine {0}")]
286 InitVmError(base::Error),
287 #[error("kernel could not be loaded: {0}")]
288 KernelLoadFailure(kernel_loader::Error),
289 #[error("error loading Kernel from Elf image: {0}")]
290 LoadElfKernel(kernel_loader::Error),
291 #[error("failed to map arm pvtime memory: {0}")]
292 MapPvtimeError(base::Error),
293 #[error("pVM firmware could not be loaded: {0}")]
294 PvmFwLoadFailure(base::Error),
295 #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
296 RamoopsAddress(u64, u64),
297 #[error("error reading guest memory: {0}")]
298 ReadGuestMemory(vm_memory::GuestMemoryError),
299 #[error("error reading CPU register: {0}")]
300 ReadReg(base::Error),
301 #[error("error reading CPU registers: {0}")]
302 ReadRegs(base::Error),
303 #[error("failed to register irq fd: {0}")]
304 RegisterIrqfd(base::Error),
305 #[error("error registering PCI bus: {0}")]
306 RegisterPci(BusError),
307 #[error("error registering virtual cpufreq device: {0}")]
308 RegisterVirtCpufreq(BusError),
309 #[error("error registering virtual socket device: {0}")]
310 RegisterVsock(arch::DeviceRegistrationError),
311 #[error("failed to set device attr: {0}")]
312 SetDeviceAttr(base::Error),
313 #[error("failed to set a hardware breakpoint: {0}")]
314 SetHwBreakpoint(base::Error),
315 #[error("failed to set register: {0}")]
316 SetReg(base::Error),
317 #[error("failed to set up guest memory: {0}")]
318 SetupGuestMemory(GuestMemoryError),
319 #[error("this function isn't supported")]
320 Unsupported,
321 #[error("failed to initialize VCPU: {0}")]
322 VcpuInit(base::Error),
323 #[error("error writing guest memory: {0}")]
324 WriteGuestMemory(GuestMemoryError),
325 #[error("error writing CPU register: {0}")]
326 WriteReg(base::Error),
327 #[error("error writing CPU registers: {0}")]
328 WriteRegs(base::Error),
329 }
330
331 pub type Result<T> = std::result::Result<T, Error>;
332
load_kernel( guest_mem: &GuestMemory, kernel_start: GuestAddress, mut kernel_image: &mut File, ) -> Result<LoadedKernel>333 fn load_kernel(
334 guest_mem: &GuestMemory,
335 kernel_start: GuestAddress,
336 mut kernel_image: &mut File,
337 ) -> Result<LoadedKernel> {
338 if let Ok(elf_kernel) = kernel_loader::load_elf(
339 guest_mem,
340 kernel_start,
341 &mut kernel_image,
342 AARCH64_PHYS_MEM_START,
343 ) {
344 return Ok(elf_kernel);
345 }
346
347 if let Ok(lz4_kernel) =
348 kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
349 {
350 return Ok(lz4_kernel);
351 }
352
353 kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
354 .map_err(Error::KernelLoadFailure)
355 }
356
357 pub struct AArch64;
358
get_block_size() -> u64359 fn get_block_size() -> u64 {
360 let page_size = base::pagesize();
361 // Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
362 // entries.
363 let ptes_per_page = page_size / 8;
364 let block_size = page_size * ptes_per_page;
365
366 block_size as u64
367 }
368
get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64>369 fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
370 const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
371
372 Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
373 }
374
main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64375 fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
376 // Static swiotlb is allocated from the end of RAM as a separate memory region, so, if
377 // enabled, make the RAM memory region smaller to leave room for it.
378 let mut main_memory_size = components.memory_size;
379 if let Some(size) = components.swiotlb {
380 if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
381 main_memory_size -= size;
382 }
383 }
384 main_memory_size
385 }
386
387 pub struct ArchMemoryLayout {
388 pci_cam: AddressRange,
389 pci_mem: AddressRange,
390 }
391
392 impl arch::LinuxArch for AArch64 {
393 type Error = Error;
394 type ArchMemoryLayout = ArchMemoryLayout;
395
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>396 fn arch_memory_layout(
397 components: &VmComponents,
398 ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
399 let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
400 Some(MemoryRegionConfig { start, size }) => {
401 (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
402 }
403 None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
404 };
405 // TODO: Make the PCI slot allocator aware of the CAM size so we can remove this check.
406 if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
407 return Err(Error::ConfigurePciCam(format!(
408 "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
409 )));
410 }
411 let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
412 Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
413 )?;
414 if pci_cam.end >= AARCH64_PHYS_MEM_START {
415 return Err(Error::ConfigurePciCam(format!(
416 "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
417 )));
418 }
419
420 let pci_mem = match components.pci_config.mem {
421 Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
422 start,
423 size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
424 )
425 .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
426 None => AddressRange::from_start_and_size(
427 AARCH64_PCI_MEM_BASE_DEFAULT,
428 AARCH64_PCI_MEM_SIZE_DEFAULT,
429 )
430 .unwrap(),
431 };
432
433 Ok(ArchMemoryLayout { pci_cam, pci_mem })
434 }
435
436 /// Returns a Vec of the valid memory addresses.
437 /// These should be used to configure the GuestMemory structure for the platform.
guest_memory_layout( components: &VmComponents, _arch_memory_layout: &Self::ArchMemoryLayout, hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>438 fn guest_memory_layout(
439 components: &VmComponents,
440 _arch_memory_layout: &Self::ArchMemoryLayout,
441 hypervisor: &impl Hypervisor,
442 ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
443 let main_memory_size = main_memory_size(components, hypervisor);
444
445 let mut memory_regions = vec![(
446 GuestAddress(AARCH64_PHYS_MEM_START),
447 main_memory_size,
448 MemoryRegionOptions::new().align(get_block_size()),
449 )];
450
451 // Allocate memory for the pVM firmware.
452 if components.hv_cfg.protection_type.runs_firmware() {
453 memory_regions.push((
454 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
455 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
456 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
457 ));
458 }
459
460 if let Some(size) = components.swiotlb {
461 if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
462 memory_regions.push((
463 addr,
464 size,
465 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
466 ));
467 }
468 }
469
470 Ok(memory_regions)
471 }
472
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig473 fn get_system_allocator_config<V: Vm>(
474 vm: &V,
475 arch_memory_layout: &Self::ArchMemoryLayout,
476 ) -> SystemAllocatorConfig {
477 let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
478 // The platform MMIO region is immediately past the end of RAM.
479 let plat_mmio_base = vm.get_memory().end_addr().offset();
480 let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
481 // The high MMIO region is the rest of the address space after the platform MMIO region.
482 let high_mmio_base = plat_mmio_base + plat_mmio_size;
483 let high_mmio_size = guest_phys_end
484 .checked_sub(high_mmio_base)
485 .unwrap_or_else(|| {
486 panic!(
487 "guest_phys_end {:#x} < high_mmio_base {:#x}",
488 guest_phys_end, high_mmio_base,
489 );
490 });
491 SystemAllocatorConfig {
492 io: None,
493 low_mmio: arch_memory_layout.pci_mem,
494 high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
495 .expect("invalid high mmio region"),
496 platform_mmio: Some(
497 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
498 .expect("invalid platform mmio region"),
499 ),
500 first_irq: AARCH64_IRQ_BASE,
501 }
502 }
503
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipAArch64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, _debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, fdt_position: Option<FdtPosition>, no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmAArch64, Vcpu: VcpuAArch64,504 fn build_vm<V, Vcpu>(
505 mut components: VmComponents,
506 arch_memory_layout: &Self::ArchMemoryLayout,
507 _vm_evt_wrtube: &SendTube,
508 system_allocator: &mut SystemAllocator,
509 serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
510 serial_jail: Option<Minijail>,
511 (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
512 mut vm: V,
513 ramoops_region: Option<arch::pstore::RamoopsRegion>,
514 devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
515 irq_chip: &mut dyn IrqChipAArch64,
516 vcpu_ids: &mut Vec<usize>,
517 dump_device_tree_blob: Option<PathBuf>,
518 _debugcon_jail: Option<Minijail>,
519 #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
520 _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
521 device_tree_overlays: Vec<DtbOverlay>,
522 fdt_position: Option<FdtPosition>,
523 no_pmu: bool,
524 ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
525 where
526 V: VmAArch64,
527 Vcpu: VcpuAArch64,
528 {
529 let has_bios = matches!(components.vm_image, VmImage::Bios(_));
530 let mem = vm.get_memory().clone();
531
532 let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
533
534 let fdt_position = fdt_position.unwrap_or(if has_bios {
535 FdtPosition::Start
536 } else {
537 FdtPosition::End
538 });
539 let payload_address = match fdt_position {
540 // If FDT is at the start RAM, the payload needs to go somewhere after it.
541 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
542 // Otherwise, put the payload at the start of RAM.
543 FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
544 };
545
546 // separate out image loading from other setup to get a specific error for
547 // image loading
548 let mut initrd = None;
549 let (payload, payload_end_address) = match components.vm_image {
550 VmImage::Bios(ref mut bios) => {
551 let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
552 .map_err(Error::BiosLoadFailure)?;
553 (
554 PayloadType::Bios {
555 entry: payload_address,
556 image_size: image_size as u64,
557 },
558 payload_address
559 .checked_add(image_size.try_into().unwrap())
560 .unwrap(),
561 )
562 }
563 VmImage::Kernel(ref mut kernel_image) => {
564 let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
565 let kernel_end = loaded_kernel.address_range.end;
566 let mut payload_end = GuestAddress(kernel_end);
567 initrd = match components.initrd_image {
568 Some(initrd_file) => {
569 let mut initrd_file = initrd_file;
570 let initrd_addr =
571 (kernel_end + (AARCH64_INITRD_ALIGN - 1)) & !(AARCH64_INITRD_ALIGN - 1);
572 let initrd_max_size =
573 main_memory_size - (initrd_addr - AARCH64_PHYS_MEM_START);
574 let initrd_addr = GuestAddress(initrd_addr);
575 let initrd_size =
576 arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
577 .map_err(Error::InitrdLoadFailure)?;
578 payload_end = initrd_addr
579 .checked_add(initrd_size.try_into().unwrap())
580 .unwrap();
581 Some((initrd_addr, initrd_size))
582 }
583 None => None,
584 };
585 (PayloadType::Kernel(loaded_kernel), payload_end)
586 }
587 };
588
589 let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
590
591 let fdt_address = match fdt_position {
592 FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
593 FdtPosition::End => {
594 let addr = memory_end
595 .checked_sub(AARCH64_FDT_MAX_SIZE)
596 .expect("Not enough memory for FDT")
597 .align_down(AARCH64_FDT_ALIGN);
598 assert!(addr >= payload_end_address, "Not enough memory for FDT");
599 addr
600 }
601 FdtPosition::AfterPayload => payload_end_address
602 .align(AARCH64_FDT_ALIGN)
603 .expect("Not enough memory for FDT"),
604 };
605
606 let mut use_pmu = vm
607 .get_hypervisor()
608 .check_capability(HypervisorCap::ArmPmuV3);
609 use_pmu &= !no_pmu;
610 let vcpu_count = components.vcpu_count;
611 let mut has_pvtime = true;
612 let mut vcpus = Vec::with_capacity(vcpu_count);
613 let mut vcpu_init = Vec::with_capacity(vcpu_count);
614 for vcpu_id in 0..vcpu_count {
615 let vcpu: Vcpu = *vm
616 .create_vcpu(vcpu_id)
617 .map_err(Error::CreateVcpu)?
618 .downcast::<Vcpu>()
619 .map_err(|_| Error::DowncastVcpu)?;
620 let per_vcpu_init = if vm
621 .get_hypervisor()
622 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
623 {
624 // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
625 Default::default()
626 } else {
627 Self::vcpu_init(
628 vcpu_id,
629 &payload,
630 fdt_address,
631 components.hv_cfg.protection_type,
632 components.boot_cpu,
633 )
634 };
635 has_pvtime &= vcpu.has_pvtime_support();
636 vcpus.push(vcpu);
637 vcpu_ids.push(vcpu_id);
638 vcpu_init.push(per_vcpu_init);
639 }
640
641 // Initialize Vcpus after all Vcpu objects have been created.
642 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
643 let features =
644 &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, components.sve_config);
645 vcpu.init(features).map_err(Error::VcpuInit)?;
646 }
647
648 irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
649
650 if has_pvtime {
651 let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
652 .build()
653 .map_err(Error::BuildPvtimeError)?;
654 vm.add_memory_region(
655 GuestAddress(AARCH64_PVTIME_IPA_START),
656 Box::new(pvtime_mem),
657 false,
658 false,
659 MemCacheType::CacheCoherent,
660 )
661 .map_err(Error::MapPvtimeError)?;
662 }
663
664 if components.hv_cfg.protection_type.needs_firmware_loaded() {
665 arch::load_image(
666 &mem,
667 &mut components
668 .pvm_fw
669 .expect("pvmfw must be available if ProtectionType loads it"),
670 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
671 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
672 )
673 .map_err(Error::CustomPvmFwLoadFailure)?;
674 } else if components.hv_cfg.protection_type.runs_firmware() {
675 // Tell the hypervisor to load the pVM firmware.
676 vm.load_protected_vm_firmware(
677 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
678 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
679 )
680 .map_err(Error::PvmFwLoadFailure)?;
681 }
682
683 for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
684 use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
685 if has_pvtime {
686 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
687 .map_err(Error::InitPvtimeError)?;
688 }
689 }
690
691 let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
692
693 // ARM doesn't really use the io bus like x86, so just create an empty bus.
694 let io_bus = Arc::new(devices::Bus::new(BusType::Io));
695
696 // Event used by PMDevice to notify crosvm that
697 // guest OS is trying to suspend.
698 let (suspend_tube_send, suspend_tube_recv) =
699 Tube::directional_pair().map_err(Error::CreateTube)?;
700 let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
701
702 let (pci_devices, others): (Vec<_>, Vec<_>) = devs
703 .into_iter()
704 .partition(|(dev, _)| dev.as_pci_device().is_some());
705
706 let pci_devices = pci_devices
707 .into_iter()
708 .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
709 .collect();
710 let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
711 arch::generate_pci_root(
712 pci_devices,
713 irq_chip.as_irq_chip_mut(),
714 mmio_bus.clone(),
715 GuestAddress(arch_memory_layout.pci_cam.start),
716 8,
717 io_bus.clone(),
718 system_allocator,
719 &mut vm,
720 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
721 None,
722 #[cfg(feature = "swap")]
723 swap_controller,
724 )
725 .map_err(Error::CreatePciRoot)?;
726
727 let pci_root = Arc::new(Mutex::new(pci));
728 let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
729 let (platform_devices, _others): (Vec<_>, Vec<_>) = others
730 .into_iter()
731 .partition(|(dev, _)| dev.as_platform_device().is_some());
732
733 let platform_devices = platform_devices
734 .into_iter()
735 .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
736 .collect();
737 let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
738 arch::sys::linux::generate_platform_bus(
739 platform_devices,
740 irq_chip.as_irq_chip_mut(),
741 &mmio_bus,
742 system_allocator,
743 &mut vm,
744 #[cfg(feature = "swap")]
745 swap_controller,
746 components.hv_cfg.protection_type,
747 )
748 .map_err(Error::CreatePlatformBus)?;
749 pid_debug_label_map.append(&mut platform_pid_debug_label_map);
750
751 let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
752 Self::add_arch_devs(
753 irq_chip.as_irq_chip_mut(),
754 &mmio_bus,
755 vcpu_count,
756 _vm_evt_wrtube,
757 vmwdt_control_tube,
758 )?;
759
760 let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
761 let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
762 let serial_devices = arch::add_serial_devices(
763 components.hv_cfg.protection_type,
764 &mmio_bus,
765 (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
766 (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
767 serial_parameters,
768 serial_jail,
769 #[cfg(feature = "swap")]
770 swap_controller,
771 )
772 .map_err(Error::CreateSerialDevices)?;
773
774 let source = IrqEventSource {
775 device_id: Serial::device_id(),
776 queue_id: 0,
777 device_name: Serial::debug_label(),
778 };
779 irq_chip
780 .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
781 .map_err(Error::RegisterIrqfd)?;
782 irq_chip
783 .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
784 .map_err(Error::RegisterIrqfd)?;
785
786 mmio_bus
787 .insert(
788 pci_bus,
789 arch_memory_layout.pci_cam.start,
790 arch_memory_layout.pci_cam.len().unwrap(),
791 )
792 .map_err(Error::RegisterPci)?;
793
794 let (vcpufreq_host_tube, vcpufreq_control_tube) =
795 Tube::pair().map_err(Error::CreateTube)?;
796 let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
797 #[cfg(any(target_os = "android", target_os = "linux"))]
798 if !components.cpu_frequencies.is_empty() {
799 let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
800 let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
801 let mut vcpu_affinities: Vec<u32> = Vec::new();
802 for vcpu in 0..vcpu_count {
803 let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
804 freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
805 let vcpu_affinity = match components.vcpu_affinity.clone() {
806 Some(VcpuAffinity::Global(v)) => v,
807 Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
808 None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
809 };
810 vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
811 }
812 for domain in freq_domain_vcpus.keys() {
813 let domain_perf = Arc::new(AtomicU32::new(0));
814 freq_domain_perfs.insert(*domain, domain_perf);
815 }
816 let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
817 for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
818 let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
819 if components.virt_cpufreq_v2 {
820 let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
821 virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
822 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
823 *vcpu_affinity,
824 components.cpu_frequencies.get(&vcpu).unwrap().clone(),
825 components.vcpu_domain_paths.get(&vcpu).cloned(),
826 domain,
827 *components.normalized_cpu_capacities.get(&vcpu).unwrap(),
828 largest_vcpu_affinity_idx,
829 vcpufreq_shared_tube.clone(),
830 freq_domain_vcpus.get(&domain).unwrap().clone(),
831 freq_domain_perfs.get(&domain).unwrap().clone(),
832 )));
833 mmio_bus
834 .insert(
835 virt_cpufreq,
836 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
837 virtfreq_size,
838 )
839 .map_err(Error::RegisterVirtCpufreq)?;
840 } else {
841 let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
842 *vcpu_affinity,
843 *components.normalized_cpu_capacities.get(&vcpu).unwrap(),
844 *components
845 .cpu_frequencies
846 .get(&vcpu)
847 .unwrap()
848 .iter()
849 .max()
850 .unwrap(),
851 )));
852 mmio_bus
853 .insert(
854 virt_cpufreq,
855 AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
856 virtfreq_size,
857 )
858 .map_err(Error::RegisterVirtCpufreq)?;
859 }
860
861 if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
862 panic!("Exceeded maximum number of virt cpufreq devices");
863 }
864 }
865 }
866
867 let mut cmdline = Self::get_base_linux_cmdline();
868 get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
869 .map_err(Error::GetSerialCmdline)?;
870 for param in components.extra_kernel_params {
871 cmdline.insert_str(¶m).map_err(Error::Cmdline)?;
872 }
873
874 if let Some(ramoops_region) = ramoops_region {
875 arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
876 .map_err(Error::Cmdline)?;
877 }
878
879 let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
880
881 let pci_cfg = fdt::PciConfigRegion {
882 base: arch_memory_layout.pci_cam.start,
883 size: arch_memory_layout.pci_cam.len().unwrap(),
884 };
885
886 let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
887
888 let mut add_pci_ranges = |alloc: &AddressAllocator, prefetchable: bool| {
889 pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
890 space: fdt::PciAddressSpace::Memory64,
891 bus_address: range.start,
892 cpu_physical_address: range.start,
893 size: range.len().unwrap(),
894 prefetchable,
895 }));
896 };
897
898 add_pci_ranges(system_allocator.mmio_allocator(MmioType::Low), false);
899 add_pci_ranges(system_allocator.mmio_allocator(MmioType::High), true);
900
901 let (bat_control, bat_mmio_base_and_irq) = match bat_type {
902 Some(BatteryType::Goldfish) => {
903 let bat_irq = AARCH64_BAT_IRQ;
904
905 // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
906 let mut amls = Vec::new();
907 let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
908 &mut amls,
909 bat_jail,
910 &mmio_bus,
911 irq_chip.as_irq_chip_mut(),
912 bat_irq,
913 system_allocator,
914 #[cfg(feature = "swap")]
915 swap_controller,
916 )
917 .map_err(Error::CreateBatDevices)?;
918 (
919 Some(BatControl {
920 type_: BatteryType::Goldfish,
921 control_tube,
922 }),
923 Some((mmio_base, bat_irq)),
924 )
925 }
926 None => (None, None),
927 };
928
929 let vmwdt_cfg = fdt::VmWdtConfig {
930 base: AARCH64_VMWDT_ADDR,
931 size: AARCH64_VMWDT_SIZE,
932 clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
933 timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
934 };
935
936 fdt::create_fdt(
937 AARCH64_FDT_MAX_SIZE as usize,
938 &mem,
939 pci_irqs,
940 pci_cfg,
941 &pci_ranges,
942 dev_resources,
943 vcpu_count as u32,
944 &|n| get_vcpu_mpidr_aff(&vcpus, n),
945 components.cpu_clusters,
946 components.cpu_capacity,
947 components.cpu_frequencies,
948 fdt_address,
949 cmdline
950 .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
951 .map_err(Error::Cmdline)?,
952 (payload.entry(), payload.size() as usize),
953 initrd,
954 components.android_fstab,
955 irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
956 use_pmu,
957 psci_version,
958 components.swiotlb.map(|size| {
959 (
960 get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
961 size,
962 )
963 }),
964 bat_mmio_base_and_irq,
965 vmwdt_cfg,
966 dump_device_tree_blob,
967 &|writer, phandles| vm.create_fdt(writer, phandles),
968 components.dynamic_power_coefficient,
969 device_tree_overlays,
970 &serial_devices,
971 components.virt_cpufreq_v2,
972 )
973 .map_err(Error::CreateFdt)?;
974
975 vm.init_arch(
976 payload.entry(),
977 fdt_address,
978 AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
979 )
980 .map_err(Error::InitVmError)?;
981
982 let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
983
984 Ok(RunnableLinuxVm {
985 vm,
986 vcpu_count,
987 vcpus: Some(vcpus),
988 vcpu_init,
989 vcpu_affinity: components.vcpu_affinity,
990 no_smt: components.no_smt,
991 irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
992 io_bus,
993 mmio_bus,
994 pid_debug_label_map,
995 suspend_tube: (suspend_tube_send, suspend_tube_recv),
996 rt_cpus: components.rt_cpus,
997 delay_rt: components.delay_rt,
998 bat_control,
999 pm: None,
1000 resume_notify_devices: Vec::new(),
1001 root_config: pci_root,
1002 platform_devices,
1003 hotplug_bus: BTreeMap::new(),
1004 devices_thread: None,
1005 vm_request_tubes,
1006 })
1007 }
1008
configure_vcpu<V: Vm>( _vm: &V, _hypervisor: &dyn Hypervisor, _irq_chip: &mut dyn IrqChipAArch64, vcpu: &mut dyn VcpuAArch64, vcpu_init: VcpuInitAArch64, _vcpu_id: usize, _num_cpus: usize, _cpu_config: Option<CpuConfigAArch64>, ) -> std::result::Result<(), Self::Error>1009 fn configure_vcpu<V: Vm>(
1010 _vm: &V,
1011 _hypervisor: &dyn Hypervisor,
1012 _irq_chip: &mut dyn IrqChipAArch64,
1013 vcpu: &mut dyn VcpuAArch64,
1014 vcpu_init: VcpuInitAArch64,
1015 _vcpu_id: usize,
1016 _num_cpus: usize,
1017 _cpu_config: Option<CpuConfigAArch64>,
1018 ) -> std::result::Result<(), Self::Error> {
1019 for (reg, value) in vcpu_init.regs.iter() {
1020 vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1021 }
1022 Ok(())
1023 }
1024
register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>( _linux: &mut RunnableLinuxVm<V, Vcpu>, _device: Box<dyn PciDevice>, _minijail: Option<Minijail>, _resources: &mut SystemAllocator, _tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>, ) -> std::result::Result<PciAddress, Self::Error>1025 fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1026 _linux: &mut RunnableLinuxVm<V, Vcpu>,
1027 _device: Box<dyn PciDevice>,
1028 _minijail: Option<Minijail>,
1029 _resources: &mut SystemAllocator,
1030 _tube: &mpsc::Sender<PciRootCommand>,
1031 #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1032 ) -> std::result::Result<PciAddress, Self::Error> {
1033 // hotplug function isn't verified on AArch64, so set it unsupported here.
1034 Err(Error::Unsupported)
1035 }
1036
get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1037 fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1038 Ok(Self::collect_for_each_cpu(base::logical_core_max_freq_khz)
1039 .map_err(Error::CpuFrequencies)?
1040 .into_iter()
1041 .enumerate()
1042 .collect())
1043 }
1044
get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>1045 fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1046 {
1047 Ok(
1048 Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
1049 .map_err(Error::CpuFrequencies)?
1050 .into_iter()
1051 .enumerate()
1052 .collect(),
1053 )
1054 }
1055
1056 // Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
1057 // in the host system.
get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1058 fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1059 Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
1060 .map_err(Error::CpuTopology)?
1061 .into_iter()
1062 .enumerate()
1063 .collect())
1064 }
1065
1066 // Creates CPU cluster mask for each CPU in the host system.
get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error>1067 fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1068 let cluster_ids = Self::collect_for_each_cpu(base::logical_core_cluster_id)
1069 .map_err(Error::CpuTopology)?;
1070 let mut unique_clusters: Vec<CpuSet> = cluster_ids
1071 .iter()
1072 .map(|&vcpu_cluster_id| {
1073 cluster_ids
1074 .iter()
1075 .enumerate()
1076 .filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
1077 .map(|(cpu_id, _)| cpu_id)
1078 .collect()
1079 })
1080 .collect();
1081 unique_clusters.sort_unstable();
1082 unique_clusters.dedup();
1083 Ok(unique_clusters)
1084 }
1085 }
1086
1087 #[cfg(feature = "gdb")]
1088 impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1089 type Error = Error;
1090
read_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>1091 fn read_memory(
1092 _vcpu: &T,
1093 guest_mem: &GuestMemory,
1094 vaddr: GuestAddress,
1095 len: usize,
1096 ) -> Result<Vec<u8>> {
1097 let mut buf = vec![0; len];
1098
1099 guest_mem
1100 .read_exact_at_addr(&mut buf, vaddr)
1101 .map_err(Error::ReadGuestMemory)?;
1102
1103 Ok(buf)
1104 }
1105
write_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>1106 fn write_memory(
1107 _vcpu: &T,
1108 guest_mem: &GuestMemory,
1109 vaddr: GuestAddress,
1110 buf: &[u8],
1111 ) -> Result<()> {
1112 guest_mem
1113 .write_all_at_addr(buf, vaddr)
1114 .map_err(Error::WriteGuestMemory)
1115 }
1116
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers>1117 fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1118 let mut regs: <GdbArch as Arch>::Registers = Default::default();
1119 assert!(
1120 regs.x.len() == 31,
1121 "unexpected number of Xn general purpose registers"
1122 );
1123 for (i, reg) in regs.x.iter_mut().enumerate() {
1124 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1125 *reg = vcpu
1126 .get_one_reg(VcpuRegAArch64::X(n))
1127 .map_err(Error::ReadReg)?;
1128 }
1129 regs.sp = vcpu
1130 .get_one_reg(VcpuRegAArch64::Sp)
1131 .map_err(Error::ReadReg)?;
1132 regs.pc = vcpu
1133 .get_one_reg(VcpuRegAArch64::Pc)
1134 .map_err(Error::ReadReg)?;
1135 // hypervisor API gives a 64-bit value for Pstate, but GDB wants a 32-bit "CPSR".
1136 regs.cpsr = vcpu
1137 .get_one_reg(VcpuRegAArch64::Pstate)
1138 .map_err(Error::ReadReg)? as u32;
1139 for (i, reg) in regs.v.iter_mut().enumerate() {
1140 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1141 *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1142 }
1143 regs.fpcr = vcpu
1144 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPCR))
1145 .map_err(Error::ReadReg)? as u32;
1146 regs.fpsr = vcpu
1147 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPSR))
1148 .map_err(Error::ReadReg)? as u32;
1149
1150 Ok(regs)
1151 }
1152
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()>1153 fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1154 assert!(
1155 regs.x.len() == 31,
1156 "unexpected number of Xn general purpose registers"
1157 );
1158 for (i, reg) in regs.x.iter().enumerate() {
1159 let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1160 vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1161 .map_err(Error::WriteReg)?;
1162 }
1163 vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1164 .map_err(Error::WriteReg)?;
1165 vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1166 .map_err(Error::WriteReg)?;
1167 // GDB gives a 32-bit value for "CPSR", but hypervisor API wants a 64-bit Pstate.
1168 let pstate = vcpu
1169 .get_one_reg(VcpuRegAArch64::Pstate)
1170 .map_err(Error::ReadReg)?;
1171 let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1172 vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1173 .map_err(Error::WriteReg)?;
1174 for (i, reg) in regs.v.iter().enumerate() {
1175 let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1176 vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1177 }
1178 vcpu.set_one_reg(
1179 VcpuRegAArch64::System(AArch64SysRegId::FPCR),
1180 u64::from(regs.fpcr),
1181 )
1182 .map_err(Error::WriteReg)?;
1183 vcpu.set_one_reg(
1184 VcpuRegAArch64::System(AArch64SysRegId::FPSR),
1185 u64::from(regs.fpsr),
1186 )
1187 .map_err(Error::WriteReg)?;
1188
1189 Ok(())
1190 }
1191
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>>1192 fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1193 let result = match reg_id {
1194 AArch64RegId::X(n) => vcpu
1195 .get_one_reg(VcpuRegAArch64::X(n))
1196 .map(|v| v.to_ne_bytes().to_vec()),
1197 AArch64RegId::Sp => vcpu
1198 .get_one_reg(VcpuRegAArch64::Sp)
1199 .map(|v| v.to_ne_bytes().to_vec()),
1200 AArch64RegId::Pc => vcpu
1201 .get_one_reg(VcpuRegAArch64::Pc)
1202 .map(|v| v.to_ne_bytes().to_vec()),
1203 AArch64RegId::Pstate => vcpu
1204 .get_one_reg(VcpuRegAArch64::Pstate)
1205 .map(|v| (v as u32).to_ne_bytes().to_vec()),
1206 AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1207 AArch64RegId::System(op) => vcpu
1208 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1209 .map(|v| v.to_ne_bytes().to_vec()),
1210 _ => {
1211 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1212 Err(base::Error::new(libc::EINVAL))
1213 }
1214 };
1215
1216 match result {
1217 Ok(bytes) => Ok(bytes),
1218 // ENOENT is returned when KVM is aware of the register but it is unavailable
1219 Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1220 Err(e) => Err(Error::ReadReg(e)),
1221 }
1222 }
1223
write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()>1224 fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1225 fn try_into_u32(data: &[u8]) -> Result<u32> {
1226 let s = data
1227 .get(..4)
1228 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1229 let a = s
1230 .try_into()
1231 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1232 Ok(u32::from_ne_bytes(a))
1233 }
1234
1235 fn try_into_u64(data: &[u8]) -> Result<u64> {
1236 let s = data
1237 .get(..8)
1238 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1239 let a = s
1240 .try_into()
1241 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1242 Ok(u64::from_ne_bytes(a))
1243 }
1244
1245 fn try_into_u128(data: &[u8]) -> Result<u128> {
1246 let s = data
1247 .get(..16)
1248 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1249 let a = s
1250 .try_into()
1251 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1252 Ok(u128::from_ne_bytes(a))
1253 }
1254
1255 match reg_id {
1256 AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1257 AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1258 AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1259 AArch64RegId::Pstate => {
1260 vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1261 }
1262 AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1263 AArch64RegId::System(op) => vcpu.set_one_reg(
1264 VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1265 try_into_u64(data)?,
1266 ),
1267 _ => {
1268 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1269 Err(base::Error::new(libc::EINVAL))
1270 }
1271 }
1272 .map_err(Error::WriteReg)
1273 }
1274
enable_singlestep(vcpu: &T) -> Result<()>1275 fn enable_singlestep(vcpu: &T) -> Result<()> {
1276 const SINGLE_STEP: bool = true;
1277 vcpu.set_guest_debug(&[], SINGLE_STEP)
1278 .map_err(Error::EnableSinglestep)
1279 }
1280
get_max_hw_breakpoints(vcpu: &T) -> Result<usize>1281 fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1282 vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1283 }
1284
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>1285 fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1286 const SINGLE_STEP: bool = false;
1287 vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1288 .map_err(Error::SetHwBreakpoint)
1289 }
1290 }
1291
1292 impl AArch64 {
1293 /// This returns a base part of the kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1294 fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1295 let mut cmdline = kernel_cmdline::Cmdline::new();
1296 cmdline.insert_str("panic=-1").unwrap();
1297 cmdline
1298 }
1299
1300 /// This adds any early platform devices for this architecture.
1301 ///
1302 /// # Arguments
1303 ///
1304 /// * `irq_chip` - The IRQ chip to add irqs to.
1305 /// * `bus` - The bus to add devices to.
1306 /// * `vcpu_count` - The number of virtual CPUs for this guest VM
1307 /// * `vm_evt_wrtube` - The notification channel
add_arch_devs( irq_chip: &mut dyn IrqChip, bus: &Bus, vcpu_count: usize, vm_evt_wrtube: &SendTube, vmwdt_request_tube: Tube, ) -> Result<()>1308 fn add_arch_devs(
1309 irq_chip: &mut dyn IrqChip,
1310 bus: &Bus,
1311 vcpu_count: usize,
1312 vm_evt_wrtube: &SendTube,
1313 vmwdt_request_tube: Tube,
1314 ) -> Result<()> {
1315 let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1316 let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1317 irq_chip
1318 .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1319 .map_err(Error::RegisterIrqfd)?;
1320
1321 bus.insert(
1322 Arc::new(Mutex::new(rtc)),
1323 AARCH64_RTC_ADDR,
1324 AARCH64_RTC_SIZE,
1325 )
1326 .expect("failed to add rtc device");
1327
1328 let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1329 let vm_wdt = devices::vmwdt::Vmwdt::new(
1330 vcpu_count,
1331 vm_evt_wrtube.try_clone().unwrap(),
1332 vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1333 vmwdt_request_tube,
1334 )
1335 .map_err(Error::CreateVmwdtDevice)?;
1336 irq_chip
1337 .register_edge_irq_event(
1338 AARCH64_VMWDT_IRQ,
1339 &vmwdt_evt,
1340 IrqEventSource::from_device(&vm_wdt),
1341 )
1342 .map_err(Error::RegisterIrqfd)?;
1343
1344 bus.insert(
1345 Arc::new(Mutex::new(vm_wdt)),
1346 AARCH64_VMWDT_ADDR,
1347 AARCH64_VMWDT_SIZE,
1348 )
1349 .expect("failed to add vmwdt device");
1350
1351 Ok(())
1352 }
1353
1354 /// Get ARM-specific features for vcpu with index `vcpu_id`.
1355 ///
1356 /// # Arguments
1357 ///
1358 /// * `vcpu_id` - The VM's index for `vcpu`.
1359 /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
vcpu_features( vcpu_id: usize, use_pmu: bool, boot_cpu: usize, sve: SveConfig, ) -> Vec<VcpuFeature>1360 fn vcpu_features(
1361 vcpu_id: usize,
1362 use_pmu: bool,
1363 boot_cpu: usize,
1364 sve: SveConfig,
1365 ) -> Vec<VcpuFeature> {
1366 let mut features = vec![VcpuFeature::PsciV0_2];
1367 if use_pmu {
1368 features.push(VcpuFeature::PmuV3);
1369 }
1370 // Non-boot cpus are powered off initially
1371 if vcpu_id != boot_cpu {
1372 features.push(VcpuFeature::PowerOff);
1373 }
1374 if sve.enable {
1375 features.push(VcpuFeature::Sve);
1376 }
1377
1378 features
1379 }
1380
1381 /// Get initial register state for vcpu with index `vcpu_id`.
1382 ///
1383 /// # Arguments
1384 ///
1385 /// * `vcpu_id` - The VM's index for `vcpu`.
vcpu_init( vcpu_id: usize, payload: &PayloadType, fdt_address: GuestAddress, protection_type: ProtectionType, boot_cpu: usize, ) -> VcpuInitAArch641386 fn vcpu_init(
1387 vcpu_id: usize,
1388 payload: &PayloadType,
1389 fdt_address: GuestAddress,
1390 protection_type: ProtectionType,
1391 boot_cpu: usize,
1392 ) -> VcpuInitAArch64 {
1393 let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1394
1395 // All interrupts masked
1396 let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1397 regs.insert(VcpuRegAArch64::Pstate, pstate);
1398
1399 // Other cpus are powered off initially
1400 if vcpu_id == boot_cpu {
1401 let entry_addr = if protection_type.needs_firmware_loaded() {
1402 Some(AARCH64_PROTECTED_VM_FW_START)
1403 } else if protection_type.runs_firmware() {
1404 None // Initial PC value is set by the hypervisor
1405 } else {
1406 Some(payload.entry().offset())
1407 };
1408
1409 /* PC -- entry point */
1410 if let Some(entry) = entry_addr {
1411 regs.insert(VcpuRegAArch64::Pc, entry);
1412 }
1413
1414 /* X0 -- fdt address */
1415 regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1416
1417 if protection_type.runs_firmware() {
1418 /* X1 -- payload entry point */
1419 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1420
1421 /* X2 -- image size */
1422 regs.insert(VcpuRegAArch64::X(2), payload.size());
1423 }
1424 }
1425
1426 VcpuInitAArch64 { regs }
1427 }
1428
collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error> where F: Fn(usize) -> std::result::Result<T, base::Error>,1429 fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
1430 where
1431 F: Fn(usize) -> std::result::Result<T, base::Error>,
1432 {
1433 (0..base::number_of_logical_cores()?).map(func).collect()
1434 }
1435 }
1436
1437 #[cfg(test)]
1438 mod tests {
1439 use super::*;
1440
1441 #[test]
vcpu_init_unprotected_kernel()1442 fn vcpu_init_unprotected_kernel() {
1443 let payload = PayloadType::Kernel(LoadedKernel {
1444 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1445 size: 0x1000,
1446 entry: GuestAddress(0x8080_0000),
1447 });
1448 let fdt_address = GuestAddress(0x1234);
1449 let prot = ProtectionType::Unprotected;
1450
1451 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1452
1453 // PC: kernel image entry point
1454 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1455
1456 // X0: fdt_offset
1457 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1458 }
1459
1460 #[test]
vcpu_init_unprotected_bios()1461 fn vcpu_init_unprotected_bios() {
1462 let payload = PayloadType::Bios {
1463 entry: GuestAddress(0x8020_0000),
1464 image_size: 0x1000,
1465 };
1466 let fdt_address = GuestAddress(0x1234);
1467 let prot = ProtectionType::Unprotected;
1468
1469 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1470
1471 // PC: bios image entry point
1472 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1473
1474 // X0: fdt_offset
1475 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1476 }
1477
1478 #[test]
vcpu_init_protected_kernel()1479 fn vcpu_init_protected_kernel() {
1480 let payload = PayloadType::Kernel(LoadedKernel {
1481 address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1482 size: 0x1000,
1483 entry: GuestAddress(0x8080_0000),
1484 });
1485 let fdt_address = GuestAddress(0x1234);
1486 let prot = ProtectionType::Protected;
1487
1488 let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1489
1490 // The hypervisor provides the initial value of PC, so PC should not be present in the
1491 // vcpu_init register map.
1492 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1493
1494 // X0: fdt_offset
1495 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1496
1497 // X1: kernel image entry point
1498 assert_eq!(
1499 vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1500 Some(&0x8080_0000)
1501 );
1502
1503 // X2: image size
1504 assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1505 }
1506 }
1507