xref: /aosp_15_r20/external/crosvm/aarch64/src/lib.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! ARM 64-bit architecture support.
6 
7 #![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
8 
9 use std::collections::BTreeMap;
10 use std::fs::File;
11 use std::io;
12 use std::path::PathBuf;
13 use std::sync::atomic::AtomicU32;
14 use std::sync::mpsc;
15 use std::sync::Arc;
16 
17 use arch::get_serial_cmdline;
18 use arch::CpuSet;
19 use arch::DtbOverlay;
20 use arch::FdtPosition;
21 use arch::GetSerialCmdlineError;
22 use arch::MemoryRegionConfig;
23 use arch::RunnableLinuxVm;
24 use arch::SveConfig;
25 use arch::VcpuAffinity;
26 use arch::VmComponents;
27 use arch::VmImage;
28 use base::MemoryMappingBuilder;
29 use base::SendTube;
30 use base::Tube;
31 use devices::serial_device::SerialHardware;
32 use devices::serial_device::SerialParameters;
33 use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
34 use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
35 use devices::Bus;
36 use devices::BusDeviceObj;
37 use devices::BusError;
38 use devices::BusType;
39 use devices::IrqChip;
40 use devices::IrqChipAArch64;
41 use devices::IrqEventSource;
42 use devices::PciAddress;
43 use devices::PciConfigMmio;
44 use devices::PciDevice;
45 use devices::PciRootCommand;
46 use devices::Serial;
47 #[cfg(any(target_os = "android", target_os = "linux"))]
48 use devices::VirtCpufreq;
49 #[cfg(any(target_os = "android", target_os = "linux"))]
50 use devices::VirtCpufreqV2;
51 #[cfg(feature = "gdb")]
52 use gdbstub::arch::Arch;
53 #[cfg(feature = "gdb")]
54 use gdbstub_arch::aarch64::reg::id::AArch64RegId;
55 #[cfg(feature = "gdb")]
56 use gdbstub_arch::aarch64::AArch64 as GdbArch;
57 #[cfg(feature = "gdb")]
58 use hypervisor::AArch64SysRegId;
59 use hypervisor::CpuConfigAArch64;
60 use hypervisor::DeviceKind;
61 use hypervisor::Hypervisor;
62 use hypervisor::HypervisorCap;
63 use hypervisor::MemCacheType;
64 use hypervisor::ProtectionType;
65 use hypervisor::VcpuAArch64;
66 use hypervisor::VcpuFeature;
67 use hypervisor::VcpuInitAArch64;
68 use hypervisor::VcpuRegAArch64;
69 use hypervisor::Vm;
70 use hypervisor::VmAArch64;
71 #[cfg(windows)]
72 use jail::FakeMinijailStub as Minijail;
73 use kernel_loader::LoadedKernel;
74 #[cfg(any(target_os = "android", target_os = "linux"))]
75 use minijail::Minijail;
76 use remain::sorted;
77 use resources::address_allocator::AddressAllocator;
78 use resources::AddressRange;
79 use resources::MmioType;
80 use resources::SystemAllocator;
81 use resources::SystemAllocatorConfig;
82 use sync::Condvar;
83 use sync::Mutex;
84 use thiserror::Error;
85 use vm_control::BatControl;
86 use vm_control::BatteryType;
87 use vm_memory::GuestAddress;
88 use vm_memory::GuestMemory;
89 use vm_memory::GuestMemoryError;
90 use vm_memory::MemoryRegionOptions;
91 use vm_memory::MemoryRegionPurpose;
92 
93 mod fdt;
94 
95 const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
96 const AARCH64_FDT_ALIGN: u64 = 0x200000;
97 const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
98 
99 // Maximum Linux arm64 kernel command line size (arch/arm64/include/uapi/asm/setup.h).
100 const AARCH64_CMDLINE_MAX_SIZE: usize = 2048;
101 
102 // These constants indicate the address space used by the ARM vGIC.
103 const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
104 const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
105 
106 // This indicates the start of DRAM inside the physical address space.
107 const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
108 const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
109 
110 const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
111 const AARCH64_PROTECTED_VM_FW_START: u64 =
112     AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
113 
114 const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
115 const AARCH64_PVTIME_IPA_START: u64 = 0x1ff0000;
116 const AARCH64_PVTIME_SIZE: u64 = 64;
117 
118 // These constants indicate the placement of the GIC registers in the physical
119 // address space.
120 const AARCH64_GIC_DIST_BASE: u64 = 0x40000000 - AARCH64_GIC_DIST_SIZE;
121 const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
122 const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
123 
124 // PSR (Processor State Register) bits
125 const PSR_MODE_EL1H: u64 = 0x00000005;
126 const PSR_F_BIT: u64 = 0x00000040;
127 const PSR_I_BIT: u64 = 0x00000080;
128 const PSR_A_BIT: u64 = 0x00000100;
129 const PSR_D_BIT: u64 = 0x00000200;
130 
131 // This was the speed kvmtool used, not sure if it matters.
132 const AARCH64_SERIAL_SPEED: u32 = 1843200;
133 // The serial device gets the first interrupt line
134 // Which gets mapped to the first SPI interrupt (physical 32).
135 const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
136 const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
137 
138 // Place the RTC device at page 2
139 const AARCH64_RTC_ADDR: u64 = 0x2000;
140 // The RTC device gets one 4k page
141 const AARCH64_RTC_SIZE: u64 = 0x1000;
142 // The RTC device gets the second interrupt line
143 const AARCH64_RTC_IRQ: u32 = 1;
144 
145 // The Goldfish battery device gets the 3rd interrupt line
146 const AARCH64_BAT_IRQ: u32 = 3;
147 
148 // Place the virtual watchdog device at page 3
149 const AARCH64_VMWDT_ADDR: u64 = 0x3000;
150 // The virtual watchdog device gets one 4k page
151 const AARCH64_VMWDT_SIZE: u64 = 0x1000;
152 
153 // Default PCI MMIO configuration region base address.
154 const AARCH64_PCI_CAM_BASE_DEFAULT: u64 = 0x10000;
155 // Default PCI MMIO configuration region size.
156 const AARCH64_PCI_CAM_SIZE_DEFAULT: u64 = 0x1000000;
157 // Default PCI mem base address.
158 const AARCH64_PCI_MEM_BASE_DEFAULT: u64 = 0x2000000;
159 // Default PCI mem size.
160 const AARCH64_PCI_MEM_SIZE_DEFAULT: u64 = 0x2000000;
161 // Virtio devices start at SPI interrupt number 4
162 const AARCH64_IRQ_BASE: u32 = 4;
163 
164 // Virtual CPU Frequency Device.
165 const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
166 const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
167 const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
168 const AARCH64_VIRTFREQ_V2_SIZE: u64 = 0x1000;
169 
170 // PMU PPI interrupt, same as qemu
171 const AARCH64_PMU_IRQ: u32 = 7;
172 
173 // VCPU stall detector interrupt
174 const AARCH64_VMWDT_IRQ: u32 = 15;
175 
176 enum PayloadType {
177     Bios {
178         entry: GuestAddress,
179         image_size: u64,
180     },
181     Kernel(LoadedKernel),
182 }
183 
184 impl PayloadType {
entry(&self) -> GuestAddress185     fn entry(&self) -> GuestAddress {
186         match self {
187             Self::Bios {
188                 entry,
189                 image_size: _,
190             } => *entry,
191             Self::Kernel(k) => k.entry,
192         }
193     }
194 
size(&self) -> u64195     fn size(&self) -> u64 {
196         match self {
197             Self::Bios {
198                 entry: _,
199                 image_size,
200             } => *image_size,
201             Self::Kernel(k) => k.size,
202         }
203     }
204 }
205 
206 // When static swiotlb allocation is required, returns the address it should be allocated at.
207 // Otherwise, returns None.
get_swiotlb_addr( memory_size: u64, swiotlb_size: u64, hypervisor: &(impl Hypervisor + ?Sized), ) -> Option<GuestAddress>208 fn get_swiotlb_addr(
209     memory_size: u64,
210     swiotlb_size: u64,
211     hypervisor: &(impl Hypervisor + ?Sized),
212 ) -> Option<GuestAddress> {
213     if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
214         Some(GuestAddress(
215             AARCH64_PHYS_MEM_START + memory_size - swiotlb_size,
216         ))
217     } else {
218         None
219     }
220 }
221 
222 #[sorted]
223 #[derive(Error, Debug)]
224 pub enum Error {
225     #[error("failed to allocate IRQ number")]
226     AllocateIrq,
227     #[error("bios could not be loaded: {0}")]
228     BiosLoadFailure(arch::LoadImageError),
229     #[error("failed to build arm pvtime memory: {0}")]
230     BuildPvtimeError(base::MmapError),
231     #[error("unable to clone an Event: {0}")]
232     CloneEvent(base::Error),
233     #[error("failed to clone IRQ chip: {0}")]
234     CloneIrqChip(base::Error),
235     #[error("the given kernel command line was invalid: {0}")]
236     Cmdline(kernel_cmdline::Error),
237     #[error("bad PCI CAM configuration: {0}")]
238     ConfigurePciCam(String),
239     #[error("bad PCI mem configuration: {0}")]
240     ConfigurePciMem(String),
241     #[error("failed to configure CPU Frequencies: {0}")]
242     CpuFrequencies(base::Error),
243     #[error("failed to configure CPU topology: {0}")]
244     CpuTopology(base::Error),
245     #[error("unable to create battery devices: {0}")]
246     CreateBatDevices(arch::DeviceRegistrationError),
247     #[error("unable to make an Event: {0}")]
248     CreateEvent(base::Error),
249     #[error("FDT could not be created: {0}")]
250     CreateFdt(cros_fdt::Error),
251     #[error("failed to create GIC: {0}")]
252     CreateGICFailure(base::Error),
253     #[error("failed to create a PCI root hub: {0}")]
254     CreatePciRoot(arch::DeviceRegistrationError),
255     #[error("failed to create platform bus: {0}")]
256     CreatePlatformBus(arch::DeviceRegistrationError),
257     #[error("unable to create serial devices: {0}")]
258     CreateSerialDevices(arch::DeviceRegistrationError),
259     #[error("failed to create socket: {0}")]
260     CreateSocket(io::Error),
261     #[error("failed to create tube: {0}")]
262     CreateTube(base::TubeError),
263     #[error("failed to create VCPU: {0}")]
264     CreateVcpu(base::Error),
265     #[error("unable to create vm watchdog timer device: {0}")]
266     CreateVmwdtDevice(anyhow::Error),
267     #[error("custom pVM firmware could not be loaded: {0}")]
268     CustomPvmFwLoadFailure(arch::LoadImageError),
269     #[error("vm created wrong kind of vcpu")]
270     DowncastVcpu,
271     #[error("failed to enable singlestep execution: {0}")]
272     EnableSinglestep(base::Error),
273     #[error("failed to finalize IRQ chip: {0}")]
274     FinalizeIrqChip(base::Error),
275     #[error("failed to get HW breakpoint count: {0}")]
276     GetMaxHwBreakPoint(base::Error),
277     #[error("failed to get PSCI version: {0}")]
278     GetPsciVersion(base::Error),
279     #[error("failed to get serial cmdline: {0}")]
280     GetSerialCmdline(GetSerialCmdlineError),
281     #[error("failed to initialize arm pvtime: {0}")]
282     InitPvtimeError(base::Error),
283     #[error("initrd could not be loaded: {0}")]
284     InitrdLoadFailure(arch::LoadImageError),
285     #[error("failed to initialize virtual machine {0}")]
286     InitVmError(base::Error),
287     #[error("kernel could not be loaded: {0}")]
288     KernelLoadFailure(kernel_loader::Error),
289     #[error("error loading Kernel from Elf image: {0}")]
290     LoadElfKernel(kernel_loader::Error),
291     #[error("failed to map arm pvtime memory: {0}")]
292     MapPvtimeError(base::Error),
293     #[error("pVM firmware could not be loaded: {0}")]
294     PvmFwLoadFailure(base::Error),
295     #[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
296     RamoopsAddress(u64, u64),
297     #[error("error reading guest memory: {0}")]
298     ReadGuestMemory(vm_memory::GuestMemoryError),
299     #[error("error reading CPU register: {0}")]
300     ReadReg(base::Error),
301     #[error("error reading CPU registers: {0}")]
302     ReadRegs(base::Error),
303     #[error("failed to register irq fd: {0}")]
304     RegisterIrqfd(base::Error),
305     #[error("error registering PCI bus: {0}")]
306     RegisterPci(BusError),
307     #[error("error registering virtual cpufreq device: {0}")]
308     RegisterVirtCpufreq(BusError),
309     #[error("error registering virtual socket device: {0}")]
310     RegisterVsock(arch::DeviceRegistrationError),
311     #[error("failed to set device attr: {0}")]
312     SetDeviceAttr(base::Error),
313     #[error("failed to set a hardware breakpoint: {0}")]
314     SetHwBreakpoint(base::Error),
315     #[error("failed to set register: {0}")]
316     SetReg(base::Error),
317     #[error("failed to set up guest memory: {0}")]
318     SetupGuestMemory(GuestMemoryError),
319     #[error("this function isn't supported")]
320     Unsupported,
321     #[error("failed to initialize VCPU: {0}")]
322     VcpuInit(base::Error),
323     #[error("error writing guest memory: {0}")]
324     WriteGuestMemory(GuestMemoryError),
325     #[error("error writing CPU register: {0}")]
326     WriteReg(base::Error),
327     #[error("error writing CPU registers: {0}")]
328     WriteRegs(base::Error),
329 }
330 
331 pub type Result<T> = std::result::Result<T, Error>;
332 
load_kernel( guest_mem: &GuestMemory, kernel_start: GuestAddress, mut kernel_image: &mut File, ) -> Result<LoadedKernel>333 fn load_kernel(
334     guest_mem: &GuestMemory,
335     kernel_start: GuestAddress,
336     mut kernel_image: &mut File,
337 ) -> Result<LoadedKernel> {
338     if let Ok(elf_kernel) = kernel_loader::load_elf(
339         guest_mem,
340         kernel_start,
341         &mut kernel_image,
342         AARCH64_PHYS_MEM_START,
343     ) {
344         return Ok(elf_kernel);
345     }
346 
347     if let Ok(lz4_kernel) =
348         kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
349     {
350         return Ok(lz4_kernel);
351     }
352 
353     kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
354         .map_err(Error::KernelLoadFailure)
355 }
356 
357 pub struct AArch64;
358 
get_block_size() -> u64359 fn get_block_size() -> u64 {
360     let page_size = base::pagesize();
361     // Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
362     // entries.
363     let ptes_per_page = page_size / 8;
364     let block_size = page_size * ptes_per_page;
365 
366     block_size as u64
367 }
368 
get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64>369 fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
370     const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
371 
372     Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
373 }
374 
main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64375 fn main_memory_size(components: &VmComponents, hypervisor: &(impl Hypervisor + ?Sized)) -> u64 {
376     // Static swiotlb is allocated from the end of RAM as a separate memory region, so, if
377     // enabled, make the RAM memory region smaller to leave room for it.
378     let mut main_memory_size = components.memory_size;
379     if let Some(size) = components.swiotlb {
380         if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
381             main_memory_size -= size;
382         }
383     }
384     main_memory_size
385 }
386 
387 pub struct ArchMemoryLayout {
388     pci_cam: AddressRange,
389     pci_mem: AddressRange,
390 }
391 
392 impl arch::LinuxArch for AArch64 {
393     type Error = Error;
394     type ArchMemoryLayout = ArchMemoryLayout;
395 
arch_memory_layout( components: &VmComponents, ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>396     fn arch_memory_layout(
397         components: &VmComponents,
398     ) -> std::result::Result<Self::ArchMemoryLayout, Self::Error> {
399         let (pci_cam_start, pci_cam_size) = match components.pci_config.cam {
400             Some(MemoryRegionConfig { start, size }) => {
401                 (start, size.unwrap_or(AARCH64_PCI_CAM_SIZE_DEFAULT))
402             }
403             None => (AARCH64_PCI_CAM_BASE_DEFAULT, AARCH64_PCI_CAM_SIZE_DEFAULT),
404         };
405         // TODO: Make the PCI slot allocator aware of the CAM size so we can remove this check.
406         if pci_cam_size != AARCH64_PCI_CAM_SIZE_DEFAULT {
407             return Err(Error::ConfigurePciCam(format!(
408                 "PCI CAM size must be {AARCH64_PCI_CAM_SIZE_DEFAULT:#x}, got {pci_cam_size:#x}"
409             )));
410         }
411         let pci_cam = AddressRange::from_start_and_size(pci_cam_start, pci_cam_size).ok_or(
412             Error::ConfigurePciCam("PCI CAM region overflowed".to_string()),
413         )?;
414         if pci_cam.end >= AARCH64_PHYS_MEM_START {
415             return Err(Error::ConfigurePciCam(format!(
416                 "PCI CAM ({pci_cam:?}) must be before start of RAM ({AARCH64_PHYS_MEM_START:#x})"
417             )));
418         }
419 
420         let pci_mem = match components.pci_config.mem {
421             Some(MemoryRegionConfig { start, size }) => AddressRange::from_start_and_size(
422                 start,
423                 size.unwrap_or(AARCH64_PCI_MEM_SIZE_DEFAULT),
424             )
425             .ok_or(Error::ConfigurePciMem("region overflowed".to_string()))?,
426             None => AddressRange::from_start_and_size(
427                 AARCH64_PCI_MEM_BASE_DEFAULT,
428                 AARCH64_PCI_MEM_SIZE_DEFAULT,
429             )
430             .unwrap(),
431         };
432 
433         Ok(ArchMemoryLayout { pci_cam, pci_mem })
434     }
435 
436     /// Returns a Vec of the valid memory addresses.
437     /// These should be used to configure the GuestMemory structure for the platform.
guest_memory_layout( components: &VmComponents, _arch_memory_layout: &Self::ArchMemoryLayout, hypervisor: &impl Hypervisor, ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>438     fn guest_memory_layout(
439         components: &VmComponents,
440         _arch_memory_layout: &Self::ArchMemoryLayout,
441         hypervisor: &impl Hypervisor,
442     ) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
443         let main_memory_size = main_memory_size(components, hypervisor);
444 
445         let mut memory_regions = vec![(
446             GuestAddress(AARCH64_PHYS_MEM_START),
447             main_memory_size,
448             MemoryRegionOptions::new().align(get_block_size()),
449         )];
450 
451         // Allocate memory for the pVM firmware.
452         if components.hv_cfg.protection_type.runs_firmware() {
453             memory_regions.push((
454                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
455                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
456                 MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
457             ));
458         }
459 
460         if let Some(size) = components.swiotlb {
461             if let Some(addr) = get_swiotlb_addr(components.memory_size, size, hypervisor) {
462                 memory_regions.push((
463                     addr,
464                     size,
465                     MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
466                 ));
467             }
468         }
469 
470         Ok(memory_regions)
471     }
472 
get_system_allocator_config<V: Vm>( vm: &V, arch_memory_layout: &Self::ArchMemoryLayout, ) -> SystemAllocatorConfig473     fn get_system_allocator_config<V: Vm>(
474         vm: &V,
475         arch_memory_layout: &Self::ArchMemoryLayout,
476     ) -> SystemAllocatorConfig {
477         let guest_phys_end = 1u64 << vm.get_guest_phys_addr_bits();
478         // The platform MMIO region is immediately past the end of RAM.
479         let plat_mmio_base = vm.get_memory().end_addr().offset();
480         let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
481         // The high MMIO region is the rest of the address space after the platform MMIO region.
482         let high_mmio_base = plat_mmio_base + plat_mmio_size;
483         let high_mmio_size = guest_phys_end
484             .checked_sub(high_mmio_base)
485             .unwrap_or_else(|| {
486                 panic!(
487                     "guest_phys_end {:#x} < high_mmio_base {:#x}",
488                     guest_phys_end, high_mmio_base,
489                 );
490             });
491         SystemAllocatorConfig {
492             io: None,
493             low_mmio: arch_memory_layout.pci_mem,
494             high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
495                 .expect("invalid high mmio region"),
496             platform_mmio: Some(
497                 AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
498                     .expect("invalid platform mmio region"),
499             ),
500             first_irq: AARCH64_IRQ_BASE,
501         }
502     }
503 
build_vm<V, Vcpu>( mut components: VmComponents, arch_memory_layout: &Self::ArchMemoryLayout, _vm_evt_wrtube: &SendTube, system_allocator: &mut SystemAllocator, serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>, serial_jail: Option<Minijail>, (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>), mut vm: V, ramoops_region: Option<arch::pstore::RamoopsRegion>, devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>, irq_chip: &mut dyn IrqChipAArch64, vcpu_ids: &mut Vec<usize>, dump_device_tree_blob: Option<PathBuf>, _debugcon_jail: Option<Minijail>, #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>, _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>, device_tree_overlays: Vec<DtbOverlay>, fdt_position: Option<FdtPosition>, no_pmu: bool, ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error> where V: VmAArch64, Vcpu: VcpuAArch64,504     fn build_vm<V, Vcpu>(
505         mut components: VmComponents,
506         arch_memory_layout: &Self::ArchMemoryLayout,
507         _vm_evt_wrtube: &SendTube,
508         system_allocator: &mut SystemAllocator,
509         serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
510         serial_jail: Option<Minijail>,
511         (bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
512         mut vm: V,
513         ramoops_region: Option<arch::pstore::RamoopsRegion>,
514         devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
515         irq_chip: &mut dyn IrqChipAArch64,
516         vcpu_ids: &mut Vec<usize>,
517         dump_device_tree_blob: Option<PathBuf>,
518         _debugcon_jail: Option<Minijail>,
519         #[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
520         _guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
521         device_tree_overlays: Vec<DtbOverlay>,
522         fdt_position: Option<FdtPosition>,
523         no_pmu: bool,
524     ) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
525     where
526         V: VmAArch64,
527         Vcpu: VcpuAArch64,
528     {
529         let has_bios = matches!(components.vm_image, VmImage::Bios(_));
530         let mem = vm.get_memory().clone();
531 
532         let main_memory_size = main_memory_size(&components, vm.get_hypervisor());
533 
534         let fdt_position = fdt_position.unwrap_or(if has_bios {
535             FdtPosition::Start
536         } else {
537             FdtPosition::End
538         });
539         let payload_address = match fdt_position {
540             // If FDT is at the start RAM, the payload needs to go somewhere after it.
541             FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_MAX_SIZE),
542             // Otherwise, put the payload at the start of RAM.
543             FdtPosition::End | FdtPosition::AfterPayload => GuestAddress(AARCH64_PHYS_MEM_START),
544         };
545 
546         // separate out image loading from other setup to get a specific error for
547         // image loading
548         let mut initrd = None;
549         let (payload, payload_end_address) = match components.vm_image {
550             VmImage::Bios(ref mut bios) => {
551                 let image_size = arch::load_image(&mem, bios, payload_address, u64::MAX)
552                     .map_err(Error::BiosLoadFailure)?;
553                 (
554                     PayloadType::Bios {
555                         entry: payload_address,
556                         image_size: image_size as u64,
557                     },
558                     payload_address
559                         .checked_add(image_size.try_into().unwrap())
560                         .unwrap(),
561                 )
562             }
563             VmImage::Kernel(ref mut kernel_image) => {
564                 let loaded_kernel = load_kernel(&mem, payload_address, kernel_image)?;
565                 let kernel_end = loaded_kernel.address_range.end;
566                 let mut payload_end = GuestAddress(kernel_end);
567                 initrd = match components.initrd_image {
568                     Some(initrd_file) => {
569                         let mut initrd_file = initrd_file;
570                         let initrd_addr =
571                             (kernel_end + (AARCH64_INITRD_ALIGN - 1)) & !(AARCH64_INITRD_ALIGN - 1);
572                         let initrd_max_size =
573                             main_memory_size - (initrd_addr - AARCH64_PHYS_MEM_START);
574                         let initrd_addr = GuestAddress(initrd_addr);
575                         let initrd_size =
576                             arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
577                                 .map_err(Error::InitrdLoadFailure)?;
578                         payload_end = initrd_addr
579                             .checked_add(initrd_size.try_into().unwrap())
580                             .unwrap();
581                         Some((initrd_addr, initrd_size))
582                     }
583                     None => None,
584                 };
585                 (PayloadType::Kernel(loaded_kernel), payload_end)
586             }
587         };
588 
589         let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + main_memory_size);
590 
591         let fdt_address = match fdt_position {
592             FdtPosition::Start => GuestAddress(AARCH64_PHYS_MEM_START),
593             FdtPosition::End => {
594                 let addr = memory_end
595                     .checked_sub(AARCH64_FDT_MAX_SIZE)
596                     .expect("Not enough memory for FDT")
597                     .align_down(AARCH64_FDT_ALIGN);
598                 assert!(addr >= payload_end_address, "Not enough memory for FDT");
599                 addr
600             }
601             FdtPosition::AfterPayload => payload_end_address
602                 .align(AARCH64_FDT_ALIGN)
603                 .expect("Not enough memory for FDT"),
604         };
605 
606         let mut use_pmu = vm
607             .get_hypervisor()
608             .check_capability(HypervisorCap::ArmPmuV3);
609         use_pmu &= !no_pmu;
610         let vcpu_count = components.vcpu_count;
611         let mut has_pvtime = true;
612         let mut vcpus = Vec::with_capacity(vcpu_count);
613         let mut vcpu_init = Vec::with_capacity(vcpu_count);
614         for vcpu_id in 0..vcpu_count {
615             let vcpu: Vcpu = *vm
616                 .create_vcpu(vcpu_id)
617                 .map_err(Error::CreateVcpu)?
618                 .downcast::<Vcpu>()
619                 .map_err(|_| Error::DowncastVcpu)?;
620             let per_vcpu_init = if vm
621                 .get_hypervisor()
622                 .check_capability(HypervisorCap::HypervisorInitializedBootContext)
623             {
624                 // No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
625                 Default::default()
626             } else {
627                 Self::vcpu_init(
628                     vcpu_id,
629                     &payload,
630                     fdt_address,
631                     components.hv_cfg.protection_type,
632                     components.boot_cpu,
633                 )
634             };
635             has_pvtime &= vcpu.has_pvtime_support();
636             vcpus.push(vcpu);
637             vcpu_ids.push(vcpu_id);
638             vcpu_init.push(per_vcpu_init);
639         }
640 
641         // Initialize Vcpus after all Vcpu objects have been created.
642         for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
643             let features =
644                 &Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu, components.sve_config);
645             vcpu.init(features).map_err(Error::VcpuInit)?;
646         }
647 
648         irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
649 
650         if has_pvtime {
651             let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
652                 .build()
653                 .map_err(Error::BuildPvtimeError)?;
654             vm.add_memory_region(
655                 GuestAddress(AARCH64_PVTIME_IPA_START),
656                 Box::new(pvtime_mem),
657                 false,
658                 false,
659                 MemCacheType::CacheCoherent,
660             )
661             .map_err(Error::MapPvtimeError)?;
662         }
663 
664         if components.hv_cfg.protection_type.needs_firmware_loaded() {
665             arch::load_image(
666                 &mem,
667                 &mut components
668                     .pvm_fw
669                     .expect("pvmfw must be available if ProtectionType loads it"),
670                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
671                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
672             )
673             .map_err(Error::CustomPvmFwLoadFailure)?;
674         } else if components.hv_cfg.protection_type.runs_firmware() {
675             // Tell the hypervisor to load the pVM firmware.
676             vm.load_protected_vm_firmware(
677                 GuestAddress(AARCH64_PROTECTED_VM_FW_START),
678                 AARCH64_PROTECTED_VM_FW_MAX_SIZE,
679             )
680             .map_err(Error::PvmFwLoadFailure)?;
681         }
682 
683         for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
684             use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
685             if has_pvtime {
686                 vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
687                     .map_err(Error::InitPvtimeError)?;
688             }
689         }
690 
691         let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
692 
693         // ARM doesn't really use the io bus like x86, so just create an empty bus.
694         let io_bus = Arc::new(devices::Bus::new(BusType::Io));
695 
696         // Event used by PMDevice to notify crosvm that
697         // guest OS is trying to suspend.
698         let (suspend_tube_send, suspend_tube_recv) =
699             Tube::directional_pair().map_err(Error::CreateTube)?;
700         let suspend_tube_send = Arc::new(Mutex::new(suspend_tube_send));
701 
702         let (pci_devices, others): (Vec<_>, Vec<_>) = devs
703             .into_iter()
704             .partition(|(dev, _)| dev.as_pci_device().is_some());
705 
706         let pci_devices = pci_devices
707             .into_iter()
708             .map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
709             .collect();
710         let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
711             arch::generate_pci_root(
712                 pci_devices,
713                 irq_chip.as_irq_chip_mut(),
714                 mmio_bus.clone(),
715                 GuestAddress(arch_memory_layout.pci_cam.start),
716                 8,
717                 io_bus.clone(),
718                 system_allocator,
719                 &mut vm,
720                 (devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
721                 None,
722                 #[cfg(feature = "swap")]
723                 swap_controller,
724             )
725             .map_err(Error::CreatePciRoot)?;
726 
727         let pci_root = Arc::new(Mutex::new(pci));
728         let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
729         let (platform_devices, _others): (Vec<_>, Vec<_>) = others
730             .into_iter()
731             .partition(|(dev, _)| dev.as_platform_device().is_some());
732 
733         let platform_devices = platform_devices
734             .into_iter()
735             .map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
736             .collect();
737         let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
738             arch::sys::linux::generate_platform_bus(
739                 platform_devices,
740                 irq_chip.as_irq_chip_mut(),
741                 &mmio_bus,
742                 system_allocator,
743                 &mut vm,
744                 #[cfg(feature = "swap")]
745                 swap_controller,
746                 components.hv_cfg.protection_type,
747             )
748             .map_err(Error::CreatePlatformBus)?;
749         pid_debug_label_map.append(&mut platform_pid_debug_label_map);
750 
751         let (vmwdt_host_tube, vmwdt_control_tube) = Tube::pair().map_err(Error::CreateTube)?;
752         Self::add_arch_devs(
753             irq_chip.as_irq_chip_mut(),
754             &mmio_bus,
755             vcpu_count,
756             _vm_evt_wrtube,
757             vmwdt_control_tube,
758         )?;
759 
760         let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
761         let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
762         let serial_devices = arch::add_serial_devices(
763             components.hv_cfg.protection_type,
764             &mmio_bus,
765             (AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
766             (AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
767             serial_parameters,
768             serial_jail,
769             #[cfg(feature = "swap")]
770             swap_controller,
771         )
772         .map_err(Error::CreateSerialDevices)?;
773 
774         let source = IrqEventSource {
775             device_id: Serial::device_id(),
776             queue_id: 0,
777             device_name: Serial::debug_label(),
778         };
779         irq_chip
780             .register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
781             .map_err(Error::RegisterIrqfd)?;
782         irq_chip
783             .register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
784             .map_err(Error::RegisterIrqfd)?;
785 
786         mmio_bus
787             .insert(
788                 pci_bus,
789                 arch_memory_layout.pci_cam.start,
790                 arch_memory_layout.pci_cam.len().unwrap(),
791             )
792             .map_err(Error::RegisterPci)?;
793 
794         let (vcpufreq_host_tube, vcpufreq_control_tube) =
795             Tube::pair().map_err(Error::CreateTube)?;
796         let vcpufreq_shared_tube = Arc::new(Mutex::new(vcpufreq_control_tube));
797         #[cfg(any(target_os = "android", target_os = "linux"))]
798         if !components.cpu_frequencies.is_empty() {
799             let mut freq_domain_vcpus: BTreeMap<u32, Vec<usize>> = BTreeMap::new();
800             let mut freq_domain_perfs: BTreeMap<u32, Arc<AtomicU32>> = BTreeMap::new();
801             let mut vcpu_affinities: Vec<u32> = Vec::new();
802             for vcpu in 0..vcpu_count {
803                 let freq_domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
804                 freq_domain_vcpus.entry(freq_domain).or_default().push(vcpu);
805                 let vcpu_affinity = match components.vcpu_affinity.clone() {
806                     Some(VcpuAffinity::Global(v)) => v,
807                     Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
808                     None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
809                 };
810                 vcpu_affinities.push(vcpu_affinity[0].try_into().unwrap());
811             }
812             for domain in freq_domain_vcpus.keys() {
813                 let domain_perf = Arc::new(AtomicU32::new(0));
814                 freq_domain_perfs.insert(*domain, domain_perf);
815             }
816             let largest_vcpu_affinity_idx = *vcpu_affinities.iter().max().unwrap() as usize;
817             for (vcpu, vcpu_affinity) in vcpu_affinities.iter().enumerate() {
818                 let mut virtfreq_size = AARCH64_VIRTFREQ_SIZE;
819                 if components.virt_cpufreq_v2 {
820                     let domain = *components.vcpu_domains.get(&vcpu).unwrap_or(&(vcpu as u32));
821                     virtfreq_size = AARCH64_VIRTFREQ_V2_SIZE;
822                     let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreqV2::new(
823                         *vcpu_affinity,
824                         components.cpu_frequencies.get(&vcpu).unwrap().clone(),
825                         components.vcpu_domain_paths.get(&vcpu).cloned(),
826                         domain,
827                         *components.normalized_cpu_capacities.get(&vcpu).unwrap(),
828                         largest_vcpu_affinity_idx,
829                         vcpufreq_shared_tube.clone(),
830                         freq_domain_vcpus.get(&domain).unwrap().clone(),
831                         freq_domain_perfs.get(&domain).unwrap().clone(),
832                     )));
833                     mmio_bus
834                         .insert(
835                             virt_cpufreq,
836                             AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
837                             virtfreq_size,
838                         )
839                         .map_err(Error::RegisterVirtCpufreq)?;
840                 } else {
841                     let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
842                         *vcpu_affinity,
843                         *components.normalized_cpu_capacities.get(&vcpu).unwrap(),
844                         *components
845                             .cpu_frequencies
846                             .get(&vcpu)
847                             .unwrap()
848                             .iter()
849                             .max()
850                             .unwrap(),
851                     )));
852                     mmio_bus
853                         .insert(
854                             virt_cpufreq,
855                             AARCH64_VIRTFREQ_BASE + (vcpu as u64 * virtfreq_size),
856                             virtfreq_size,
857                         )
858                         .map_err(Error::RegisterVirtCpufreq)?;
859                 }
860 
861                 if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + virtfreq_size > AARCH64_VIRTFREQ_MAXSIZE {
862                     panic!("Exceeded maximum number of virt cpufreq devices");
863                 }
864             }
865         }
866 
867         let mut cmdline = Self::get_base_linux_cmdline();
868         get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
869             .map_err(Error::GetSerialCmdline)?;
870         for param in components.extra_kernel_params {
871             cmdline.insert_str(&param).map_err(Error::Cmdline)?;
872         }
873 
874         if let Some(ramoops_region) = ramoops_region {
875             arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
876                 .map_err(Error::Cmdline)?;
877         }
878 
879         let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
880 
881         let pci_cfg = fdt::PciConfigRegion {
882             base: arch_memory_layout.pci_cam.start,
883             size: arch_memory_layout.pci_cam.len().unwrap(),
884         };
885 
886         let mut pci_ranges: Vec<fdt::PciRange> = Vec::new();
887 
888         let mut add_pci_ranges = |alloc: &AddressAllocator, prefetchable: bool| {
889             pci_ranges.extend(alloc.pools().iter().map(|range| fdt::PciRange {
890                 space: fdt::PciAddressSpace::Memory64,
891                 bus_address: range.start,
892                 cpu_physical_address: range.start,
893                 size: range.len().unwrap(),
894                 prefetchable,
895             }));
896         };
897 
898         add_pci_ranges(system_allocator.mmio_allocator(MmioType::Low), false);
899         add_pci_ranges(system_allocator.mmio_allocator(MmioType::High), true);
900 
901         let (bat_control, bat_mmio_base_and_irq) = match bat_type {
902             Some(BatteryType::Goldfish) => {
903                 let bat_irq = AARCH64_BAT_IRQ;
904 
905                 // a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
906                 let mut amls = Vec::new();
907                 let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
908                     &mut amls,
909                     bat_jail,
910                     &mmio_bus,
911                     irq_chip.as_irq_chip_mut(),
912                     bat_irq,
913                     system_allocator,
914                     #[cfg(feature = "swap")]
915                     swap_controller,
916                 )
917                 .map_err(Error::CreateBatDevices)?;
918                 (
919                     Some(BatControl {
920                         type_: BatteryType::Goldfish,
921                         control_tube,
922                     }),
923                     Some((mmio_base, bat_irq)),
924                 )
925             }
926             None => (None, None),
927         };
928 
929         let vmwdt_cfg = fdt::VmWdtConfig {
930             base: AARCH64_VMWDT_ADDR,
931             size: AARCH64_VMWDT_SIZE,
932             clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
933             timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
934         };
935 
936         fdt::create_fdt(
937             AARCH64_FDT_MAX_SIZE as usize,
938             &mem,
939             pci_irqs,
940             pci_cfg,
941             &pci_ranges,
942             dev_resources,
943             vcpu_count as u32,
944             &|n| get_vcpu_mpidr_aff(&vcpus, n),
945             components.cpu_clusters,
946             components.cpu_capacity,
947             components.cpu_frequencies,
948             fdt_address,
949             cmdline
950                 .as_str_with_max_len(AARCH64_CMDLINE_MAX_SIZE - 1)
951                 .map_err(Error::Cmdline)?,
952             (payload.entry(), payload.size() as usize),
953             initrd,
954             components.android_fstab,
955             irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
956             use_pmu,
957             psci_version,
958             components.swiotlb.map(|size| {
959                 (
960                     get_swiotlb_addr(components.memory_size, size, vm.get_hypervisor()),
961                     size,
962                 )
963             }),
964             bat_mmio_base_and_irq,
965             vmwdt_cfg,
966             dump_device_tree_blob,
967             &|writer, phandles| vm.create_fdt(writer, phandles),
968             components.dynamic_power_coefficient,
969             device_tree_overlays,
970             &serial_devices,
971             components.virt_cpufreq_v2,
972         )
973         .map_err(Error::CreateFdt)?;
974 
975         vm.init_arch(
976             payload.entry(),
977             fdt_address,
978             AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
979         )
980         .map_err(Error::InitVmError)?;
981 
982         let vm_request_tubes = vec![vmwdt_host_tube, vcpufreq_host_tube];
983 
984         Ok(RunnableLinuxVm {
985             vm,
986             vcpu_count,
987             vcpus: Some(vcpus),
988             vcpu_init,
989             vcpu_affinity: components.vcpu_affinity,
990             no_smt: components.no_smt,
991             irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
992             io_bus,
993             mmio_bus,
994             pid_debug_label_map,
995             suspend_tube: (suspend_tube_send, suspend_tube_recv),
996             rt_cpus: components.rt_cpus,
997             delay_rt: components.delay_rt,
998             bat_control,
999             pm: None,
1000             resume_notify_devices: Vec::new(),
1001             root_config: pci_root,
1002             platform_devices,
1003             hotplug_bus: BTreeMap::new(),
1004             devices_thread: None,
1005             vm_request_tubes,
1006         })
1007     }
1008 
configure_vcpu<V: Vm>( _vm: &V, _hypervisor: &dyn Hypervisor, _irq_chip: &mut dyn IrqChipAArch64, vcpu: &mut dyn VcpuAArch64, vcpu_init: VcpuInitAArch64, _vcpu_id: usize, _num_cpus: usize, _cpu_config: Option<CpuConfigAArch64>, ) -> std::result::Result<(), Self::Error>1009     fn configure_vcpu<V: Vm>(
1010         _vm: &V,
1011         _hypervisor: &dyn Hypervisor,
1012         _irq_chip: &mut dyn IrqChipAArch64,
1013         vcpu: &mut dyn VcpuAArch64,
1014         vcpu_init: VcpuInitAArch64,
1015         _vcpu_id: usize,
1016         _num_cpus: usize,
1017         _cpu_config: Option<CpuConfigAArch64>,
1018     ) -> std::result::Result<(), Self::Error> {
1019         for (reg, value) in vcpu_init.regs.iter() {
1020             vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
1021         }
1022         Ok(())
1023     }
1024 
register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>( _linux: &mut RunnableLinuxVm<V, Vcpu>, _device: Box<dyn PciDevice>, _minijail: Option<Minijail>, _resources: &mut SystemAllocator, _tube: &mpsc::Sender<PciRootCommand>, #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>, ) -> std::result::Result<PciAddress, Self::Error>1025     fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
1026         _linux: &mut RunnableLinuxVm<V, Vcpu>,
1027         _device: Box<dyn PciDevice>,
1028         _minijail: Option<Minijail>,
1029         _resources: &mut SystemAllocator,
1030         _tube: &mpsc::Sender<PciRootCommand>,
1031         #[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
1032     ) -> std::result::Result<PciAddress, Self::Error> {
1033         // hotplug function isn't verified on AArch64, so set it unsupported here.
1034         Err(Error::Unsupported)
1035     }
1036 
get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1037     fn get_host_cpu_max_freq_khz() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1038         Ok(Self::collect_for_each_cpu(base::logical_core_max_freq_khz)
1039             .map_err(Error::CpuFrequencies)?
1040             .into_iter()
1041             .enumerate()
1042             .collect())
1043     }
1044 
get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>1045     fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
1046     {
1047         Ok(
1048             Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
1049                 .map_err(Error::CpuFrequencies)?
1050                 .into_iter()
1051                 .enumerate()
1052                 .collect(),
1053         )
1054     }
1055 
1056     // Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
1057     // in the host system.
get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error>1058     fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
1059         Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
1060             .map_err(Error::CpuTopology)?
1061             .into_iter()
1062             .enumerate()
1063             .collect())
1064     }
1065 
1066     // Creates CPU cluster mask for each CPU in the host system.
get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error>1067     fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
1068         let cluster_ids = Self::collect_for_each_cpu(base::logical_core_cluster_id)
1069             .map_err(Error::CpuTopology)?;
1070         let mut unique_clusters: Vec<CpuSet> = cluster_ids
1071             .iter()
1072             .map(|&vcpu_cluster_id| {
1073                 cluster_ids
1074                     .iter()
1075                     .enumerate()
1076                     .filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
1077                     .map(|(cpu_id, _)| cpu_id)
1078                     .collect()
1079             })
1080             .collect();
1081         unique_clusters.sort_unstable();
1082         unique_clusters.dedup();
1083         Ok(unique_clusters)
1084     }
1085 }
1086 
1087 #[cfg(feature = "gdb")]
1088 impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
1089     type Error = Error;
1090 
read_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, len: usize, ) -> Result<Vec<u8>>1091     fn read_memory(
1092         _vcpu: &T,
1093         guest_mem: &GuestMemory,
1094         vaddr: GuestAddress,
1095         len: usize,
1096     ) -> Result<Vec<u8>> {
1097         let mut buf = vec![0; len];
1098 
1099         guest_mem
1100             .read_exact_at_addr(&mut buf, vaddr)
1101             .map_err(Error::ReadGuestMemory)?;
1102 
1103         Ok(buf)
1104     }
1105 
write_memory( _vcpu: &T, guest_mem: &GuestMemory, vaddr: GuestAddress, buf: &[u8], ) -> Result<()>1106     fn write_memory(
1107         _vcpu: &T,
1108         guest_mem: &GuestMemory,
1109         vaddr: GuestAddress,
1110         buf: &[u8],
1111     ) -> Result<()> {
1112         guest_mem
1113             .write_all_at_addr(buf, vaddr)
1114             .map_err(Error::WriteGuestMemory)
1115     }
1116 
read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers>1117     fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
1118         let mut regs: <GdbArch as Arch>::Registers = Default::default();
1119         assert!(
1120             regs.x.len() == 31,
1121             "unexpected number of Xn general purpose registers"
1122         );
1123         for (i, reg) in regs.x.iter_mut().enumerate() {
1124             let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1125             *reg = vcpu
1126                 .get_one_reg(VcpuRegAArch64::X(n))
1127                 .map_err(Error::ReadReg)?;
1128         }
1129         regs.sp = vcpu
1130             .get_one_reg(VcpuRegAArch64::Sp)
1131             .map_err(Error::ReadReg)?;
1132         regs.pc = vcpu
1133             .get_one_reg(VcpuRegAArch64::Pc)
1134             .map_err(Error::ReadReg)?;
1135         // hypervisor API gives a 64-bit value for Pstate, but GDB wants a 32-bit "CPSR".
1136         regs.cpsr = vcpu
1137             .get_one_reg(VcpuRegAArch64::Pstate)
1138             .map_err(Error::ReadReg)? as u32;
1139         for (i, reg) in regs.v.iter_mut().enumerate() {
1140             let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1141             *reg = vcpu.get_vector_reg(n).map_err(Error::ReadReg)?;
1142         }
1143         regs.fpcr = vcpu
1144             .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPCR))
1145             .map_err(Error::ReadReg)? as u32;
1146         regs.fpsr = vcpu
1147             .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::FPSR))
1148             .map_err(Error::ReadReg)? as u32;
1149 
1150         Ok(regs)
1151     }
1152 
write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()>1153     fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
1154         assert!(
1155             regs.x.len() == 31,
1156             "unexpected number of Xn general purpose registers"
1157         );
1158         for (i, reg) in regs.x.iter().enumerate() {
1159             let n = u8::try_from(i).expect("invalid Xn general purpose register index");
1160             vcpu.set_one_reg(VcpuRegAArch64::X(n), *reg)
1161                 .map_err(Error::WriteReg)?;
1162         }
1163         vcpu.set_one_reg(VcpuRegAArch64::Sp, regs.sp)
1164             .map_err(Error::WriteReg)?;
1165         vcpu.set_one_reg(VcpuRegAArch64::Pc, regs.pc)
1166             .map_err(Error::WriteReg)?;
1167         // GDB gives a 32-bit value for "CPSR", but hypervisor API wants a 64-bit Pstate.
1168         let pstate = vcpu
1169             .get_one_reg(VcpuRegAArch64::Pstate)
1170             .map_err(Error::ReadReg)?;
1171         let pstate = (pstate & 0xffff_ffff_0000_0000) | (regs.cpsr as u64);
1172         vcpu.set_one_reg(VcpuRegAArch64::Pstate, pstate)
1173             .map_err(Error::WriteReg)?;
1174         for (i, reg) in regs.v.iter().enumerate() {
1175             let n = u8::try_from(i).expect("invalid Vn general purpose register index");
1176             vcpu.set_vector_reg(n, *reg).map_err(Error::WriteReg)?;
1177         }
1178         vcpu.set_one_reg(
1179             VcpuRegAArch64::System(AArch64SysRegId::FPCR),
1180             u64::from(regs.fpcr),
1181         )
1182         .map_err(Error::WriteReg)?;
1183         vcpu.set_one_reg(
1184             VcpuRegAArch64::System(AArch64SysRegId::FPSR),
1185             u64::from(regs.fpsr),
1186         )
1187         .map_err(Error::WriteReg)?;
1188 
1189         Ok(())
1190     }
1191 
read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>>1192     fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
1193         let result = match reg_id {
1194             AArch64RegId::X(n) => vcpu
1195                 .get_one_reg(VcpuRegAArch64::X(n))
1196                 .map(|v| v.to_ne_bytes().to_vec()),
1197             AArch64RegId::Sp => vcpu
1198                 .get_one_reg(VcpuRegAArch64::Sp)
1199                 .map(|v| v.to_ne_bytes().to_vec()),
1200             AArch64RegId::Pc => vcpu
1201                 .get_one_reg(VcpuRegAArch64::Pc)
1202                 .map(|v| v.to_ne_bytes().to_vec()),
1203             AArch64RegId::Pstate => vcpu
1204                 .get_one_reg(VcpuRegAArch64::Pstate)
1205                 .map(|v| (v as u32).to_ne_bytes().to_vec()),
1206             AArch64RegId::V(n) => vcpu.get_vector_reg(n).map(|v| v.to_ne_bytes().to_vec()),
1207             AArch64RegId::System(op) => vcpu
1208                 .get_one_reg(VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)))
1209                 .map(|v| v.to_ne_bytes().to_vec()),
1210             _ => {
1211                 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1212                 Err(base::Error::new(libc::EINVAL))
1213             }
1214         };
1215 
1216         match result {
1217             Ok(bytes) => Ok(bytes),
1218             // ENOENT is returned when KVM is aware of the register but it is unavailable
1219             Err(e) if e.errno() == libc::ENOENT => Ok(Vec::new()),
1220             Err(e) => Err(Error::ReadReg(e)),
1221         }
1222     }
1223 
write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()>1224     fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
1225         fn try_into_u32(data: &[u8]) -> Result<u32> {
1226             let s = data
1227                 .get(..4)
1228                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1229             let a = s
1230                 .try_into()
1231                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1232             Ok(u32::from_ne_bytes(a))
1233         }
1234 
1235         fn try_into_u64(data: &[u8]) -> Result<u64> {
1236             let s = data
1237                 .get(..8)
1238                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1239             let a = s
1240                 .try_into()
1241                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1242             Ok(u64::from_ne_bytes(a))
1243         }
1244 
1245         fn try_into_u128(data: &[u8]) -> Result<u128> {
1246             let s = data
1247                 .get(..16)
1248                 .ok_or(Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1249             let a = s
1250                 .try_into()
1251                 .map_err(|_| Error::WriteReg(base::Error::new(libc::EINVAL)))?;
1252             Ok(u128::from_ne_bytes(a))
1253         }
1254 
1255         match reg_id {
1256             AArch64RegId::X(n) => vcpu.set_one_reg(VcpuRegAArch64::X(n), try_into_u64(data)?),
1257             AArch64RegId::Sp => vcpu.set_one_reg(VcpuRegAArch64::Sp, try_into_u64(data)?),
1258             AArch64RegId::Pc => vcpu.set_one_reg(VcpuRegAArch64::Pc, try_into_u64(data)?),
1259             AArch64RegId::Pstate => {
1260                 vcpu.set_one_reg(VcpuRegAArch64::Pstate, u64::from(try_into_u32(data)?))
1261             }
1262             AArch64RegId::V(n) => vcpu.set_vector_reg(n, try_into_u128(data)?),
1263             AArch64RegId::System(op) => vcpu.set_one_reg(
1264                 VcpuRegAArch64::System(AArch64SysRegId::from_encoded(op)),
1265                 try_into_u64(data)?,
1266             ),
1267             _ => {
1268                 base::error!("Unexpected AArch64RegId: {:?}", reg_id);
1269                 Err(base::Error::new(libc::EINVAL))
1270             }
1271         }
1272         .map_err(Error::WriteReg)
1273     }
1274 
enable_singlestep(vcpu: &T) -> Result<()>1275     fn enable_singlestep(vcpu: &T) -> Result<()> {
1276         const SINGLE_STEP: bool = true;
1277         vcpu.set_guest_debug(&[], SINGLE_STEP)
1278             .map_err(Error::EnableSinglestep)
1279     }
1280 
get_max_hw_breakpoints(vcpu: &T) -> Result<usize>1281     fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
1282         vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
1283     }
1284 
set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()>1285     fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
1286         const SINGLE_STEP: bool = false;
1287         vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
1288             .map_err(Error::SetHwBreakpoint)
1289     }
1290 }
1291 
1292 impl AArch64 {
1293     /// This returns a base part of the kernel command for this architecture
get_base_linux_cmdline() -> kernel_cmdline::Cmdline1294     fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
1295         let mut cmdline = kernel_cmdline::Cmdline::new();
1296         cmdline.insert_str("panic=-1").unwrap();
1297         cmdline
1298     }
1299 
1300     /// This adds any early platform devices for this architecture.
1301     ///
1302     /// # Arguments
1303     ///
1304     /// * `irq_chip` - The IRQ chip to add irqs to.
1305     /// * `bus` - The bus to add devices to.
1306     /// * `vcpu_count` - The number of virtual CPUs for this guest VM
1307     /// * `vm_evt_wrtube` - The notification channel
add_arch_devs( irq_chip: &mut dyn IrqChip, bus: &Bus, vcpu_count: usize, vm_evt_wrtube: &SendTube, vmwdt_request_tube: Tube, ) -> Result<()>1308     fn add_arch_devs(
1309         irq_chip: &mut dyn IrqChip,
1310         bus: &Bus,
1311         vcpu_count: usize,
1312         vm_evt_wrtube: &SendTube,
1313         vmwdt_request_tube: Tube,
1314     ) -> Result<()> {
1315         let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1316         let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
1317         irq_chip
1318             .register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
1319             .map_err(Error::RegisterIrqfd)?;
1320 
1321         bus.insert(
1322             Arc::new(Mutex::new(rtc)),
1323             AARCH64_RTC_ADDR,
1324             AARCH64_RTC_SIZE,
1325         )
1326         .expect("failed to add rtc device");
1327 
1328         let vmwdt_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
1329         let vm_wdt = devices::vmwdt::Vmwdt::new(
1330             vcpu_count,
1331             vm_evt_wrtube.try_clone().unwrap(),
1332             vmwdt_evt.try_clone().map_err(Error::CloneEvent)?,
1333             vmwdt_request_tube,
1334         )
1335         .map_err(Error::CreateVmwdtDevice)?;
1336         irq_chip
1337             .register_edge_irq_event(
1338                 AARCH64_VMWDT_IRQ,
1339                 &vmwdt_evt,
1340                 IrqEventSource::from_device(&vm_wdt),
1341             )
1342             .map_err(Error::RegisterIrqfd)?;
1343 
1344         bus.insert(
1345             Arc::new(Mutex::new(vm_wdt)),
1346             AARCH64_VMWDT_ADDR,
1347             AARCH64_VMWDT_SIZE,
1348         )
1349         .expect("failed to add vmwdt device");
1350 
1351         Ok(())
1352     }
1353 
1354     /// Get ARM-specific features for vcpu with index `vcpu_id`.
1355     ///
1356     /// # Arguments
1357     ///
1358     /// * `vcpu_id` - The VM's index for `vcpu`.
1359     /// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
vcpu_features( vcpu_id: usize, use_pmu: bool, boot_cpu: usize, sve: SveConfig, ) -> Vec<VcpuFeature>1360     fn vcpu_features(
1361         vcpu_id: usize,
1362         use_pmu: bool,
1363         boot_cpu: usize,
1364         sve: SveConfig,
1365     ) -> Vec<VcpuFeature> {
1366         let mut features = vec![VcpuFeature::PsciV0_2];
1367         if use_pmu {
1368             features.push(VcpuFeature::PmuV3);
1369         }
1370         // Non-boot cpus are powered off initially
1371         if vcpu_id != boot_cpu {
1372             features.push(VcpuFeature::PowerOff);
1373         }
1374         if sve.enable {
1375             features.push(VcpuFeature::Sve);
1376         }
1377 
1378         features
1379     }
1380 
1381     /// Get initial register state for vcpu with index `vcpu_id`.
1382     ///
1383     /// # Arguments
1384     ///
1385     /// * `vcpu_id` - The VM's index for `vcpu`.
vcpu_init( vcpu_id: usize, payload: &PayloadType, fdt_address: GuestAddress, protection_type: ProtectionType, boot_cpu: usize, ) -> VcpuInitAArch641386     fn vcpu_init(
1387         vcpu_id: usize,
1388         payload: &PayloadType,
1389         fdt_address: GuestAddress,
1390         protection_type: ProtectionType,
1391         boot_cpu: usize,
1392     ) -> VcpuInitAArch64 {
1393         let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
1394 
1395         // All interrupts masked
1396         let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
1397         regs.insert(VcpuRegAArch64::Pstate, pstate);
1398 
1399         // Other cpus are powered off initially
1400         if vcpu_id == boot_cpu {
1401             let entry_addr = if protection_type.needs_firmware_loaded() {
1402                 Some(AARCH64_PROTECTED_VM_FW_START)
1403             } else if protection_type.runs_firmware() {
1404                 None // Initial PC value is set by the hypervisor
1405             } else {
1406                 Some(payload.entry().offset())
1407             };
1408 
1409             /* PC -- entry point */
1410             if let Some(entry) = entry_addr {
1411                 regs.insert(VcpuRegAArch64::Pc, entry);
1412             }
1413 
1414             /* X0 -- fdt address */
1415             regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
1416 
1417             if protection_type.runs_firmware() {
1418                 /* X1 -- payload entry point */
1419                 regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
1420 
1421                 /* X2 -- image size */
1422                 regs.insert(VcpuRegAArch64::X(2), payload.size());
1423             }
1424         }
1425 
1426         VcpuInitAArch64 { regs }
1427     }
1428 
collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error> where F: Fn(usize) -> std::result::Result<T, base::Error>,1429     fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
1430     where
1431         F: Fn(usize) -> std::result::Result<T, base::Error>,
1432     {
1433         (0..base::number_of_logical_cores()?).map(func).collect()
1434     }
1435 }
1436 
1437 #[cfg(test)]
1438 mod tests {
1439     use super::*;
1440 
1441     #[test]
vcpu_init_unprotected_kernel()1442     fn vcpu_init_unprotected_kernel() {
1443         let payload = PayloadType::Kernel(LoadedKernel {
1444             address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1445             size: 0x1000,
1446             entry: GuestAddress(0x8080_0000),
1447         });
1448         let fdt_address = GuestAddress(0x1234);
1449         let prot = ProtectionType::Unprotected;
1450 
1451         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1452 
1453         // PC: kernel image entry point
1454         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
1455 
1456         // X0: fdt_offset
1457         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1458     }
1459 
1460     #[test]
vcpu_init_unprotected_bios()1461     fn vcpu_init_unprotected_bios() {
1462         let payload = PayloadType::Bios {
1463             entry: GuestAddress(0x8020_0000),
1464             image_size: 0x1000,
1465         };
1466         let fdt_address = GuestAddress(0x1234);
1467         let prot = ProtectionType::Unprotected;
1468 
1469         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1470 
1471         // PC: bios image entry point
1472         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
1473 
1474         // X0: fdt_offset
1475         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1476     }
1477 
1478     #[test]
vcpu_init_protected_kernel()1479     fn vcpu_init_protected_kernel() {
1480         let payload = PayloadType::Kernel(LoadedKernel {
1481             address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
1482             size: 0x1000,
1483             entry: GuestAddress(0x8080_0000),
1484         });
1485         let fdt_address = GuestAddress(0x1234);
1486         let prot = ProtectionType::Protected;
1487 
1488         let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
1489 
1490         // The hypervisor provides the initial value of PC, so PC should not be present in the
1491         // vcpu_init register map.
1492         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
1493 
1494         // X0: fdt_offset
1495         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
1496 
1497         // X1: kernel image entry point
1498         assert_eq!(
1499             vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
1500             Some(&0x8080_0000)
1501         );
1502 
1503         // X2: image size
1504         assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
1505     }
1506 }
1507