xref: /aosp_15_r20/external/crosvm/aarch64/src/fdt.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::collections::HashSet;
7 use std::fs::File;
8 use std::fs::OpenOptions;
9 use std::io::Write;
10 use std::path::PathBuf;
11 
12 use arch::apply_device_tree_overlays;
13 use arch::serial::SerialDeviceInfo;
14 use arch::CpuSet;
15 use arch::DtbOverlay;
16 #[cfg(any(target_os = "android", target_os = "linux"))]
17 use arch::PlatformBusResources;
18 use base::open_file_or_duplicate;
19 use cros_fdt::Error;
20 use cros_fdt::Fdt;
21 use cros_fdt::Result;
22 // This is a Battery related constant
23 use devices::bat::GOLDFISHBAT_MMIO_LEN;
24 use devices::pl030::PL030_AMBA_ID;
25 use devices::IommuDevType;
26 use devices::PciAddress;
27 use devices::PciInterruptPin;
28 use hypervisor::PsciVersion;
29 use hypervisor::VmAArch64;
30 use hypervisor::PSCI_0_2;
31 use hypervisor::PSCI_1_0;
32 use rand::rngs::OsRng;
33 use rand::RngCore;
34 use vm_memory::GuestAddress;
35 use vm_memory::GuestMemory;
36 
37 // These are GIC address-space location constants.
38 use crate::AARCH64_GIC_CPUI_BASE;
39 use crate::AARCH64_GIC_CPUI_SIZE;
40 use crate::AARCH64_GIC_DIST_BASE;
41 use crate::AARCH64_GIC_DIST_SIZE;
42 use crate::AARCH64_GIC_REDIST_SIZE;
43 use crate::AARCH64_PMU_IRQ;
44 use crate::AARCH64_PROTECTED_VM_FW_START;
45 // These are RTC related constants
46 use crate::AARCH64_RTC_ADDR;
47 use crate::AARCH64_RTC_IRQ;
48 use crate::AARCH64_RTC_SIZE;
49 // These are serial device related constants.
50 use crate::AARCH64_SERIAL_SPEED;
51 use crate::AARCH64_VIRTFREQ_BASE;
52 use crate::AARCH64_VIRTFREQ_SIZE;
53 use crate::AARCH64_VIRTFREQ_V2_SIZE;
54 use crate::AARCH64_VMWDT_IRQ;
55 
56 // This is an arbitrary number to specify the node for the GIC.
57 // If we had a more complex interrupt architecture, then we'd need an enum for
58 // these.
59 const PHANDLE_GIC: u32 = 1;
60 const PHANDLE_RESTRICTED_DMA_POOL: u32 = 2;
61 
62 // CPUs are assigned phandles starting with this number.
63 const PHANDLE_CPU0: u32 = 0x100;
64 
65 const PHANDLE_OPP_DOMAIN_BASE: u32 = 0x1000;
66 
67 // pKVM pvIOMMUs are assigned phandles starting with this number.
68 const PHANDLE_PKVM_PVIOMMU: u32 = 0x2000;
69 
70 // These are specified by the Linux GIC bindings
71 const GIC_FDT_IRQ_NUM_CELLS: u32 = 3;
72 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
73 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
74 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
75 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
76 const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001;
77 const IRQ_TYPE_LEVEL_HIGH: u32 = 0x00000004;
78 const IRQ_TYPE_LEVEL_LOW: u32 = 0x00000008;
79 
create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()>80 fn create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()> {
81     let mut mem_reg_prop = Vec::new();
82     let mut previous_memory_region_end = None;
83     let mut regions = guest_mem.guest_memory_regions();
84     regions.sort();
85     for region in regions {
86         if region.0.offset() == AARCH64_PROTECTED_VM_FW_START {
87             continue;
88         }
89         // Merge with the previous region if possible.
90         if let Some(previous_end) = previous_memory_region_end {
91             if region.0 == previous_end {
92                 *mem_reg_prop.last_mut().unwrap() += region.1 as u64;
93                 previous_memory_region_end =
94                     Some(previous_end.checked_add(region.1 as u64).unwrap());
95                 continue;
96             }
97             assert!(region.0 > previous_end, "Memory regions overlap");
98         }
99 
100         mem_reg_prop.push(region.0.offset());
101         mem_reg_prop.push(region.1 as u64);
102         previous_memory_region_end = Some(region.0.checked_add(region.1 as u64).unwrap());
103     }
104 
105     let memory_node = fdt.root_mut().subnode_mut("memory")?;
106     memory_node.set_prop("device_type", "memory")?;
107     memory_node.set_prop("reg", mem_reg_prop)?;
108     Ok(())
109 }
110 
create_resv_memory_node( fdt: &mut Fdt, resv_addr_and_size: (Option<GuestAddress>, u64), ) -> Result<u32>111 fn create_resv_memory_node(
112     fdt: &mut Fdt,
113     resv_addr_and_size: (Option<GuestAddress>, u64),
114 ) -> Result<u32> {
115     let (resv_addr, resv_size) = resv_addr_and_size;
116 
117     let resv_memory_node = fdt.root_mut().subnode_mut("reserved-memory")?;
118     resv_memory_node.set_prop("#address-cells", 0x2u32)?;
119     resv_memory_node.set_prop("#size-cells", 0x2u32)?;
120     resv_memory_node.set_prop("ranges", ())?;
121 
122     let restricted_dma_pool_node = if let Some(resv_addr) = resv_addr {
123         let node =
124             resv_memory_node.subnode_mut(&format!("restricted_dma_reserved@{:x}", resv_addr.0))?;
125         node.set_prop("reg", &[resv_addr.0, resv_size])?;
126         node
127     } else {
128         let node = resv_memory_node.subnode_mut("restricted_dma_reserved")?;
129         node.set_prop("size", resv_size)?;
130         node
131     };
132     restricted_dma_pool_node.set_prop("phandle", PHANDLE_RESTRICTED_DMA_POOL)?;
133     restricted_dma_pool_node.set_prop("compatible", "restricted-dma-pool")?;
134     restricted_dma_pool_node.set_prop("alignment", base::pagesize() as u64)?;
135     Ok(PHANDLE_RESTRICTED_DMA_POOL)
136 }
137 
create_cpu_nodes( fdt: &mut Fdt, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, dynamic_power_coefficient: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, ) -> Result<()>138 fn create_cpu_nodes(
139     fdt: &mut Fdt,
140     num_cpus: u32,
141     cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
142     cpu_clusters: Vec<CpuSet>,
143     cpu_capacity: BTreeMap<usize, u32>,
144     dynamic_power_coefficient: BTreeMap<usize, u32>,
145     cpu_frequencies: BTreeMap<usize, Vec<u32>>,
146 ) -> Result<()> {
147     let root_node = fdt.root_mut();
148     let cpus_node = root_node.subnode_mut("cpus")?;
149     cpus_node.set_prop("#address-cells", 0x1u32)?;
150     cpus_node.set_prop("#size-cells", 0x0u32)?;
151 
152     for cpu_id in 0..num_cpus {
153         let reg = u32::try_from(
154             cpu_mpidr_generator(cpu_id.try_into().unwrap()).ok_or(Error::PropertyValueInvalid)?,
155         )
156         .map_err(|_| Error::PropertyValueTooLarge)?;
157         let cpu_name = format!("cpu@{:x}", reg);
158         let cpu_node = cpus_node.subnode_mut(&cpu_name)?;
159         cpu_node.set_prop("device_type", "cpu")?;
160         cpu_node.set_prop("compatible", "arm,armv8")?;
161         if num_cpus > 1 {
162             cpu_node.set_prop("enable-method", "psci")?;
163         }
164         cpu_node.set_prop("reg", reg)?;
165         cpu_node.set_prop("phandle", PHANDLE_CPU0 + cpu_id)?;
166 
167         if let Some(pwr_coefficient) = dynamic_power_coefficient.get(&(cpu_id as usize)) {
168             cpu_node.set_prop("dynamic-power-coefficient", *pwr_coefficient)?;
169         }
170         if let Some(capacity) = cpu_capacity.get(&(cpu_id as usize)) {
171             cpu_node.set_prop("capacity-dmips-mhz", *capacity)?;
172         }
173         // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw).
174         if let Some(frequencies) = cpu_frequencies.get(&(cpu_id as usize)) {
175             cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
176             let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{}", cpu_id))?;
177             opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
178             opp_table_node.set_prop("compatible", "operating-points-v2")?;
179             for freq in frequencies.iter() {
180                 let opp_hz = (*freq) as u64 * 1000;
181                 let opp_node = opp_table_node.subnode_mut(&format!("opp{}", opp_hz))?;
182                 opp_node.set_prop("opp-hz", opp_hz)?;
183             }
184         }
185     }
186 
187     if !cpu_clusters.is_empty() {
188         let cpu_map_node = cpus_node.subnode_mut("cpu-map")?;
189         for (cluster_idx, cpus) in cpu_clusters.iter().enumerate() {
190             let cluster_node = cpu_map_node.subnode_mut(&format!("cluster{}", cluster_idx))?;
191             for (core_idx, cpu_id) in cpus.iter().enumerate() {
192                 let core_node = cluster_node.subnode_mut(&format!("core{}", core_idx))?;
193                 core_node.set_prop("cpu", PHANDLE_CPU0 + *cpu_id as u32)?;
194             }
195         }
196     }
197 
198     Ok(())
199 }
200 
create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()>201 fn create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()> {
202     let mut gic_reg_prop = [AARCH64_GIC_DIST_BASE, AARCH64_GIC_DIST_SIZE, 0, 0];
203 
204     let intc_node = fdt.root_mut().subnode_mut("intc")?;
205     if is_gicv3 {
206         intc_node.set_prop("compatible", "arm,gic-v3")?;
207         gic_reg_prop[2] = AARCH64_GIC_DIST_BASE - (AARCH64_GIC_REDIST_SIZE * num_cpus);
208         gic_reg_prop[3] = AARCH64_GIC_REDIST_SIZE * num_cpus;
209     } else {
210         intc_node.set_prop("compatible", "arm,cortex-a15-gic")?;
211         gic_reg_prop[2] = AARCH64_GIC_CPUI_BASE;
212         gic_reg_prop[3] = AARCH64_GIC_CPUI_SIZE;
213     }
214     intc_node.set_prop("#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS)?;
215     intc_node.set_prop("interrupt-controller", ())?;
216     intc_node.set_prop("reg", &gic_reg_prop)?;
217     intc_node.set_prop("phandle", PHANDLE_GIC)?;
218     intc_node.set_prop("#address-cells", 2u32)?;
219     intc_node.set_prop("#size-cells", 2u32)?;
220     add_symbols_entry(fdt, "intc", "/intc")?;
221     Ok(())
222 }
223 
create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>224 fn create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
225     // These are fixed interrupt numbers for the timer device.
226     let irqs = [13, 14, 11, 10];
227     let compatible = "arm,armv8-timer";
228     let cpu_mask: u32 =
229         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
230 
231     let mut timer_reg_cells = Vec::new();
232     for &irq in &irqs {
233         timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
234         timer_reg_cells.push(irq);
235         timer_reg_cells.push(cpu_mask | IRQ_TYPE_LEVEL_LOW);
236     }
237 
238     let timer_node = fdt.root_mut().subnode_mut("timer")?;
239     timer_node.set_prop("compatible", compatible)?;
240     timer_node.set_prop("interrupts", timer_reg_cells)?;
241     timer_node.set_prop("always-on", ())?;
242     Ok(())
243 }
244 
create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>245 fn create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
246     let compatible = "virtual,android-v-only-cpufreq";
247     let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
248     let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_SIZE * num_cpus];
249 
250     vcf_node.set_prop("compatible", compatible)?;
251     vcf_node.set_prop("reg", &reg)?;
252     Ok(())
253 }
254 
create_virt_cpufreq_v2_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>255 fn create_virt_cpufreq_v2_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
256     let compatible = "qemu,virtual-cpufreq";
257     let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
258     let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_V2_SIZE * num_cpus];
259 
260     vcf_node.set_prop("compatible", compatible)?;
261     vcf_node.set_prop("reg", &reg)?;
262     Ok(())
263 }
264 
create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>265 fn create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
266     let compatible = "arm,armv8-pmuv3";
267     let cpu_mask: u32 =
268         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
269     let irq = [
270         GIC_FDT_IRQ_TYPE_PPI,
271         AARCH64_PMU_IRQ,
272         cpu_mask | IRQ_TYPE_LEVEL_HIGH,
273     ];
274 
275     let pmu_node = fdt.root_mut().subnode_mut("pmu")?;
276     pmu_node.set_prop("compatible", compatible)?;
277     pmu_node.set_prop("interrupts", &irq)?;
278     Ok(())
279 }
280 
create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()>281 fn create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()> {
282     let serial_reg_prop = [addr, size];
283     let irq = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_EDGE_RISING];
284 
285     let serial_node = fdt
286         .root_mut()
287         .subnode_mut(&format!("U6_16550A@{:x}", addr))?;
288     serial_node.set_prop("compatible", "ns16550a")?;
289     serial_node.set_prop("reg", &serial_reg_prop)?;
290     serial_node.set_prop("clock-frequency", AARCH64_SERIAL_SPEED)?;
291     serial_node.set_prop("interrupts", &irq)?;
292 
293     Ok(())
294 }
295 
create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()>296 fn create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()> {
297     for dev in serial_devices {
298         create_serial_node(fdt, dev.address, dev.size, dev.irq)?;
299     }
300 
301     Ok(())
302 }
303 
psci_compatible(version: &PsciVersion) -> Vec<&str>304 fn psci_compatible(version: &PsciVersion) -> Vec<&str> {
305     // The PSCI kernel driver only supports compatible strings for the following
306     // backward-compatible versions.
307     let supported = [(PSCI_1_0, "arm,psci-1.0"), (PSCI_0_2, "arm,psci-0.2")];
308 
309     let mut compatible: Vec<_> = supported
310         .iter()
311         .filter(|&(v, _)| *version >= *v)
312         .map(|&(_, c)| c)
313         .collect();
314 
315     // The PSCI kernel driver also supports PSCI v0.1, which is NOT forward-compatible.
316     if compatible.is_empty() {
317         compatible = vec!["arm,psci"];
318     }
319 
320     compatible
321 }
322 
create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()>323 fn create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()> {
324     let compatible = psci_compatible(version);
325     let psci_node = fdt.root_mut().subnode_mut("psci")?;
326     psci_node.set_prop("compatible", compatible.as_slice())?;
327     // Only support aarch64 guest
328     psci_node.set_prop("method", "hvc")?;
329     Ok(())
330 }
331 
create_chosen_node( fdt: &mut Fdt, cmdline: &str, initrd: Option<(GuestAddress, usize)>, stdout_path: Option<&str>, ) -> Result<()>332 fn create_chosen_node(
333     fdt: &mut Fdt,
334     cmdline: &str,
335     initrd: Option<(GuestAddress, usize)>,
336     stdout_path: Option<&str>,
337 ) -> Result<()> {
338     let chosen_node = fdt.root_mut().subnode_mut("chosen")?;
339     chosen_node.set_prop("linux,pci-probe-only", 1u32)?;
340     chosen_node.set_prop("bootargs", cmdline)?;
341     if let Some(stdout_path) = stdout_path {
342         // Used by android bootloader for boot console output
343         chosen_node.set_prop("stdout-path", stdout_path)?;
344     }
345 
346     let mut kaslr_seed_bytes = [0u8; 8];
347     OsRng.fill_bytes(&mut kaslr_seed_bytes);
348     let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes);
349     chosen_node.set_prop("kaslr-seed", kaslr_seed)?;
350 
351     let mut rng_seed_bytes = [0u8; 256];
352     OsRng.fill_bytes(&mut rng_seed_bytes);
353     chosen_node.set_prop("rng-seed", &rng_seed_bytes)?;
354 
355     if let Some((initrd_addr, initrd_size)) = initrd {
356         let initrd_start = initrd_addr.offset() as u32;
357         let initrd_end = initrd_start + initrd_size as u32;
358         chosen_node.set_prop("linux,initrd-start", initrd_start)?;
359         chosen_node.set_prop("linux,initrd-end", initrd_end)?;
360     }
361 
362     Ok(())
363 }
364 
create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()>365 fn create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()> {
366     let addr: u32 = addr
367         .offset()
368         .try_into()
369         .map_err(|_| Error::PropertyValueTooLarge)?;
370     let size: u32 = size.try_into().map_err(|_| Error::PropertyValueTooLarge)?;
371 
372     let config_node = fdt.root_mut().subnode_mut("config")?;
373     config_node.set_prop("kernel-address", addr)?;
374     config_node.set_prop("kernel-size", size)?;
375     Ok(())
376 }
377 
create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()>378 fn create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()> {
379     let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
380     vcf_node.set_prop("compatible", "virtual,kvm-cpufreq")?;
381     Ok(())
382 }
383 
384 #[cfg(any(target_os = "android", target_os = "linux"))]
get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>>385 fn get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>> {
386     let mut ids = HashSet::new();
387 
388     for res in platform_dev_resources {
389         for iommu in &res.iommus {
390             if let (IommuDevType::PkvmPviommu, Some(id), _) = iommu {
391                 ids.insert(*id);
392             }
393         }
394     }
395 
396     Ok(Vec::from_iter(ids))
397 }
398 
create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32>399 fn create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32> {
400     let name = format!("pviommu{index}");
401     let phandle = PHANDLE_PKVM_PVIOMMU
402         .checked_add(index.try_into().unwrap())
403         .unwrap();
404 
405     let iommu_node = fdt.root_mut().subnode_mut(&name)?;
406     iommu_node.set_prop("phandle", phandle)?;
407     iommu_node.set_prop("#iommu-cells", 1u32)?;
408     iommu_node.set_prop("compatible", "pkvm,pviommu")?;
409     iommu_node.set_prop("id", id)?;
410 
411     Ok(phandle)
412 }
413 
414 /// PCI host controller address range.
415 ///
416 /// This represents a single entry in the "ranges" property for a PCI host controller.
417 ///
418 /// See [PCI Bus Binding to Open Firmware](https://www.openfirmware.info/data/docs/bus.pci.pdf)
419 /// and https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
420 /// for more information.
421 #[derive(Copy, Clone)]
422 pub struct PciRange {
423     pub space: PciAddressSpace,
424     pub bus_address: u64,
425     pub cpu_physical_address: u64,
426     pub size: u64,
427     pub prefetchable: bool,
428 }
429 
430 /// PCI address space.
431 #[derive(Copy, Clone)]
432 #[allow(dead_code)]
433 pub enum PciAddressSpace {
434     /// PCI configuration space
435     Configuration = 0b00,
436     /// I/O space
437     Io = 0b01,
438     /// 32-bit memory space
439     Memory = 0b10,
440     /// 64-bit memory space
441     Memory64 = 0b11,
442 }
443 
444 /// Location of memory-mapped PCI configuration space.
445 #[derive(Copy, Clone)]
446 pub struct PciConfigRegion {
447     /// Physical address of the base of the memory-mapped PCI configuration region.
448     pub base: u64,
449     /// Size of the PCI configuration region in bytes.
450     pub size: u64,
451 }
452 
453 /// Location of memory-mapped vm watchdog
454 #[derive(Copy, Clone)]
455 pub struct VmWdtConfig {
456     /// Physical address of the base of the memory-mapped vm watchdog region.
457     pub base: u64,
458     /// Size of the vm watchdog region in bytes.
459     pub size: u64,
460     /// The internal clock frequency of the watchdog.
461     pub clock_hz: u32,
462     /// The expiration timeout measured in seconds.
463     pub timeout_sec: u32,
464 }
465 
create_pci_nodes( fdt: &mut Fdt, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, cfg: PciConfigRegion, ranges: &[PciRange], dma_pool_phandle: Option<u32>, ) -> Result<()>466 fn create_pci_nodes(
467     fdt: &mut Fdt,
468     pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
469     cfg: PciConfigRegion,
470     ranges: &[PciRange],
471     dma_pool_phandle: Option<u32>,
472 ) -> Result<()> {
473     // Add devicetree nodes describing a PCI generic host controller.
474     // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
475     // and "PCI Bus Binding to IEEE Std 1275-1994".
476     let ranges: Vec<u32> = ranges
477         .iter()
478         .flat_map(|r| {
479             let ss = r.space as u32;
480             let p = r.prefetchable as u32;
481             [
482                 // BUS_ADDRESS(3) encoded as defined in OF PCI Bus Binding
483                 (ss << 24) | (p << 30),
484                 (r.bus_address >> 32) as u32,
485                 r.bus_address as u32,
486                 // CPU_PHYSICAL(2)
487                 (r.cpu_physical_address >> 32) as u32,
488                 r.cpu_physical_address as u32,
489                 // SIZE(2)
490                 (r.size >> 32) as u32,
491                 r.size as u32,
492             ]
493         })
494         .collect();
495 
496     let bus_range = [0u32, 0u32]; // Only bus 0
497     let reg = [cfg.base, cfg.size];
498 
499     let mut interrupts: Vec<u32> = Vec::new();
500     let mut masks: Vec<u32> = Vec::new();
501 
502     for (address, irq_num, irq_pin) in pci_irqs.iter() {
503         // PCI_DEVICE(3)
504         interrupts.push(address.to_config_address(0, 8));
505         interrupts.push(0);
506         interrupts.push(0);
507 
508         // INT#(1)
509         interrupts.push(irq_pin.to_mask() + 1);
510 
511         // CONTROLLER(PHANDLE)
512         interrupts.push(PHANDLE_GIC);
513         interrupts.push(0);
514         interrupts.push(0);
515 
516         // CONTROLLER_DATA(3)
517         interrupts.push(GIC_FDT_IRQ_TYPE_SPI);
518         interrupts.push(*irq_num);
519         interrupts.push(IRQ_TYPE_LEVEL_HIGH);
520 
521         // PCI_DEVICE(3)
522         masks.push(0xf800); // bits 11..15 (device)
523         masks.push(0);
524         masks.push(0);
525 
526         // INT#(1)
527         masks.push(0x7); // allow INTA#-INTD# (1 | 2 | 3 | 4)
528     }
529 
530     let pci_node = fdt.root_mut().subnode_mut("pci")?;
531     pci_node.set_prop("compatible", "pci-host-cam-generic")?;
532     pci_node.set_prop("device_type", "pci")?;
533     pci_node.set_prop("ranges", ranges)?;
534     pci_node.set_prop("bus-range", &bus_range)?;
535     pci_node.set_prop("#address-cells", 3u32)?;
536     pci_node.set_prop("#size-cells", 2u32)?;
537     pci_node.set_prop("reg", &reg)?;
538     pci_node.set_prop("#interrupt-cells", 1u32)?;
539     pci_node.set_prop("interrupt-map", interrupts)?;
540     pci_node.set_prop("interrupt-map-mask", masks)?;
541     pci_node.set_prop("dma-coherent", ())?;
542     if let Some(dma_pool_phandle) = dma_pool_phandle {
543         pci_node.set_prop("memory-region", dma_pool_phandle)?;
544     }
545     Ok(())
546 }
547 
create_rtc_node(fdt: &mut Fdt) -> Result<()>548 fn create_rtc_node(fdt: &mut Fdt) -> Result<()> {
549     // the kernel driver for pl030 really really wants a clock node
550     // associated with an AMBA device or it will fail to probe, so we
551     // need to make up a clock node to associate with the pl030 rtc
552     // node and an associated handle with a unique phandle value.
553     const CLK_PHANDLE: u32 = 24;
554     let clock_node = fdt.root_mut().subnode_mut("pclk@3M")?;
555     clock_node.set_prop("#clock-cells", 0u32)?;
556     clock_node.set_prop("compatible", "fixed-clock")?;
557     clock_node.set_prop("clock-frequency", 3141592u32)?;
558     clock_node.set_prop("phandle", CLK_PHANDLE)?;
559 
560     let rtc_name = format!("rtc@{:x}", AARCH64_RTC_ADDR);
561     let reg = [AARCH64_RTC_ADDR, AARCH64_RTC_SIZE];
562     let irq = [GIC_FDT_IRQ_TYPE_SPI, AARCH64_RTC_IRQ, IRQ_TYPE_LEVEL_HIGH];
563 
564     let rtc_node = fdt.root_mut().subnode_mut(&rtc_name)?;
565     rtc_node.set_prop("compatible", "arm,primecell")?;
566     rtc_node.set_prop("arm,primecell-periphid", PL030_AMBA_ID)?;
567     rtc_node.set_prop("reg", &reg)?;
568     rtc_node.set_prop("interrupts", &irq)?;
569     rtc_node.set_prop("clocks", CLK_PHANDLE)?;
570     rtc_node.set_prop("clock-names", "apb_pclk")?;
571     Ok(())
572 }
573 
574 /// Create a flattened device tree node for Goldfish Battery device.
575 ///
576 /// # Arguments
577 ///
578 /// * `fdt` - An Fdt in which the node is created
579 /// * `mmio_base` - The MMIO base address of the battery
580 /// * `irq` - The IRQ number of the battery
create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()>581 fn create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()> {
582     let reg = [mmio_base, GOLDFISHBAT_MMIO_LEN];
583     let irqs = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_LEVEL_HIGH];
584     let bat_node = fdt.root_mut().subnode_mut("goldfish_battery")?;
585     bat_node.set_prop("compatible", "google,goldfish-battery")?;
586     bat_node.set_prop("reg", &reg)?;
587     bat_node.set_prop("interrupts", &irqs)?;
588     Ok(())
589 }
590 
create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig, num_cpus: u32) -> Result<()>591 fn create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig, num_cpus: u32) -> Result<()> {
592     let vmwdt_name = format!("vmwdt@{:x}", vmwdt_cfg.base);
593     let reg = [vmwdt_cfg.base, vmwdt_cfg.size];
594     let cpu_mask: u32 =
595         (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
596     let irq = [
597         GIC_FDT_IRQ_TYPE_PPI,
598         AARCH64_VMWDT_IRQ,
599         cpu_mask | IRQ_TYPE_EDGE_RISING,
600     ];
601 
602     let vmwdt_node = fdt.root_mut().subnode_mut(&vmwdt_name)?;
603     vmwdt_node.set_prop("compatible", "qemu,vcpu-stall-detector")?;
604     vmwdt_node.set_prop("reg", &reg)?;
605     vmwdt_node.set_prop("clock-frequency", vmwdt_cfg.clock_hz)?;
606     vmwdt_node.set_prop("timeout-sec", vmwdt_cfg.timeout_sec)?;
607     vmwdt_node.set_prop("interrupts", &irq)?;
608     Ok(())
609 }
610 
611 // Add a node path to __symbols__ node of the FDT, so it can be referenced by an overlay.
add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()>612 fn add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()> {
613     // Ensure the path points to a valid node with a defined phandle
614     let Some(target_node) = fdt.get_node(path) else {
615         return Err(Error::InvalidPath(format!("{path} does not exist")));
616     };
617     target_node
618         .get_prop::<u32>("phandle")
619         .or_else(|| target_node.get_prop("linux,phandle"))
620         .ok_or_else(|| Error::InvalidPath(format!("{path} must have a phandle")))?;
621     // Add the label -> path mapping.
622     let symbols_node = fdt.root_mut().subnode_mut("__symbols__")?;
623     symbols_node.set_prop(symbol, path)?;
624     Ok(())
625 }
626 
627 /// Creates a flattened device tree containing all of the parameters for the
628 /// kernel and loads it into the guest memory at the specified offset.
629 ///
630 /// # Arguments
631 ///
632 /// * `fdt_max_size` - The amount of space reserved for the device tree
633 /// * `guest_mem` - The guest memory object
634 /// * `pci_irqs` - List of PCI device address to PCI interrupt number and pin mappings
635 /// * `pci_cfg` - Location of the memory-mapped PCI configuration space.
636 /// * `pci_ranges` - Memory ranges accessible via the PCI host controller.
637 /// * `num_cpus` - Number of virtual CPUs the guest will have
638 /// * `fdt_address` - The offset into physical memory for the device tree
639 /// * `cmdline` - The kernel commandline
640 /// * `initrd` - An optional tuple of initrd guest physical address and size
641 /// * `android_fstab` - An optional file holding Android fstab entries
642 /// * `is_gicv3` - True if gicv3, false if v2
643 /// * `psci_version` - the current PSCI version
644 /// * `swiotlb` - Reserve a memory pool for DMA. Tuple of base address and size.
645 /// * `bat_mmio_base_and_irq` - The battery base address and irq number
646 /// * `vmwdt_cfg` - The virtual watchdog configuration
647 /// * `dump_device_tree_blob` - Option path to write DTB to
648 /// * `vm_generator` - Callback to add additional nodes to DTB. create_vm uses Aarch64Vm::create_fdt
create_fdt( fdt_max_size: usize, guest_mem: &GuestMemory, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, pci_cfg: PciConfigRegion, pci_ranges: &[PciRange], #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec< PlatformBusResources, >, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, fdt_address: GuestAddress, cmdline: &str, image: (GuestAddress, usize), initrd: Option<(GuestAddress, usize)>, android_fstab: Option<File>, is_gicv3: bool, use_pmu: bool, psci_version: PsciVersion, swiotlb: Option<(Option<GuestAddress>, u64)>, bat_mmio_base_and_irq: Option<(u64, u32)>, vmwdt_cfg: VmWdtConfig, dump_device_tree_blob: Option<PathBuf>, vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>, dynamic_power_coefficient: BTreeMap<usize, u32>, device_tree_overlays: Vec<DtbOverlay>, serial_devices: &[SerialDeviceInfo], virt_cpufreq_v2: bool, ) -> Result<()>649 pub fn create_fdt(
650     fdt_max_size: usize,
651     guest_mem: &GuestMemory,
652     pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
653     pci_cfg: PciConfigRegion,
654     pci_ranges: &[PciRange],
655     #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec<
656         PlatformBusResources,
657     >,
658     num_cpus: u32,
659     cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
660     cpu_clusters: Vec<CpuSet>,
661     cpu_capacity: BTreeMap<usize, u32>,
662     cpu_frequencies: BTreeMap<usize, Vec<u32>>,
663     fdt_address: GuestAddress,
664     cmdline: &str,
665     image: (GuestAddress, usize),
666     initrd: Option<(GuestAddress, usize)>,
667     android_fstab: Option<File>,
668     is_gicv3: bool,
669     use_pmu: bool,
670     psci_version: PsciVersion,
671     swiotlb: Option<(Option<GuestAddress>, u64)>,
672     bat_mmio_base_and_irq: Option<(u64, u32)>,
673     vmwdt_cfg: VmWdtConfig,
674     dump_device_tree_blob: Option<PathBuf>,
675     vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>,
676     dynamic_power_coefficient: BTreeMap<usize, u32>,
677     device_tree_overlays: Vec<DtbOverlay>,
678     serial_devices: &[SerialDeviceInfo],
679     virt_cpufreq_v2: bool,
680 ) -> Result<()> {
681     let mut fdt = Fdt::new(&[]);
682     let mut phandles_key_cache = Vec::new();
683     let mut phandles = BTreeMap::new();
684 
685     // The whole thing is put into one giant node with some top level properties
686     let root_node = fdt.root_mut();
687     root_node.set_prop("interrupt-parent", PHANDLE_GIC)?;
688     phandles.insert("intc", PHANDLE_GIC);
689     root_node.set_prop("compatible", "linux,dummy-virt")?;
690     root_node.set_prop("#address-cells", 0x2u32)?;
691     root_node.set_prop("#size-cells", 0x2u32)?;
692     if let Some(android_fstab) = android_fstab {
693         arch::android::create_android_fdt(&mut fdt, android_fstab)?;
694     }
695     let stdout_path = serial_devices
696         .first()
697         .map(|first_serial| format!("/U6_16550A@{:x}", first_serial.address));
698     create_chosen_node(&mut fdt, cmdline, initrd, stdout_path.as_deref())?;
699     create_config_node(&mut fdt, image)?;
700     create_memory_node(&mut fdt, guest_mem)?;
701     let dma_pool_phandle = match swiotlb {
702         Some(x) => {
703             let phandle = create_resv_memory_node(&mut fdt, x)?;
704             phandles.insert("restricted_dma_reserved", phandle);
705             Some(phandle)
706         }
707         None => None,
708     };
709     create_cpu_nodes(
710         &mut fdt,
711         num_cpus,
712         cpu_mpidr_generator,
713         cpu_clusters,
714         cpu_capacity,
715         dynamic_power_coefficient,
716         cpu_frequencies.clone(),
717     )?;
718     create_gic_node(&mut fdt, is_gicv3, num_cpus as u64)?;
719     create_timer_node(&mut fdt, num_cpus)?;
720     if use_pmu {
721         create_pmu_node(&mut fdt, num_cpus)?;
722     }
723     create_serial_nodes(&mut fdt, serial_devices)?;
724     create_psci_node(&mut fdt, &psci_version)?;
725     create_pci_nodes(&mut fdt, pci_irqs, pci_cfg, pci_ranges, dma_pool_phandle)?;
726     create_rtc_node(&mut fdt)?;
727     if let Some((bat_mmio_base, bat_irq)) = bat_mmio_base_and_irq {
728         create_battery_node(&mut fdt, bat_mmio_base, bat_irq)?;
729     }
730     create_vmwdt_node(&mut fdt, vmwdt_cfg, num_cpus)?;
731     create_kvm_cpufreq_node(&mut fdt)?;
732     vm_generator(&mut fdt, &phandles)?;
733     if !cpu_frequencies.is_empty() {
734         if virt_cpufreq_v2 {
735             create_virt_cpufreq_v2_node(&mut fdt, num_cpus as u64)?;
736         } else {
737             create_virt_cpufreq_node(&mut fdt, num_cpus as u64)?;
738         }
739     }
740 
741     let pviommu_ids = get_pkvm_pviommu_ids(&platform_dev_resources)?;
742 
743     let cache_offset = phandles_key_cache.len();
744     // Hack to extend the lifetime of the Strings as keys of phandles (i.e. &str).
745     phandles_key_cache.extend(pviommu_ids.iter().map(|id| format!("pviommu{id}")));
746     let pviommu_phandle_keys = &phandles_key_cache[cache_offset..];
747 
748     for (index, (id, key)) in pviommu_ids.iter().zip(pviommu_phandle_keys).enumerate() {
749         let phandle = create_pkvm_pviommu_node(&mut fdt, index, *id)?;
750         phandles.insert(key, phandle);
751     }
752 
753     // Done writing base FDT, now apply DT overlays
754     apply_device_tree_overlays(
755         &mut fdt,
756         device_tree_overlays,
757         #[cfg(any(target_os = "android", target_os = "linux"))]
758         platform_dev_resources,
759         #[cfg(any(target_os = "android", target_os = "linux"))]
760         &phandles,
761     )?;
762 
763     let fdt_final = fdt.finish()?;
764 
765     if let Some(file_path) = dump_device_tree_blob {
766         let mut fd = open_file_or_duplicate(
767             &file_path,
768             OpenOptions::new()
769                 .read(true)
770                 .create(true)
771                 .truncate(true)
772                 .write(true),
773         )
774         .map_err(|e| Error::FdtIoError(e.into()))?;
775         fd.write_all(&fdt_final)
776             .map_err(|e| Error::FdtDumpIoError(e, file_path.clone()))?;
777     }
778 
779     if fdt_final.len() > fdt_max_size {
780         return Err(Error::TotalSizeTooLarge);
781     }
782 
783     let written = guest_mem
784         .write_at_addr(fdt_final.as_slice(), fdt_address)
785         .map_err(|_| Error::FdtGuestMemoryWriteError)?;
786     if written < fdt_final.len() {
787         return Err(Error::FdtGuestMemoryWriteError);
788     }
789 
790     Ok(())
791 }
792 
793 #[cfg(test)]
794 mod tests {
795     use super::*;
796 
797     #[test]
psci_compatible_v0_1()798     fn psci_compatible_v0_1() {
799         assert_eq!(
800             psci_compatible(&PsciVersion::new(0, 1).unwrap()),
801             vec!["arm,psci"]
802         );
803     }
804 
805     #[test]
psci_compatible_v0_2()806     fn psci_compatible_v0_2() {
807         assert_eq!(
808             psci_compatible(&PsciVersion::new(0, 2).unwrap()),
809             vec!["arm,psci-0.2"]
810         );
811     }
812 
813     #[test]
psci_compatible_v0_5()814     fn psci_compatible_v0_5() {
815         // Only the 0.2 version supported by the kernel should be added.
816         assert_eq!(
817             psci_compatible(&PsciVersion::new(0, 5).unwrap()),
818             vec!["arm,psci-0.2"]
819         );
820     }
821 
822     #[test]
psci_compatible_v1_0()823     fn psci_compatible_v1_0() {
824         // Both 1.0 and 0.2 should be listed, in that order.
825         assert_eq!(
826             psci_compatible(&PsciVersion::new(1, 0).unwrap()),
827             vec!["arm,psci-1.0", "arm,psci-0.2"]
828         );
829     }
830 
831     #[test]
psci_compatible_v1_5()832     fn psci_compatible_v1_5() {
833         // Only the 1.0 and 0.2 versions supported by the kernel should be listed.
834         assert_eq!(
835             psci_compatible(&PsciVersion::new(1, 5).unwrap()),
836             vec!["arm,psci-1.0", "arm,psci-0.2"]
837         );
838     }
839 
840     #[test]
symbols_entries()841     fn symbols_entries() {
842         const TEST_SYMBOL: &str = "dev";
843         const TEST_PATH: &str = "/dev";
844 
845         let mut fdt = Fdt::new(&[]);
846         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing node");
847 
848         fdt.root_mut().subnode_mut(TEST_SYMBOL).unwrap();
849         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing phandle");
850 
851         let intc_node = fdt.get_node_mut(TEST_PATH).unwrap();
852         intc_node.set_prop("phandle", 1u32).unwrap();
853         add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect("valid path");
854 
855         let symbols = fdt.get_node("/__symbols__").unwrap();
856         assert_eq!(symbols.get_prop::<String>(TEST_SYMBOL).unwrap(), TEST_PATH);
857     }
858 }
859