1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::collections::HashSet;
7 use std::fs::File;
8 use std::fs::OpenOptions;
9 use std::io::Write;
10 use std::path::PathBuf;
11
12 use arch::apply_device_tree_overlays;
13 use arch::serial::SerialDeviceInfo;
14 use arch::CpuSet;
15 use arch::DtbOverlay;
16 #[cfg(any(target_os = "android", target_os = "linux"))]
17 use arch::PlatformBusResources;
18 use base::open_file_or_duplicate;
19 use cros_fdt::Error;
20 use cros_fdt::Fdt;
21 use cros_fdt::Result;
22 // This is a Battery related constant
23 use devices::bat::GOLDFISHBAT_MMIO_LEN;
24 use devices::pl030::PL030_AMBA_ID;
25 use devices::IommuDevType;
26 use devices::PciAddress;
27 use devices::PciInterruptPin;
28 use hypervisor::PsciVersion;
29 use hypervisor::VmAArch64;
30 use hypervisor::PSCI_0_2;
31 use hypervisor::PSCI_1_0;
32 use rand::rngs::OsRng;
33 use rand::RngCore;
34 use vm_memory::GuestAddress;
35 use vm_memory::GuestMemory;
36
37 // These are GIC address-space location constants.
38 use crate::AARCH64_GIC_CPUI_BASE;
39 use crate::AARCH64_GIC_CPUI_SIZE;
40 use crate::AARCH64_GIC_DIST_BASE;
41 use crate::AARCH64_GIC_DIST_SIZE;
42 use crate::AARCH64_GIC_REDIST_SIZE;
43 use crate::AARCH64_PMU_IRQ;
44 use crate::AARCH64_PROTECTED_VM_FW_START;
45 // These are RTC related constants
46 use crate::AARCH64_RTC_ADDR;
47 use crate::AARCH64_RTC_IRQ;
48 use crate::AARCH64_RTC_SIZE;
49 // These are serial device related constants.
50 use crate::AARCH64_SERIAL_SPEED;
51 use crate::AARCH64_VIRTFREQ_BASE;
52 use crate::AARCH64_VIRTFREQ_SIZE;
53 use crate::AARCH64_VIRTFREQ_V2_SIZE;
54 use crate::AARCH64_VMWDT_IRQ;
55
56 // This is an arbitrary number to specify the node for the GIC.
57 // If we had a more complex interrupt architecture, then we'd need an enum for
58 // these.
59 const PHANDLE_GIC: u32 = 1;
60 const PHANDLE_RESTRICTED_DMA_POOL: u32 = 2;
61
62 // CPUs are assigned phandles starting with this number.
63 const PHANDLE_CPU0: u32 = 0x100;
64
65 const PHANDLE_OPP_DOMAIN_BASE: u32 = 0x1000;
66
67 // pKVM pvIOMMUs are assigned phandles starting with this number.
68 const PHANDLE_PKVM_PVIOMMU: u32 = 0x2000;
69
70 // These are specified by the Linux GIC bindings
71 const GIC_FDT_IRQ_NUM_CELLS: u32 = 3;
72 const GIC_FDT_IRQ_TYPE_SPI: u32 = 0;
73 const GIC_FDT_IRQ_TYPE_PPI: u32 = 1;
74 const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
75 const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << GIC_FDT_IRQ_PPI_CPU_SHIFT;
76 const IRQ_TYPE_EDGE_RISING: u32 = 0x00000001;
77 const IRQ_TYPE_LEVEL_HIGH: u32 = 0x00000004;
78 const IRQ_TYPE_LEVEL_LOW: u32 = 0x00000008;
79
create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()>80 fn create_memory_node(fdt: &mut Fdt, guest_mem: &GuestMemory) -> Result<()> {
81 let mut mem_reg_prop = Vec::new();
82 let mut previous_memory_region_end = None;
83 let mut regions = guest_mem.guest_memory_regions();
84 regions.sort();
85 for region in regions {
86 if region.0.offset() == AARCH64_PROTECTED_VM_FW_START {
87 continue;
88 }
89 // Merge with the previous region if possible.
90 if let Some(previous_end) = previous_memory_region_end {
91 if region.0 == previous_end {
92 *mem_reg_prop.last_mut().unwrap() += region.1 as u64;
93 previous_memory_region_end =
94 Some(previous_end.checked_add(region.1 as u64).unwrap());
95 continue;
96 }
97 assert!(region.0 > previous_end, "Memory regions overlap");
98 }
99
100 mem_reg_prop.push(region.0.offset());
101 mem_reg_prop.push(region.1 as u64);
102 previous_memory_region_end = Some(region.0.checked_add(region.1 as u64).unwrap());
103 }
104
105 let memory_node = fdt.root_mut().subnode_mut("memory")?;
106 memory_node.set_prop("device_type", "memory")?;
107 memory_node.set_prop("reg", mem_reg_prop)?;
108 Ok(())
109 }
110
create_resv_memory_node( fdt: &mut Fdt, resv_addr_and_size: (Option<GuestAddress>, u64), ) -> Result<u32>111 fn create_resv_memory_node(
112 fdt: &mut Fdt,
113 resv_addr_and_size: (Option<GuestAddress>, u64),
114 ) -> Result<u32> {
115 let (resv_addr, resv_size) = resv_addr_and_size;
116
117 let resv_memory_node = fdt.root_mut().subnode_mut("reserved-memory")?;
118 resv_memory_node.set_prop("#address-cells", 0x2u32)?;
119 resv_memory_node.set_prop("#size-cells", 0x2u32)?;
120 resv_memory_node.set_prop("ranges", ())?;
121
122 let restricted_dma_pool_node = if let Some(resv_addr) = resv_addr {
123 let node =
124 resv_memory_node.subnode_mut(&format!("restricted_dma_reserved@{:x}", resv_addr.0))?;
125 node.set_prop("reg", &[resv_addr.0, resv_size])?;
126 node
127 } else {
128 let node = resv_memory_node.subnode_mut("restricted_dma_reserved")?;
129 node.set_prop("size", resv_size)?;
130 node
131 };
132 restricted_dma_pool_node.set_prop("phandle", PHANDLE_RESTRICTED_DMA_POOL)?;
133 restricted_dma_pool_node.set_prop("compatible", "restricted-dma-pool")?;
134 restricted_dma_pool_node.set_prop("alignment", base::pagesize() as u64)?;
135 Ok(PHANDLE_RESTRICTED_DMA_POOL)
136 }
137
create_cpu_nodes( fdt: &mut Fdt, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, dynamic_power_coefficient: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, ) -> Result<()>138 fn create_cpu_nodes(
139 fdt: &mut Fdt,
140 num_cpus: u32,
141 cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
142 cpu_clusters: Vec<CpuSet>,
143 cpu_capacity: BTreeMap<usize, u32>,
144 dynamic_power_coefficient: BTreeMap<usize, u32>,
145 cpu_frequencies: BTreeMap<usize, Vec<u32>>,
146 ) -> Result<()> {
147 let root_node = fdt.root_mut();
148 let cpus_node = root_node.subnode_mut("cpus")?;
149 cpus_node.set_prop("#address-cells", 0x1u32)?;
150 cpus_node.set_prop("#size-cells", 0x0u32)?;
151
152 for cpu_id in 0..num_cpus {
153 let reg = u32::try_from(
154 cpu_mpidr_generator(cpu_id.try_into().unwrap()).ok_or(Error::PropertyValueInvalid)?,
155 )
156 .map_err(|_| Error::PropertyValueTooLarge)?;
157 let cpu_name = format!("cpu@{:x}", reg);
158 let cpu_node = cpus_node.subnode_mut(&cpu_name)?;
159 cpu_node.set_prop("device_type", "cpu")?;
160 cpu_node.set_prop("compatible", "arm,armv8")?;
161 if num_cpus > 1 {
162 cpu_node.set_prop("enable-method", "psci")?;
163 }
164 cpu_node.set_prop("reg", reg)?;
165 cpu_node.set_prop("phandle", PHANDLE_CPU0 + cpu_id)?;
166
167 if let Some(pwr_coefficient) = dynamic_power_coefficient.get(&(cpu_id as usize)) {
168 cpu_node.set_prop("dynamic-power-coefficient", *pwr_coefficient)?;
169 }
170 if let Some(capacity) = cpu_capacity.get(&(cpu_id as usize)) {
171 cpu_node.set_prop("capacity-dmips-mhz", *capacity)?;
172 }
173 // Placed inside cpu nodes for ease of parsing for some secure firmwares(PvmFw).
174 if let Some(frequencies) = cpu_frequencies.get(&(cpu_id as usize)) {
175 cpu_node.set_prop("operating-points-v2", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
176 let opp_table_node = cpu_node.subnode_mut(&format!("opp_table{}", cpu_id))?;
177 opp_table_node.set_prop("phandle", PHANDLE_OPP_DOMAIN_BASE + cpu_id)?;
178 opp_table_node.set_prop("compatible", "operating-points-v2")?;
179 for freq in frequencies.iter() {
180 let opp_hz = (*freq) as u64 * 1000;
181 let opp_node = opp_table_node.subnode_mut(&format!("opp{}", opp_hz))?;
182 opp_node.set_prop("opp-hz", opp_hz)?;
183 }
184 }
185 }
186
187 if !cpu_clusters.is_empty() {
188 let cpu_map_node = cpus_node.subnode_mut("cpu-map")?;
189 for (cluster_idx, cpus) in cpu_clusters.iter().enumerate() {
190 let cluster_node = cpu_map_node.subnode_mut(&format!("cluster{}", cluster_idx))?;
191 for (core_idx, cpu_id) in cpus.iter().enumerate() {
192 let core_node = cluster_node.subnode_mut(&format!("core{}", core_idx))?;
193 core_node.set_prop("cpu", PHANDLE_CPU0 + *cpu_id as u32)?;
194 }
195 }
196 }
197
198 Ok(())
199 }
200
create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()>201 fn create_gic_node(fdt: &mut Fdt, is_gicv3: bool, num_cpus: u64) -> Result<()> {
202 let mut gic_reg_prop = [AARCH64_GIC_DIST_BASE, AARCH64_GIC_DIST_SIZE, 0, 0];
203
204 let intc_node = fdt.root_mut().subnode_mut("intc")?;
205 if is_gicv3 {
206 intc_node.set_prop("compatible", "arm,gic-v3")?;
207 gic_reg_prop[2] = AARCH64_GIC_DIST_BASE - (AARCH64_GIC_REDIST_SIZE * num_cpus);
208 gic_reg_prop[3] = AARCH64_GIC_REDIST_SIZE * num_cpus;
209 } else {
210 intc_node.set_prop("compatible", "arm,cortex-a15-gic")?;
211 gic_reg_prop[2] = AARCH64_GIC_CPUI_BASE;
212 gic_reg_prop[3] = AARCH64_GIC_CPUI_SIZE;
213 }
214 intc_node.set_prop("#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS)?;
215 intc_node.set_prop("interrupt-controller", ())?;
216 intc_node.set_prop("reg", &gic_reg_prop)?;
217 intc_node.set_prop("phandle", PHANDLE_GIC)?;
218 intc_node.set_prop("#address-cells", 2u32)?;
219 intc_node.set_prop("#size-cells", 2u32)?;
220 add_symbols_entry(fdt, "intc", "/intc")?;
221 Ok(())
222 }
223
create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>224 fn create_timer_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
225 // These are fixed interrupt numbers for the timer device.
226 let irqs = [13, 14, 11, 10];
227 let compatible = "arm,armv8-timer";
228 let cpu_mask: u32 =
229 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
230
231 let mut timer_reg_cells = Vec::new();
232 for &irq in &irqs {
233 timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI);
234 timer_reg_cells.push(irq);
235 timer_reg_cells.push(cpu_mask | IRQ_TYPE_LEVEL_LOW);
236 }
237
238 let timer_node = fdt.root_mut().subnode_mut("timer")?;
239 timer_node.set_prop("compatible", compatible)?;
240 timer_node.set_prop("interrupts", timer_reg_cells)?;
241 timer_node.set_prop("always-on", ())?;
242 Ok(())
243 }
244
create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>245 fn create_virt_cpufreq_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
246 let compatible = "virtual,android-v-only-cpufreq";
247 let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
248 let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_SIZE * num_cpus];
249
250 vcf_node.set_prop("compatible", compatible)?;
251 vcf_node.set_prop("reg", ®)?;
252 Ok(())
253 }
254
create_virt_cpufreq_v2_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()>255 fn create_virt_cpufreq_v2_node(fdt: &mut Fdt, num_cpus: u64) -> Result<()> {
256 let compatible = "qemu,virtual-cpufreq";
257 let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
258 let reg = [AARCH64_VIRTFREQ_BASE, AARCH64_VIRTFREQ_V2_SIZE * num_cpus];
259
260 vcf_node.set_prop("compatible", compatible)?;
261 vcf_node.set_prop("reg", ®)?;
262 Ok(())
263 }
264
create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()>265 fn create_pmu_node(fdt: &mut Fdt, num_cpus: u32) -> Result<()> {
266 let compatible = "arm,armv8-pmuv3";
267 let cpu_mask: u32 =
268 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
269 let irq = [
270 GIC_FDT_IRQ_TYPE_PPI,
271 AARCH64_PMU_IRQ,
272 cpu_mask | IRQ_TYPE_LEVEL_HIGH,
273 ];
274
275 let pmu_node = fdt.root_mut().subnode_mut("pmu")?;
276 pmu_node.set_prop("compatible", compatible)?;
277 pmu_node.set_prop("interrupts", &irq)?;
278 Ok(())
279 }
280
create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()>281 fn create_serial_node(fdt: &mut Fdt, addr: u64, size: u64, irq: u32) -> Result<()> {
282 let serial_reg_prop = [addr, size];
283 let irq = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_EDGE_RISING];
284
285 let serial_node = fdt
286 .root_mut()
287 .subnode_mut(&format!("U6_16550A@{:x}", addr))?;
288 serial_node.set_prop("compatible", "ns16550a")?;
289 serial_node.set_prop("reg", &serial_reg_prop)?;
290 serial_node.set_prop("clock-frequency", AARCH64_SERIAL_SPEED)?;
291 serial_node.set_prop("interrupts", &irq)?;
292
293 Ok(())
294 }
295
create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()>296 fn create_serial_nodes(fdt: &mut Fdt, serial_devices: &[SerialDeviceInfo]) -> Result<()> {
297 for dev in serial_devices {
298 create_serial_node(fdt, dev.address, dev.size, dev.irq)?;
299 }
300
301 Ok(())
302 }
303
psci_compatible(version: &PsciVersion) -> Vec<&str>304 fn psci_compatible(version: &PsciVersion) -> Vec<&str> {
305 // The PSCI kernel driver only supports compatible strings for the following
306 // backward-compatible versions.
307 let supported = [(PSCI_1_0, "arm,psci-1.0"), (PSCI_0_2, "arm,psci-0.2")];
308
309 let mut compatible: Vec<_> = supported
310 .iter()
311 .filter(|&(v, _)| *version >= *v)
312 .map(|&(_, c)| c)
313 .collect();
314
315 // The PSCI kernel driver also supports PSCI v0.1, which is NOT forward-compatible.
316 if compatible.is_empty() {
317 compatible = vec!["arm,psci"];
318 }
319
320 compatible
321 }
322
create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()>323 fn create_psci_node(fdt: &mut Fdt, version: &PsciVersion) -> Result<()> {
324 let compatible = psci_compatible(version);
325 let psci_node = fdt.root_mut().subnode_mut("psci")?;
326 psci_node.set_prop("compatible", compatible.as_slice())?;
327 // Only support aarch64 guest
328 psci_node.set_prop("method", "hvc")?;
329 Ok(())
330 }
331
create_chosen_node( fdt: &mut Fdt, cmdline: &str, initrd: Option<(GuestAddress, usize)>, stdout_path: Option<&str>, ) -> Result<()>332 fn create_chosen_node(
333 fdt: &mut Fdt,
334 cmdline: &str,
335 initrd: Option<(GuestAddress, usize)>,
336 stdout_path: Option<&str>,
337 ) -> Result<()> {
338 let chosen_node = fdt.root_mut().subnode_mut("chosen")?;
339 chosen_node.set_prop("linux,pci-probe-only", 1u32)?;
340 chosen_node.set_prop("bootargs", cmdline)?;
341 if let Some(stdout_path) = stdout_path {
342 // Used by android bootloader for boot console output
343 chosen_node.set_prop("stdout-path", stdout_path)?;
344 }
345
346 let mut kaslr_seed_bytes = [0u8; 8];
347 OsRng.fill_bytes(&mut kaslr_seed_bytes);
348 let kaslr_seed = u64::from_le_bytes(kaslr_seed_bytes);
349 chosen_node.set_prop("kaslr-seed", kaslr_seed)?;
350
351 let mut rng_seed_bytes = [0u8; 256];
352 OsRng.fill_bytes(&mut rng_seed_bytes);
353 chosen_node.set_prop("rng-seed", &rng_seed_bytes)?;
354
355 if let Some((initrd_addr, initrd_size)) = initrd {
356 let initrd_start = initrd_addr.offset() as u32;
357 let initrd_end = initrd_start + initrd_size as u32;
358 chosen_node.set_prop("linux,initrd-start", initrd_start)?;
359 chosen_node.set_prop("linux,initrd-end", initrd_end)?;
360 }
361
362 Ok(())
363 }
364
create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()>365 fn create_config_node(fdt: &mut Fdt, (addr, size): (GuestAddress, usize)) -> Result<()> {
366 let addr: u32 = addr
367 .offset()
368 .try_into()
369 .map_err(|_| Error::PropertyValueTooLarge)?;
370 let size: u32 = size.try_into().map_err(|_| Error::PropertyValueTooLarge)?;
371
372 let config_node = fdt.root_mut().subnode_mut("config")?;
373 config_node.set_prop("kernel-address", addr)?;
374 config_node.set_prop("kernel-size", size)?;
375 Ok(())
376 }
377
create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()>378 fn create_kvm_cpufreq_node(fdt: &mut Fdt) -> Result<()> {
379 let vcf_node = fdt.root_mut().subnode_mut("cpufreq")?;
380 vcf_node.set_prop("compatible", "virtual,kvm-cpufreq")?;
381 Ok(())
382 }
383
384 #[cfg(any(target_os = "android", target_os = "linux"))]
get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>>385 fn get_pkvm_pviommu_ids(platform_dev_resources: &Vec<PlatformBusResources>) -> Result<Vec<u32>> {
386 let mut ids = HashSet::new();
387
388 for res in platform_dev_resources {
389 for iommu in &res.iommus {
390 if let (IommuDevType::PkvmPviommu, Some(id), _) = iommu {
391 ids.insert(*id);
392 }
393 }
394 }
395
396 Ok(Vec::from_iter(ids))
397 }
398
create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32>399 fn create_pkvm_pviommu_node(fdt: &mut Fdt, index: usize, id: u32) -> Result<u32> {
400 let name = format!("pviommu{index}");
401 let phandle = PHANDLE_PKVM_PVIOMMU
402 .checked_add(index.try_into().unwrap())
403 .unwrap();
404
405 let iommu_node = fdt.root_mut().subnode_mut(&name)?;
406 iommu_node.set_prop("phandle", phandle)?;
407 iommu_node.set_prop("#iommu-cells", 1u32)?;
408 iommu_node.set_prop("compatible", "pkvm,pviommu")?;
409 iommu_node.set_prop("id", id)?;
410
411 Ok(phandle)
412 }
413
414 /// PCI host controller address range.
415 ///
416 /// This represents a single entry in the "ranges" property for a PCI host controller.
417 ///
418 /// See [PCI Bus Binding to Open Firmware](https://www.openfirmware.info/data/docs/bus.pci.pdf)
419 /// and https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt
420 /// for more information.
421 #[derive(Copy, Clone)]
422 pub struct PciRange {
423 pub space: PciAddressSpace,
424 pub bus_address: u64,
425 pub cpu_physical_address: u64,
426 pub size: u64,
427 pub prefetchable: bool,
428 }
429
430 /// PCI address space.
431 #[derive(Copy, Clone)]
432 #[allow(dead_code)]
433 pub enum PciAddressSpace {
434 /// PCI configuration space
435 Configuration = 0b00,
436 /// I/O space
437 Io = 0b01,
438 /// 32-bit memory space
439 Memory = 0b10,
440 /// 64-bit memory space
441 Memory64 = 0b11,
442 }
443
444 /// Location of memory-mapped PCI configuration space.
445 #[derive(Copy, Clone)]
446 pub struct PciConfigRegion {
447 /// Physical address of the base of the memory-mapped PCI configuration region.
448 pub base: u64,
449 /// Size of the PCI configuration region in bytes.
450 pub size: u64,
451 }
452
453 /// Location of memory-mapped vm watchdog
454 #[derive(Copy, Clone)]
455 pub struct VmWdtConfig {
456 /// Physical address of the base of the memory-mapped vm watchdog region.
457 pub base: u64,
458 /// Size of the vm watchdog region in bytes.
459 pub size: u64,
460 /// The internal clock frequency of the watchdog.
461 pub clock_hz: u32,
462 /// The expiration timeout measured in seconds.
463 pub timeout_sec: u32,
464 }
465
create_pci_nodes( fdt: &mut Fdt, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, cfg: PciConfigRegion, ranges: &[PciRange], dma_pool_phandle: Option<u32>, ) -> Result<()>466 fn create_pci_nodes(
467 fdt: &mut Fdt,
468 pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
469 cfg: PciConfigRegion,
470 ranges: &[PciRange],
471 dma_pool_phandle: Option<u32>,
472 ) -> Result<()> {
473 // Add devicetree nodes describing a PCI generic host controller.
474 // See Documentation/devicetree/bindings/pci/host-generic-pci.txt in the kernel
475 // and "PCI Bus Binding to IEEE Std 1275-1994".
476 let ranges: Vec<u32> = ranges
477 .iter()
478 .flat_map(|r| {
479 let ss = r.space as u32;
480 let p = r.prefetchable as u32;
481 [
482 // BUS_ADDRESS(3) encoded as defined in OF PCI Bus Binding
483 (ss << 24) | (p << 30),
484 (r.bus_address >> 32) as u32,
485 r.bus_address as u32,
486 // CPU_PHYSICAL(2)
487 (r.cpu_physical_address >> 32) as u32,
488 r.cpu_physical_address as u32,
489 // SIZE(2)
490 (r.size >> 32) as u32,
491 r.size as u32,
492 ]
493 })
494 .collect();
495
496 let bus_range = [0u32, 0u32]; // Only bus 0
497 let reg = [cfg.base, cfg.size];
498
499 let mut interrupts: Vec<u32> = Vec::new();
500 let mut masks: Vec<u32> = Vec::new();
501
502 for (address, irq_num, irq_pin) in pci_irqs.iter() {
503 // PCI_DEVICE(3)
504 interrupts.push(address.to_config_address(0, 8));
505 interrupts.push(0);
506 interrupts.push(0);
507
508 // INT#(1)
509 interrupts.push(irq_pin.to_mask() + 1);
510
511 // CONTROLLER(PHANDLE)
512 interrupts.push(PHANDLE_GIC);
513 interrupts.push(0);
514 interrupts.push(0);
515
516 // CONTROLLER_DATA(3)
517 interrupts.push(GIC_FDT_IRQ_TYPE_SPI);
518 interrupts.push(*irq_num);
519 interrupts.push(IRQ_TYPE_LEVEL_HIGH);
520
521 // PCI_DEVICE(3)
522 masks.push(0xf800); // bits 11..15 (device)
523 masks.push(0);
524 masks.push(0);
525
526 // INT#(1)
527 masks.push(0x7); // allow INTA#-INTD# (1 | 2 | 3 | 4)
528 }
529
530 let pci_node = fdt.root_mut().subnode_mut("pci")?;
531 pci_node.set_prop("compatible", "pci-host-cam-generic")?;
532 pci_node.set_prop("device_type", "pci")?;
533 pci_node.set_prop("ranges", ranges)?;
534 pci_node.set_prop("bus-range", &bus_range)?;
535 pci_node.set_prop("#address-cells", 3u32)?;
536 pci_node.set_prop("#size-cells", 2u32)?;
537 pci_node.set_prop("reg", ®)?;
538 pci_node.set_prop("#interrupt-cells", 1u32)?;
539 pci_node.set_prop("interrupt-map", interrupts)?;
540 pci_node.set_prop("interrupt-map-mask", masks)?;
541 pci_node.set_prop("dma-coherent", ())?;
542 if let Some(dma_pool_phandle) = dma_pool_phandle {
543 pci_node.set_prop("memory-region", dma_pool_phandle)?;
544 }
545 Ok(())
546 }
547
create_rtc_node(fdt: &mut Fdt) -> Result<()>548 fn create_rtc_node(fdt: &mut Fdt) -> Result<()> {
549 // the kernel driver for pl030 really really wants a clock node
550 // associated with an AMBA device or it will fail to probe, so we
551 // need to make up a clock node to associate with the pl030 rtc
552 // node and an associated handle with a unique phandle value.
553 const CLK_PHANDLE: u32 = 24;
554 let clock_node = fdt.root_mut().subnode_mut("pclk@3M")?;
555 clock_node.set_prop("#clock-cells", 0u32)?;
556 clock_node.set_prop("compatible", "fixed-clock")?;
557 clock_node.set_prop("clock-frequency", 3141592u32)?;
558 clock_node.set_prop("phandle", CLK_PHANDLE)?;
559
560 let rtc_name = format!("rtc@{:x}", AARCH64_RTC_ADDR);
561 let reg = [AARCH64_RTC_ADDR, AARCH64_RTC_SIZE];
562 let irq = [GIC_FDT_IRQ_TYPE_SPI, AARCH64_RTC_IRQ, IRQ_TYPE_LEVEL_HIGH];
563
564 let rtc_node = fdt.root_mut().subnode_mut(&rtc_name)?;
565 rtc_node.set_prop("compatible", "arm,primecell")?;
566 rtc_node.set_prop("arm,primecell-periphid", PL030_AMBA_ID)?;
567 rtc_node.set_prop("reg", ®)?;
568 rtc_node.set_prop("interrupts", &irq)?;
569 rtc_node.set_prop("clocks", CLK_PHANDLE)?;
570 rtc_node.set_prop("clock-names", "apb_pclk")?;
571 Ok(())
572 }
573
574 /// Create a flattened device tree node for Goldfish Battery device.
575 ///
576 /// # Arguments
577 ///
578 /// * `fdt` - An Fdt in which the node is created
579 /// * `mmio_base` - The MMIO base address of the battery
580 /// * `irq` - The IRQ number of the battery
create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()>581 fn create_battery_node(fdt: &mut Fdt, mmio_base: u64, irq: u32) -> Result<()> {
582 let reg = [mmio_base, GOLDFISHBAT_MMIO_LEN];
583 let irqs = [GIC_FDT_IRQ_TYPE_SPI, irq, IRQ_TYPE_LEVEL_HIGH];
584 let bat_node = fdt.root_mut().subnode_mut("goldfish_battery")?;
585 bat_node.set_prop("compatible", "google,goldfish-battery")?;
586 bat_node.set_prop("reg", ®)?;
587 bat_node.set_prop("interrupts", &irqs)?;
588 Ok(())
589 }
590
create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig, num_cpus: u32) -> Result<()>591 fn create_vmwdt_node(fdt: &mut Fdt, vmwdt_cfg: VmWdtConfig, num_cpus: u32) -> Result<()> {
592 let vmwdt_name = format!("vmwdt@{:x}", vmwdt_cfg.base);
593 let reg = [vmwdt_cfg.base, vmwdt_cfg.size];
594 let cpu_mask: u32 =
595 (((1 << num_cpus) - 1) << GIC_FDT_IRQ_PPI_CPU_SHIFT) & GIC_FDT_IRQ_PPI_CPU_MASK;
596 let irq = [
597 GIC_FDT_IRQ_TYPE_PPI,
598 AARCH64_VMWDT_IRQ,
599 cpu_mask | IRQ_TYPE_EDGE_RISING,
600 ];
601
602 let vmwdt_node = fdt.root_mut().subnode_mut(&vmwdt_name)?;
603 vmwdt_node.set_prop("compatible", "qemu,vcpu-stall-detector")?;
604 vmwdt_node.set_prop("reg", ®)?;
605 vmwdt_node.set_prop("clock-frequency", vmwdt_cfg.clock_hz)?;
606 vmwdt_node.set_prop("timeout-sec", vmwdt_cfg.timeout_sec)?;
607 vmwdt_node.set_prop("interrupts", &irq)?;
608 Ok(())
609 }
610
611 // Add a node path to __symbols__ node of the FDT, so it can be referenced by an overlay.
add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()>612 fn add_symbols_entry(fdt: &mut Fdt, symbol: &str, path: &str) -> Result<()> {
613 // Ensure the path points to a valid node with a defined phandle
614 let Some(target_node) = fdt.get_node(path) else {
615 return Err(Error::InvalidPath(format!("{path} does not exist")));
616 };
617 target_node
618 .get_prop::<u32>("phandle")
619 .or_else(|| target_node.get_prop("linux,phandle"))
620 .ok_or_else(|| Error::InvalidPath(format!("{path} must have a phandle")))?;
621 // Add the label -> path mapping.
622 let symbols_node = fdt.root_mut().subnode_mut("__symbols__")?;
623 symbols_node.set_prop(symbol, path)?;
624 Ok(())
625 }
626
627 /// Creates a flattened device tree containing all of the parameters for the
628 /// kernel and loads it into the guest memory at the specified offset.
629 ///
630 /// # Arguments
631 ///
632 /// * `fdt_max_size` - The amount of space reserved for the device tree
633 /// * `guest_mem` - The guest memory object
634 /// * `pci_irqs` - List of PCI device address to PCI interrupt number and pin mappings
635 /// * `pci_cfg` - Location of the memory-mapped PCI configuration space.
636 /// * `pci_ranges` - Memory ranges accessible via the PCI host controller.
637 /// * `num_cpus` - Number of virtual CPUs the guest will have
638 /// * `fdt_address` - The offset into physical memory for the device tree
639 /// * `cmdline` - The kernel commandline
640 /// * `initrd` - An optional tuple of initrd guest physical address and size
641 /// * `android_fstab` - An optional file holding Android fstab entries
642 /// * `is_gicv3` - True if gicv3, false if v2
643 /// * `psci_version` - the current PSCI version
644 /// * `swiotlb` - Reserve a memory pool for DMA. Tuple of base address and size.
645 /// * `bat_mmio_base_and_irq` - The battery base address and irq number
646 /// * `vmwdt_cfg` - The virtual watchdog configuration
647 /// * `dump_device_tree_blob` - Option path to write DTB to
648 /// * `vm_generator` - Callback to add additional nodes to DTB. create_vm uses Aarch64Vm::create_fdt
create_fdt( fdt_max_size: usize, guest_mem: &GuestMemory, pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>, pci_cfg: PciConfigRegion, pci_ranges: &[PciRange], #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec< PlatformBusResources, >, num_cpus: u32, cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>, cpu_clusters: Vec<CpuSet>, cpu_capacity: BTreeMap<usize, u32>, cpu_frequencies: BTreeMap<usize, Vec<u32>>, fdt_address: GuestAddress, cmdline: &str, image: (GuestAddress, usize), initrd: Option<(GuestAddress, usize)>, android_fstab: Option<File>, is_gicv3: bool, use_pmu: bool, psci_version: PsciVersion, swiotlb: Option<(Option<GuestAddress>, u64)>, bat_mmio_base_and_irq: Option<(u64, u32)>, vmwdt_cfg: VmWdtConfig, dump_device_tree_blob: Option<PathBuf>, vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>, dynamic_power_coefficient: BTreeMap<usize, u32>, device_tree_overlays: Vec<DtbOverlay>, serial_devices: &[SerialDeviceInfo], virt_cpufreq_v2: bool, ) -> Result<()>649 pub fn create_fdt(
650 fdt_max_size: usize,
651 guest_mem: &GuestMemory,
652 pci_irqs: Vec<(PciAddress, u32, PciInterruptPin)>,
653 pci_cfg: PciConfigRegion,
654 pci_ranges: &[PciRange],
655 #[cfg(any(target_os = "android", target_os = "linux"))] platform_dev_resources: Vec<
656 PlatformBusResources,
657 >,
658 num_cpus: u32,
659 cpu_mpidr_generator: &impl Fn(usize) -> Option<u64>,
660 cpu_clusters: Vec<CpuSet>,
661 cpu_capacity: BTreeMap<usize, u32>,
662 cpu_frequencies: BTreeMap<usize, Vec<u32>>,
663 fdt_address: GuestAddress,
664 cmdline: &str,
665 image: (GuestAddress, usize),
666 initrd: Option<(GuestAddress, usize)>,
667 android_fstab: Option<File>,
668 is_gicv3: bool,
669 use_pmu: bool,
670 psci_version: PsciVersion,
671 swiotlb: Option<(Option<GuestAddress>, u64)>,
672 bat_mmio_base_and_irq: Option<(u64, u32)>,
673 vmwdt_cfg: VmWdtConfig,
674 dump_device_tree_blob: Option<PathBuf>,
675 vm_generator: &impl Fn(&mut Fdt, &BTreeMap<&str, u32>) -> cros_fdt::Result<()>,
676 dynamic_power_coefficient: BTreeMap<usize, u32>,
677 device_tree_overlays: Vec<DtbOverlay>,
678 serial_devices: &[SerialDeviceInfo],
679 virt_cpufreq_v2: bool,
680 ) -> Result<()> {
681 let mut fdt = Fdt::new(&[]);
682 let mut phandles_key_cache = Vec::new();
683 let mut phandles = BTreeMap::new();
684
685 // The whole thing is put into one giant node with some top level properties
686 let root_node = fdt.root_mut();
687 root_node.set_prop("interrupt-parent", PHANDLE_GIC)?;
688 phandles.insert("intc", PHANDLE_GIC);
689 root_node.set_prop("compatible", "linux,dummy-virt")?;
690 root_node.set_prop("#address-cells", 0x2u32)?;
691 root_node.set_prop("#size-cells", 0x2u32)?;
692 if let Some(android_fstab) = android_fstab {
693 arch::android::create_android_fdt(&mut fdt, android_fstab)?;
694 }
695 let stdout_path = serial_devices
696 .first()
697 .map(|first_serial| format!("/U6_16550A@{:x}", first_serial.address));
698 create_chosen_node(&mut fdt, cmdline, initrd, stdout_path.as_deref())?;
699 create_config_node(&mut fdt, image)?;
700 create_memory_node(&mut fdt, guest_mem)?;
701 let dma_pool_phandle = match swiotlb {
702 Some(x) => {
703 let phandle = create_resv_memory_node(&mut fdt, x)?;
704 phandles.insert("restricted_dma_reserved", phandle);
705 Some(phandle)
706 }
707 None => None,
708 };
709 create_cpu_nodes(
710 &mut fdt,
711 num_cpus,
712 cpu_mpidr_generator,
713 cpu_clusters,
714 cpu_capacity,
715 dynamic_power_coefficient,
716 cpu_frequencies.clone(),
717 )?;
718 create_gic_node(&mut fdt, is_gicv3, num_cpus as u64)?;
719 create_timer_node(&mut fdt, num_cpus)?;
720 if use_pmu {
721 create_pmu_node(&mut fdt, num_cpus)?;
722 }
723 create_serial_nodes(&mut fdt, serial_devices)?;
724 create_psci_node(&mut fdt, &psci_version)?;
725 create_pci_nodes(&mut fdt, pci_irqs, pci_cfg, pci_ranges, dma_pool_phandle)?;
726 create_rtc_node(&mut fdt)?;
727 if let Some((bat_mmio_base, bat_irq)) = bat_mmio_base_and_irq {
728 create_battery_node(&mut fdt, bat_mmio_base, bat_irq)?;
729 }
730 create_vmwdt_node(&mut fdt, vmwdt_cfg, num_cpus)?;
731 create_kvm_cpufreq_node(&mut fdt)?;
732 vm_generator(&mut fdt, &phandles)?;
733 if !cpu_frequencies.is_empty() {
734 if virt_cpufreq_v2 {
735 create_virt_cpufreq_v2_node(&mut fdt, num_cpus as u64)?;
736 } else {
737 create_virt_cpufreq_node(&mut fdt, num_cpus as u64)?;
738 }
739 }
740
741 let pviommu_ids = get_pkvm_pviommu_ids(&platform_dev_resources)?;
742
743 let cache_offset = phandles_key_cache.len();
744 // Hack to extend the lifetime of the Strings as keys of phandles (i.e. &str).
745 phandles_key_cache.extend(pviommu_ids.iter().map(|id| format!("pviommu{id}")));
746 let pviommu_phandle_keys = &phandles_key_cache[cache_offset..];
747
748 for (index, (id, key)) in pviommu_ids.iter().zip(pviommu_phandle_keys).enumerate() {
749 let phandle = create_pkvm_pviommu_node(&mut fdt, index, *id)?;
750 phandles.insert(key, phandle);
751 }
752
753 // Done writing base FDT, now apply DT overlays
754 apply_device_tree_overlays(
755 &mut fdt,
756 device_tree_overlays,
757 #[cfg(any(target_os = "android", target_os = "linux"))]
758 platform_dev_resources,
759 #[cfg(any(target_os = "android", target_os = "linux"))]
760 &phandles,
761 )?;
762
763 let fdt_final = fdt.finish()?;
764
765 if let Some(file_path) = dump_device_tree_blob {
766 let mut fd = open_file_or_duplicate(
767 &file_path,
768 OpenOptions::new()
769 .read(true)
770 .create(true)
771 .truncate(true)
772 .write(true),
773 )
774 .map_err(|e| Error::FdtIoError(e.into()))?;
775 fd.write_all(&fdt_final)
776 .map_err(|e| Error::FdtDumpIoError(e, file_path.clone()))?;
777 }
778
779 if fdt_final.len() > fdt_max_size {
780 return Err(Error::TotalSizeTooLarge);
781 }
782
783 let written = guest_mem
784 .write_at_addr(fdt_final.as_slice(), fdt_address)
785 .map_err(|_| Error::FdtGuestMemoryWriteError)?;
786 if written < fdt_final.len() {
787 return Err(Error::FdtGuestMemoryWriteError);
788 }
789
790 Ok(())
791 }
792
793 #[cfg(test)]
794 mod tests {
795 use super::*;
796
797 #[test]
psci_compatible_v0_1()798 fn psci_compatible_v0_1() {
799 assert_eq!(
800 psci_compatible(&PsciVersion::new(0, 1).unwrap()),
801 vec!["arm,psci"]
802 );
803 }
804
805 #[test]
psci_compatible_v0_2()806 fn psci_compatible_v0_2() {
807 assert_eq!(
808 psci_compatible(&PsciVersion::new(0, 2).unwrap()),
809 vec!["arm,psci-0.2"]
810 );
811 }
812
813 #[test]
psci_compatible_v0_5()814 fn psci_compatible_v0_5() {
815 // Only the 0.2 version supported by the kernel should be added.
816 assert_eq!(
817 psci_compatible(&PsciVersion::new(0, 5).unwrap()),
818 vec!["arm,psci-0.2"]
819 );
820 }
821
822 #[test]
psci_compatible_v1_0()823 fn psci_compatible_v1_0() {
824 // Both 1.0 and 0.2 should be listed, in that order.
825 assert_eq!(
826 psci_compatible(&PsciVersion::new(1, 0).unwrap()),
827 vec!["arm,psci-1.0", "arm,psci-0.2"]
828 );
829 }
830
831 #[test]
psci_compatible_v1_5()832 fn psci_compatible_v1_5() {
833 // Only the 1.0 and 0.2 versions supported by the kernel should be listed.
834 assert_eq!(
835 psci_compatible(&PsciVersion::new(1, 5).unwrap()),
836 vec!["arm,psci-1.0", "arm,psci-0.2"]
837 );
838 }
839
840 #[test]
symbols_entries()841 fn symbols_entries() {
842 const TEST_SYMBOL: &str = "dev";
843 const TEST_PATH: &str = "/dev";
844
845 let mut fdt = Fdt::new(&[]);
846 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing node");
847
848 fdt.root_mut().subnode_mut(TEST_SYMBOL).unwrap();
849 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect_err("missing phandle");
850
851 let intc_node = fdt.get_node_mut(TEST_PATH).unwrap();
852 intc_node.set_prop("phandle", 1u32).unwrap();
853 add_symbols_entry(&mut fdt, TEST_SYMBOL, TEST_PATH).expect("valid path");
854
855 let symbols = fdt.get_node("/__symbols__").unwrap();
856 assert_eq!(symbols.get_prop::<String>(TEST_SYMBOL).unwrap(), TEST_PATH);
857 }
858 }
859