1 // Copyright 2022, The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //! High-level FDT functions.
16 
17 use crate::bootargs::BootArgsIterator;
18 use crate::device_assignment::{self, DeviceAssignmentInfo, VmDtbo};
19 use crate::Box;
20 use crate::RebootReason;
21 use alloc::collections::BTreeMap;
22 use alloc::ffi::CString;
23 use alloc::format;
24 use alloc::vec::Vec;
25 use core::cmp::max;
26 use core::cmp::min;
27 use core::ffi::CStr;
28 use core::fmt;
29 use core::mem::size_of;
30 use core::ops::Range;
31 use cstr::cstr;
32 use hypervisor_backends::get_device_assigner;
33 use hypervisor_backends::get_mem_sharer;
34 use libfdt::AddressRange;
35 use libfdt::CellIterator;
36 use libfdt::Fdt;
37 use libfdt::FdtError;
38 use libfdt::FdtNode;
39 use libfdt::FdtNodeMut;
40 use libfdt::Phandle;
41 use log::debug;
42 use log::error;
43 use log::info;
44 use log::warn;
45 use static_assertions::const_assert;
46 use tinyvec::ArrayVec;
47 use vmbase::fdt::pci::PciMemoryFlags;
48 use vmbase::fdt::pci::PciRangeType;
49 use vmbase::fdt::SwiotlbInfo;
50 use vmbase::layout::{crosvm::MEM_START, MAX_VIRT_ADDR};
51 use vmbase::memory::SIZE_4KB;
52 use vmbase::util::RangeExt as _;
53 use zerocopy::AsBytes as _;
54 
55 // SAFETY: The template DT is automatically generated through DTC, which should produce valid DTBs.
56 const FDT_TEMPLATE: &Fdt = unsafe { Fdt::unchecked_from_slice(pvmfw_fdt_template::RAW) };
57 
58 /// An enumeration of errors that can occur during the FDT validation.
59 #[derive(Clone, Debug)]
60 pub enum FdtValidationError {
61     /// Invalid CPU count.
62     InvalidCpuCount(usize),
63     /// Invalid VCpufreq Range.
64     InvalidVcpufreq(u64, u64),
65     /// Forbidden /avf/untrusted property.
66     ForbiddenUntrustedProp(&'static CStr),
67 }
68 
69 impl fmt::Display for FdtValidationError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result70     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
71         match self {
72             Self::InvalidCpuCount(num_cpus) => write!(f, "Invalid CPU count: {num_cpus}"),
73             Self::InvalidVcpufreq(addr, size) => {
74                 write!(f, "Invalid vcpufreq region: ({addr:#x}, {size:#x})")
75             }
76             Self::ForbiddenUntrustedProp(name) => {
77                 write!(f, "Forbidden /avf/untrusted property '{name:?}'")
78             }
79         }
80     }
81 }
82 
83 /// Extract from /config the address range containing the pre-loaded kernel. Absence of /config is
84 /// not an error.
read_kernel_range_from(fdt: &Fdt) -> libfdt::Result<Option<Range<usize>>>85 pub fn read_kernel_range_from(fdt: &Fdt) -> libfdt::Result<Option<Range<usize>>> {
86     let addr = cstr!("kernel-address");
87     let size = cstr!("kernel-size");
88 
89     if let Some(config) = fdt.node(cstr!("/config"))? {
90         if let (Some(addr), Some(size)) = (config.getprop_u32(addr)?, config.getprop_u32(size)?) {
91             let addr = addr as usize;
92             let size = size as usize;
93 
94             return Ok(Some(addr..(addr + size)));
95         }
96     }
97 
98     Ok(None)
99 }
100 
101 /// Extract from /chosen the address range containing the pre-loaded ramdisk. Absence is not an
102 /// error as there can be initrd-less VM.
read_initrd_range_from(fdt: &Fdt) -> libfdt::Result<Option<Range<usize>>>103 pub fn read_initrd_range_from(fdt: &Fdt) -> libfdt::Result<Option<Range<usize>>> {
104     let start = cstr!("linux,initrd-start");
105     let end = cstr!("linux,initrd-end");
106 
107     if let Some(chosen) = fdt.chosen()? {
108         if let (Some(start), Some(end)) = (chosen.getprop_u32(start)?, chosen.getprop_u32(end)?) {
109             return Ok(Some((start as usize)..(end as usize)));
110         }
111     }
112 
113     Ok(None)
114 }
115 
patch_initrd_range(fdt: &mut Fdt, initrd_range: &Range<usize>) -> libfdt::Result<()>116 fn patch_initrd_range(fdt: &mut Fdt, initrd_range: &Range<usize>) -> libfdt::Result<()> {
117     let start = u32::try_from(initrd_range.start).unwrap();
118     let end = u32::try_from(initrd_range.end).unwrap();
119 
120     let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
121     node.setprop(cstr!("linux,initrd-start"), &start.to_be_bytes())?;
122     node.setprop(cstr!("linux,initrd-end"), &end.to_be_bytes())?;
123     Ok(())
124 }
125 
read_bootargs_from(fdt: &Fdt) -> libfdt::Result<Option<CString>>126 fn read_bootargs_from(fdt: &Fdt) -> libfdt::Result<Option<CString>> {
127     if let Some(chosen) = fdt.chosen()? {
128         if let Some(bootargs) = chosen.getprop_str(cstr!("bootargs"))? {
129             // We need to copy the string to heap because the original fdt will be invalidated
130             // by the templated DT
131             let copy = CString::new(bootargs.to_bytes()).map_err(|_| FdtError::BadValue)?;
132             return Ok(Some(copy));
133         }
134     }
135     Ok(None)
136 }
137 
patch_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()>138 fn patch_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()> {
139     let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
140     // This function is called before the verification is done. So, we just copy the bootargs to
141     // the new FDT unmodified. This will be filtered again in the modify_for_next_stage function
142     // if the VM is not debuggable.
143     node.setprop(cstr!("bootargs"), bootargs.to_bytes_with_nul())
144 }
145 
146 /// Reads and validates the memory range in the DT.
147 ///
148 /// Only one memory range is expected with the crosvm setup for now.
read_and_validate_memory_range( fdt: &Fdt, guest_page_size: usize, ) -> Result<Range<usize>, RebootReason>149 fn read_and_validate_memory_range(
150     fdt: &Fdt,
151     guest_page_size: usize,
152 ) -> Result<Range<usize>, RebootReason> {
153     let mut memory = fdt.memory().map_err(|e| {
154         error!("Failed to read memory range from DT: {e}");
155         RebootReason::InvalidFdt
156     })?;
157     let range = memory.next().ok_or_else(|| {
158         error!("The /memory node in the DT contains no range.");
159         RebootReason::InvalidFdt
160     })?;
161     if memory.next().is_some() {
162         warn!(
163             "The /memory node in the DT contains more than one memory range, \
164              while only one is expected."
165         );
166     }
167     let base = range.start;
168     if base != MEM_START {
169         error!("Memory base address {:#x} is not {:#x}", base, MEM_START);
170         return Err(RebootReason::InvalidFdt);
171     }
172 
173     let size = range.len();
174     if size % guest_page_size != 0 {
175         error!("Memory size {:#x} is not a multiple of page size {:#x}", size, guest_page_size);
176         return Err(RebootReason::InvalidFdt);
177     }
178 
179     if size == 0 {
180         error!("Memory size is 0");
181         return Err(RebootReason::InvalidFdt);
182     }
183     Ok(range)
184 }
185 
patch_memory_range(fdt: &mut Fdt, memory_range: &Range<usize>) -> libfdt::Result<()>186 fn patch_memory_range(fdt: &mut Fdt, memory_range: &Range<usize>) -> libfdt::Result<()> {
187     let addr = u64::try_from(MEM_START).unwrap();
188     let size = u64::try_from(memory_range.len()).unwrap();
189     fdt.node_mut(cstr!("/memory"))?
190         .ok_or(FdtError::NotFound)?
191         .setprop_inplace(cstr!("reg"), [addr.to_be(), size.to_be()].as_bytes())
192 }
193 
194 #[derive(Debug, Default)]
195 struct CpuInfo {
196     opptable_info: Option<ArrayVec<[u64; CpuInfo::MAX_OPPTABLES]>>,
197     cpu_capacity: Option<u32>,
198 }
199 
200 impl CpuInfo {
201     const MAX_OPPTABLES: usize = 20;
202 }
203 
read_opp_info_from( opp_node: FdtNode, ) -> libfdt::Result<ArrayVec<[u64; CpuInfo::MAX_OPPTABLES]>>204 fn read_opp_info_from(
205     opp_node: FdtNode,
206 ) -> libfdt::Result<ArrayVec<[u64; CpuInfo::MAX_OPPTABLES]>> {
207     let mut table = ArrayVec::new();
208     let mut opp_nodes = opp_node.subnodes()?;
209     for subnode in opp_nodes.by_ref().take(table.capacity()) {
210         let prop = subnode.getprop_u64(cstr!("opp-hz"))?.ok_or(FdtError::NotFound)?;
211         table.push(prop);
212     }
213 
214     if opp_nodes.next().is_some() {
215         warn!("OPP table has more than {} entries: discarding extra nodes.", table.capacity());
216     }
217 
218     Ok(table)
219 }
220 
221 #[derive(Debug, Default)]
222 struct ClusterTopology {
223     // TODO: Support multi-level clusters & threads.
224     cores: [Option<usize>; ClusterTopology::MAX_CORES_PER_CLUSTER],
225 }
226 
227 impl ClusterTopology {
228     const MAX_CORES_PER_CLUSTER: usize = 10;
229 }
230 
231 #[derive(Debug, Default)]
232 struct CpuTopology {
233     // TODO: Support sockets.
234     clusters: [Option<ClusterTopology>; CpuTopology::MAX_CLUSTERS],
235 }
236 
237 impl CpuTopology {
238     const MAX_CLUSTERS: usize = 3;
239 }
240 
read_cpu_map_from(fdt: &Fdt) -> libfdt::Result<Option<BTreeMap<Phandle, (usize, usize)>>>241 fn read_cpu_map_from(fdt: &Fdt) -> libfdt::Result<Option<BTreeMap<Phandle, (usize, usize)>>> {
242     let Some(cpu_map) = fdt.node(cstr!("/cpus/cpu-map"))? else {
243         return Ok(None);
244     };
245 
246     let mut topology = BTreeMap::new();
247     for n in 0..CpuTopology::MAX_CLUSTERS {
248         let name = CString::new(format!("cluster{n}")).unwrap();
249         let Some(cluster) = cpu_map.subnode(&name)? else {
250             break;
251         };
252         for m in 0..ClusterTopology::MAX_CORES_PER_CLUSTER {
253             let name = CString::new(format!("core{m}")).unwrap();
254             let Some(core) = cluster.subnode(&name)? else {
255                 break;
256             };
257             let cpu = core.getprop_u32(cstr!("cpu"))?.ok_or(FdtError::NotFound)?;
258             let prev = topology.insert(cpu.try_into()?, (n, m));
259             if prev.is_some() {
260                 return Err(FdtError::BadValue);
261             }
262         }
263     }
264 
265     Ok(Some(topology))
266 }
267 
read_cpu_info_from( fdt: &Fdt, ) -> libfdt::Result<(ArrayVec<[CpuInfo; DeviceTreeInfo::MAX_CPUS]>, Option<CpuTopology>)>268 fn read_cpu_info_from(
269     fdt: &Fdt,
270 ) -> libfdt::Result<(ArrayVec<[CpuInfo; DeviceTreeInfo::MAX_CPUS]>, Option<CpuTopology>)> {
271     let mut cpus = ArrayVec::new();
272 
273     let cpu_map = read_cpu_map_from(fdt)?;
274     let mut topology: CpuTopology = Default::default();
275 
276     let mut cpu_nodes = fdt.compatible_nodes(cstr!("arm,armv8"))?;
277     for (idx, cpu) in cpu_nodes.by_ref().take(cpus.capacity()).enumerate() {
278         let cpu_capacity = cpu.getprop_u32(cstr!("capacity-dmips-mhz"))?;
279         let opp_phandle = cpu.getprop_u32(cstr!("operating-points-v2"))?;
280         let opptable_info = if let Some(phandle) = opp_phandle {
281             let phandle = phandle.try_into()?;
282             let node = fdt.node_with_phandle(phandle)?.ok_or(FdtError::NotFound)?;
283             Some(read_opp_info_from(node)?)
284         } else {
285             None
286         };
287         let info = CpuInfo { opptable_info, cpu_capacity };
288         cpus.push(info);
289 
290         if let Some(ref cpu_map) = cpu_map {
291             let phandle = cpu.get_phandle()?.ok_or(FdtError::NotFound)?;
292             let (cluster, core_idx) = cpu_map.get(&phandle).ok_or(FdtError::BadValue)?;
293             let cluster = topology.clusters[*cluster].get_or_insert(Default::default());
294             if cluster.cores[*core_idx].is_some() {
295                 return Err(FdtError::BadValue);
296             }
297             cluster.cores[*core_idx] = Some(idx);
298         }
299     }
300 
301     if cpu_nodes.next().is_some() {
302         warn!("DT has more than {} CPU nodes: discarding extra nodes.", cpus.capacity());
303     }
304 
305     Ok((cpus, cpu_map.map(|_| topology)))
306 }
307 
validate_cpu_info(cpus: &[CpuInfo]) -> Result<(), FdtValidationError>308 fn validate_cpu_info(cpus: &[CpuInfo]) -> Result<(), FdtValidationError> {
309     if cpus.is_empty() {
310         return Err(FdtValidationError::InvalidCpuCount(0));
311     }
312     Ok(())
313 }
314 
read_vcpufreq_info(fdt: &Fdt) -> libfdt::Result<Option<VcpufreqInfo>>315 fn read_vcpufreq_info(fdt: &Fdt) -> libfdt::Result<Option<VcpufreqInfo>> {
316     let mut nodes = fdt.compatible_nodes(cstr!("virtual,android-v-only-cpufreq"))?;
317     let Some(node) = nodes.next() else {
318         return Ok(None);
319     };
320 
321     if nodes.next().is_some() {
322         warn!("DT has more than 1 cpufreq node: discarding extra nodes.");
323     }
324 
325     let mut regs = node.reg()?.ok_or(FdtError::NotFound)?;
326     let reg = regs.next().ok_or(FdtError::NotFound)?;
327     let size = reg.size.ok_or(FdtError::NotFound)?;
328 
329     Ok(Some(VcpufreqInfo { addr: reg.addr, size }))
330 }
331 
validate_vcpufreq_info( vcpufreq_info: &VcpufreqInfo, cpus: &[CpuInfo], ) -> Result<(), FdtValidationError>332 fn validate_vcpufreq_info(
333     vcpufreq_info: &VcpufreqInfo,
334     cpus: &[CpuInfo],
335 ) -> Result<(), FdtValidationError> {
336     const VCPUFREQ_BASE_ADDR: u64 = 0x1040000;
337     const VCPUFREQ_SIZE_PER_CPU: u64 = 0x8;
338 
339     let base = vcpufreq_info.addr;
340     let size = vcpufreq_info.size;
341     let expected_size = VCPUFREQ_SIZE_PER_CPU * cpus.len() as u64;
342 
343     if (base, size) != (VCPUFREQ_BASE_ADDR, expected_size) {
344         return Err(FdtValidationError::InvalidVcpufreq(base, size));
345     }
346 
347     Ok(())
348 }
349 
patch_opptable( node: FdtNodeMut, opptable: Option<ArrayVec<[u64; CpuInfo::MAX_OPPTABLES]>>, ) -> libfdt::Result<()>350 fn patch_opptable(
351     node: FdtNodeMut,
352     opptable: Option<ArrayVec<[u64; CpuInfo::MAX_OPPTABLES]>>,
353 ) -> libfdt::Result<()> {
354     let oppcompat = cstr!("operating-points-v2");
355     let next = node.next_compatible(oppcompat)?.ok_or(FdtError::NoSpace)?;
356 
357     let Some(opptable) = opptable else {
358         return next.nop();
359     };
360 
361     let mut next_subnode = next.first_subnode()?;
362 
363     for entry in opptable {
364         let mut subnode = next_subnode.ok_or(FdtError::NoSpace)?;
365         subnode.setprop_inplace(cstr!("opp-hz"), &entry.to_be_bytes())?;
366         next_subnode = subnode.next_subnode()?;
367     }
368 
369     while let Some(current) = next_subnode {
370         next_subnode = current.delete_and_next_subnode()?;
371     }
372 
373     Ok(())
374 }
375 
376 // TODO(ptosi): Rework FdtNodeMut and replace this function.
get_nth_compatible<'a>( fdt: &'a mut Fdt, n: usize, compat: &CStr, ) -> libfdt::Result<Option<FdtNodeMut<'a>>>377 fn get_nth_compatible<'a>(
378     fdt: &'a mut Fdt,
379     n: usize,
380     compat: &CStr,
381 ) -> libfdt::Result<Option<FdtNodeMut<'a>>> {
382     let mut node = fdt.root_mut().next_compatible(compat)?;
383     for _ in 0..n {
384         node = node.ok_or(FdtError::NoSpace)?.next_compatible(compat)?;
385     }
386     Ok(node)
387 }
388 
patch_cpus( fdt: &mut Fdt, cpus: &[CpuInfo], topology: &Option<CpuTopology>, ) -> libfdt::Result<()>389 fn patch_cpus(
390     fdt: &mut Fdt,
391     cpus: &[CpuInfo],
392     topology: &Option<CpuTopology>,
393 ) -> libfdt::Result<()> {
394     const COMPAT: &CStr = cstr!("arm,armv8");
395     let mut cpu_phandles = Vec::new();
396     for (idx, cpu) in cpus.iter().enumerate() {
397         let mut cur = get_nth_compatible(fdt, idx, COMPAT)?.ok_or(FdtError::NoSpace)?;
398         let phandle = cur.as_node().get_phandle()?.unwrap();
399         cpu_phandles.push(phandle);
400         if let Some(cpu_capacity) = cpu.cpu_capacity {
401             cur.setprop_inplace(cstr!("capacity-dmips-mhz"), &cpu_capacity.to_be_bytes())?;
402         }
403         patch_opptable(cur, cpu.opptable_info)?;
404     }
405     let mut next = get_nth_compatible(fdt, cpus.len(), COMPAT)?;
406     while let Some(current) = next {
407         next = current.delete_and_next_compatible(COMPAT)?;
408     }
409 
410     if let Some(topology) = topology {
411         for (n, cluster) in topology.clusters.iter().enumerate() {
412             let path = CString::new(format!("/cpus/cpu-map/cluster{n}")).unwrap();
413             let cluster_node = fdt.node_mut(&path)?.unwrap();
414             if let Some(cluster) = cluster {
415                 let mut iter = cluster_node.first_subnode()?;
416                 for core in cluster.cores {
417                     let mut core_node = iter.unwrap();
418                     iter = if let Some(core_idx) = core {
419                         let phandle = *cpu_phandles.get(core_idx).unwrap();
420                         let value = u32::from(phandle).to_be_bytes();
421                         core_node.setprop_inplace(cstr!("cpu"), &value)?;
422                         core_node.next_subnode()?
423                     } else {
424                         core_node.delete_and_next_subnode()?
425                     };
426                 }
427                 assert!(iter.is_none());
428             } else {
429                 cluster_node.nop()?;
430             }
431         }
432     } else {
433         fdt.node_mut(cstr!("/cpus/cpu-map"))?.unwrap().nop()?;
434     }
435 
436     Ok(())
437 }
438 
439 /// Reads the /avf/untrusted DT node, which the host can use to pass properties (no subnodes) to
440 /// the guest that don't require being validated by pvmfw.
parse_untrusted_props(fdt: &Fdt) -> libfdt::Result<BTreeMap<CString, Vec<u8>>>441 fn parse_untrusted_props(fdt: &Fdt) -> libfdt::Result<BTreeMap<CString, Vec<u8>>> {
442     let mut props = BTreeMap::new();
443     if let Some(node) = fdt.node(cstr!("/avf/untrusted"))? {
444         for property in node.properties()? {
445             let name = property.name()?;
446             let value = property.value()?;
447             props.insert(CString::from(name), value.to_vec());
448         }
449         if node.subnodes()?.next().is_some() {
450             warn!("Discarding unexpected /avf/untrusted subnodes.");
451         }
452     }
453 
454     Ok(props)
455 }
456 
457 /// Read candidate properties' names from DT which could be overlaid
parse_vm_ref_dt(fdt: &Fdt) -> libfdt::Result<BTreeMap<CString, Vec<u8>>>458 fn parse_vm_ref_dt(fdt: &Fdt) -> libfdt::Result<BTreeMap<CString, Vec<u8>>> {
459     let mut property_map = BTreeMap::new();
460     if let Some(avf_node) = fdt.node(cstr!("/avf"))? {
461         for property in avf_node.properties()? {
462             let name = property.name()?;
463             let value = property.value()?;
464             property_map.insert(
465                 CString::new(name.to_bytes()).map_err(|_| FdtError::BadValue)?,
466                 value.to_vec(),
467             );
468         }
469     }
470     Ok(property_map)
471 }
472 
validate_untrusted_props(props: &BTreeMap<CString, Vec<u8>>) -> Result<(), FdtValidationError>473 fn validate_untrusted_props(props: &BTreeMap<CString, Vec<u8>>) -> Result<(), FdtValidationError> {
474     const FORBIDDEN_PROPS: &[&CStr] =
475         &[cstr!("compatible"), cstr!("linux,phandle"), cstr!("phandle")];
476 
477     for name in FORBIDDEN_PROPS {
478         if props.contains_key(*name) {
479             return Err(FdtValidationError::ForbiddenUntrustedProp(name));
480         }
481     }
482 
483     Ok(())
484 }
485 
486 /// Overlay VM reference DT into VM DT based on the props_info. Property is overlaid in vm_dt only
487 /// when it exists both in vm_ref_dt and props_info. If the values mismatch, it returns error.
validate_vm_ref_dt( vm_dt: &mut Fdt, vm_ref_dt: &Fdt, props_info: &BTreeMap<CString, Vec<u8>>, ) -> libfdt::Result<()>488 fn validate_vm_ref_dt(
489     vm_dt: &mut Fdt,
490     vm_ref_dt: &Fdt,
491     props_info: &BTreeMap<CString, Vec<u8>>,
492 ) -> libfdt::Result<()> {
493     let root_vm_dt = vm_dt.root_mut();
494     let mut avf_vm_dt = root_vm_dt.add_subnode(cstr!("avf"))?;
495     // TODO(b/318431677): Validate nodes beyond /avf.
496     let avf_node = vm_ref_dt.node(cstr!("/avf"))?.ok_or(FdtError::NotFound)?;
497     for (name, value) in props_info.iter() {
498         if let Some(ref_value) = avf_node.getprop(name)? {
499             if value != ref_value {
500                 error!(
501                     "Property mismatches while applying overlay VM reference DT. \
502                     Name:{:?}, Value from host as hex:{:x?}, Value from VM reference DT as hex:{:x?}",
503                     name, value, ref_value
504                 );
505                 return Err(FdtError::BadValue);
506             }
507             avf_vm_dt.setprop(name, ref_value)?;
508         }
509     }
510     Ok(())
511 }
512 
513 #[derive(Debug)]
514 struct PciInfo {
515     ranges: [PciAddrRange; 2],
516     irq_masks: ArrayVec<[PciIrqMask; PciInfo::MAX_IRQS]>,
517     irq_maps: ArrayVec<[PciIrqMap; PciInfo::MAX_IRQS]>,
518 }
519 
520 impl PciInfo {
521     const IRQ_MASK_CELLS: usize = 4;
522     const IRQ_MAP_CELLS: usize = 10;
523     const MAX_IRQS: usize = 16;
524 }
525 
526 type PciAddrRange = AddressRange<(u32, u64), u64, u64>;
527 type PciIrqMask = [u32; PciInfo::IRQ_MASK_CELLS];
528 type PciIrqMap = [u32; PciInfo::IRQ_MAP_CELLS];
529 
530 /// Iterator that takes N cells as a chunk
531 struct CellChunkIterator<'a, const N: usize> {
532     cells: CellIterator<'a>,
533 }
534 
535 impl<'a, const N: usize> CellChunkIterator<'a, N> {
new(cells: CellIterator<'a>) -> Self536     fn new(cells: CellIterator<'a>) -> Self {
537         Self { cells }
538     }
539 }
540 
541 impl<'a, const N: usize> Iterator for CellChunkIterator<'a, N> {
542     type Item = [u32; N];
next(&mut self) -> Option<Self::Item>543     fn next(&mut self) -> Option<Self::Item> {
544         let mut ret: Self::Item = [0; N];
545         for i in ret.iter_mut() {
546             *i = self.cells.next()?;
547         }
548         Some(ret)
549     }
550 }
551 
552 /// Read pci host controller ranges, irq maps, and irq map masks from DT
read_pci_info_from(fdt: &Fdt) -> libfdt::Result<PciInfo>553 fn read_pci_info_from(fdt: &Fdt) -> libfdt::Result<PciInfo> {
554     let node =
555         fdt.compatible_nodes(cstr!("pci-host-cam-generic"))?.next().ok_or(FdtError::NotFound)?;
556 
557     let mut ranges = node.ranges::<(u32, u64), u64, u64>()?.ok_or(FdtError::NotFound)?;
558     let range0 = ranges.next().ok_or(FdtError::NotFound)?;
559     let range1 = ranges.next().ok_or(FdtError::NotFound)?;
560 
561     let irq_masks = node.getprop_cells(cstr!("interrupt-map-mask"))?.ok_or(FdtError::NotFound)?;
562     let mut chunks = CellChunkIterator::<{ PciInfo::IRQ_MASK_CELLS }>::new(irq_masks);
563     let irq_masks = (&mut chunks).take(PciInfo::MAX_IRQS).collect();
564 
565     if chunks.next().is_some() {
566         warn!("Input DT has more than {} PCI entries!", PciInfo::MAX_IRQS);
567         return Err(FdtError::NoSpace);
568     }
569 
570     let irq_maps = node.getprop_cells(cstr!("interrupt-map"))?.ok_or(FdtError::NotFound)?;
571     let mut chunks = CellChunkIterator::<{ PciInfo::IRQ_MAP_CELLS }>::new(irq_maps);
572     let irq_maps = (&mut chunks).take(PciInfo::MAX_IRQS).collect();
573 
574     if chunks.next().is_some() {
575         warn!("Input DT has more than {} PCI entries!", PciInfo::MAX_IRQS);
576         return Err(FdtError::NoSpace);
577     }
578 
579     Ok(PciInfo { ranges: [range0, range1], irq_masks, irq_maps })
580 }
581 
validate_pci_info(pci_info: &PciInfo, memory_range: &Range<usize>) -> Result<(), RebootReason>582 fn validate_pci_info(pci_info: &PciInfo, memory_range: &Range<usize>) -> Result<(), RebootReason> {
583     for range in pci_info.ranges.iter() {
584         validate_pci_addr_range(range, memory_range)?;
585     }
586     for irq_mask in pci_info.irq_masks.iter() {
587         validate_pci_irq_mask(irq_mask)?;
588     }
589     for (idx, irq_map) in pci_info.irq_maps.iter().enumerate() {
590         validate_pci_irq_map(irq_map, idx)?;
591     }
592     Ok(())
593 }
594 
validate_pci_addr_range( range: &PciAddrRange, memory_range: &Range<usize>, ) -> Result<(), RebootReason>595 fn validate_pci_addr_range(
596     range: &PciAddrRange,
597     memory_range: &Range<usize>,
598 ) -> Result<(), RebootReason> {
599     let mem_flags = PciMemoryFlags(range.addr.0);
600     let range_type = mem_flags.range_type();
601     let bus_addr = range.addr.1;
602     let cpu_addr = range.parent_addr;
603     let size = range.size;
604 
605     if range_type != PciRangeType::Memory64 {
606         error!("Invalid range type {:?} for bus address {:#x} in PCI node", range_type, bus_addr);
607         return Err(RebootReason::InvalidFdt);
608     }
609     // Enforce ID bus-to-cpu mappings, as used by crosvm.
610     if bus_addr != cpu_addr {
611         error!("PCI bus address: {:#x} is different from CPU address: {:#x}", bus_addr, cpu_addr);
612         return Err(RebootReason::InvalidFdt);
613     }
614 
615     let Some(bus_end) = bus_addr.checked_add(size) else {
616         error!("PCI address range size {:#x} overflows", size);
617         return Err(RebootReason::InvalidFdt);
618     };
619     if bus_end > MAX_VIRT_ADDR.try_into().unwrap() {
620         error!("PCI address end {:#x} is outside of translatable range", bus_end);
621         return Err(RebootReason::InvalidFdt);
622     }
623 
624     let memory_start = memory_range.start.try_into().unwrap();
625     let memory_end = memory_range.end.try_into().unwrap();
626 
627     if max(bus_addr, memory_start) < min(bus_end, memory_end) {
628         error!(
629             "PCI address range {:#x}-{:#x} overlaps with main memory range {:#x}-{:#x}",
630             bus_addr, bus_end, memory_start, memory_end
631         );
632         return Err(RebootReason::InvalidFdt);
633     }
634 
635     Ok(())
636 }
637 
validate_pci_irq_mask(irq_mask: &PciIrqMask) -> Result<(), RebootReason>638 fn validate_pci_irq_mask(irq_mask: &PciIrqMask) -> Result<(), RebootReason> {
639     const IRQ_MASK_ADDR_HI: u32 = 0xf800;
640     const IRQ_MASK_ADDR_ME: u32 = 0x0;
641     const IRQ_MASK_ADDR_LO: u32 = 0x0;
642     const IRQ_MASK_ANY_IRQ: u32 = 0x7;
643     const EXPECTED: PciIrqMask =
644         [IRQ_MASK_ADDR_HI, IRQ_MASK_ADDR_ME, IRQ_MASK_ADDR_LO, IRQ_MASK_ANY_IRQ];
645     if *irq_mask != EXPECTED {
646         error!("Invalid PCI irq mask {:#?}", irq_mask);
647         return Err(RebootReason::InvalidFdt);
648     }
649     Ok(())
650 }
651 
validate_pci_irq_map(irq_map: &PciIrqMap, idx: usize) -> Result<(), RebootReason>652 fn validate_pci_irq_map(irq_map: &PciIrqMap, idx: usize) -> Result<(), RebootReason> {
653     const PCI_DEVICE_IDX: usize = 11;
654     const PCI_IRQ_ADDR_ME: u32 = 0;
655     const PCI_IRQ_ADDR_LO: u32 = 0;
656     const PCI_IRQ_INTC: u32 = 1;
657     const AARCH64_IRQ_BASE: u32 = 4; // from external/crosvm/aarch64/src/lib.rs
658     const GIC_SPI: u32 = 0;
659     const IRQ_TYPE_LEVEL_HIGH: u32 = 4;
660 
661     let pci_addr = (irq_map[0], irq_map[1], irq_map[2]);
662     let pci_irq_number = irq_map[3];
663     let _controller_phandle = irq_map[4]; // skipped.
664     let gic_addr = (irq_map[5], irq_map[6]); // address-cells is <2> for GIC
665                                              // interrupt-cells is <3> for GIC
666     let gic_peripheral_interrupt_type = irq_map[7];
667     let gic_irq_number = irq_map[8];
668     let gic_irq_type = irq_map[9];
669 
670     let phys_hi: u32 = (0x1 << PCI_DEVICE_IDX) * (idx + 1) as u32;
671     let expected_pci_addr = (phys_hi, PCI_IRQ_ADDR_ME, PCI_IRQ_ADDR_LO);
672 
673     if pci_addr != expected_pci_addr {
674         error!("PCI device address {:#x} {:#x} {:#x} in interrupt-map is different from expected address \
675                {:#x} {:#x} {:#x}",
676                pci_addr.0, pci_addr.1, pci_addr.2, expected_pci_addr.0, expected_pci_addr.1, expected_pci_addr.2);
677         return Err(RebootReason::InvalidFdt);
678     }
679 
680     if pci_irq_number != PCI_IRQ_INTC {
681         error!(
682             "PCI INT# {:#x} in interrupt-map is different from expected value {:#x}",
683             pci_irq_number, PCI_IRQ_INTC
684         );
685         return Err(RebootReason::InvalidFdt);
686     }
687 
688     if gic_addr != (0, 0) {
689         error!(
690             "GIC address {:#x} {:#x} in interrupt-map is different from expected address \
691                {:#x} {:#x}",
692             gic_addr.0, gic_addr.1, 0, 0
693         );
694         return Err(RebootReason::InvalidFdt);
695     }
696 
697     if gic_peripheral_interrupt_type != GIC_SPI {
698         error!("GIC peripheral interrupt type {:#x} in interrupt-map is different from expected value \
699                {:#x}", gic_peripheral_interrupt_type, GIC_SPI);
700         return Err(RebootReason::InvalidFdt);
701     }
702 
703     let irq_nr: u32 = AARCH64_IRQ_BASE + (idx as u32);
704     if gic_irq_number != irq_nr {
705         error!(
706             "GIC irq number {:#x} in interrupt-map is unexpected. Expected {:#x}",
707             gic_irq_number, irq_nr
708         );
709         return Err(RebootReason::InvalidFdt);
710     }
711 
712     if gic_irq_type != IRQ_TYPE_LEVEL_HIGH {
713         error!(
714             "IRQ type in {:#x} is invalid. Must be LEVEL_HIGH {:#x}",
715             gic_irq_type, IRQ_TYPE_LEVEL_HIGH
716         );
717         return Err(RebootReason::InvalidFdt);
718     }
719     Ok(())
720 }
721 
patch_pci_info(fdt: &mut Fdt, pci_info: &PciInfo) -> libfdt::Result<()>722 fn patch_pci_info(fdt: &mut Fdt, pci_info: &PciInfo) -> libfdt::Result<()> {
723     let mut node =
724         fdt.root_mut().next_compatible(cstr!("pci-host-cam-generic"))?.ok_or(FdtError::NotFound)?;
725 
726     let irq_masks_size = pci_info.irq_masks.len() * size_of::<PciIrqMask>();
727     node.trimprop(cstr!("interrupt-map-mask"), irq_masks_size)?;
728 
729     let irq_maps_size = pci_info.irq_maps.len() * size_of::<PciIrqMap>();
730     node.trimprop(cstr!("interrupt-map"), irq_maps_size)?;
731 
732     node.setprop_inplace(
733         cstr!("ranges"),
734         [pci_info.ranges[0].to_cells(), pci_info.ranges[1].to_cells()].as_flattened(),
735     )
736 }
737 
738 #[derive(Default, Debug)]
739 struct SerialInfo {
740     addrs: ArrayVec<[u64; Self::MAX_SERIALS]>,
741 }
742 
743 impl SerialInfo {
744     const MAX_SERIALS: usize = 4;
745 }
746 
read_serial_info_from(fdt: &Fdt) -> libfdt::Result<SerialInfo>747 fn read_serial_info_from(fdt: &Fdt) -> libfdt::Result<SerialInfo> {
748     let mut addrs = ArrayVec::new();
749 
750     let mut serial_nodes = fdt.compatible_nodes(cstr!("ns16550a"))?;
751     for node in serial_nodes.by_ref().take(addrs.capacity()) {
752         let reg = node.first_reg()?;
753         addrs.push(reg.addr);
754     }
755     if serial_nodes.next().is_some() {
756         warn!("DT has more than {} UART nodes: discarding extra nodes.", addrs.capacity());
757     }
758 
759     Ok(SerialInfo { addrs })
760 }
761 
762 #[derive(Default, Debug, PartialEq)]
763 struct WdtInfo {
764     addr: u64,
765     size: u64,
766     irq: [u32; WdtInfo::IRQ_CELLS],
767 }
768 
769 impl WdtInfo {
770     const IRQ_CELLS: usize = 3;
771     const IRQ_NR: u32 = 0xf;
772     const ADDR: u64 = 0x3000;
773     const SIZE: u64 = 0x1000;
774     const GIC_PPI: u32 = 1;
775     const IRQ_TYPE_EDGE_RISING: u32 = 1;
776     const GIC_FDT_IRQ_PPI_CPU_SHIFT: u32 = 8;
777     // TODO(b/350498812): Rework this for >8 vCPUs.
778     const GIC_FDT_IRQ_PPI_CPU_MASK: u32 = 0xff << Self::GIC_FDT_IRQ_PPI_CPU_SHIFT;
779 
get_expected(num_cpus: usize) -> Self780     const fn get_expected(num_cpus: usize) -> Self {
781         Self {
782             addr: Self::ADDR,
783             size: Self::SIZE,
784             irq: [
785                 Self::GIC_PPI,
786                 Self::IRQ_NR,
787                 ((((1 << num_cpus) - 1) << Self::GIC_FDT_IRQ_PPI_CPU_SHIFT)
788                     & Self::GIC_FDT_IRQ_PPI_CPU_MASK)
789                     | Self::IRQ_TYPE_EDGE_RISING,
790             ],
791         }
792     }
793 }
794 
read_wdt_info_from(fdt: &Fdt) -> libfdt::Result<WdtInfo>795 fn read_wdt_info_from(fdt: &Fdt) -> libfdt::Result<WdtInfo> {
796     let mut node_iter = fdt.compatible_nodes(cstr!("qemu,vcpu-stall-detector"))?;
797     let node = node_iter.next().ok_or(FdtError::NotFound)?;
798     let mut ranges = node.reg()?.ok_or(FdtError::NotFound)?;
799 
800     let reg = ranges.next().ok_or(FdtError::NotFound)?;
801     let size = reg.size.ok_or(FdtError::NotFound)?;
802     if ranges.next().is_some() {
803         warn!("Discarding extra vmwdt <reg> entries.");
804     }
805 
806     let interrupts = node.getprop_cells(cstr!("interrupts"))?.ok_or(FdtError::NotFound)?;
807     let mut chunks = CellChunkIterator::<{ WdtInfo::IRQ_CELLS }>::new(interrupts);
808     let irq = chunks.next().ok_or(FdtError::NotFound)?;
809 
810     if chunks.next().is_some() {
811         warn!("Discarding extra vmwdt <interrupts> entries.");
812     }
813 
814     Ok(WdtInfo { addr: reg.addr, size, irq })
815 }
816 
validate_wdt_info(wdt: &WdtInfo, num_cpus: usize) -> Result<(), RebootReason>817 fn validate_wdt_info(wdt: &WdtInfo, num_cpus: usize) -> Result<(), RebootReason> {
818     if *wdt != WdtInfo::get_expected(num_cpus) {
819         error!("Invalid watchdog timer: {wdt:?}");
820         return Err(RebootReason::InvalidFdt);
821     }
822 
823     Ok(())
824 }
825 
patch_wdt_info(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()>826 fn patch_wdt_info(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()> {
827     let mut interrupts = WdtInfo::get_expected(num_cpus).irq;
828     for v in interrupts.iter_mut() {
829         *v = v.to_be();
830     }
831 
832     let mut node = fdt
833         .root_mut()
834         .next_compatible(cstr!("qemu,vcpu-stall-detector"))?
835         .ok_or(libfdt::FdtError::NotFound)?;
836     node.setprop_inplace(cstr!("interrupts"), interrupts.as_bytes())?;
837     Ok(())
838 }
839 
840 /// Patch the DT by deleting the ns16550a compatible nodes whose address are unknown
patch_serial_info(fdt: &mut Fdt, serial_info: &SerialInfo) -> libfdt::Result<()>841 fn patch_serial_info(fdt: &mut Fdt, serial_info: &SerialInfo) -> libfdt::Result<()> {
842     let name = cstr!("ns16550a");
843     let mut next = fdt.root_mut().next_compatible(name);
844     while let Some(current) = next? {
845         let reg =
846             current.as_node().reg()?.ok_or(FdtError::NotFound)?.next().ok_or(FdtError::NotFound)?;
847         next = if !serial_info.addrs.contains(&reg.addr) {
848             current.delete_and_next_compatible(name)
849         } else {
850             current.next_compatible(name)
851         }
852     }
853     Ok(())
854 }
855 
validate_swiotlb_info( swiotlb_info: &SwiotlbInfo, memory: &Range<usize>, guest_page_size: usize, ) -> Result<(), RebootReason>856 fn validate_swiotlb_info(
857     swiotlb_info: &SwiotlbInfo,
858     memory: &Range<usize>,
859     guest_page_size: usize,
860 ) -> Result<(), RebootReason> {
861     let size = swiotlb_info.size;
862     let align = swiotlb_info.align;
863 
864     if size == 0 || (size % guest_page_size) != 0 {
865         error!("Invalid swiotlb size {:#x}", size);
866         return Err(RebootReason::InvalidFdt);
867     }
868 
869     if let Some(align) = align.filter(|&a| a % guest_page_size != 0) {
870         error!("Invalid swiotlb alignment {:#x}", align);
871         return Err(RebootReason::InvalidFdt);
872     }
873 
874     if let Some(addr) = swiotlb_info.addr {
875         if addr.checked_add(size).is_none() {
876             error!("Invalid swiotlb range: addr:{addr:#x} size:{size:#x}");
877             return Err(RebootReason::InvalidFdt);
878         }
879     }
880     if let Some(range) = swiotlb_info.fixed_range() {
881         if !range.is_within(memory) {
882             error!("swiotlb range {range:#x?} not part of memory range {memory:#x?}");
883             return Err(RebootReason::InvalidFdt);
884         }
885     }
886 
887     Ok(())
888 }
889 
patch_swiotlb_info(fdt: &mut Fdt, swiotlb_info: &SwiotlbInfo) -> libfdt::Result<()>890 fn patch_swiotlb_info(fdt: &mut Fdt, swiotlb_info: &SwiotlbInfo) -> libfdt::Result<()> {
891     let mut node =
892         fdt.root_mut().next_compatible(cstr!("restricted-dma-pool"))?.ok_or(FdtError::NotFound)?;
893 
894     if let Some(range) = swiotlb_info.fixed_range() {
895         node.setprop_addrrange_inplace(
896             cstr!("reg"),
897             range.start.try_into().unwrap(),
898             range.len().try_into().unwrap(),
899         )?;
900         node.nop_property(cstr!("size"))?;
901         node.nop_property(cstr!("alignment"))?;
902     } else {
903         node.nop_property(cstr!("reg"))?;
904         node.setprop_inplace(cstr!("size"), &swiotlb_info.size.to_be_bytes())?;
905         node.setprop_inplace(cstr!("alignment"), &swiotlb_info.align.unwrap().to_be_bytes())?;
906     }
907 
908     Ok(())
909 }
910 
patch_gic(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()>911 fn patch_gic(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()> {
912     let node = fdt.compatible_nodes(cstr!("arm,gic-v3"))?.next().ok_or(FdtError::NotFound)?;
913     let mut ranges = node.reg()?.ok_or(FdtError::NotFound)?;
914     let range0 = ranges.next().ok_or(FdtError::NotFound)?;
915     let mut range1 = ranges.next().ok_or(FdtError::NotFound)?;
916 
917     let addr = range0.addr;
918     // `read_cpu_info_from()` guarantees that we have at most MAX_CPUS.
919     const_assert!(DeviceTreeInfo::gic_patched_size(DeviceTreeInfo::MAX_CPUS).is_some());
920     let size = u64::try_from(DeviceTreeInfo::gic_patched_size(num_cpus).unwrap()).unwrap();
921 
922     // range1 is just below range0
923     range1.addr = addr - size;
924     range1.size = Some(size);
925 
926     let (addr0, size0) = range0.to_cells();
927     let (addr1, size1) = range1.to_cells();
928     let value = [addr0, size0.unwrap(), addr1, size1.unwrap()];
929 
930     let mut node =
931         fdt.root_mut().next_compatible(cstr!("arm,gic-v3"))?.ok_or(FdtError::NotFound)?;
932     node.setprop_inplace(cstr!("reg"), value.as_flattened())
933 }
934 
patch_timer(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()>935 fn patch_timer(fdt: &mut Fdt, num_cpus: usize) -> libfdt::Result<()> {
936     const NUM_INTERRUPTS: usize = 4;
937     const CELLS_PER_INTERRUPT: usize = 3;
938     let node = fdt.compatible_nodes(cstr!("arm,armv8-timer"))?.next().ok_or(FdtError::NotFound)?;
939     let interrupts = node.getprop_cells(cstr!("interrupts"))?.ok_or(FdtError::NotFound)?;
940     let mut value: ArrayVec<[u32; NUM_INTERRUPTS * CELLS_PER_INTERRUPT]> =
941         interrupts.take(NUM_INTERRUPTS * CELLS_PER_INTERRUPT).collect();
942 
943     let num_cpus: u32 = num_cpus.try_into().unwrap();
944     // TODO(b/350498812): Rework this for >8 vCPUs.
945     let cpu_mask: u32 = (((0x1 << num_cpus) - 1) & 0xff) << 8;
946 
947     for v in value.iter_mut().skip(2).step_by(CELLS_PER_INTERRUPT) {
948         *v |= cpu_mask;
949     }
950     for v in value.iter_mut() {
951         *v = v.to_be();
952     }
953 
954     let value = value.into_inner();
955 
956     let mut node =
957         fdt.root_mut().next_compatible(cstr!("arm,armv8-timer"))?.ok_or(FdtError::NotFound)?;
958     node.setprop_inplace(cstr!("interrupts"), value.as_bytes())
959 }
960 
patch_untrusted_props(fdt: &mut Fdt, props: &BTreeMap<CString, Vec<u8>>) -> libfdt::Result<()>961 fn patch_untrusted_props(fdt: &mut Fdt, props: &BTreeMap<CString, Vec<u8>>) -> libfdt::Result<()> {
962     let avf_node = if let Some(node) = fdt.node_mut(cstr!("/avf"))? {
963         node
964     } else {
965         fdt.root_mut().add_subnode(cstr!("avf"))?
966     };
967 
968     // The node shouldn't already be present; if it is, return the error.
969     let mut node = avf_node.add_subnode(cstr!("untrusted"))?;
970 
971     for (name, value) in props {
972         node.setprop(name, value)?;
973     }
974 
975     Ok(())
976 }
977 
978 #[derive(Debug)]
979 struct VcpufreqInfo {
980     addr: u64,
981     size: u64,
982 }
983 
patch_vcpufreq(fdt: &mut Fdt, vcpufreq_info: &Option<VcpufreqInfo>) -> libfdt::Result<()>984 fn patch_vcpufreq(fdt: &mut Fdt, vcpufreq_info: &Option<VcpufreqInfo>) -> libfdt::Result<()> {
985     let mut node = fdt.node_mut(cstr!("/cpufreq"))?.unwrap();
986     if let Some(info) = vcpufreq_info {
987         node.setprop_addrrange_inplace(cstr!("reg"), info.addr, info.size)
988     } else {
989         node.nop()
990     }
991 }
992 
993 #[derive(Debug)]
994 pub struct DeviceTreeInfo {
995     pub initrd_range: Option<Range<usize>>,
996     pub memory_range: Range<usize>,
997     bootargs: Option<CString>,
998     cpus: ArrayVec<[CpuInfo; DeviceTreeInfo::MAX_CPUS]>,
999     cpu_topology: Option<CpuTopology>,
1000     pci_info: PciInfo,
1001     serial_info: SerialInfo,
1002     pub swiotlb_info: SwiotlbInfo,
1003     device_assignment: Option<DeviceAssignmentInfo>,
1004     untrusted_props: BTreeMap<CString, Vec<u8>>,
1005     vm_ref_dt_props_info: BTreeMap<CString, Vec<u8>>,
1006     vcpufreq_info: Option<VcpufreqInfo>,
1007 }
1008 
1009 impl DeviceTreeInfo {
1010     const MAX_CPUS: usize = 16;
1011 
gic_patched_size(num_cpus: usize) -> Option<usize>1012     const fn gic_patched_size(num_cpus: usize) -> Option<usize> {
1013         const GIC_REDIST_SIZE_PER_CPU: usize = 32 * SIZE_4KB;
1014 
1015         GIC_REDIST_SIZE_PER_CPU.checked_mul(num_cpus)
1016     }
1017 }
1018 
sanitize_device_tree( fdt: &mut Fdt, vm_dtbo: Option<&mut [u8]>, vm_ref_dt: Option<&[u8]>, guest_page_size: usize, ) -> Result<DeviceTreeInfo, RebootReason>1019 pub fn sanitize_device_tree(
1020     fdt: &mut Fdt,
1021     vm_dtbo: Option<&mut [u8]>,
1022     vm_ref_dt: Option<&[u8]>,
1023     guest_page_size: usize,
1024 ) -> Result<DeviceTreeInfo, RebootReason> {
1025     let vm_dtbo = match vm_dtbo {
1026         Some(vm_dtbo) => Some(VmDtbo::from_mut_slice(vm_dtbo).map_err(|e| {
1027             error!("Failed to load VM DTBO: {e}");
1028             RebootReason::InvalidFdt
1029         })?),
1030         None => None,
1031     };
1032 
1033     let info = parse_device_tree(fdt, vm_dtbo.as_deref(), guest_page_size)?;
1034 
1035     fdt.clone_from(FDT_TEMPLATE).map_err(|e| {
1036         error!("Failed to instantiate FDT from the template DT: {e}");
1037         RebootReason::InvalidFdt
1038     })?;
1039 
1040     fdt.unpack().map_err(|e| {
1041         error!("Failed to unpack DT for patching: {e}");
1042         RebootReason::InvalidFdt
1043     })?;
1044 
1045     if let Some(device_assignment_info) = &info.device_assignment {
1046         let vm_dtbo = vm_dtbo.unwrap();
1047         device_assignment_info.filter(vm_dtbo).map_err(|e| {
1048             error!("Failed to filter VM DTBO: {e}");
1049             RebootReason::InvalidFdt
1050         })?;
1051         // SAFETY: Damaged VM DTBO isn't used in this API after this unsafe block.
1052         // VM DTBO can't be reused in any way as Fdt nor VmDtbo outside of this API because
1053         // it can only be instantiated after validation.
1054         unsafe {
1055             fdt.apply_overlay(vm_dtbo.as_mut()).map_err(|e| {
1056                 error!("Failed to apply filtered VM DTBO: {e}");
1057                 RebootReason::InvalidFdt
1058             })?;
1059         }
1060     }
1061 
1062     if let Some(vm_ref_dt) = vm_ref_dt {
1063         let vm_ref_dt = Fdt::from_slice(vm_ref_dt).map_err(|e| {
1064             error!("Failed to load VM reference DT: {e}");
1065             RebootReason::InvalidFdt
1066         })?;
1067 
1068         validate_vm_ref_dt(fdt, vm_ref_dt, &info.vm_ref_dt_props_info).map_err(|e| {
1069             error!("Failed to apply VM reference DT: {e}");
1070             RebootReason::InvalidFdt
1071         })?;
1072     }
1073 
1074     patch_device_tree(fdt, &info)?;
1075 
1076     // TODO(b/317201360): Ensure no overlapping in <reg> among devices
1077 
1078     fdt.pack().map_err(|e| {
1079         error!("Failed to unpack DT after patching: {e}");
1080         RebootReason::InvalidFdt
1081     })?;
1082 
1083     Ok(info)
1084 }
1085 
parse_device_tree( fdt: &Fdt, vm_dtbo: Option<&VmDtbo>, guest_page_size: usize, ) -> Result<DeviceTreeInfo, RebootReason>1086 fn parse_device_tree(
1087     fdt: &Fdt,
1088     vm_dtbo: Option<&VmDtbo>,
1089     guest_page_size: usize,
1090 ) -> Result<DeviceTreeInfo, RebootReason> {
1091     let initrd_range = read_initrd_range_from(fdt).map_err(|e| {
1092         error!("Failed to read initrd range from DT: {e}");
1093         RebootReason::InvalidFdt
1094     })?;
1095 
1096     let memory_range = read_and_validate_memory_range(fdt, guest_page_size)?;
1097 
1098     let bootargs = read_bootargs_from(fdt).map_err(|e| {
1099         error!("Failed to read bootargs from DT: {e}");
1100         RebootReason::InvalidFdt
1101     })?;
1102 
1103     let (cpus, cpu_topology) = read_cpu_info_from(fdt).map_err(|e| {
1104         error!("Failed to read CPU info from DT: {e}");
1105         RebootReason::InvalidFdt
1106     })?;
1107     validate_cpu_info(&cpus).map_err(|e| {
1108         error!("Failed to validate CPU info from DT: {e}");
1109         RebootReason::InvalidFdt
1110     })?;
1111 
1112     let vcpufreq_info = read_vcpufreq_info(fdt).map_err(|e| {
1113         error!("Failed to read vcpufreq info from DT: {e}");
1114         RebootReason::InvalidFdt
1115     })?;
1116     if let Some(ref info) = vcpufreq_info {
1117         validate_vcpufreq_info(info, &cpus).map_err(|e| {
1118             error!("Failed to validate vcpufreq info from DT: {e}");
1119             RebootReason::InvalidFdt
1120         })?;
1121     }
1122 
1123     let pci_info = read_pci_info_from(fdt).map_err(|e| {
1124         error!("Failed to read pci info from DT: {e}");
1125         RebootReason::InvalidFdt
1126     })?;
1127     validate_pci_info(&pci_info, &memory_range)?;
1128 
1129     let wdt_info = read_wdt_info_from(fdt).map_err(|e| {
1130         error!("Failed to read vCPU stall detector info from DT: {e}");
1131         RebootReason::InvalidFdt
1132     })?;
1133     validate_wdt_info(&wdt_info, cpus.len())?;
1134 
1135     let serial_info = read_serial_info_from(fdt).map_err(|e| {
1136         error!("Failed to read serial info from DT: {e}");
1137         RebootReason::InvalidFdt
1138     })?;
1139 
1140     let swiotlb_info = SwiotlbInfo::new_from_fdt(fdt)
1141         .map_err(|e| {
1142             error!("Failed to read swiotlb info from DT: {e}");
1143             RebootReason::InvalidFdt
1144         })?
1145         .ok_or_else(|| {
1146             error!("Swiotlb info missing from DT");
1147             RebootReason::InvalidFdt
1148         })?;
1149     validate_swiotlb_info(&swiotlb_info, &memory_range, guest_page_size)?;
1150 
1151     let device_assignment = match vm_dtbo {
1152         Some(vm_dtbo) => {
1153             if let Some(hypervisor) = get_device_assigner() {
1154                 // TODO(ptosi): Cache the (single?) granule once, in vmbase.
1155                 let granule = get_mem_sharer()
1156                     .ok_or_else(|| {
1157                         error!("No MEM_SHARE found during device assignment validation");
1158                         RebootReason::InternalError
1159                     })?
1160                     .granule()
1161                     .map_err(|e| {
1162                         error!("Failed to get granule for device assignment validation: {e}");
1163                         RebootReason::InternalError
1164                     })?;
1165                 DeviceAssignmentInfo::parse(fdt, vm_dtbo, hypervisor, granule).map_err(|e| {
1166                     error!("Failed to parse device assignment from DT and VM DTBO: {e}");
1167                     RebootReason::InvalidFdt
1168                 })?
1169             } else {
1170                 warn!(
1171                     "Device assignment is ignored because device assigning hypervisor is missing"
1172                 );
1173                 None
1174             }
1175         }
1176         None => None,
1177     };
1178 
1179     let untrusted_props = parse_untrusted_props(fdt).map_err(|e| {
1180         error!("Failed to read untrusted properties: {e}");
1181         RebootReason::InvalidFdt
1182     })?;
1183     validate_untrusted_props(&untrusted_props).map_err(|e| {
1184         error!("Failed to validate untrusted properties: {e}");
1185         RebootReason::InvalidFdt
1186     })?;
1187 
1188     let vm_ref_dt_props_info = parse_vm_ref_dt(fdt).map_err(|e| {
1189         error!("Failed to read names of properties under /avf from DT: {e}");
1190         RebootReason::InvalidFdt
1191     })?;
1192 
1193     Ok(DeviceTreeInfo {
1194         initrd_range,
1195         memory_range,
1196         bootargs,
1197         cpus,
1198         cpu_topology,
1199         pci_info,
1200         serial_info,
1201         swiotlb_info,
1202         device_assignment,
1203         untrusted_props,
1204         vm_ref_dt_props_info,
1205         vcpufreq_info,
1206     })
1207 }
1208 
patch_device_tree(fdt: &mut Fdt, info: &DeviceTreeInfo) -> Result<(), RebootReason>1209 fn patch_device_tree(fdt: &mut Fdt, info: &DeviceTreeInfo) -> Result<(), RebootReason> {
1210     if let Some(initrd_range) = &info.initrd_range {
1211         patch_initrd_range(fdt, initrd_range).map_err(|e| {
1212             error!("Failed to patch initrd range to DT: {e}");
1213             RebootReason::InvalidFdt
1214         })?;
1215     }
1216     patch_memory_range(fdt, &info.memory_range).map_err(|e| {
1217         error!("Failed to patch memory range to DT: {e}");
1218         RebootReason::InvalidFdt
1219     })?;
1220     if let Some(bootargs) = &info.bootargs {
1221         patch_bootargs(fdt, bootargs.as_c_str()).map_err(|e| {
1222             error!("Failed to patch bootargs to DT: {e}");
1223             RebootReason::InvalidFdt
1224         })?;
1225     }
1226     patch_cpus(fdt, &info.cpus, &info.cpu_topology).map_err(|e| {
1227         error!("Failed to patch cpus to DT: {e}");
1228         RebootReason::InvalidFdt
1229     })?;
1230     patch_vcpufreq(fdt, &info.vcpufreq_info).map_err(|e| {
1231         error!("Failed to patch vcpufreq info to DT: {e}");
1232         RebootReason::InvalidFdt
1233     })?;
1234     patch_pci_info(fdt, &info.pci_info).map_err(|e| {
1235         error!("Failed to patch pci info to DT: {e}");
1236         RebootReason::InvalidFdt
1237     })?;
1238     patch_wdt_info(fdt, info.cpus.len()).map_err(|e| {
1239         error!("Failed to patch wdt info to DT: {e}");
1240         RebootReason::InvalidFdt
1241     })?;
1242     patch_serial_info(fdt, &info.serial_info).map_err(|e| {
1243         error!("Failed to patch serial info to DT: {e}");
1244         RebootReason::InvalidFdt
1245     })?;
1246     patch_swiotlb_info(fdt, &info.swiotlb_info).map_err(|e| {
1247         error!("Failed to patch swiotlb info to DT: {e}");
1248         RebootReason::InvalidFdt
1249     })?;
1250     patch_gic(fdt, info.cpus.len()).map_err(|e| {
1251         error!("Failed to patch gic info to DT: {e}");
1252         RebootReason::InvalidFdt
1253     })?;
1254     patch_timer(fdt, info.cpus.len()).map_err(|e| {
1255         error!("Failed to patch timer info to DT: {e}");
1256         RebootReason::InvalidFdt
1257     })?;
1258     if let Some(device_assignment) = &info.device_assignment {
1259         // Note: We patch values after VM DTBO is overlaid because patch may require more space
1260         // then VM DTBO's underlying slice is allocated.
1261         device_assignment.patch(fdt).map_err(|e| {
1262             error!("Failed to patch device assignment info to DT: {e}");
1263             RebootReason::InvalidFdt
1264         })?;
1265     } else {
1266         device_assignment::clean(fdt).map_err(|e| {
1267             error!("Failed to clean pre-polulated DT nodes for device assignment: {e}");
1268             RebootReason::InvalidFdt
1269         })?;
1270     }
1271     patch_untrusted_props(fdt, &info.untrusted_props).map_err(|e| {
1272         error!("Failed to patch untrusted properties: {e}");
1273         RebootReason::InvalidFdt
1274     })?;
1275 
1276     Ok(())
1277 }
1278 
1279 /// Modifies the input DT according to the fields of the configuration.
modify_for_next_stage( fdt: &mut Fdt, bcc: &[u8], new_instance: bool, strict_boot: bool, debug_policy: Option<&[u8]>, debuggable: bool, kaslr_seed: u64, ) -> libfdt::Result<()>1280 pub fn modify_for_next_stage(
1281     fdt: &mut Fdt,
1282     bcc: &[u8],
1283     new_instance: bool,
1284     strict_boot: bool,
1285     debug_policy: Option<&[u8]>,
1286     debuggable: bool,
1287     kaslr_seed: u64,
1288 ) -> libfdt::Result<()> {
1289     if let Some(debug_policy) = debug_policy {
1290         let backup = Vec::from(fdt.as_slice());
1291         fdt.unpack()?;
1292         let backup_fdt = Fdt::from_slice(backup.as_slice()).unwrap();
1293         if apply_debug_policy(fdt, backup_fdt, debug_policy)? {
1294             info!("Debug policy applied.");
1295         } else {
1296             // apply_debug_policy restored fdt to backup_fdt so unpack it again.
1297             fdt.unpack()?;
1298         }
1299     } else {
1300         info!("No debug policy found.");
1301         fdt.unpack()?;
1302     }
1303 
1304     patch_dice_node(fdt, bcc.as_ptr() as usize, bcc.len())?;
1305 
1306     if let Some(mut chosen) = fdt.chosen_mut()? {
1307         empty_or_delete_prop(&mut chosen, cstr!("avf,strict-boot"), strict_boot)?;
1308         empty_or_delete_prop(&mut chosen, cstr!("avf,new-instance"), new_instance)?;
1309         chosen.setprop_inplace(cstr!("kaslr-seed"), &kaslr_seed.to_be_bytes())?;
1310     };
1311     if !debuggable {
1312         if let Some(bootargs) = read_bootargs_from(fdt)? {
1313             filter_out_dangerous_bootargs(fdt, &bootargs)?;
1314         }
1315     }
1316 
1317     fdt.pack()?;
1318 
1319     Ok(())
1320 }
1321 
1322 /// Patch the "google,open-dice"-compatible reserved-memory node to point to the bcc range
patch_dice_node(fdt: &mut Fdt, addr: usize, size: usize) -> libfdt::Result<()>1323 fn patch_dice_node(fdt: &mut Fdt, addr: usize, size: usize) -> libfdt::Result<()> {
1324     // We reject DTs with missing reserved-memory node as validation should have checked that the
1325     // "swiotlb" subnode (compatible = "restricted-dma-pool") was present.
1326     let node = fdt.node_mut(cstr!("/reserved-memory"))?.ok_or(libfdt::FdtError::NotFound)?;
1327 
1328     let mut node = node.next_compatible(cstr!("google,open-dice"))?.ok_or(FdtError::NotFound)?;
1329 
1330     let addr: u64 = addr.try_into().unwrap();
1331     let size: u64 = size.try_into().unwrap();
1332     node.setprop_inplace(cstr!("reg"), [addr.to_be_bytes(), size.to_be_bytes()].as_flattened())
1333 }
1334 
empty_or_delete_prop( fdt_node: &mut FdtNodeMut, prop_name: &CStr, keep_prop: bool, ) -> libfdt::Result<()>1335 fn empty_or_delete_prop(
1336     fdt_node: &mut FdtNodeMut,
1337     prop_name: &CStr,
1338     keep_prop: bool,
1339 ) -> libfdt::Result<()> {
1340     if keep_prop {
1341         fdt_node.setprop_empty(prop_name)
1342     } else {
1343         fdt_node
1344             .delprop(prop_name)
1345             .or_else(|e| if e == FdtError::NotFound { Ok(()) } else { Err(e) })
1346     }
1347 }
1348 
1349 /// Apply the debug policy overlay to the guest DT.
1350 ///
1351 /// Returns Ok(true) on success, Ok(false) on recovered failure and Err(_) on corruption of the DT.
apply_debug_policy( fdt: &mut Fdt, backup_fdt: &Fdt, debug_policy: &[u8], ) -> libfdt::Result<bool>1352 fn apply_debug_policy(
1353     fdt: &mut Fdt,
1354     backup_fdt: &Fdt,
1355     debug_policy: &[u8],
1356 ) -> libfdt::Result<bool> {
1357     let mut debug_policy = Vec::from(debug_policy);
1358     let overlay = match Fdt::from_mut_slice(debug_policy.as_mut_slice()) {
1359         Ok(overlay) => overlay,
1360         Err(e) => {
1361             warn!("Corrupted debug policy found: {e}. Not applying.");
1362             return Ok(false);
1363         }
1364     };
1365 
1366     // SAFETY: on failure, the corrupted DT is restored using the backup.
1367     if let Err(e) = unsafe { fdt.apply_overlay(overlay) } {
1368         warn!("Failed to apply debug policy: {e}. Recovering...");
1369         fdt.clone_from(backup_fdt)?;
1370         // A successful restoration is considered success because an invalid debug policy
1371         // shouldn't DOS the pvmfw
1372         Ok(false)
1373     } else {
1374         Ok(true)
1375     }
1376 }
1377 
has_common_debug_policy(fdt: &Fdt, debug_feature_name: &CStr) -> libfdt::Result<bool>1378 fn has_common_debug_policy(fdt: &Fdt, debug_feature_name: &CStr) -> libfdt::Result<bool> {
1379     if let Some(node) = fdt.node(cstr!("/avf/guest/common"))? {
1380         if let Some(value) = node.getprop_u32(debug_feature_name)? {
1381             return Ok(value == 1);
1382         }
1383     }
1384     Ok(false) // if the policy doesn't exist or not 1, don't enable the debug feature
1385 }
1386 
filter_out_dangerous_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()>1387 fn filter_out_dangerous_bootargs(fdt: &mut Fdt, bootargs: &CStr) -> libfdt::Result<()> {
1388     let has_crashkernel = has_common_debug_policy(fdt, cstr!("ramdump"))?;
1389     let has_console = has_common_debug_policy(fdt, cstr!("log"))?;
1390 
1391     let accepted: &[(&str, Box<dyn Fn(Option<&str>) -> bool>)] = &[
1392         ("panic", Box::new(|v| if let Some(v) = v { v == "=-1" } else { false })),
1393         ("crashkernel", Box::new(|_| has_crashkernel)),
1394         ("console", Box::new(|_| has_console)),
1395     ];
1396 
1397     // parse and filter out unwanted
1398     let mut filtered = Vec::new();
1399     for arg in BootArgsIterator::new(bootargs).map_err(|e| {
1400         info!("Invalid bootarg: {e}");
1401         FdtError::BadValue
1402     })? {
1403         match accepted.iter().find(|&t| t.0 == arg.name()) {
1404             Some((_, pred)) if pred(arg.value()) => filtered.push(arg),
1405             _ => debug!("Rejected bootarg {}", arg.as_ref()),
1406         }
1407     }
1408 
1409     // flatten into a new C-string
1410     let mut new_bootargs = Vec::new();
1411     for (i, arg) in filtered.iter().enumerate() {
1412         if i != 0 {
1413             new_bootargs.push(b' '); // separator
1414         }
1415         new_bootargs.extend_from_slice(arg.as_ref().as_bytes());
1416     }
1417     new_bootargs.push(b'\0');
1418 
1419     let mut node = fdt.chosen_mut()?.ok_or(FdtError::NotFound)?;
1420     node.setprop(cstr!("bootargs"), new_bootargs.as_slice())
1421 }
1422