xref: /aosp_15_r20/external/crosvm/devices/src/virtio/virtio_pci_device.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7 
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::debug;
13 use base::error;
14 use base::trace;
15 use base::AsRawDescriptor;
16 use base::AsRawDescriptors;
17 use base::Event;
18 use base::Protection;
19 use base::RawDescriptor;
20 use base::Result;
21 use base::SharedMemory;
22 use base::Tube;
23 use data_model::Le32;
24 use hypervisor::Datamatch;
25 use hypervisor::MemCacheType;
26 use libc::ERANGE;
27 #[cfg(target_arch = "x86_64")]
28 use metrics::MetricEventType;
29 use resources::Alloc;
30 use resources::AllocOptions;
31 use resources::SystemAllocator;
32 use serde::Deserialize;
33 use serde::Serialize;
34 use sync::Mutex;
35 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
36 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
37 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
40 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
41 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
42 use vm_control::api::VmMemoryClient;
43 use vm_control::VmMemoryDestination;
44 use vm_control::VmMemoryRegionId;
45 use vm_control::VmMemorySource;
46 use vm_memory::GuestAddress;
47 use vm_memory::GuestMemory;
48 use zerocopy::AsBytes;
49 use zerocopy::FromBytes;
50 use zerocopy::FromZeroes;
51 
52 use self::virtio_pci_common_config::VirtioPciCommonConfig;
53 use super::*;
54 #[cfg(target_arch = "x86_64")]
55 use crate::acpi::PmWakeupEvent;
56 #[cfg(target_arch = "x86_64")]
57 use crate::pci::pm::PciDevicePower;
58 use crate::pci::pm::PciPmCap;
59 use crate::pci::pm::PmConfig;
60 use crate::pci::pm::PmStatusChange;
61 use crate::pci::BarRange;
62 use crate::pci::MsixCap;
63 use crate::pci::MsixConfig;
64 use crate::pci::MsixStatus;
65 use crate::pci::PciAddress;
66 use crate::pci::PciBarConfiguration;
67 use crate::pci::PciBarIndex;
68 use crate::pci::PciBarPrefetchable;
69 use crate::pci::PciBarRegionType;
70 use crate::pci::PciBaseSystemPeripheralSubclass;
71 use crate::pci::PciCapability;
72 use crate::pci::PciCapabilityID;
73 use crate::pci::PciClassCode;
74 use crate::pci::PciConfiguration;
75 use crate::pci::PciDevice;
76 use crate::pci::PciDeviceError;
77 use crate::pci::PciDisplaySubclass;
78 use crate::pci::PciHeaderType;
79 use crate::pci::PciId;
80 use crate::pci::PciInputDeviceSubclass;
81 use crate::pci::PciInterruptPin;
82 use crate::pci::PciMassStorageSubclass;
83 use crate::pci::PciMultimediaSubclass;
84 use crate::pci::PciNetworkControllerSubclass;
85 use crate::pci::PciSimpleCommunicationControllerSubclass;
86 use crate::pci::PciSubclass;
87 use crate::pci::PciWirelessControllerSubclass;
88 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
89 #[cfg(feature = "pci-hotplug")]
90 use crate::HotPluggable;
91 use crate::IrqLevelEvent;
92 use crate::Suspendable;
93 
94 #[repr(u8)]
95 #[derive(Debug, Copy, Clone, enumn::N)]
96 pub enum PciCapabilityType {
97     CommonConfig = 1,
98     NotifyConfig = 2,
99     IsrConfig = 3,
100     DeviceConfig = 4,
101     PciConfig = 5,
102     // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
103     // capabilities. Specified in 5.7.7.4 here
104     // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
105     DoorbellConfig = 6,
106     NotificationConfig = 7,
107     SharedMemoryConfig = 8,
108 }
109 
110 #[allow(dead_code)]
111 #[repr(C)]
112 #[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes)]
113 pub struct VirtioPciCap {
114     // cap_vndr and cap_next are autofilled based on id() in pci configuration
115     pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
116     pub cap_next: u8, // Generic PCI field: next ptr
117     pub cap_len: u8,  // Generic PCI field: capability length
118     pub cfg_type: u8, // Identifies the structure.
119     pub bar: u8,      // Where to find it.
120     id: u8,           // Multiple capabilities of the same type
121     padding: [u8; 2], // Pad to full dword.
122     pub offset: Le32, // Offset within bar.
123     pub length: Le32, // Length of the structure, in bytes.
124 }
125 
126 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]127     fn bytes(&self) -> &[u8] {
128         self.as_bytes()
129     }
130 
id(&self) -> PciCapabilityID131     fn id(&self) -> PciCapabilityID {
132         PciCapabilityID::VendorSpecific
133     }
134 
writable_bits(&self) -> Vec<u32>135     fn writable_bits(&self) -> Vec<u32> {
136         vec![0u32; 4]
137     }
138 }
139 
140 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self141     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
142         VirtioPciCap {
143             cap_vndr: 0,
144             cap_next: 0,
145             cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
146             cfg_type: cfg_type as u8,
147             bar,
148             id: 0,
149             padding: [0; 2],
150             offset: Le32::from(offset),
151             length: Le32::from(length),
152         }
153     }
154 
set_cap_len(&mut self, cap_len: u8)155     pub fn set_cap_len(&mut self, cap_len: u8) {
156         self.cap_len = cap_len;
157     }
158 }
159 
160 #[allow(dead_code)]
161 #[repr(C)]
162 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
163 pub struct VirtioPciNotifyCap {
164     cap: VirtioPciCap,
165     notify_off_multiplier: Le32,
166 }
167 
168 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]169     fn bytes(&self) -> &[u8] {
170         self.as_bytes()
171     }
172 
id(&self) -> PciCapabilityID173     fn id(&self) -> PciCapabilityID {
174         PciCapabilityID::VendorSpecific
175     }
176 
writable_bits(&self) -> Vec<u32>177     fn writable_bits(&self) -> Vec<u32> {
178         vec![0u32; 5]
179     }
180 }
181 
182 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self183     pub fn new(
184         cfg_type: PciCapabilityType,
185         bar: u8,
186         offset: u32,
187         length: u32,
188         multiplier: Le32,
189     ) -> Self {
190         VirtioPciNotifyCap {
191             cap: VirtioPciCap {
192                 cap_vndr: 0,
193                 cap_next: 0,
194                 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
195                 cfg_type: cfg_type as u8,
196                 bar,
197                 id: 0,
198                 padding: [0; 2],
199                 offset: Le32::from(offset),
200                 length: Le32::from(length),
201             },
202             notify_off_multiplier: multiplier,
203         }
204     }
205 }
206 
207 #[repr(C)]
208 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
209 pub struct VirtioPciShmCap {
210     cap: VirtioPciCap,
211     offset_hi: Le32, // Most sig 32 bits of offset
212     length_hi: Le32, // Most sig 32 bits of length
213 }
214 
215 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]216     fn bytes(&self) -> &[u8] {
217         self.as_bytes()
218     }
219 
id(&self) -> PciCapabilityID220     fn id(&self) -> PciCapabilityID {
221         PciCapabilityID::VendorSpecific
222     }
223 
writable_bits(&self) -> Vec<u32>224     fn writable_bits(&self) -> Vec<u32> {
225         vec![0u32; 6]
226     }
227 }
228 
229 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self230     pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
231         VirtioPciShmCap {
232             cap: VirtioPciCap {
233                 cap_vndr: 0,
234                 cap_next: 0,
235                 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
236                 cfg_type: cfg_type as u8,
237                 bar,
238                 id: shmid,
239                 padding: [0; 2],
240                 offset: Le32::from(offset as u32),
241                 length: Le32::from(length as u32),
242             },
243             offset_hi: Le32::from((offset >> 32) as u32),
244             length_hi: Le32::from((length >> 32) as u32),
245         }
246     }
247 }
248 
249 // Allocate one bar for the structs pointed to by the capability structures.
250 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
251 const COMMON_CONFIG_SIZE: u64 = 56;
252 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
253 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
254 const ISR_CONFIG_SIZE: u64 = 1;
255 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
256 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
257 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
258 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
259 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
260 const NOTIFICATION_SIZE: u64 = 0x1000;
261 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
262 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
263 const MSIX_TABLE_SIZE: u64 = 0x1000;
264 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
265 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
266 const MSIX_PBA_SIZE: u64 = 0x1000;
267 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
268 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
269 
270 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
271 
272 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
273 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
274 const VIRTIO_PCI_REVISION_ID: u8 = 1;
275 
276 const CAPABILITIES_BAR_NUM: usize = 0;
277 const SHMEM_BAR_NUM: usize = 2;
278 
279 struct QueueEvent {
280     event: Event,
281     ioevent_registered: bool,
282 }
283 
284 /// Implements the
285 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
286 /// transport for virtio devices.
287 pub struct VirtioPciDevice {
288     config_regs: PciConfiguration,
289     preferred_address: Option<PciAddress>,
290     pci_address: Option<PciAddress>,
291 
292     device: Box<dyn VirtioDevice>,
293     device_activated: bool,
294     disable_intx: bool,
295 
296     interrupt: Option<Interrupt>,
297     interrupt_evt: Option<IrqLevelEvent>,
298     queues: Vec<QueueConfig>,
299     queue_evts: Vec<QueueEvent>,
300     mem: GuestMemory,
301     settings_bar: PciBarIndex,
302     msix_config: Arc<Mutex<MsixConfig>>,
303     pm_config: Arc<Mutex<PmConfig>>,
304     common_config: VirtioPciCommonConfig,
305 
306     iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
307 
308     // API client that is present if the device has shared memory regions, and
309     // is used to map/unmap files into the shared memory region.
310     shared_memory_vm_memory_client: Option<VmMemoryClient>,
311 
312     // API client for registration of ioevents when PCI BAR reprogramming is detected.
313     ioevent_vm_memory_client: VmMemoryClient,
314 
315     // State only present while asleep.
316     sleep_state: Option<SleepState>,
317 
318     vm_control_tube: Arc<Mutex<Tube>>,
319 }
320 
321 enum SleepState {
322     // Asleep and device hasn't been activated yet by the guest.
323     Inactive,
324     // Asleep and device has been activated by the guest.
325     Active {
326         /// The queues returned from `VirtioDevice::virtio_sleep`.
327         /// Map is from queue index -> Queue.
328         activated_queues: BTreeMap<usize, Queue>,
329     },
330 }
331 
332 #[derive(Serialize, Deserialize)]
333 struct VirtioPciDeviceSnapshot {
334     config_regs: serde_json::Value,
335 
336     inner_device: serde_json::Value,
337     device_activated: bool,
338 
339     interrupt: Option<InterruptSnapshot>,
340     msix_config: serde_json::Value,
341     common_config: VirtioPciCommonConfig,
342 
343     queues: Vec<serde_json::Value>,
344     activated_queues: Option<Vec<(usize, serde_json::Value)>>,
345 }
346 
347 impl VirtioPciDevice {
348     /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>349     pub fn new(
350         mem: GuestMemory,
351         device: Box<dyn VirtioDevice>,
352         msi_device_tube: Tube,
353         disable_intx: bool,
354         shared_memory_vm_memory_client: Option<VmMemoryClient>,
355         ioevent_vm_memory_client: VmMemoryClient,
356         vm_control_tube: Tube,
357     ) -> Result<Self> {
358         // shared_memory_vm_memory_client is required if there are shared memory regions.
359         assert_eq!(
360             device.get_shared_memory_region().is_none(),
361             shared_memory_vm_memory_client.is_none()
362         );
363 
364         let mut queue_evts = Vec::new();
365         for _ in device.queue_max_sizes() {
366             queue_evts.push(QueueEvent {
367                 event: Event::new()?,
368                 ioevent_registered: false,
369             });
370         }
371         let queues = device
372             .queue_max_sizes()
373             .iter()
374             .map(|&s| QueueConfig::new(s, device.features()))
375             .collect();
376 
377         let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
378 
379         let (pci_device_class, pci_device_subclass) = match device.device_type() {
380             DeviceType::Net => (
381                 PciClassCode::NetworkController,
382                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
383             ),
384             DeviceType::Block => (
385                 PciClassCode::MassStorage,
386                 &PciMassStorageSubclass::Other as &dyn PciSubclass,
387             ),
388             DeviceType::Console => (
389                 PciClassCode::SimpleCommunicationController,
390                 &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
391             ),
392             DeviceType::Rng => (
393                 PciClassCode::BaseSystemPeripheral,
394                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
395             ),
396             DeviceType::Balloon => (
397                 PciClassCode::BaseSystemPeripheral,
398                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
399             ),
400             DeviceType::Scsi => (
401                 PciClassCode::MassStorage,
402                 &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
403             ),
404             DeviceType::P9 => (
405                 PciClassCode::NetworkController,
406                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
407             ),
408             DeviceType::Gpu => (
409                 PciClassCode::DisplayController,
410                 &PciDisplaySubclass::Other as &dyn PciSubclass,
411             ),
412             DeviceType::Input => (
413                 PciClassCode::InputDevice,
414                 &PciInputDeviceSubclass::Other as &dyn PciSubclass,
415             ),
416             DeviceType::Vsock => (
417                 PciClassCode::NetworkController,
418                 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
419             ),
420             DeviceType::Iommu => (
421                 PciClassCode::BaseSystemPeripheral,
422                 &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
423             ),
424             DeviceType::Sound => (
425                 PciClassCode::MultimediaController,
426                 &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
427             ),
428             DeviceType::Fs => (
429                 PciClassCode::MassStorage,
430                 &PciMassStorageSubclass::Other as &dyn PciSubclass,
431             ),
432             DeviceType::Pmem => (
433                 PciClassCode::MassStorage,
434                 &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
435             ),
436             DeviceType::Mac80211HwSim => (
437                 PciClassCode::WirelessController,
438                 &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
439             ),
440             DeviceType::VideoEncoder => (
441                 PciClassCode::MultimediaController,
442                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
443             ),
444             DeviceType::VideoDecoder => (
445                 PciClassCode::MultimediaController,
446                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
447             ),
448             DeviceType::Media => (
449                 PciClassCode::MultimediaController,
450                 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
451             ),
452             DeviceType::Scmi => (
453                 PciClassCode::BaseSystemPeripheral,
454                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
455             ),
456             DeviceType::Wl => (
457                 PciClassCode::DisplayController,
458                 &PciDisplaySubclass::Other as &dyn PciSubclass,
459             ),
460             DeviceType::Tpm => (
461                 PciClassCode::BaseSystemPeripheral,
462                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
463             ),
464             DeviceType::Pvclock => (
465                 PciClassCode::BaseSystemPeripheral,
466                 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
467             ),
468         };
469 
470         let num_interrupts = device.num_interrupts();
471 
472         // One MSI-X vector per queue plus one for configuration changes.
473         let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
474         let msix_config = Arc::new(Mutex::new(MsixConfig::new(
475             msix_num,
476             msi_device_tube,
477             PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
478             device.debug_label(),
479         )));
480 
481         let config_regs = PciConfiguration::new(
482             VIRTIO_PCI_VENDOR_ID,
483             pci_device_id,
484             pci_device_class,
485             pci_device_subclass,
486             None,
487             PciHeaderType::Device,
488             VIRTIO_PCI_VENDOR_ID,
489             pci_device_id,
490             VIRTIO_PCI_REVISION_ID,
491         );
492 
493         Ok(VirtioPciDevice {
494             config_regs,
495             preferred_address: device.pci_address(),
496             pci_address: None,
497             device,
498             device_activated: false,
499             disable_intx,
500             interrupt: None,
501             interrupt_evt: None,
502             queues,
503             queue_evts,
504             mem,
505             settings_bar: 0,
506             msix_config,
507             pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
508             common_config: VirtioPciCommonConfig {
509                 driver_status: 0,
510                 config_generation: 0,
511                 device_feature_select: 0,
512                 driver_feature_select: 0,
513                 queue_select: 0,
514                 msix_config: VIRTIO_MSI_NO_VECTOR,
515             },
516             iommu: None,
517             shared_memory_vm_memory_client,
518             ioevent_vm_memory_client,
519             sleep_state: None,
520             vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
521         })
522     }
523 
is_driver_ready(&self) -> bool524     fn is_driver_ready(&self) -> bool {
525         let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
526             | VIRTIO_CONFIG_S_DRIVER
527             | VIRTIO_CONFIG_S_DRIVER_OK
528             | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
529         (self.common_config.driver_status & ready_bits) == ready_bits
530             && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
531     }
532 
is_device_suspended(&self) -> bool533     fn is_device_suspended(&self) -> bool {
534         (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
535     }
536 
537     /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool538     fn is_reset_requested(&self) -> bool {
539         self.common_config.driver_status == DEVICE_RESET as u8
540     }
541 
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>542     fn add_settings_pci_capabilities(
543         &mut self,
544         settings_bar: u8,
545     ) -> std::result::Result<(), PciDeviceError> {
546         // Add pointers to the different configuration structures from the PCI capabilities.
547         let common_cap = VirtioPciCap::new(
548             PciCapabilityType::CommonConfig,
549             settings_bar,
550             COMMON_CONFIG_BAR_OFFSET as u32,
551             COMMON_CONFIG_SIZE as u32,
552         );
553         self.config_regs
554             .add_capability(&common_cap, None)
555             .map_err(PciDeviceError::CapabilitiesSetup)?;
556 
557         let isr_cap = VirtioPciCap::new(
558             PciCapabilityType::IsrConfig,
559             settings_bar,
560             ISR_CONFIG_BAR_OFFSET as u32,
561             ISR_CONFIG_SIZE as u32,
562         );
563         self.config_regs
564             .add_capability(&isr_cap, None)
565             .map_err(PciDeviceError::CapabilitiesSetup)?;
566 
567         // TODO(dgreid) - set based on device's configuration size?
568         let device_cap = VirtioPciCap::new(
569             PciCapabilityType::DeviceConfig,
570             settings_bar,
571             DEVICE_CONFIG_BAR_OFFSET as u32,
572             DEVICE_CONFIG_SIZE as u32,
573         );
574         self.config_regs
575             .add_capability(&device_cap, None)
576             .map_err(PciDeviceError::CapabilitiesSetup)?;
577 
578         let notify_cap = VirtioPciNotifyCap::new(
579             PciCapabilityType::NotifyConfig,
580             settings_bar,
581             NOTIFICATION_BAR_OFFSET as u32,
582             NOTIFICATION_SIZE as u32,
583             Le32::from(NOTIFY_OFF_MULTIPLIER),
584         );
585         self.config_regs
586             .add_capability(&notify_cap, None)
587             .map_err(PciDeviceError::CapabilitiesSetup)?;
588 
589         //TODO(dgreid) - How will the configuration_cap work?
590         let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
591         self.config_regs
592             .add_capability(&configuration_cap, None)
593             .map_err(PciDeviceError::CapabilitiesSetup)?;
594 
595         let msix_cap = MsixCap::new(
596             settings_bar,
597             self.msix_config.lock().num_vectors(),
598             MSIX_TABLE_BAR_OFFSET as u32,
599             settings_bar,
600             MSIX_PBA_BAR_OFFSET as u32,
601         );
602         self.config_regs
603             .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
604             .map_err(PciDeviceError::CapabilitiesSetup)?;
605 
606         self.config_regs
607             .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
608             .map_err(PciDeviceError::CapabilitiesSetup)?;
609 
610         self.settings_bar = settings_bar as PciBarIndex;
611         Ok(())
612     }
613 
614     /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>615     fn activate(&mut self) -> anyhow::Result<()> {
616         let interrupt = Interrupt::new(
617             self.interrupt_evt
618                 .as_ref()
619                 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
620                 .try_clone()
621                 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
622             Some(self.msix_config.clone()),
623             self.common_config.msix_config,
624             #[cfg(target_arch = "x86_64")]
625             Some((
626                 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
627                 MetricEventType::VirtioWakeup {
628                     virtio_id: self.device.device_type() as u32,
629                 },
630             )),
631         );
632         self.interrupt = Some(interrupt.clone());
633 
634         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
635         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
636 
637         // Use ready queues and their events.
638         let queues = self
639             .queues
640             .iter_mut()
641             .enumerate()
642             .zip(self.queue_evts.iter_mut())
643             .filter(|((_, q), _)| q.ready())
644             .map(|((queue_index, queue), evt)| {
645                 if !evt.ioevent_registered {
646                     self.ioevent_vm_memory_client
647                         .register_io_event(
648                             evt.event.try_clone().context("failed to clone Event")?,
649                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
650                             Datamatch::AnyLength,
651                         )
652                         .context("failed to register ioevent")?;
653                     evt.ioevent_registered = true;
654                 }
655                 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
656                 Ok((
657                     queue_index,
658                     queue
659                         .activate(&self.mem, queue_evt, interrupt.clone())
660                         .context("failed to activate queue")?,
661                 ))
662             })
663             .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
664 
665         if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
666             error!("{} activate failed: {:#}", self.debug_label(), e);
667             self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
668         } else {
669             self.device_activated = true;
670         }
671 
672         Ok(())
673     }
674 
unregister_ioevents(&mut self) -> anyhow::Result<()>675     fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
676         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
677         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
678 
679         for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
680             if evt.ioevent_registered {
681                 self.ioevent_vm_memory_client
682                     .unregister_io_event(
683                         evt.event.try_clone().context("failed to clone Event")?,
684                         notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
685                         Datamatch::AnyLength,
686                     )
687                     .context("failed to unregister ioevent")?;
688                 evt.ioevent_registered = false;
689             }
690         }
691         Ok(())
692     }
693 
virtio_device(&self) -> &dyn VirtioDevice694     pub fn virtio_device(&self) -> &dyn VirtioDevice {
695         self.device.as_ref()
696     }
697 
pci_address(&self) -> Option<PciAddress>698     pub fn pci_address(&self) -> Option<PciAddress> {
699         self.pci_address
700     }
701 
702     #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)703     fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
704         if let Some(interrupt) = self.interrupt.as_mut() {
705             interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
706         }
707     }
708 
709     #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)710     fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
711 }
712 
713 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String714     fn debug_label(&self) -> String {
715         format!("pci{}", self.device.debug_label())
716     }
717 
preferred_address(&self) -> Option<PciAddress>718     fn preferred_address(&self) -> Option<PciAddress> {
719         self.preferred_address
720     }
721 
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>722     fn allocate_address(
723         &mut self,
724         resources: &mut SystemAllocator,
725     ) -> std::result::Result<PciAddress, PciDeviceError> {
726         if self.pci_address.is_none() {
727             if let Some(address) = self.preferred_address {
728                 if !resources.reserve_pci(
729                     Alloc::PciBar {
730                         bus: address.bus,
731                         dev: address.dev,
732                         func: address.func,
733                         bar: 0,
734                     },
735                     self.debug_label(),
736                 ) {
737                     return Err(PciDeviceError::PciAllocationFailed);
738                 }
739                 self.pci_address = Some(address);
740             } else {
741                 self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
742                     Some(Alloc::PciBar {
743                         bus,
744                         dev,
745                         func,
746                         bar: _,
747                     }) => Some(PciAddress { bus, dev, func }),
748                     _ => None,
749                 }
750             }
751         }
752         self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
753     }
754 
keep_rds(&self) -> Vec<RawDescriptor>755     fn keep_rds(&self) -> Vec<RawDescriptor> {
756         let mut rds = self.device.keep_rds();
757         rds.extend(
758             self.queue_evts
759                 .iter()
760                 .map(|qe| qe.event.as_raw_descriptor()),
761         );
762         if let Some(interrupt_evt) = &self.interrupt_evt {
763             rds.extend(interrupt_evt.as_raw_descriptors());
764         }
765         let descriptor = self.msix_config.lock().get_msi_socket();
766         rds.push(descriptor);
767         if let Some(iommu) = &self.iommu {
768             rds.append(&mut iommu.lock().as_raw_descriptors());
769         }
770         rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
771         rds.push(self.vm_control_tube.lock().as_raw_descriptor());
772         rds
773     }
774 
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)775     fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
776         self.interrupt_evt = Some(irq_evt);
777         if !self.disable_intx {
778             self.config_regs.set_irq(irq_num as u8, pin);
779         }
780     }
781 
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>782     fn allocate_io_bars(
783         &mut self,
784         resources: &mut SystemAllocator,
785     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
786         let device_type = self.device.device_type();
787         allocate_io_bars(
788             self,
789             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
790                 resources
791                     .allocate_mmio(
792                         size,
793                         alloc,
794                         format!("virtio-{}-cap_bar", device_type),
795                         alloc_option,
796                     )
797                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
798             },
799         )
800     }
801 
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>802     fn allocate_device_bars(
803         &mut self,
804         resources: &mut SystemAllocator,
805     ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
806         let device_type = self.device.device_type();
807         allocate_device_bars(
808             self,
809             |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
810                 resources
811                     .allocate_mmio(
812                         size,
813                         alloc,
814                         format!("virtio-{}-custom_bar", device_type),
815                         alloc_option,
816                     )
817                     .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
818             },
819         )
820     }
821 
destroy_device(&mut self)822     fn destroy_device(&mut self) {
823         if let Err(e) = self.unregister_ioevents() {
824             error!("error destroying {}: {:?}", &self.debug_label(), &e);
825         }
826     }
827 
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>828     fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
829         self.config_regs.get_bar_configuration(bar_num)
830     }
831 
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>832     fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
833         let mut caps = self.device.get_device_caps();
834         if let Some(region) = self.device.get_shared_memory_region() {
835             caps.push(Box::new(VirtioPciShmCap::new(
836                 PciCapabilityType::SharedMemoryConfig,
837                 SHMEM_BAR_NUM as u8,
838                 0,
839                 region.length,
840                 region.id,
841             )));
842         }
843 
844         for cap in caps {
845             self.config_regs
846                 .add_capability(&*cap, None)
847                 .map_err(PciDeviceError::CapabilitiesSetup)?;
848         }
849 
850         Ok(())
851     }
852 
read_config_register(&self, reg_idx: usize) -> u32853     fn read_config_register(&self, reg_idx: usize) -> u32 {
854         self.config_regs.read_reg(reg_idx)
855     }
856 
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])857     fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
858         if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
859             if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
860                 self.device.control_notify(*msix_behavior);
861             } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
862                 self.handle_pm_status_change(status);
863             }
864         }
865     }
866 
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>867     fn setup_pci_config_mapping(
868         &mut self,
869         shmem: &SharedMemory,
870         base: usize,
871         len: usize,
872     ) -> std::result::Result<bool, PciDeviceError> {
873         self.config_regs
874             .setup_mapping(shmem, base, len)
875             .map(|_| true)
876             .map_err(PciDeviceError::MmioSetup)
877     }
878 
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])879     fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
880         if bar_index == self.settings_bar {
881             match offset {
882                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
883                     offset - COMMON_CONFIG_BAR_OFFSET,
884                     data,
885                     &mut self.queues,
886                     self.device.as_mut(),
887                 ),
888                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
889                     if let Some(v) = data.get_mut(0) {
890                         // Reading this register resets it to 0.
891                         *v = if let Some(interrupt) = &self.interrupt {
892                             interrupt.read_and_reset_interrupt_status()
893                         } else {
894                             0
895                         };
896                     }
897                 }
898                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
899                     self.device
900                         .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
901                 }
902                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
903                     // Handled with ioevents.
904                 }
905                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
906                     self.msix_config
907                         .lock()
908                         .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
909                 }
910                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
911                     self.msix_config
912                         .lock()
913                         .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
914                 }
915                 _ => (),
916             }
917         } else {
918             self.device.read_bar(bar_index, offset, data);
919         }
920     }
921 
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])922     fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
923         let was_suspended = self.is_device_suspended();
924 
925         if bar_index == self.settings_bar {
926             match offset {
927                 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
928                     offset - COMMON_CONFIG_BAR_OFFSET,
929                     data,
930                     &mut self.queues,
931                     self.device.as_mut(),
932                 ),
933                 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
934                     if let Some(v) = data.first() {
935                         if let Some(interrupt) = &self.interrupt {
936                             interrupt.clear_interrupt_status_bits(*v);
937                         }
938                     }
939                 }
940                 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
941                     self.device
942                         .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
943                 }
944                 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
945                     // Notifications are normally handled with ioevents inside the hypervisor and
946                     // do not reach write_bar(). However, if the ioevent registration hasn't
947                     // finished yet, it is possible for a write to the notification region to make
948                     // it through as a normal MMIO exit and end up here. To handle that case,
949                     // provide a fallback that looks up the corresponding queue for the offset and
950                     // triggers its event, which is equivalent to what the ioevent would do.
951                     let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
952                         / NOTIFY_OFF_MULTIPLIER as usize;
953                     trace!("write_bar notification fallback for queue {}", queue_index);
954                     if let Some(evt) = self.queue_evts.get(queue_index) {
955                         let _ = evt.event.signal();
956                     }
957                 }
958                 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
959                     let behavior = self
960                         .msix_config
961                         .lock()
962                         .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
963                     self.device.control_notify(behavior);
964                 }
965                 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
966                     self.msix_config
967                         .lock()
968                         .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
969                 }
970                 _ => (),
971             }
972         } else {
973             self.device.write_bar(bar_index, offset, data);
974         }
975 
976         if !self.device_activated && self.is_driver_ready() {
977             if let Err(e) = self.activate() {
978                 error!("failed to activate device: {:#}", e);
979             }
980         }
981 
982         let is_suspended = self.is_device_suspended();
983         if is_suspended != was_suspended {
984             if let Some(interrupt) = self.interrupt.as_mut() {
985                 interrupt.set_suspended(is_suspended);
986             }
987         }
988 
989         // Device has been reset by the driver
990         if self.device_activated && self.is_reset_requested() {
991             if let Err(e) = self.device.reset() {
992                 error!("failed to reset {} device: {:#}", self.debug_label(), e);
993             } else {
994                 self.device_activated = false;
995                 // reset queues
996                 self.queues.iter_mut().for_each(QueueConfig::reset);
997                 // select queue 0 by default
998                 self.common_config.queue_select = 0;
999                 if let Err(e) = self.unregister_ioevents() {
1000                     error!("failed to unregister ioevents: {:#}", e);
1001                 }
1002             }
1003         }
1004     }
1005 
on_device_sandboxed(&mut self)1006     fn on_device_sandboxed(&mut self) {
1007         self.device.on_device_sandboxed();
1008     }
1009 
1010     #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>>1011     fn generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>> {
1012         self.device.generate_acpi(&self.pci_address, sdts)
1013     }
1014 
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>1015     fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1016         Some(self)
1017     }
1018 }
1019 
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1020 fn allocate_io_bars<F>(
1021     virtio_pci_device: &mut VirtioPciDevice,
1022     mut alloc_fn: F,
1023 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1024 where
1025     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1026 {
1027     let address = virtio_pci_device
1028         .pci_address
1029         .expect("allocate_address must be called prior to allocate_io_bars");
1030     // Allocate one bar for the structures pointed to by the capability structures.
1031     let settings_config_addr = alloc_fn(
1032         CAPABILITY_BAR_SIZE,
1033         Alloc::PciBar {
1034             bus: address.bus,
1035             dev: address.dev,
1036             func: address.func,
1037             bar: 0,
1038         },
1039         AllocOptions::new()
1040             .max_address(u32::MAX.into())
1041             .align(CAPABILITY_BAR_SIZE),
1042     )?;
1043     let config = PciBarConfiguration::new(
1044         CAPABILITIES_BAR_NUM,
1045         CAPABILITY_BAR_SIZE,
1046         PciBarRegionType::Memory32BitRegion,
1047         PciBarPrefetchable::NotPrefetchable,
1048     )
1049     .set_address(settings_config_addr);
1050     let settings_bar = virtio_pci_device
1051         .config_regs
1052         .add_pci_bar(config)
1053         .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1054         as u8;
1055     // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1056     virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1057 
1058     Ok(vec![BarRange {
1059         addr: settings_config_addr,
1060         size: CAPABILITY_BAR_SIZE,
1061         prefetchable: false,
1062     }])
1063 }
1064 
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1065 fn allocate_device_bars<F>(
1066     virtio_pci_device: &mut VirtioPciDevice,
1067     mut alloc_fn: F,
1068 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1069 where
1070     F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1071 {
1072     let address = virtio_pci_device
1073         .pci_address
1074         .expect("allocate_address must be called prior to allocate_device_bars");
1075 
1076     let configs = virtio_pci_device.device.get_device_bars(address);
1077     let configs = if !configs.is_empty() {
1078         configs
1079     } else {
1080         let region = match virtio_pci_device.device.get_shared_memory_region() {
1081             None => return Ok(Vec::new()),
1082             Some(r) => r,
1083         };
1084         let config = PciBarConfiguration::new(
1085             SHMEM_BAR_NUM,
1086             region
1087                 .length
1088                 .checked_next_power_of_two()
1089                 .expect("bar too large"),
1090             PciBarRegionType::Memory64BitRegion,
1091             PciBarPrefetchable::Prefetchable,
1092         );
1093 
1094         let alloc = Alloc::PciBar {
1095             bus: address.bus,
1096             dev: address.dev,
1097             func: address.func,
1098             bar: config.bar_index() as u8,
1099         };
1100 
1101         let vm_memory_client = virtio_pci_device
1102             .shared_memory_vm_memory_client
1103             .take()
1104             .expect("missing shared_memory_tube");
1105 
1106         // See comment VmMemoryRequest::execute
1107         let can_prepare = !virtio_pci_device
1108             .device
1109             .expose_shmem_descriptors_with_viommu();
1110         let prepare_type = if can_prepare {
1111             virtio_pci_device.device.get_shared_memory_prepare_type()
1112         } else {
1113             SharedMemoryPrepareType::DynamicPerMapping
1114         };
1115 
1116         let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1117         virtio_pci_device
1118             .device
1119             .set_shared_memory_mapper(vm_requester);
1120 
1121         vec![config]
1122     };
1123     let mut ranges = vec![];
1124     for config in configs {
1125         let device_addr = alloc_fn(
1126             config.size(),
1127             Alloc::PciBar {
1128                 bus: address.bus,
1129                 dev: address.dev,
1130                 func: address.func,
1131                 bar: config.bar_index() as u8,
1132             },
1133             AllocOptions::new()
1134                 .prefetchable(config.is_prefetchable())
1135                 .align(config.size()),
1136         )?;
1137         let config = config.set_address(device_addr);
1138         let _device_bar = virtio_pci_device
1139             .config_regs
1140             .add_pci_bar(config)
1141             .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1142         ranges.push(BarRange {
1143             addr: device_addr,
1144             size: config.size(),
1145             prefetchable: false,
1146         });
1147     }
1148 
1149     if virtio_pci_device
1150         .device
1151         .get_shared_memory_region()
1152         .is_some()
1153     {
1154         virtio_pci_device
1155             .device
1156             .set_shared_memory_region_base(GuestAddress(ranges[0].addr));
1157     }
1158 
1159     Ok(ranges)
1160 }
1161 
1162 #[cfg(feature = "pci-hotplug")]
1163 impl HotPluggable for VirtioPciDevice {
1164     /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1165     fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1166         self.pci_address = Some(pci_addr);
1167         Ok(())
1168     }
1169 
1170     /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1171     fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1172         let mut simple_allocator = SimpleAllocator::new(0);
1173         allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1174     }
1175 
1176     /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1177     fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1178         // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1179         let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1180         allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1181     }
1182 }
1183 
1184 #[cfg(feature = "pci-hotplug")]
1185 /// A simple allocator that can allocate non-overlapping aligned intervals.
1186 ///
1187 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1188 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1189 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1190 struct SimpleAllocator {
1191     current_address: u64,
1192 }
1193 
1194 #[cfg(feature = "pci-hotplug")]
1195 impl SimpleAllocator {
1196     /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1197     fn new(base_address: u64) -> Self {
1198         Self {
1199             current_address: base_address,
1200         }
1201     }
1202 
1203     /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1204     fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1205         if align > 0 {
1206             // aligns current_address upward to align.
1207             self.current_address = (self.current_address + align - 1) / align * align;
1208         }
1209         let start_address = self.current_address;
1210         self.current_address += size;
1211         Ok(start_address)
1212     }
1213 }
1214 
1215 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1216     fn sleep(&mut self) -> anyhow::Result<()> {
1217         // If the device is already asleep, we should not request it to sleep again.
1218         if self.sleep_state.is_some() {
1219             return Ok(());
1220         }
1221 
1222         if let Some(queues) = self.device.virtio_sleep()? {
1223             anyhow::ensure!(
1224                 self.device_activated,
1225                 format!(
1226                     "unactivated device {} returned queues on sleep",
1227                     self.debug_label()
1228                 ),
1229             );
1230             self.sleep_state = Some(SleepState::Active {
1231                 activated_queues: queues,
1232             });
1233         } else {
1234             anyhow::ensure!(
1235                 !self.device_activated,
1236                 format!(
1237                     "activated device {} didn't return queues on sleep",
1238                     self.debug_label()
1239                 ),
1240             );
1241             self.sleep_state = Some(SleepState::Inactive);
1242         }
1243         Ok(())
1244     }
1245 
wake(&mut self) -> anyhow::Result<()>1246     fn wake(&mut self) -> anyhow::Result<()> {
1247         match self.sleep_state.take() {
1248             None => {
1249                 // If the device is already awake, we should not request it to wake again.
1250             }
1251             Some(SleepState::Inactive) => {
1252                 self.device.virtio_wake(None).with_context(|| {
1253                     format!(
1254                         "virtio_wake failed for {}, can't recover",
1255                         self.debug_label(),
1256                     )
1257                 })?;
1258             }
1259             Some(SleepState::Active { activated_queues }) => {
1260                 self.device
1261                     .virtio_wake(Some((
1262                         self.mem.clone(),
1263                         self.interrupt
1264                             .clone()
1265                             .expect("interrupt missing for already active queues"),
1266                         activated_queues,
1267                     )))
1268                     .with_context(|| {
1269                         format!(
1270                             "virtio_wake failed for {}, can't recover",
1271                             self.debug_label(),
1272                         )
1273                     })?;
1274             }
1275         };
1276         Ok(())
1277     }
1278 
snapshot(&mut self) -> anyhow::Result<serde_json::Value>1279     fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
1280         if self.iommu.is_some() {
1281             return Err(anyhow!("Cannot snapshot if iommu is present."));
1282         }
1283 
1284         serde_json::to_value(VirtioPciDeviceSnapshot {
1285             config_regs: self.config_regs.snapshot()?,
1286             inner_device: self.device.virtio_snapshot()?,
1287             device_activated: self.device_activated,
1288             interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1289             msix_config: self.msix_config.lock().snapshot()?,
1290             common_config: self.common_config,
1291             queues: self
1292                 .queues
1293                 .iter()
1294                 .map(|q| q.snapshot())
1295                 .collect::<anyhow::Result<Vec<_>>>()?,
1296             activated_queues: match &self.sleep_state {
1297                 None => {
1298                     anyhow::bail!("tried snapshotting while awake")
1299                 }
1300                 Some(SleepState::Inactive) => None,
1301                 Some(SleepState::Active { activated_queues }) => {
1302                     let mut serialized_queues = Vec::new();
1303                     for (index, queue) in activated_queues.iter() {
1304                         serialized_queues.push((*index, queue.snapshot()?));
1305                     }
1306                     Some(serialized_queues)
1307                 }
1308             },
1309         })
1310         .context("failed to serialize VirtioPciDeviceSnapshot")
1311     }
1312 
restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>1313     fn restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
1314         // Restoring from an activated state is more complex and low priority, so just fail for
1315         // now. We'll need to reset the device before restoring, e.g. must call
1316         // self.unregister_ioevents().
1317         anyhow::ensure!(
1318             !self.device_activated,
1319             "tried to restore after virtio device activated. not supported yet"
1320         );
1321 
1322         let deser: VirtioPciDeviceSnapshot = serde_json::from_value(data)?;
1323 
1324         self.config_regs.restore(deser.config_regs)?;
1325         self.device_activated = deser.device_activated;
1326 
1327         self.msix_config.lock().restore(deser.msix_config)?;
1328         self.common_config = deser.common_config;
1329 
1330         // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1331         // before restoring the queues.
1332         if let Some(deser_interrupt) = deser.interrupt {
1333             self.interrupt = Some(Interrupt::new_from_snapshot(
1334                 self.interrupt_evt
1335                     .as_ref()
1336                     .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1337                     .try_clone()
1338                     .with_context(|| {
1339                         format!("{} failed to clone interrupt_evt", self.debug_label())
1340                     })?,
1341                 Some(self.msix_config.clone()),
1342                 self.common_config.msix_config,
1343                 deser_interrupt,
1344                 #[cfg(target_arch = "x86_64")]
1345                 Some((
1346                     PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1347                     MetricEventType::VirtioWakeup {
1348                         virtio_id: self.device.device_type() as u32,
1349                     },
1350                 )),
1351             ));
1352         }
1353 
1354         assert_eq!(
1355             self.queues.len(),
1356             deser.queues.len(),
1357             "device must have the same number of queues"
1358         );
1359         for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1360             q.restore(s)?;
1361         }
1362 
1363         // Verify we are asleep and inactive.
1364         match &self.sleep_state {
1365             None => {
1366                 anyhow::bail!("tried restoring while awake")
1367             }
1368             Some(SleepState::Inactive) => {}
1369             Some(SleepState::Active { .. }) => {
1370                 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1371             }
1372         };
1373         // Restore `sleep_state`.
1374         if let Some(activated_queues_snapshot) = deser.activated_queues {
1375             let interrupt = self
1376                 .interrupt
1377                 .as_ref()
1378                 .context("tried to restore active queues without an interrupt")?;
1379             let mut activated_queues = BTreeMap::new();
1380             for (index, queue_snapshot) in activated_queues_snapshot {
1381                 let queue_config = self
1382                     .queues
1383                     .get(index)
1384                     .with_context(|| format!("missing queue config for activated queue {index}"))?;
1385                 let queue_evt = self
1386                     .queue_evts
1387                     .get(index)
1388                     .with_context(|| format!("missing queue event for activated queue {index}"))?
1389                     .event
1390                     .try_clone()
1391                     .context("failed to clone queue event")?;
1392                 activated_queues.insert(
1393                     index,
1394                     Queue::restore(
1395                         queue_config,
1396                         queue_snapshot,
1397                         &self.mem,
1398                         queue_evt,
1399                         interrupt.clone(),
1400                     )?,
1401                 );
1402             }
1403 
1404             // Restore the activated queues.
1405             self.sleep_state = Some(SleepState::Active { activated_queues });
1406         } else {
1407             self.sleep_state = Some(SleepState::Inactive);
1408         }
1409 
1410         // Call register_io_events for the activated queue events.
1411         let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1412         let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1413         self.queues
1414             .iter()
1415             .enumerate()
1416             .zip(self.queue_evts.iter_mut())
1417             .filter(|((_, q), _)| q.ready())
1418             .try_for_each(|((queue_index, _queue), evt)| {
1419                 if !evt.ioevent_registered {
1420                     self.ioevent_vm_memory_client
1421                         .register_io_event(
1422                             evt.event.try_clone().context("failed to clone Event")?,
1423                             notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1424                             Datamatch::AnyLength,
1425                         )
1426                         .context("failed to register ioevent")?;
1427                     evt.ioevent_registered = true;
1428                 }
1429                 Ok::<(), anyhow::Error>(())
1430             })?;
1431 
1432         // There might be data in the queue that wasn't drained by the device
1433         // at the time it was snapshotted. In this case, the doorbell should
1434         // still be signaled. If it is not, the driver may never re-trigger the
1435         // doorbell, and the device will stall. So here, we explicitly signal
1436         // every doorbell. Spurious doorbells are safe (devices will check their
1437         // queue, realize nothing is there, and go back to sleep.)
1438         self.queue_evts.iter_mut().try_for_each(|queue_event| {
1439             queue_event
1440                 .event
1441                 .signal()
1442                 .context("failed to wake doorbell")
1443         })?;
1444 
1445         self.device.virtio_restore(deser.inner_device)?;
1446 
1447         Ok(())
1448     }
1449 }
1450 
1451 struct VmRequester {
1452     vm_memory_client: VmMemoryClient,
1453     alloc: Alloc,
1454     mappings: BTreeMap<u64, VmMemoryRegionId>,
1455     prepare_type: SharedMemoryPrepareType,
1456     prepared: bool,
1457 }
1458 
1459 impl VmRequester {
new( vm_memory_client: VmMemoryClient, alloc: Alloc, prepare_type: SharedMemoryPrepareType, ) -> Self1460     fn new(
1461         vm_memory_client: VmMemoryClient,
1462         alloc: Alloc,
1463         prepare_type: SharedMemoryPrepareType,
1464     ) -> Self {
1465         Self {
1466             vm_memory_client,
1467             alloc,
1468             mappings: BTreeMap::new(),
1469             prepare_type,
1470             prepared: false,
1471         }
1472     }
1473 }
1474 
1475 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1476     fn add_mapping(
1477         &mut self,
1478         source: VmMemorySource,
1479         offset: u64,
1480         prot: Protection,
1481         cache: MemCacheType,
1482     ) -> anyhow::Result<()> {
1483         if !self.prepared {
1484             if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1485                 self.prepare_type
1486             {
1487                 debug!(
1488                     "lazy prepare_shared_memory_region with {:?}",
1489                     prepare_cache_type
1490                 );
1491                 self.vm_memory_client
1492                     .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1493                     .context("lazy prepare_shared_memory_region failed")?;
1494             }
1495             self.prepared = true;
1496         }
1497 
1498         // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1499         // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1500         // add any mapping that requests MemCacheType::CacheNonCoherent.
1501         if cache == MemCacheType::CacheNonCoherent {
1502             if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1503                 self.prepare_type
1504             {
1505                 error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1506                 return Err(anyhow!("invalid MemCacheType"));
1507             }
1508         }
1509 
1510         let id = self
1511             .vm_memory_client
1512             .register_memory(
1513                 source,
1514                 VmMemoryDestination::ExistingAllocation {
1515                     allocation: self.alloc,
1516                     offset,
1517                 },
1518                 prot,
1519                 cache,
1520             )
1521             .context("register_memory failed")?;
1522 
1523         self.mappings.insert(offset, id);
1524         Ok(())
1525     }
1526 
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1527     fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1528         let id = self.mappings.remove(&offset).context("invalid offset")?;
1529         self.vm_memory_client
1530             .unregister_memory(id)
1531             .context("unregister_memory failed")
1532     }
1533 
as_raw_descriptor(&self) -> Option<RawDescriptor>1534     fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1535         Some(self.vm_memory_client.as_raw_descriptor())
1536     }
1537 }
1538 
1539 #[cfg(test)]
1540 mod tests {
1541 
1542     #[cfg(feature = "pci-hotplug")]
1543     #[test]
allocate_aligned_address()1544     fn allocate_aligned_address() {
1545         let mut simple_allocator = super::SimpleAllocator::new(0);
1546         // start at 0, aligned to 0x80. Interval end at 0x20.
1547         assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1548         // 0x20 => start at 0x40. Interval end at 0x80.
1549         assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1550         // 0x80 => start at 0x80, Interval end at 0x108.
1551         assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1552         // 0x108 => start at 0x180. Interval end at 0x1b0.
1553         assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1554     }
1555 }
1556