1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::BTreeMap;
6 use std::sync::Arc;
7
8 #[cfg(target_arch = "x86_64")]
9 use acpi_tables::sdt::SDT;
10 use anyhow::anyhow;
11 use anyhow::Context;
12 use base::debug;
13 use base::error;
14 use base::trace;
15 use base::AsRawDescriptor;
16 use base::AsRawDescriptors;
17 use base::Event;
18 use base::Protection;
19 use base::RawDescriptor;
20 use base::Result;
21 use base::SharedMemory;
22 use base::Tube;
23 use data_model::Le32;
24 use hypervisor::Datamatch;
25 use hypervisor::MemCacheType;
26 use libc::ERANGE;
27 #[cfg(target_arch = "x86_64")]
28 use metrics::MetricEventType;
29 use resources::Alloc;
30 use resources::AllocOptions;
31 use resources::SystemAllocator;
32 use serde::Deserialize;
33 use serde::Serialize;
34 use sync::Mutex;
35 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_ACKNOWLEDGE;
36 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER;
37 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_DRIVER_OK;
38 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FAILED;
39 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_FEATURES_OK;
40 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_NEEDS_RESET;
41 use virtio_sys::virtio_config::VIRTIO_CONFIG_S_SUSPEND;
42 use vm_control::api::VmMemoryClient;
43 use vm_control::VmMemoryDestination;
44 use vm_control::VmMemoryRegionId;
45 use vm_control::VmMemorySource;
46 use vm_memory::GuestAddress;
47 use vm_memory::GuestMemory;
48 use zerocopy::AsBytes;
49 use zerocopy::FromBytes;
50 use zerocopy::FromZeroes;
51
52 use self::virtio_pci_common_config::VirtioPciCommonConfig;
53 use super::*;
54 #[cfg(target_arch = "x86_64")]
55 use crate::acpi::PmWakeupEvent;
56 #[cfg(target_arch = "x86_64")]
57 use crate::pci::pm::PciDevicePower;
58 use crate::pci::pm::PciPmCap;
59 use crate::pci::pm::PmConfig;
60 use crate::pci::pm::PmStatusChange;
61 use crate::pci::BarRange;
62 use crate::pci::MsixCap;
63 use crate::pci::MsixConfig;
64 use crate::pci::MsixStatus;
65 use crate::pci::PciAddress;
66 use crate::pci::PciBarConfiguration;
67 use crate::pci::PciBarIndex;
68 use crate::pci::PciBarPrefetchable;
69 use crate::pci::PciBarRegionType;
70 use crate::pci::PciBaseSystemPeripheralSubclass;
71 use crate::pci::PciCapability;
72 use crate::pci::PciCapabilityID;
73 use crate::pci::PciClassCode;
74 use crate::pci::PciConfiguration;
75 use crate::pci::PciDevice;
76 use crate::pci::PciDeviceError;
77 use crate::pci::PciDisplaySubclass;
78 use crate::pci::PciHeaderType;
79 use crate::pci::PciId;
80 use crate::pci::PciInputDeviceSubclass;
81 use crate::pci::PciInterruptPin;
82 use crate::pci::PciMassStorageSubclass;
83 use crate::pci::PciMultimediaSubclass;
84 use crate::pci::PciNetworkControllerSubclass;
85 use crate::pci::PciSimpleCommunicationControllerSubclass;
86 use crate::pci::PciSubclass;
87 use crate::pci::PciWirelessControllerSubclass;
88 use crate::virtio::ipc_memory_mapper::IpcMemoryMapper;
89 #[cfg(feature = "pci-hotplug")]
90 use crate::HotPluggable;
91 use crate::IrqLevelEvent;
92 use crate::Suspendable;
93
94 #[repr(u8)]
95 #[derive(Debug, Copy, Clone, enumn::N)]
96 pub enum PciCapabilityType {
97 CommonConfig = 1,
98 NotifyConfig = 2,
99 IsrConfig = 3,
100 DeviceConfig = 4,
101 PciConfig = 5,
102 // Doorbell, Notification and SharedMemory are Virtio Vhost User related PCI
103 // capabilities. Specified in 5.7.7.4 here
104 // https://stefanha.github.io/virtio/vhost-user-slave.html#x1-2830007.
105 DoorbellConfig = 6,
106 NotificationConfig = 7,
107 SharedMemoryConfig = 8,
108 }
109
110 #[allow(dead_code)]
111 #[repr(C)]
112 #[derive(Clone, Copy, FromZeroes, FromBytes, AsBytes)]
113 pub struct VirtioPciCap {
114 // cap_vndr and cap_next are autofilled based on id() in pci configuration
115 pub cap_vndr: u8, // Generic PCI field: PCI_CAP_ID_VNDR
116 pub cap_next: u8, // Generic PCI field: next ptr
117 pub cap_len: u8, // Generic PCI field: capability length
118 pub cfg_type: u8, // Identifies the structure.
119 pub bar: u8, // Where to find it.
120 id: u8, // Multiple capabilities of the same type
121 padding: [u8; 2], // Pad to full dword.
122 pub offset: Le32, // Offset within bar.
123 pub length: Le32, // Length of the structure, in bytes.
124 }
125
126 impl PciCapability for VirtioPciCap {
bytes(&self) -> &[u8]127 fn bytes(&self) -> &[u8] {
128 self.as_bytes()
129 }
130
id(&self) -> PciCapabilityID131 fn id(&self) -> PciCapabilityID {
132 PciCapabilityID::VendorSpecific
133 }
134
writable_bits(&self) -> Vec<u32>135 fn writable_bits(&self) -> Vec<u32> {
136 vec![0u32; 4]
137 }
138 }
139
140 impl VirtioPciCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self141 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32) -> Self {
142 VirtioPciCap {
143 cap_vndr: 0,
144 cap_next: 0,
145 cap_len: std::mem::size_of::<VirtioPciCap>() as u8,
146 cfg_type: cfg_type as u8,
147 bar,
148 id: 0,
149 padding: [0; 2],
150 offset: Le32::from(offset),
151 length: Le32::from(length),
152 }
153 }
154
set_cap_len(&mut self, cap_len: u8)155 pub fn set_cap_len(&mut self, cap_len: u8) {
156 self.cap_len = cap_len;
157 }
158 }
159
160 #[allow(dead_code)]
161 #[repr(C)]
162 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
163 pub struct VirtioPciNotifyCap {
164 cap: VirtioPciCap,
165 notify_off_multiplier: Le32,
166 }
167
168 impl PciCapability for VirtioPciNotifyCap {
bytes(&self) -> &[u8]169 fn bytes(&self) -> &[u8] {
170 self.as_bytes()
171 }
172
id(&self) -> PciCapabilityID173 fn id(&self) -> PciCapabilityID {
174 PciCapabilityID::VendorSpecific
175 }
176
writable_bits(&self) -> Vec<u32>177 fn writable_bits(&self) -> Vec<u32> {
178 vec![0u32; 5]
179 }
180 }
181
182 impl VirtioPciNotifyCap {
new( cfg_type: PciCapabilityType, bar: u8, offset: u32, length: u32, multiplier: Le32, ) -> Self183 pub fn new(
184 cfg_type: PciCapabilityType,
185 bar: u8,
186 offset: u32,
187 length: u32,
188 multiplier: Le32,
189 ) -> Self {
190 VirtioPciNotifyCap {
191 cap: VirtioPciCap {
192 cap_vndr: 0,
193 cap_next: 0,
194 cap_len: std::mem::size_of::<VirtioPciNotifyCap>() as u8,
195 cfg_type: cfg_type as u8,
196 bar,
197 id: 0,
198 padding: [0; 2],
199 offset: Le32::from(offset),
200 length: Le32::from(length),
201 },
202 notify_off_multiplier: multiplier,
203 }
204 }
205 }
206
207 #[repr(C)]
208 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
209 pub struct VirtioPciShmCap {
210 cap: VirtioPciCap,
211 offset_hi: Le32, // Most sig 32 bits of offset
212 length_hi: Le32, // Most sig 32 bits of length
213 }
214
215 impl PciCapability for VirtioPciShmCap {
bytes(&self) -> &[u8]216 fn bytes(&self) -> &[u8] {
217 self.as_bytes()
218 }
219
id(&self) -> PciCapabilityID220 fn id(&self) -> PciCapabilityID {
221 PciCapabilityID::VendorSpecific
222 }
223
writable_bits(&self) -> Vec<u32>224 fn writable_bits(&self) -> Vec<u32> {
225 vec![0u32; 6]
226 }
227 }
228
229 impl VirtioPciShmCap {
new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self230 pub fn new(cfg_type: PciCapabilityType, bar: u8, offset: u64, length: u64, shmid: u8) -> Self {
231 VirtioPciShmCap {
232 cap: VirtioPciCap {
233 cap_vndr: 0,
234 cap_next: 0,
235 cap_len: std::mem::size_of::<VirtioPciShmCap>() as u8,
236 cfg_type: cfg_type as u8,
237 bar,
238 id: shmid,
239 padding: [0; 2],
240 offset: Le32::from(offset as u32),
241 length: Le32::from(length as u32),
242 },
243 offset_hi: Le32::from((offset >> 32) as u32),
244 length_hi: Le32::from((length >> 32) as u32),
245 }
246 }
247 }
248
249 // Allocate one bar for the structs pointed to by the capability structures.
250 const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000;
251 const COMMON_CONFIG_SIZE: u64 = 56;
252 const COMMON_CONFIG_LAST: u64 = COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE - 1;
253 const ISR_CONFIG_BAR_OFFSET: u64 = 0x1000;
254 const ISR_CONFIG_SIZE: u64 = 1;
255 const ISR_CONFIG_LAST: u64 = ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE - 1;
256 const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x2000;
257 const DEVICE_CONFIG_SIZE: u64 = 0x1000;
258 const DEVICE_CONFIG_LAST: u64 = DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE - 1;
259 const NOTIFICATION_BAR_OFFSET: u64 = 0x3000;
260 const NOTIFICATION_SIZE: u64 = 0x1000;
261 const NOTIFICATION_LAST: u64 = NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE - 1;
262 const MSIX_TABLE_BAR_OFFSET: u64 = 0x6000;
263 const MSIX_TABLE_SIZE: u64 = 0x1000;
264 const MSIX_TABLE_LAST: u64 = MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE - 1;
265 const MSIX_PBA_BAR_OFFSET: u64 = 0x7000;
266 const MSIX_PBA_SIZE: u64 = 0x1000;
267 const MSIX_PBA_LAST: u64 = MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE - 1;
268 const CAPABILITY_BAR_SIZE: u64 = 0x8000;
269
270 const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.
271
272 const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4;
273 const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID.
274 const VIRTIO_PCI_REVISION_ID: u8 = 1;
275
276 const CAPABILITIES_BAR_NUM: usize = 0;
277 const SHMEM_BAR_NUM: usize = 2;
278
279 struct QueueEvent {
280 event: Event,
281 ioevent_registered: bool,
282 }
283
284 /// Implements the
285 /// [PCI](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-650001)
286 /// transport for virtio devices.
287 pub struct VirtioPciDevice {
288 config_regs: PciConfiguration,
289 preferred_address: Option<PciAddress>,
290 pci_address: Option<PciAddress>,
291
292 device: Box<dyn VirtioDevice>,
293 device_activated: bool,
294 disable_intx: bool,
295
296 interrupt: Option<Interrupt>,
297 interrupt_evt: Option<IrqLevelEvent>,
298 queues: Vec<QueueConfig>,
299 queue_evts: Vec<QueueEvent>,
300 mem: GuestMemory,
301 settings_bar: PciBarIndex,
302 msix_config: Arc<Mutex<MsixConfig>>,
303 pm_config: Arc<Mutex<PmConfig>>,
304 common_config: VirtioPciCommonConfig,
305
306 iommu: Option<Arc<Mutex<IpcMemoryMapper>>>,
307
308 // API client that is present if the device has shared memory regions, and
309 // is used to map/unmap files into the shared memory region.
310 shared_memory_vm_memory_client: Option<VmMemoryClient>,
311
312 // API client for registration of ioevents when PCI BAR reprogramming is detected.
313 ioevent_vm_memory_client: VmMemoryClient,
314
315 // State only present while asleep.
316 sleep_state: Option<SleepState>,
317
318 vm_control_tube: Arc<Mutex<Tube>>,
319 }
320
321 enum SleepState {
322 // Asleep and device hasn't been activated yet by the guest.
323 Inactive,
324 // Asleep and device has been activated by the guest.
325 Active {
326 /// The queues returned from `VirtioDevice::virtio_sleep`.
327 /// Map is from queue index -> Queue.
328 activated_queues: BTreeMap<usize, Queue>,
329 },
330 }
331
332 #[derive(Serialize, Deserialize)]
333 struct VirtioPciDeviceSnapshot {
334 config_regs: serde_json::Value,
335
336 inner_device: serde_json::Value,
337 device_activated: bool,
338
339 interrupt: Option<InterruptSnapshot>,
340 msix_config: serde_json::Value,
341 common_config: VirtioPciCommonConfig,
342
343 queues: Vec<serde_json::Value>,
344 activated_queues: Option<Vec<(usize, serde_json::Value)>>,
345 }
346
347 impl VirtioPciDevice {
348 /// Constructs a new PCI transport for the given virtio device.
new( mem: GuestMemory, device: Box<dyn VirtioDevice>, msi_device_tube: Tube, disable_intx: bool, shared_memory_vm_memory_client: Option<VmMemoryClient>, ioevent_vm_memory_client: VmMemoryClient, vm_control_tube: Tube, ) -> Result<Self>349 pub fn new(
350 mem: GuestMemory,
351 device: Box<dyn VirtioDevice>,
352 msi_device_tube: Tube,
353 disable_intx: bool,
354 shared_memory_vm_memory_client: Option<VmMemoryClient>,
355 ioevent_vm_memory_client: VmMemoryClient,
356 vm_control_tube: Tube,
357 ) -> Result<Self> {
358 // shared_memory_vm_memory_client is required if there are shared memory regions.
359 assert_eq!(
360 device.get_shared_memory_region().is_none(),
361 shared_memory_vm_memory_client.is_none()
362 );
363
364 let mut queue_evts = Vec::new();
365 for _ in device.queue_max_sizes() {
366 queue_evts.push(QueueEvent {
367 event: Event::new()?,
368 ioevent_registered: false,
369 });
370 }
371 let queues = device
372 .queue_max_sizes()
373 .iter()
374 .map(|&s| QueueConfig::new(s, device.features()))
375 .collect();
376
377 let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device.device_type() as u16;
378
379 let (pci_device_class, pci_device_subclass) = match device.device_type() {
380 DeviceType::Net => (
381 PciClassCode::NetworkController,
382 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
383 ),
384 DeviceType::Block => (
385 PciClassCode::MassStorage,
386 &PciMassStorageSubclass::Other as &dyn PciSubclass,
387 ),
388 DeviceType::Console => (
389 PciClassCode::SimpleCommunicationController,
390 &PciSimpleCommunicationControllerSubclass::Other as &dyn PciSubclass,
391 ),
392 DeviceType::Rng => (
393 PciClassCode::BaseSystemPeripheral,
394 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
395 ),
396 DeviceType::Balloon => (
397 PciClassCode::BaseSystemPeripheral,
398 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
399 ),
400 DeviceType::Scsi => (
401 PciClassCode::MassStorage,
402 &PciMassStorageSubclass::Scsi as &dyn PciSubclass,
403 ),
404 DeviceType::P9 => (
405 PciClassCode::NetworkController,
406 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
407 ),
408 DeviceType::Gpu => (
409 PciClassCode::DisplayController,
410 &PciDisplaySubclass::Other as &dyn PciSubclass,
411 ),
412 DeviceType::Input => (
413 PciClassCode::InputDevice,
414 &PciInputDeviceSubclass::Other as &dyn PciSubclass,
415 ),
416 DeviceType::Vsock => (
417 PciClassCode::NetworkController,
418 &PciNetworkControllerSubclass::Other as &dyn PciSubclass,
419 ),
420 DeviceType::Iommu => (
421 PciClassCode::BaseSystemPeripheral,
422 &PciBaseSystemPeripheralSubclass::Iommu as &dyn PciSubclass,
423 ),
424 DeviceType::Sound => (
425 PciClassCode::MultimediaController,
426 &PciMultimediaSubclass::AudioController as &dyn PciSubclass,
427 ),
428 DeviceType::Fs => (
429 PciClassCode::MassStorage,
430 &PciMassStorageSubclass::Other as &dyn PciSubclass,
431 ),
432 DeviceType::Pmem => (
433 PciClassCode::MassStorage,
434 &PciMassStorageSubclass::NonVolatileMemory as &dyn PciSubclass,
435 ),
436 DeviceType::Mac80211HwSim => (
437 PciClassCode::WirelessController,
438 &PciWirelessControllerSubclass::Other as &dyn PciSubclass,
439 ),
440 DeviceType::VideoEncoder => (
441 PciClassCode::MultimediaController,
442 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
443 ),
444 DeviceType::VideoDecoder => (
445 PciClassCode::MultimediaController,
446 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
447 ),
448 DeviceType::Media => (
449 PciClassCode::MultimediaController,
450 &PciMultimediaSubclass::VideoController as &dyn PciSubclass,
451 ),
452 DeviceType::Scmi => (
453 PciClassCode::BaseSystemPeripheral,
454 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
455 ),
456 DeviceType::Wl => (
457 PciClassCode::DisplayController,
458 &PciDisplaySubclass::Other as &dyn PciSubclass,
459 ),
460 DeviceType::Tpm => (
461 PciClassCode::BaseSystemPeripheral,
462 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
463 ),
464 DeviceType::Pvclock => (
465 PciClassCode::BaseSystemPeripheral,
466 &PciBaseSystemPeripheralSubclass::Other as &dyn PciSubclass,
467 ),
468 };
469
470 let num_interrupts = device.num_interrupts();
471
472 // One MSI-X vector per queue plus one for configuration changes.
473 let msix_num = u16::try_from(num_interrupts + 1).map_err(|_| base::Error::new(ERANGE))?;
474 let msix_config = Arc::new(Mutex::new(MsixConfig::new(
475 msix_num,
476 msi_device_tube,
477 PciId::new(VIRTIO_PCI_VENDOR_ID, pci_device_id).into(),
478 device.debug_label(),
479 )));
480
481 let config_regs = PciConfiguration::new(
482 VIRTIO_PCI_VENDOR_ID,
483 pci_device_id,
484 pci_device_class,
485 pci_device_subclass,
486 None,
487 PciHeaderType::Device,
488 VIRTIO_PCI_VENDOR_ID,
489 pci_device_id,
490 VIRTIO_PCI_REVISION_ID,
491 );
492
493 Ok(VirtioPciDevice {
494 config_regs,
495 preferred_address: device.pci_address(),
496 pci_address: None,
497 device,
498 device_activated: false,
499 disable_intx,
500 interrupt: None,
501 interrupt_evt: None,
502 queues,
503 queue_evts,
504 mem,
505 settings_bar: 0,
506 msix_config,
507 pm_config: Arc::new(Mutex::new(PmConfig::new(true))),
508 common_config: VirtioPciCommonConfig {
509 driver_status: 0,
510 config_generation: 0,
511 device_feature_select: 0,
512 driver_feature_select: 0,
513 queue_select: 0,
514 msix_config: VIRTIO_MSI_NO_VECTOR,
515 },
516 iommu: None,
517 shared_memory_vm_memory_client,
518 ioevent_vm_memory_client,
519 sleep_state: None,
520 vm_control_tube: Arc::new(Mutex::new(vm_control_tube)),
521 })
522 }
523
is_driver_ready(&self) -> bool524 fn is_driver_ready(&self) -> bool {
525 let ready_bits = (VIRTIO_CONFIG_S_ACKNOWLEDGE
526 | VIRTIO_CONFIG_S_DRIVER
527 | VIRTIO_CONFIG_S_DRIVER_OK
528 | VIRTIO_CONFIG_S_FEATURES_OK) as u8;
529 (self.common_config.driver_status & ready_bits) == ready_bits
530 && self.common_config.driver_status & VIRTIO_CONFIG_S_FAILED as u8 == 0
531 }
532
is_device_suspended(&self) -> bool533 fn is_device_suspended(&self) -> bool {
534 (self.common_config.driver_status & VIRTIO_CONFIG_S_SUSPEND as u8) != 0
535 }
536
537 /// Determines if the driver has requested the device reset itself
is_reset_requested(&self) -> bool538 fn is_reset_requested(&self) -> bool {
539 self.common_config.driver_status == DEVICE_RESET as u8
540 }
541
add_settings_pci_capabilities( &mut self, settings_bar: u8, ) -> std::result::Result<(), PciDeviceError>542 fn add_settings_pci_capabilities(
543 &mut self,
544 settings_bar: u8,
545 ) -> std::result::Result<(), PciDeviceError> {
546 // Add pointers to the different configuration structures from the PCI capabilities.
547 let common_cap = VirtioPciCap::new(
548 PciCapabilityType::CommonConfig,
549 settings_bar,
550 COMMON_CONFIG_BAR_OFFSET as u32,
551 COMMON_CONFIG_SIZE as u32,
552 );
553 self.config_regs
554 .add_capability(&common_cap, None)
555 .map_err(PciDeviceError::CapabilitiesSetup)?;
556
557 let isr_cap = VirtioPciCap::new(
558 PciCapabilityType::IsrConfig,
559 settings_bar,
560 ISR_CONFIG_BAR_OFFSET as u32,
561 ISR_CONFIG_SIZE as u32,
562 );
563 self.config_regs
564 .add_capability(&isr_cap, None)
565 .map_err(PciDeviceError::CapabilitiesSetup)?;
566
567 // TODO(dgreid) - set based on device's configuration size?
568 let device_cap = VirtioPciCap::new(
569 PciCapabilityType::DeviceConfig,
570 settings_bar,
571 DEVICE_CONFIG_BAR_OFFSET as u32,
572 DEVICE_CONFIG_SIZE as u32,
573 );
574 self.config_regs
575 .add_capability(&device_cap, None)
576 .map_err(PciDeviceError::CapabilitiesSetup)?;
577
578 let notify_cap = VirtioPciNotifyCap::new(
579 PciCapabilityType::NotifyConfig,
580 settings_bar,
581 NOTIFICATION_BAR_OFFSET as u32,
582 NOTIFICATION_SIZE as u32,
583 Le32::from(NOTIFY_OFF_MULTIPLIER),
584 );
585 self.config_regs
586 .add_capability(¬ify_cap, None)
587 .map_err(PciDeviceError::CapabilitiesSetup)?;
588
589 //TODO(dgreid) - How will the configuration_cap work?
590 let configuration_cap = VirtioPciCap::new(PciCapabilityType::PciConfig, 0, 0, 0);
591 self.config_regs
592 .add_capability(&configuration_cap, None)
593 .map_err(PciDeviceError::CapabilitiesSetup)?;
594
595 let msix_cap = MsixCap::new(
596 settings_bar,
597 self.msix_config.lock().num_vectors(),
598 MSIX_TABLE_BAR_OFFSET as u32,
599 settings_bar,
600 MSIX_PBA_BAR_OFFSET as u32,
601 );
602 self.config_regs
603 .add_capability(&msix_cap, Some(Box::new(self.msix_config.clone())))
604 .map_err(PciDeviceError::CapabilitiesSetup)?;
605
606 self.config_regs
607 .add_capability(&PciPmCap::new(), Some(Box::new(self.pm_config.clone())))
608 .map_err(PciDeviceError::CapabilitiesSetup)?;
609
610 self.settings_bar = settings_bar as PciBarIndex;
611 Ok(())
612 }
613
614 /// Activates the underlying `VirtioDevice`. `assign_irq` has to be called first.
activate(&mut self) -> anyhow::Result<()>615 fn activate(&mut self) -> anyhow::Result<()> {
616 let interrupt = Interrupt::new(
617 self.interrupt_evt
618 .as_ref()
619 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
620 .try_clone()
621 .with_context(|| format!("{} failed to clone interrupt_evt", self.debug_label()))?,
622 Some(self.msix_config.clone()),
623 self.common_config.msix_config,
624 #[cfg(target_arch = "x86_64")]
625 Some((
626 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
627 MetricEventType::VirtioWakeup {
628 virtio_id: self.device.device_type() as u32,
629 },
630 )),
631 );
632 self.interrupt = Some(interrupt.clone());
633
634 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
635 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
636
637 // Use ready queues and their events.
638 let queues = self
639 .queues
640 .iter_mut()
641 .enumerate()
642 .zip(self.queue_evts.iter_mut())
643 .filter(|((_, q), _)| q.ready())
644 .map(|((queue_index, queue), evt)| {
645 if !evt.ioevent_registered {
646 self.ioevent_vm_memory_client
647 .register_io_event(
648 evt.event.try_clone().context("failed to clone Event")?,
649 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
650 Datamatch::AnyLength,
651 )
652 .context("failed to register ioevent")?;
653 evt.ioevent_registered = true;
654 }
655 let queue_evt = evt.event.try_clone().context("failed to clone queue_evt")?;
656 Ok((
657 queue_index,
658 queue
659 .activate(&self.mem, queue_evt, interrupt.clone())
660 .context("failed to activate queue")?,
661 ))
662 })
663 .collect::<anyhow::Result<BTreeMap<usize, Queue>>>()?;
664
665 if let Err(e) = self.device.activate(self.mem.clone(), interrupt, queues) {
666 error!("{} activate failed: {:#}", self.debug_label(), e);
667 self.common_config.driver_status |= VIRTIO_CONFIG_S_NEEDS_RESET as u8;
668 } else {
669 self.device_activated = true;
670 }
671
672 Ok(())
673 }
674
unregister_ioevents(&mut self) -> anyhow::Result<()>675 fn unregister_ioevents(&mut self) -> anyhow::Result<()> {
676 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
677 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
678
679 for (queue_index, evt) in self.queue_evts.iter_mut().enumerate() {
680 if evt.ioevent_registered {
681 self.ioevent_vm_memory_client
682 .unregister_io_event(
683 evt.event.try_clone().context("failed to clone Event")?,
684 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
685 Datamatch::AnyLength,
686 )
687 .context("failed to unregister ioevent")?;
688 evt.ioevent_registered = false;
689 }
690 }
691 Ok(())
692 }
693
virtio_device(&self) -> &dyn VirtioDevice694 pub fn virtio_device(&self) -> &dyn VirtioDevice {
695 self.device.as_ref()
696 }
697
pci_address(&self) -> Option<PciAddress>698 pub fn pci_address(&self) -> Option<PciAddress> {
699 self.pci_address
700 }
701
702 #[cfg(target_arch = "x86_64")]
handle_pm_status_change(&mut self, status: &PmStatusChange)703 fn handle_pm_status_change(&mut self, status: &PmStatusChange) {
704 if let Some(interrupt) = self.interrupt.as_mut() {
705 interrupt.set_wakeup_event_active(status.to == PciDevicePower::D3)
706 }
707 }
708
709 #[cfg(not(target_arch = "x86_64"))]
handle_pm_status_change(&mut self, _status: &PmStatusChange)710 fn handle_pm_status_change(&mut self, _status: &PmStatusChange) {}
711 }
712
713 impl PciDevice for VirtioPciDevice {
debug_label(&self) -> String714 fn debug_label(&self) -> String {
715 format!("pci{}", self.device.debug_label())
716 }
717
preferred_address(&self) -> Option<PciAddress>718 fn preferred_address(&self) -> Option<PciAddress> {
719 self.preferred_address
720 }
721
allocate_address( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<PciAddress, PciDeviceError>722 fn allocate_address(
723 &mut self,
724 resources: &mut SystemAllocator,
725 ) -> std::result::Result<PciAddress, PciDeviceError> {
726 if self.pci_address.is_none() {
727 if let Some(address) = self.preferred_address {
728 if !resources.reserve_pci(
729 Alloc::PciBar {
730 bus: address.bus,
731 dev: address.dev,
732 func: address.func,
733 bar: 0,
734 },
735 self.debug_label(),
736 ) {
737 return Err(PciDeviceError::PciAllocationFailed);
738 }
739 self.pci_address = Some(address);
740 } else {
741 self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
742 Some(Alloc::PciBar {
743 bus,
744 dev,
745 func,
746 bar: _,
747 }) => Some(PciAddress { bus, dev, func }),
748 _ => None,
749 }
750 }
751 }
752 self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
753 }
754
keep_rds(&self) -> Vec<RawDescriptor>755 fn keep_rds(&self) -> Vec<RawDescriptor> {
756 let mut rds = self.device.keep_rds();
757 rds.extend(
758 self.queue_evts
759 .iter()
760 .map(|qe| qe.event.as_raw_descriptor()),
761 );
762 if let Some(interrupt_evt) = &self.interrupt_evt {
763 rds.extend(interrupt_evt.as_raw_descriptors());
764 }
765 let descriptor = self.msix_config.lock().get_msi_socket();
766 rds.push(descriptor);
767 if let Some(iommu) = &self.iommu {
768 rds.append(&mut iommu.lock().as_raw_descriptors());
769 }
770 rds.push(self.ioevent_vm_memory_client.as_raw_descriptor());
771 rds.push(self.vm_control_tube.lock().as_raw_descriptor());
772 rds
773 }
774
assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32)775 fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
776 self.interrupt_evt = Some(irq_evt);
777 if !self.disable_intx {
778 self.config_regs.set_irq(irq_num as u8, pin);
779 }
780 }
781
allocate_io_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>782 fn allocate_io_bars(
783 &mut self,
784 resources: &mut SystemAllocator,
785 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
786 let device_type = self.device.device_type();
787 allocate_io_bars(
788 self,
789 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
790 resources
791 .allocate_mmio(
792 size,
793 alloc,
794 format!("virtio-{}-cap_bar", device_type),
795 alloc_option,
796 )
797 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
798 },
799 )
800 }
801
allocate_device_bars( &mut self, resources: &mut SystemAllocator, ) -> std::result::Result<Vec<BarRange>, PciDeviceError>802 fn allocate_device_bars(
803 &mut self,
804 resources: &mut SystemAllocator,
805 ) -> std::result::Result<Vec<BarRange>, PciDeviceError> {
806 let device_type = self.device.device_type();
807 allocate_device_bars(
808 self,
809 |size: u64, alloc: Alloc, alloc_option: &AllocOptions| {
810 resources
811 .allocate_mmio(
812 size,
813 alloc,
814 format!("virtio-{}-custom_bar", device_type),
815 alloc_option,
816 )
817 .map_err(|e| PciDeviceError::IoAllocationFailed(size, e))
818 },
819 )
820 }
821
destroy_device(&mut self)822 fn destroy_device(&mut self) {
823 if let Err(e) = self.unregister_ioevents() {
824 error!("error destroying {}: {:?}", &self.debug_label(), &e);
825 }
826 }
827
get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration>828 fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
829 self.config_regs.get_bar_configuration(bar_num)
830 }
831
register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError>832 fn register_device_capabilities(&mut self) -> std::result::Result<(), PciDeviceError> {
833 let mut caps = self.device.get_device_caps();
834 if let Some(region) = self.device.get_shared_memory_region() {
835 caps.push(Box::new(VirtioPciShmCap::new(
836 PciCapabilityType::SharedMemoryConfig,
837 SHMEM_BAR_NUM as u8,
838 0,
839 region.length,
840 region.id,
841 )));
842 }
843
844 for cap in caps {
845 self.config_regs
846 .add_capability(&*cap, None)
847 .map_err(PciDeviceError::CapabilitiesSetup)?;
848 }
849
850 Ok(())
851 }
852
read_config_register(&self, reg_idx: usize) -> u32853 fn read_config_register(&self, reg_idx: usize) -> u32 {
854 self.config_regs.read_reg(reg_idx)
855 }
856
write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8])857 fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
858 if let Some(res) = self.config_regs.write_reg(reg_idx, offset, data) {
859 if let Some(msix_behavior) = res.downcast_ref::<MsixStatus>() {
860 self.device.control_notify(*msix_behavior);
861 } else if let Some(status) = res.downcast_ref::<PmStatusChange>() {
862 self.handle_pm_status_change(status);
863 }
864 }
865 }
866
setup_pci_config_mapping( &mut self, shmem: &SharedMemory, base: usize, len: usize, ) -> std::result::Result<bool, PciDeviceError>867 fn setup_pci_config_mapping(
868 &mut self,
869 shmem: &SharedMemory,
870 base: usize,
871 len: usize,
872 ) -> std::result::Result<bool, PciDeviceError> {
873 self.config_regs
874 .setup_mapping(shmem, base, len)
875 .map(|_| true)
876 .map_err(PciDeviceError::MmioSetup)
877 }
878
read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8])879 fn read_bar(&mut self, bar_index: usize, offset: u64, data: &mut [u8]) {
880 if bar_index == self.settings_bar {
881 match offset {
882 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.read(
883 offset - COMMON_CONFIG_BAR_OFFSET,
884 data,
885 &mut self.queues,
886 self.device.as_mut(),
887 ),
888 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
889 if let Some(v) = data.get_mut(0) {
890 // Reading this register resets it to 0.
891 *v = if let Some(interrupt) = &self.interrupt {
892 interrupt.read_and_reset_interrupt_status()
893 } else {
894 0
895 };
896 }
897 }
898 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
899 self.device
900 .read_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
901 }
902 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
903 // Handled with ioevents.
904 }
905 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
906 self.msix_config
907 .lock()
908 .read_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
909 }
910 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
911 self.msix_config
912 .lock()
913 .read_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
914 }
915 _ => (),
916 }
917 } else {
918 self.device.read_bar(bar_index, offset, data);
919 }
920 }
921
write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8])922 fn write_bar(&mut self, bar_index: usize, offset: u64, data: &[u8]) {
923 let was_suspended = self.is_device_suspended();
924
925 if bar_index == self.settings_bar {
926 match offset {
927 COMMON_CONFIG_BAR_OFFSET..=COMMON_CONFIG_LAST => self.common_config.write(
928 offset - COMMON_CONFIG_BAR_OFFSET,
929 data,
930 &mut self.queues,
931 self.device.as_mut(),
932 ),
933 ISR_CONFIG_BAR_OFFSET..=ISR_CONFIG_LAST => {
934 if let Some(v) = data.first() {
935 if let Some(interrupt) = &self.interrupt {
936 interrupt.clear_interrupt_status_bits(*v);
937 }
938 }
939 }
940 DEVICE_CONFIG_BAR_OFFSET..=DEVICE_CONFIG_LAST => {
941 self.device
942 .write_config(offset - DEVICE_CONFIG_BAR_OFFSET, data);
943 }
944 NOTIFICATION_BAR_OFFSET..=NOTIFICATION_LAST => {
945 // Notifications are normally handled with ioevents inside the hypervisor and
946 // do not reach write_bar(). However, if the ioevent registration hasn't
947 // finished yet, it is possible for a write to the notification region to make
948 // it through as a normal MMIO exit and end up here. To handle that case,
949 // provide a fallback that looks up the corresponding queue for the offset and
950 // triggers its event, which is equivalent to what the ioevent would do.
951 let queue_index = (offset - NOTIFICATION_BAR_OFFSET) as usize
952 / NOTIFY_OFF_MULTIPLIER as usize;
953 trace!("write_bar notification fallback for queue {}", queue_index);
954 if let Some(evt) = self.queue_evts.get(queue_index) {
955 let _ = evt.event.signal();
956 }
957 }
958 MSIX_TABLE_BAR_OFFSET..=MSIX_TABLE_LAST => {
959 let behavior = self
960 .msix_config
961 .lock()
962 .write_msix_table(offset - MSIX_TABLE_BAR_OFFSET, data);
963 self.device.control_notify(behavior);
964 }
965 MSIX_PBA_BAR_OFFSET..=MSIX_PBA_LAST => {
966 self.msix_config
967 .lock()
968 .write_pba_entries(offset - MSIX_PBA_BAR_OFFSET, data);
969 }
970 _ => (),
971 }
972 } else {
973 self.device.write_bar(bar_index, offset, data);
974 }
975
976 if !self.device_activated && self.is_driver_ready() {
977 if let Err(e) = self.activate() {
978 error!("failed to activate device: {:#}", e);
979 }
980 }
981
982 let is_suspended = self.is_device_suspended();
983 if is_suspended != was_suspended {
984 if let Some(interrupt) = self.interrupt.as_mut() {
985 interrupt.set_suspended(is_suspended);
986 }
987 }
988
989 // Device has been reset by the driver
990 if self.device_activated && self.is_reset_requested() {
991 if let Err(e) = self.device.reset() {
992 error!("failed to reset {} device: {:#}", self.debug_label(), e);
993 } else {
994 self.device_activated = false;
995 // reset queues
996 self.queues.iter_mut().for_each(QueueConfig::reset);
997 // select queue 0 by default
998 self.common_config.queue_select = 0;
999 if let Err(e) = self.unregister_ioevents() {
1000 error!("failed to unregister ioevents: {:#}", e);
1001 }
1002 }
1003 }
1004 }
1005
on_device_sandboxed(&mut self)1006 fn on_device_sandboxed(&mut self) {
1007 self.device.on_device_sandboxed();
1008 }
1009
1010 #[cfg(target_arch = "x86_64")]
generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>>1011 fn generate_acpi(&mut self, sdts: Vec<SDT>) -> Option<Vec<SDT>> {
1012 self.device.generate_acpi(&self.pci_address, sdts)
1013 }
1014
as_virtio_pci_device(&self) -> Option<&VirtioPciDevice>1015 fn as_virtio_pci_device(&self) -> Option<&VirtioPciDevice> {
1016 Some(self)
1017 }
1018 }
1019
allocate_io_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1020 fn allocate_io_bars<F>(
1021 virtio_pci_device: &mut VirtioPciDevice,
1022 mut alloc_fn: F,
1023 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1024 where
1025 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1026 {
1027 let address = virtio_pci_device
1028 .pci_address
1029 .expect("allocate_address must be called prior to allocate_io_bars");
1030 // Allocate one bar for the structures pointed to by the capability structures.
1031 let settings_config_addr = alloc_fn(
1032 CAPABILITY_BAR_SIZE,
1033 Alloc::PciBar {
1034 bus: address.bus,
1035 dev: address.dev,
1036 func: address.func,
1037 bar: 0,
1038 },
1039 AllocOptions::new()
1040 .max_address(u32::MAX.into())
1041 .align(CAPABILITY_BAR_SIZE),
1042 )?;
1043 let config = PciBarConfiguration::new(
1044 CAPABILITIES_BAR_NUM,
1045 CAPABILITY_BAR_SIZE,
1046 PciBarRegionType::Memory32BitRegion,
1047 PciBarPrefetchable::NotPrefetchable,
1048 )
1049 .set_address(settings_config_addr);
1050 let settings_bar = virtio_pci_device
1051 .config_regs
1052 .add_pci_bar(config)
1053 .map_err(|e| PciDeviceError::IoRegistrationFailed(settings_config_addr, e))?
1054 as u8;
1055 // Once the BARs are allocated, the capabilities can be added to the PCI configuration.
1056 virtio_pci_device.add_settings_pci_capabilities(settings_bar)?;
1057
1058 Ok(vec![BarRange {
1059 addr: settings_config_addr,
1060 size: CAPABILITY_BAR_SIZE,
1061 prefetchable: false,
1062 }])
1063 }
1064
allocate_device_bars<F>( virtio_pci_device: &mut VirtioPciDevice, mut alloc_fn: F, ) -> std::result::Result<Vec<BarRange>, PciDeviceError> where F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,1065 fn allocate_device_bars<F>(
1066 virtio_pci_device: &mut VirtioPciDevice,
1067 mut alloc_fn: F,
1068 ) -> std::result::Result<Vec<BarRange>, PciDeviceError>
1069 where
1070 F: FnMut(u64, Alloc, &AllocOptions) -> std::result::Result<u64, PciDeviceError>,
1071 {
1072 let address = virtio_pci_device
1073 .pci_address
1074 .expect("allocate_address must be called prior to allocate_device_bars");
1075
1076 let configs = virtio_pci_device.device.get_device_bars(address);
1077 let configs = if !configs.is_empty() {
1078 configs
1079 } else {
1080 let region = match virtio_pci_device.device.get_shared_memory_region() {
1081 None => return Ok(Vec::new()),
1082 Some(r) => r,
1083 };
1084 let config = PciBarConfiguration::new(
1085 SHMEM_BAR_NUM,
1086 region
1087 .length
1088 .checked_next_power_of_two()
1089 .expect("bar too large"),
1090 PciBarRegionType::Memory64BitRegion,
1091 PciBarPrefetchable::Prefetchable,
1092 );
1093
1094 let alloc = Alloc::PciBar {
1095 bus: address.bus,
1096 dev: address.dev,
1097 func: address.func,
1098 bar: config.bar_index() as u8,
1099 };
1100
1101 let vm_memory_client = virtio_pci_device
1102 .shared_memory_vm_memory_client
1103 .take()
1104 .expect("missing shared_memory_tube");
1105
1106 // See comment VmMemoryRequest::execute
1107 let can_prepare = !virtio_pci_device
1108 .device
1109 .expose_shmem_descriptors_with_viommu();
1110 let prepare_type = if can_prepare {
1111 virtio_pci_device.device.get_shared_memory_prepare_type()
1112 } else {
1113 SharedMemoryPrepareType::DynamicPerMapping
1114 };
1115
1116 let vm_requester = Box::new(VmRequester::new(vm_memory_client, alloc, prepare_type));
1117 virtio_pci_device
1118 .device
1119 .set_shared_memory_mapper(vm_requester);
1120
1121 vec![config]
1122 };
1123 let mut ranges = vec![];
1124 for config in configs {
1125 let device_addr = alloc_fn(
1126 config.size(),
1127 Alloc::PciBar {
1128 bus: address.bus,
1129 dev: address.dev,
1130 func: address.func,
1131 bar: config.bar_index() as u8,
1132 },
1133 AllocOptions::new()
1134 .prefetchable(config.is_prefetchable())
1135 .align(config.size()),
1136 )?;
1137 let config = config.set_address(device_addr);
1138 let _device_bar = virtio_pci_device
1139 .config_regs
1140 .add_pci_bar(config)
1141 .map_err(|e| PciDeviceError::IoRegistrationFailed(device_addr, e))?;
1142 ranges.push(BarRange {
1143 addr: device_addr,
1144 size: config.size(),
1145 prefetchable: false,
1146 });
1147 }
1148
1149 if virtio_pci_device
1150 .device
1151 .get_shared_memory_region()
1152 .is_some()
1153 {
1154 virtio_pci_device
1155 .device
1156 .set_shared_memory_region_base(GuestAddress(ranges[0].addr));
1157 }
1158
1159 Ok(ranges)
1160 }
1161
1162 #[cfg(feature = "pci-hotplug")]
1163 impl HotPluggable for VirtioPciDevice {
1164 /// Sets PciAddress to pci_addr
set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError>1165 fn set_pci_address(&mut self, pci_addr: PciAddress) -> std::result::Result<(), PciDeviceError> {
1166 self.pci_address = Some(pci_addr);
1167 Ok(())
1168 }
1169
1170 /// Configures IO BAR layout without memory alloc.
configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError>1171 fn configure_io_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1172 let mut simple_allocator = SimpleAllocator::new(0);
1173 allocate_io_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1174 }
1175
1176 /// Configure device BAR layout without memory alloc.
configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError>1177 fn configure_device_bars(&mut self) -> std::result::Result<(), PciDeviceError> {
1178 // For device BAR, the space for CAPABILITY_BAR_SIZE should be skipped.
1179 let mut simple_allocator = SimpleAllocator::new(CAPABILITY_BAR_SIZE);
1180 allocate_device_bars(self, |size, _, _| simple_allocator.alloc(size, size)).map(|_| ())
1181 }
1182 }
1183
1184 #[cfg(feature = "pci-hotplug")]
1185 /// A simple allocator that can allocate non-overlapping aligned intervals.
1186 ///
1187 /// The addresses allocated are not exclusively reserved for the device, and cannot be used for a
1188 /// static device. The allocated placeholder address describes the layout of PCI BAR for hotplugged
1189 /// devices. Actual memory allocation is handled by PCI BAR reprogramming initiated by guest OS.
1190 struct SimpleAllocator {
1191 current_address: u64,
1192 }
1193
1194 #[cfg(feature = "pci-hotplug")]
1195 impl SimpleAllocator {
1196 /// Constructs SimpleAllocator. Address will start at or after base_address.
new(base_address: u64) -> Self1197 fn new(base_address: u64) -> Self {
1198 Self {
1199 current_address: base_address,
1200 }
1201 }
1202
1203 /// Allocate memory with size and align. Returns the start of address.
alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError>1204 fn alloc(&mut self, size: u64, align: u64) -> std::result::Result<u64, PciDeviceError> {
1205 if align > 0 {
1206 // aligns current_address upward to align.
1207 self.current_address = (self.current_address + align - 1) / align * align;
1208 }
1209 let start_address = self.current_address;
1210 self.current_address += size;
1211 Ok(start_address)
1212 }
1213 }
1214
1215 impl Suspendable for VirtioPciDevice {
sleep(&mut self) -> anyhow::Result<()>1216 fn sleep(&mut self) -> anyhow::Result<()> {
1217 // If the device is already asleep, we should not request it to sleep again.
1218 if self.sleep_state.is_some() {
1219 return Ok(());
1220 }
1221
1222 if let Some(queues) = self.device.virtio_sleep()? {
1223 anyhow::ensure!(
1224 self.device_activated,
1225 format!(
1226 "unactivated device {} returned queues on sleep",
1227 self.debug_label()
1228 ),
1229 );
1230 self.sleep_state = Some(SleepState::Active {
1231 activated_queues: queues,
1232 });
1233 } else {
1234 anyhow::ensure!(
1235 !self.device_activated,
1236 format!(
1237 "activated device {} didn't return queues on sleep",
1238 self.debug_label()
1239 ),
1240 );
1241 self.sleep_state = Some(SleepState::Inactive);
1242 }
1243 Ok(())
1244 }
1245
wake(&mut self) -> anyhow::Result<()>1246 fn wake(&mut self) -> anyhow::Result<()> {
1247 match self.sleep_state.take() {
1248 None => {
1249 // If the device is already awake, we should not request it to wake again.
1250 }
1251 Some(SleepState::Inactive) => {
1252 self.device.virtio_wake(None).with_context(|| {
1253 format!(
1254 "virtio_wake failed for {}, can't recover",
1255 self.debug_label(),
1256 )
1257 })?;
1258 }
1259 Some(SleepState::Active { activated_queues }) => {
1260 self.device
1261 .virtio_wake(Some((
1262 self.mem.clone(),
1263 self.interrupt
1264 .clone()
1265 .expect("interrupt missing for already active queues"),
1266 activated_queues,
1267 )))
1268 .with_context(|| {
1269 format!(
1270 "virtio_wake failed for {}, can't recover",
1271 self.debug_label(),
1272 )
1273 })?;
1274 }
1275 };
1276 Ok(())
1277 }
1278
snapshot(&mut self) -> anyhow::Result<serde_json::Value>1279 fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
1280 if self.iommu.is_some() {
1281 return Err(anyhow!("Cannot snapshot if iommu is present."));
1282 }
1283
1284 serde_json::to_value(VirtioPciDeviceSnapshot {
1285 config_regs: self.config_regs.snapshot()?,
1286 inner_device: self.device.virtio_snapshot()?,
1287 device_activated: self.device_activated,
1288 interrupt: self.interrupt.as_ref().map(|i| i.snapshot()),
1289 msix_config: self.msix_config.lock().snapshot()?,
1290 common_config: self.common_config,
1291 queues: self
1292 .queues
1293 .iter()
1294 .map(|q| q.snapshot())
1295 .collect::<anyhow::Result<Vec<_>>>()?,
1296 activated_queues: match &self.sleep_state {
1297 None => {
1298 anyhow::bail!("tried snapshotting while awake")
1299 }
1300 Some(SleepState::Inactive) => None,
1301 Some(SleepState::Active { activated_queues }) => {
1302 let mut serialized_queues = Vec::new();
1303 for (index, queue) in activated_queues.iter() {
1304 serialized_queues.push((*index, queue.snapshot()?));
1305 }
1306 Some(serialized_queues)
1307 }
1308 },
1309 })
1310 .context("failed to serialize VirtioPciDeviceSnapshot")
1311 }
1312
restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>1313 fn restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
1314 // Restoring from an activated state is more complex and low priority, so just fail for
1315 // now. We'll need to reset the device before restoring, e.g. must call
1316 // self.unregister_ioevents().
1317 anyhow::ensure!(
1318 !self.device_activated,
1319 "tried to restore after virtio device activated. not supported yet"
1320 );
1321
1322 let deser: VirtioPciDeviceSnapshot = serde_json::from_value(data)?;
1323
1324 self.config_regs.restore(deser.config_regs)?;
1325 self.device_activated = deser.device_activated;
1326
1327 self.msix_config.lock().restore(deser.msix_config)?;
1328 self.common_config = deser.common_config;
1329
1330 // Restore the interrupt. This must be done after restoring the MSI-X configuration, but
1331 // before restoring the queues.
1332 if let Some(deser_interrupt) = deser.interrupt {
1333 self.interrupt = Some(Interrupt::new_from_snapshot(
1334 self.interrupt_evt
1335 .as_ref()
1336 .ok_or_else(|| anyhow!("{} interrupt_evt is none", self.debug_label()))?
1337 .try_clone()
1338 .with_context(|| {
1339 format!("{} failed to clone interrupt_evt", self.debug_label())
1340 })?,
1341 Some(self.msix_config.clone()),
1342 self.common_config.msix_config,
1343 deser_interrupt,
1344 #[cfg(target_arch = "x86_64")]
1345 Some((
1346 PmWakeupEvent::new(self.vm_control_tube.clone(), self.pm_config.clone()),
1347 MetricEventType::VirtioWakeup {
1348 virtio_id: self.device.device_type() as u32,
1349 },
1350 )),
1351 ));
1352 }
1353
1354 assert_eq!(
1355 self.queues.len(),
1356 deser.queues.len(),
1357 "device must have the same number of queues"
1358 );
1359 for (q, s) in self.queues.iter_mut().zip(deser.queues.into_iter()) {
1360 q.restore(s)?;
1361 }
1362
1363 // Verify we are asleep and inactive.
1364 match &self.sleep_state {
1365 None => {
1366 anyhow::bail!("tried restoring while awake")
1367 }
1368 Some(SleepState::Inactive) => {}
1369 Some(SleepState::Active { .. }) => {
1370 anyhow::bail!("tried to restore after virtio device activated. not supported yet")
1371 }
1372 };
1373 // Restore `sleep_state`.
1374 if let Some(activated_queues_snapshot) = deser.activated_queues {
1375 let interrupt = self
1376 .interrupt
1377 .as_ref()
1378 .context("tried to restore active queues without an interrupt")?;
1379 let mut activated_queues = BTreeMap::new();
1380 for (index, queue_snapshot) in activated_queues_snapshot {
1381 let queue_config = self
1382 .queues
1383 .get(index)
1384 .with_context(|| format!("missing queue config for activated queue {index}"))?;
1385 let queue_evt = self
1386 .queue_evts
1387 .get(index)
1388 .with_context(|| format!("missing queue event for activated queue {index}"))?
1389 .event
1390 .try_clone()
1391 .context("failed to clone queue event")?;
1392 activated_queues.insert(
1393 index,
1394 Queue::restore(
1395 queue_config,
1396 queue_snapshot,
1397 &self.mem,
1398 queue_evt,
1399 interrupt.clone(),
1400 )?,
1401 );
1402 }
1403
1404 // Restore the activated queues.
1405 self.sleep_state = Some(SleepState::Active { activated_queues });
1406 } else {
1407 self.sleep_state = Some(SleepState::Inactive);
1408 }
1409
1410 // Call register_io_events for the activated queue events.
1411 let bar0 = self.config_regs.get_bar_addr(self.settings_bar);
1412 let notify_base = bar0 + NOTIFICATION_BAR_OFFSET;
1413 self.queues
1414 .iter()
1415 .enumerate()
1416 .zip(self.queue_evts.iter_mut())
1417 .filter(|((_, q), _)| q.ready())
1418 .try_for_each(|((queue_index, _queue), evt)| {
1419 if !evt.ioevent_registered {
1420 self.ioevent_vm_memory_client
1421 .register_io_event(
1422 evt.event.try_clone().context("failed to clone Event")?,
1423 notify_base + queue_index as u64 * u64::from(NOTIFY_OFF_MULTIPLIER),
1424 Datamatch::AnyLength,
1425 )
1426 .context("failed to register ioevent")?;
1427 evt.ioevent_registered = true;
1428 }
1429 Ok::<(), anyhow::Error>(())
1430 })?;
1431
1432 // There might be data in the queue that wasn't drained by the device
1433 // at the time it was snapshotted. In this case, the doorbell should
1434 // still be signaled. If it is not, the driver may never re-trigger the
1435 // doorbell, and the device will stall. So here, we explicitly signal
1436 // every doorbell. Spurious doorbells are safe (devices will check their
1437 // queue, realize nothing is there, and go back to sleep.)
1438 self.queue_evts.iter_mut().try_for_each(|queue_event| {
1439 queue_event
1440 .event
1441 .signal()
1442 .context("failed to wake doorbell")
1443 })?;
1444
1445 self.device.virtio_restore(deser.inner_device)?;
1446
1447 Ok(())
1448 }
1449 }
1450
1451 struct VmRequester {
1452 vm_memory_client: VmMemoryClient,
1453 alloc: Alloc,
1454 mappings: BTreeMap<u64, VmMemoryRegionId>,
1455 prepare_type: SharedMemoryPrepareType,
1456 prepared: bool,
1457 }
1458
1459 impl VmRequester {
new( vm_memory_client: VmMemoryClient, alloc: Alloc, prepare_type: SharedMemoryPrepareType, ) -> Self1460 fn new(
1461 vm_memory_client: VmMemoryClient,
1462 alloc: Alloc,
1463 prepare_type: SharedMemoryPrepareType,
1464 ) -> Self {
1465 Self {
1466 vm_memory_client,
1467 alloc,
1468 mappings: BTreeMap::new(),
1469 prepare_type,
1470 prepared: false,
1471 }
1472 }
1473 }
1474
1475 impl SharedMemoryMapper for VmRequester {
add_mapping( &mut self, source: VmMemorySource, offset: u64, prot: Protection, cache: MemCacheType, ) -> anyhow::Result<()>1476 fn add_mapping(
1477 &mut self,
1478 source: VmMemorySource,
1479 offset: u64,
1480 prot: Protection,
1481 cache: MemCacheType,
1482 ) -> anyhow::Result<()> {
1483 if !self.prepared {
1484 if let SharedMemoryPrepareType::SingleMappingOnFirst(prepare_cache_type) =
1485 self.prepare_type
1486 {
1487 debug!(
1488 "lazy prepare_shared_memory_region with {:?}",
1489 prepare_cache_type
1490 );
1491 self.vm_memory_client
1492 .prepare_shared_memory_region(self.alloc, prepare_cache_type)
1493 .context("lazy prepare_shared_memory_region failed")?;
1494 }
1495 self.prepared = true;
1496 }
1497
1498 // devices must implement VirtioDevice::get_shared_memory_prepare_type(), returning
1499 // SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheNonCoherent) in order to
1500 // add any mapping that requests MemCacheType::CacheNonCoherent.
1501 if cache == MemCacheType::CacheNonCoherent {
1502 if let SharedMemoryPrepareType::SingleMappingOnFirst(MemCacheType::CacheCoherent) =
1503 self.prepare_type
1504 {
1505 error!("invalid request to map with CacheNonCoherent for device with prepared CacheCoherent memory");
1506 return Err(anyhow!("invalid MemCacheType"));
1507 }
1508 }
1509
1510 let id = self
1511 .vm_memory_client
1512 .register_memory(
1513 source,
1514 VmMemoryDestination::ExistingAllocation {
1515 allocation: self.alloc,
1516 offset,
1517 },
1518 prot,
1519 cache,
1520 )
1521 .context("register_memory failed")?;
1522
1523 self.mappings.insert(offset, id);
1524 Ok(())
1525 }
1526
remove_mapping(&mut self, offset: u64) -> anyhow::Result<()>1527 fn remove_mapping(&mut self, offset: u64) -> anyhow::Result<()> {
1528 let id = self.mappings.remove(&offset).context("invalid offset")?;
1529 self.vm_memory_client
1530 .unregister_memory(id)
1531 .context("unregister_memory failed")
1532 }
1533
as_raw_descriptor(&self) -> Option<RawDescriptor>1534 fn as_raw_descriptor(&self) -> Option<RawDescriptor> {
1535 Some(self.vm_memory_client.as_raw_descriptor())
1536 }
1537 }
1538
1539 #[cfg(test)]
1540 mod tests {
1541
1542 #[cfg(feature = "pci-hotplug")]
1543 #[test]
allocate_aligned_address()1544 fn allocate_aligned_address() {
1545 let mut simple_allocator = super::SimpleAllocator::new(0);
1546 // start at 0, aligned to 0x80. Interval end at 0x20.
1547 assert_eq!(simple_allocator.alloc(0x20, 0x80).unwrap(), 0);
1548 // 0x20 => start at 0x40. Interval end at 0x80.
1549 assert_eq!(simple_allocator.alloc(0x40, 0x40).unwrap(), 0x40);
1550 // 0x80 => start at 0x80, Interval end at 0x108.
1551 assert_eq!(simple_allocator.alloc(0x88, 0x80).unwrap(), 0x80);
1552 // 0x108 => start at 0x180. Interval end at 0x1b0.
1553 assert_eq!(simple_allocator.alloc(0x30, 0x80).unwrap(), 0x180);
1554 }
1555 }
1556