xref: /aosp_15_r20/external/crosvm/devices/src/irqchip/kvm/x86_64.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::sync::Arc;
6 
7 use anyhow::anyhow;
8 use anyhow::Context;
9 use base::error;
10 #[cfg(not(test))]
11 use base::Clock;
12 use base::Error;
13 use base::Event;
14 #[cfg(test)]
15 use base::FakeClock as Clock;
16 use base::Result;
17 use base::Tube;
18 use hypervisor::kvm::KvmVcpu;
19 use hypervisor::kvm::KvmVm;
20 use hypervisor::HypervisorCap;
21 use hypervisor::IoapicState;
22 use hypervisor::IrqRoute;
23 use hypervisor::IrqSource;
24 use hypervisor::IrqSourceChip;
25 use hypervisor::LapicState;
26 use hypervisor::MPState;
27 use hypervisor::PicSelect;
28 use hypervisor::PicState;
29 use hypervisor::PitState;
30 use hypervisor::Vcpu;
31 use hypervisor::VcpuX86_64;
32 use hypervisor::Vm;
33 use hypervisor::VmX86_64;
34 use kvm_sys::*;
35 use resources::SystemAllocator;
36 use serde::Deserialize;
37 use serde::Serialize;
38 use sync::Mutex;
39 
40 use crate::irqchip::Ioapic;
41 use crate::irqchip::IrqEvent;
42 use crate::irqchip::IrqEventIndex;
43 use crate::irqchip::Pic;
44 use crate::irqchip::VcpuRunState;
45 use crate::irqchip::IOAPIC_BASE_ADDRESS;
46 use crate::irqchip::IOAPIC_MEM_LENGTH_BYTES;
47 use crate::Bus;
48 use crate::IrqChip;
49 use crate::IrqChipCap;
50 use crate::IrqChipX86_64;
51 use crate::IrqEdgeEvent;
52 use crate::IrqEventSource;
53 use crate::IrqLevelEvent;
54 use crate::Pit;
55 use crate::PitError;
56 
57 /// PIT tube 0 timer is connected to IRQ 0
58 const PIT_CHANNEL0_IRQ: u32 = 0;
59 
60 /// Default x86 routing table.  Pins 0-7 go to primary pic and ioapic, pins 8-15 go to secondary
61 /// pic and ioapic, and pins 16-23 go only to the ioapic.
kvm_default_irq_routing_table(ioapic_pins: usize) -> Vec<IrqRoute>62 fn kvm_default_irq_routing_table(ioapic_pins: usize) -> Vec<IrqRoute> {
63     let mut routes: Vec<IrqRoute> = Vec::new();
64 
65     for i in 0..8 {
66         routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicPrimary, i));
67         routes.push(IrqRoute::ioapic_irq_route(i));
68     }
69     for i in 8..16 {
70         routes.push(IrqRoute::pic_irq_route(IrqSourceChip::PicSecondary, i));
71         routes.push(IrqRoute::ioapic_irq_route(i));
72     }
73     for i in 16..ioapic_pins as u32 {
74         routes.push(IrqRoute::ioapic_irq_route(i));
75     }
76 
77     routes
78 }
79 
80 /// IrqChip implementation where the entire IrqChip is emulated by KVM.
81 ///
82 /// This implementation will use the KVM API to create and configure the in-kernel irqchip.
83 pub struct KvmKernelIrqChip {
84     pub(super) vm: KvmVm,
85     pub(super) vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
86     pub(super) routes: Arc<Mutex<Vec<IrqRoute>>>,
87 }
88 
89 #[derive(Serialize, Deserialize)]
90 struct KvmKernelIrqChipSnapshot {
91     routes: Vec<IrqRoute>,
92     // apic_base and interrupt_bitmap are part of the IrqChip, despite the
93     // fact that we get the values from the Vcpu ioctl "KVM_GET_SREGS".
94     // Contains 1 entry per Vcpu.
95     apic_base: Vec<u64>,
96     interrupt_bitmap: Vec<[u64; 4usize]>,
97 }
98 
99 impl KvmKernelIrqChip {
100     /// Construct a new KvmKernelIrqchip.
new(vm: KvmVm, num_vcpus: usize) -> Result<KvmKernelIrqChip>101     pub fn new(vm: KvmVm, num_vcpus: usize) -> Result<KvmKernelIrqChip> {
102         vm.create_irq_chip()?;
103         vm.create_pit()?;
104         let ioapic_pins = vm.get_ioapic_num_pins()?;
105 
106         Ok(KvmKernelIrqChip {
107             vm,
108             vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
109             routes: Arc::new(Mutex::new(kvm_default_irq_routing_table(ioapic_pins))),
110         })
111     }
112     /// Attempt to create a shallow clone of this x86_64 KvmKernelIrqChip instance.
arch_try_clone(&self) -> Result<Self>113     pub(super) fn arch_try_clone(&self) -> Result<Self> {
114         Ok(KvmKernelIrqChip {
115             vm: self.vm.try_clone()?,
116             vcpus: self.vcpus.clone(),
117             routes: self.routes.clone(),
118         })
119     }
120 }
121 
122 impl IrqChipX86_64 for KvmKernelIrqChip {
try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>>123     fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
124         Ok(Box::new(self.try_clone()?))
125     }
126 
as_irq_chip(&self) -> &dyn IrqChip127     fn as_irq_chip(&self) -> &dyn IrqChip {
128         self
129     }
130 
as_irq_chip_mut(&mut self) -> &mut dyn IrqChip131     fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
132         self
133     }
134 
135     /// Get the current state of the PIC
get_pic_state(&self, select: PicSelect) -> Result<PicState>136     fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
137         Ok(PicState::from(&self.vm.get_pic_state(select)?))
138     }
139 
140     /// Set the current state of the PIC
set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()>141     fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
142         self.vm.set_pic_state(select, &kvm_pic_state::from(state))
143     }
144 
145     /// Get the current state of the IOAPIC
get_ioapic_state(&self) -> Result<IoapicState>146     fn get_ioapic_state(&self) -> Result<IoapicState> {
147         Ok(IoapicState::from(&self.vm.get_ioapic_state()?))
148     }
149 
150     /// Set the current state of the IOAPIC
set_ioapic_state(&mut self, state: &IoapicState) -> Result<()>151     fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
152         self.vm.set_ioapic_state(&kvm_ioapic_state::from(state))
153     }
154 
155     /// Get the current state of the specified VCPU's local APIC
get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>156     fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
157         match self.vcpus.lock().get(vcpu_id) {
158             Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
159             _ => Err(Error::new(libc::ENOENT)),
160         }
161     }
162 
163     /// Set the current state of the specified VCPU's local APIC
set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>164     fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
165         match self.vcpus.lock().get(vcpu_id) {
166             Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
167             _ => Err(Error::new(libc::ENOENT)),
168         }
169     }
170 
171     /// Get the lapic frequency in Hz
lapic_frequency(&self) -> u32172     fn lapic_frequency(&self) -> u32 {
173         // KVM emulates the lapic to have a bus frequency of 1GHz
174         1_000_000_000
175     }
176 
177     /// Retrieves the state of the PIT. Gets the pit state via the KVM API.
get_pit(&self) -> Result<PitState>178     fn get_pit(&self) -> Result<PitState> {
179         Ok(PitState::from(&self.vm.get_pit_state()?))
180     }
181 
182     /// Sets the state of the PIT. Sets the pit state via the KVM API.
set_pit(&mut self, state: &PitState) -> Result<()>183     fn set_pit(&mut self, state: &PitState) -> Result<()> {
184         self.vm.set_pit_state(&kvm_pit_state2::from(state))
185     }
186 
187     /// Returns true if the PIT uses port 0x61 for the PC speaker, false if 0x61 is unused.
188     /// KVM's kernel PIT doesn't use 0x61.
pit_uses_speaker_port(&self) -> bool189     fn pit_uses_speaker_port(&self) -> bool {
190         false
191     }
192 
snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value>193     fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
194         let mut apics: Vec<u64> = Vec::new();
195         let mut interrupt_bitmaps: Vec<[u64; 4usize]> = Vec::new();
196         {
197             let vcpus_lock = self.vcpus.lock();
198             for vcpu in (*vcpus_lock).iter().flatten() {
199                 apics.push(vcpu.get_apic_base()?);
200                 interrupt_bitmaps.push(vcpu.get_interrupt_bitmap()?);
201             }
202         }
203         serde_json::to_value(KvmKernelIrqChipSnapshot {
204             routes: self.routes.lock().clone(),
205             apic_base: apics,
206             interrupt_bitmap: interrupt_bitmaps,
207         })
208         .context("failed to serialize KvmKernelIrqChip")
209     }
210 
restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()>211     fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
212         let deser: KvmKernelIrqChipSnapshot =
213             serde_json::from_value(data).context("failed to deserialize data")?;
214         self.set_irq_routes(&deser.routes)?;
215         let vcpus_lock = self.vcpus.lock();
216         assert_eq!(deser.interrupt_bitmap.len(), vcpus_lock.len());
217         assert_eq!(deser.apic_base.len(), vcpus_lock.len());
218         for (i, vcpu) in vcpus_lock.iter().enumerate() {
219             if let Some(vcpu) = vcpu {
220                 vcpu.set_apic_base(*deser.apic_base.get(i).unwrap())?;
221                 vcpu.set_interrupt_bitmap(*deser.interrupt_bitmap.get(i).unwrap())?;
222             } else {
223                 return Err(anyhow!(
224                     "Received None instead of Vcpu while restoring apic_base and interrupt_bitmap"
225                 ));
226             }
227         }
228         Ok(())
229     }
230 }
231 
232 /// The KvmSplitIrqsChip supports KVM's SPLIT_IRQCHIP feature, where the PIC and IOAPIC
233 /// are emulated in userspace, while the local APICs are emulated in the kernel.
234 /// The SPLIT_IRQCHIP feature only supports x86/x86_64 so we only define this IrqChip in crosvm
235 /// for x86/x86_64.
236 pub struct KvmSplitIrqChip {
237     vm: KvmVm,
238     vcpus: Arc<Mutex<Vec<Option<KvmVcpu>>>>,
239     routes: Arc<Mutex<Vec<IrqRoute>>>,
240     pit: Arc<Mutex<Pit>>,
241     pic: Arc<Mutex<Pic>>,
242     ioapic: Arc<Mutex<Ioapic>>,
243     ioapic_pins: usize,
244     /// Vec of ioapic irq events that have been delayed because the ioapic was locked when
245     /// service_irq was called on the irqchip. This prevents deadlocks when a Vcpu thread has
246     /// locked the ioapic and the ioapic sends a AddMsiRoute signal to the main thread (which
247     /// itself may be busy trying to call service_irq).
248     delayed_ioapic_irq_events: Arc<Mutex<Vec<usize>>>,
249     /// Event which is meant to trigger process of any irqs events that were delayed.
250     delayed_ioapic_irq_trigger: Event,
251     /// Array of Events that devices will use to assert ioapic pins.
252     irq_events: Arc<Mutex<Vec<Option<IrqEvent>>>>,
253 }
254 
kvm_dummy_msi_routes(ioapic_pins: usize) -> Vec<IrqRoute>255 fn kvm_dummy_msi_routes(ioapic_pins: usize) -> Vec<IrqRoute> {
256     let mut routes: Vec<IrqRoute> = Vec::new();
257     for i in 0..ioapic_pins {
258         routes.push(
259             // Add dummy MSI routes to replace the default IRQChip routes.
260             IrqRoute {
261                 gsi: i as u32,
262                 source: IrqSource::Msi {
263                     address: 0,
264                     data: 0,
265                 },
266             },
267         );
268     }
269     routes
270 }
271 
272 impl KvmSplitIrqChip {
273     /// Construct a new KvmSplitIrqChip.
new( vm: KvmVm, num_vcpus: usize, irq_tube: Tube, ioapic_pins: Option<usize>, ) -> Result<Self>274     pub fn new(
275         vm: KvmVm,
276         num_vcpus: usize,
277         irq_tube: Tube,
278         ioapic_pins: Option<usize>,
279     ) -> Result<Self> {
280         let ioapic_pins = ioapic_pins.unwrap_or(vm.get_ioapic_num_pins()?);
281         vm.enable_split_irqchip(ioapic_pins)?;
282         let pit_evt = IrqEdgeEvent::new()?;
283         let pit = Pit::new(pit_evt.try_clone()?, Arc::new(Mutex::new(Clock::new()))).map_err(
284             |e| match e {
285                 PitError::CloneEvent(err) => err,
286                 PitError::CreateEvent(err) => err,
287                 PitError::CreateWaitContext(err) => err,
288                 PitError::WaitError(err) => err,
289                 PitError::TimerCreateError(err) => err,
290                 PitError::SpawnThread(_) => Error::new(libc::EIO),
291             },
292         )?;
293 
294         let pit_event_source = IrqEventSource::from_device(&pit);
295 
296         let mut chip = KvmSplitIrqChip {
297             vm,
298             vcpus: Arc::new(Mutex::new((0..num_vcpus).map(|_| None).collect())),
299             routes: Arc::new(Mutex::new(Vec::new())),
300             pit: Arc::new(Mutex::new(pit)),
301             pic: Arc::new(Mutex::new(Pic::new())),
302             ioapic: Arc::new(Mutex::new(Ioapic::new(irq_tube, ioapic_pins)?)),
303             ioapic_pins,
304             delayed_ioapic_irq_events: Arc::new(Mutex::new(Vec::new())),
305             delayed_ioapic_irq_trigger: Event::new()?,
306             irq_events: Arc::new(Mutex::new(Default::default())),
307         };
308 
309         // Setup standard x86 irq routes
310         let mut routes = kvm_default_irq_routing_table(ioapic_pins);
311         // Add dummy MSI routes for the first ioapic_pins GSIs
312         routes.append(&mut kvm_dummy_msi_routes(ioapic_pins));
313 
314         // Set the routes so they get sent to KVM
315         chip.set_irq_routes(&routes)?;
316 
317         chip.register_edge_irq_event(PIT_CHANNEL0_IRQ, &pit_evt, pit_event_source)?;
318         Ok(chip)
319     }
320 }
321 
322 impl KvmSplitIrqChip {
323     /// Convenience function for determining which chips the supplied irq routes to.
routes_to_chips(&self, irq: u32) -> Vec<(IrqSourceChip, u32)>324     fn routes_to_chips(&self, irq: u32) -> Vec<(IrqSourceChip, u32)> {
325         let mut chips = Vec::new();
326         for route in self.routes.lock().iter() {
327             match route {
328                 IrqRoute {
329                     gsi,
330                     source: IrqSource::Irqchip { chip, pin },
331                 } if *gsi == irq => match chip {
332                     IrqSourceChip::PicPrimary
333                     | IrqSourceChip::PicSecondary
334                     | IrqSourceChip::Ioapic => chips.push((*chip, *pin)),
335                     IrqSourceChip::Gic => {
336                         error!("gic irq should not be possible on a KvmSplitIrqChip")
337                     }
338                     IrqSourceChip::Aia => {
339                         error!("Aia irq should not be possible on x86_64")
340                     }
341                 },
342                 // Ignore MSIs and other routes
343                 _ => {}
344             }
345         }
346         chips
347     }
348 
349     /// Return true if there is a pending interrupt for the specified vcpu. For KvmSplitIrqChip
350     /// this calls interrupt_requested on the pic.
interrupt_requested(&self, vcpu_id: usize) -> bool351     pub fn interrupt_requested(&self, vcpu_id: usize) -> bool {
352         // Pic interrupts for the split irqchip only go to vcpu 0
353         if vcpu_id != 0 {
354             return false;
355         }
356         self.pic.lock().interrupt_requested()
357     }
358 
359     /// Check if the specified vcpu has any pending interrupts. Returns [`None`] for no interrupts,
360     /// otherwise [`Some::<u8>`] should be the injected interrupt vector. For [`KvmSplitIrqChip`]
361     /// this calls `get_external_interrupt` on the pic.
get_external_interrupt(&self, vcpu_id: usize) -> Option<u8>362     pub fn get_external_interrupt(&self, vcpu_id: usize) -> Option<u8> {
363         // Pic interrupts for the split irqchip only go to vcpu 0
364         if vcpu_id != 0 {
365             return None;
366         }
367         self.pic.lock().get_external_interrupt()
368     }
369 
370     /// Register an event that can trigger an interrupt for a particular GSI.
register_irq_event( &mut self, irq: u32, irq_event: &Event, resample_event: Option<&Event>, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>371     fn register_irq_event(
372         &mut self,
373         irq: u32,
374         irq_event: &Event,
375         resample_event: Option<&Event>,
376         source: IrqEventSource,
377     ) -> Result<Option<IrqEventIndex>> {
378         if irq < self.ioapic_pins as u32 {
379             let mut evt = IrqEvent {
380                 gsi: irq,
381                 event: irq_event.try_clone()?,
382                 resample_event: None,
383                 source,
384             };
385 
386             if let Some(resample_event) = resample_event {
387                 evt.resample_event = Some(resample_event.try_clone()?);
388             }
389 
390             let mut irq_events = self.irq_events.lock();
391             let index = irq_events.len();
392             irq_events.push(Some(evt));
393             Ok(Some(index))
394         } else {
395             self.vm.register_irqfd(irq, irq_event, resample_event)?;
396             Ok(None)
397         }
398     }
399 
400     /// Unregister an event for a particular GSI.
unregister_irq_event(&mut self, irq: u32, irq_event: &Event) -> Result<()>401     fn unregister_irq_event(&mut self, irq: u32, irq_event: &Event) -> Result<()> {
402         if irq < self.ioapic_pins as u32 {
403             let mut irq_events = self.irq_events.lock();
404             for (index, evt) in irq_events.iter().enumerate() {
405                 if let Some(evt) = evt {
406                     if evt.gsi == irq && irq_event.eq(&evt.event) {
407                         irq_events[index] = None;
408                         break;
409                     }
410                 }
411             }
412             Ok(())
413         } else {
414             self.vm.unregister_irqfd(irq, irq_event)
415         }
416     }
417 }
418 
419 /// Convenience function for determining whether or not two irq routes conflict.
420 /// Returns true if they conflict.
routes_conflict(route: &IrqRoute, other: &IrqRoute) -> bool421 fn routes_conflict(route: &IrqRoute, other: &IrqRoute) -> bool {
422     // They don't conflict if they have different GSIs.
423     if route.gsi != other.gsi {
424         return false;
425     }
426 
427     // If they're both MSI with the same GSI then they conflict.
428     if let (IrqSource::Msi { .. }, IrqSource::Msi { .. }) = (route.source, other.source) {
429         return true;
430     }
431 
432     // If the route chips match and they have the same GSI then they conflict.
433     if let (
434         IrqSource::Irqchip {
435             chip: route_chip, ..
436         },
437         IrqSource::Irqchip {
438             chip: other_chip, ..
439         },
440     ) = (route.source, other.source)
441     {
442         return route_chip == other_chip;
443     }
444 
445     // Otherwise they do not conflict.
446     false
447 }
448 
449 /// This IrqChip only works with Kvm so we only implement it for KvmVcpu.
450 impl IrqChip for KvmSplitIrqChip {
451     /// Add a vcpu to the irq chip.
add_vcpu(&mut self, vcpu_id: usize, vcpu: &dyn Vcpu) -> Result<()>452     fn add_vcpu(&mut self, vcpu_id: usize, vcpu: &dyn Vcpu) -> Result<()> {
453         let vcpu: &KvmVcpu = vcpu
454             .downcast_ref()
455             .expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
456         self.vcpus.lock()[vcpu_id] = Some(vcpu.try_clone()?);
457         Ok(())
458     }
459 
460     /// Register an event that can trigger an interrupt for a particular GSI.
register_edge_irq_event( &mut self, irq: u32, irq_event: &IrqEdgeEvent, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>461     fn register_edge_irq_event(
462         &mut self,
463         irq: u32,
464         irq_event: &IrqEdgeEvent,
465         source: IrqEventSource,
466     ) -> Result<Option<IrqEventIndex>> {
467         self.register_irq_event(irq, irq_event.get_trigger(), None, source)
468     }
469 
unregister_edge_irq_event(&mut self, irq: u32, irq_event: &IrqEdgeEvent) -> Result<()>470     fn unregister_edge_irq_event(&mut self, irq: u32, irq_event: &IrqEdgeEvent) -> Result<()> {
471         self.unregister_irq_event(irq, irq_event.get_trigger())
472     }
473 
register_level_irq_event( &mut self, irq: u32, irq_event: &IrqLevelEvent, source: IrqEventSource, ) -> Result<Option<IrqEventIndex>>474     fn register_level_irq_event(
475         &mut self,
476         irq: u32,
477         irq_event: &IrqLevelEvent,
478         source: IrqEventSource,
479     ) -> Result<Option<IrqEventIndex>> {
480         self.register_irq_event(
481             irq,
482             irq_event.get_trigger(),
483             Some(irq_event.get_resample()),
484             source,
485         )
486     }
487 
unregister_level_irq_event(&mut self, irq: u32, irq_event: &IrqLevelEvent) -> Result<()>488     fn unregister_level_irq_event(&mut self, irq: u32, irq_event: &IrqLevelEvent) -> Result<()> {
489         self.unregister_irq_event(irq, irq_event.get_trigger())
490     }
491 
492     /// Route an IRQ line to an interrupt controller, or to a particular MSI vector.
route_irq(&mut self, route: IrqRoute) -> Result<()>493     fn route_irq(&mut self, route: IrqRoute) -> Result<()> {
494         let mut routes = self.routes.lock();
495         routes.retain(|r| !routes_conflict(r, &route));
496 
497         routes.push(route);
498 
499         // We only call set_gsi_routing with the msi routes
500         let mut msi_routes = routes.clone();
501         msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
502 
503         self.vm.set_gsi_routing(&msi_routes)
504     }
505 
506     /// Replace all irq routes with the supplied routes
set_irq_routes(&mut self, routes: &[IrqRoute]) -> Result<()>507     fn set_irq_routes(&mut self, routes: &[IrqRoute]) -> Result<()> {
508         let mut current_routes = self.routes.lock();
509         *current_routes = routes.to_vec();
510 
511         // We only call set_gsi_routing with the msi routes
512         let mut msi_routes = routes.to_vec();
513         msi_routes.retain(|r| matches!(r.source, IrqSource::Msi { .. }));
514 
515         self.vm.set_gsi_routing(&msi_routes)
516     }
517 
518     /// Return a vector of all registered irq numbers and their associated events and event
519     /// indices. These should be used by the main thread to wait for irq events.
irq_event_tokens(&self) -> Result<Vec<(IrqEventIndex, IrqEventSource, Event)>>520     fn irq_event_tokens(&self) -> Result<Vec<(IrqEventIndex, IrqEventSource, Event)>> {
521         let mut tokens = vec![];
522         for (index, evt) in self.irq_events.lock().iter().enumerate() {
523             if let Some(evt) = evt {
524                 tokens.push((index, evt.source.clone(), evt.event.try_clone()?));
525             }
526         }
527         Ok(tokens)
528     }
529 
530     /// Either assert or deassert an IRQ line.  Sends to either an interrupt controller, or does
531     /// a send_msi if the irq is associated with an MSI.
service_irq(&mut self, irq: u32, level: bool) -> Result<()>532     fn service_irq(&mut self, irq: u32, level: bool) -> Result<()> {
533         let chips = self.routes_to_chips(irq);
534         for (chip, pin) in chips {
535             match chip {
536                 IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
537                     self.pic.lock().service_irq(pin as u8, level);
538                 }
539                 IrqSourceChip::Ioapic => {
540                     self.ioapic.lock().service_irq(pin as usize, level);
541                 }
542                 _ => {}
543             }
544         }
545         Ok(())
546     }
547 
548     /// Service an IRQ event by asserting then deasserting an IRQ line. The associated Event
549     /// that triggered the irq event will be read from. If the irq is associated with a resample
550     /// Event, then the deassert will only happen after an EOI is broadcast for a vector
551     /// associated with the irq line.
552     /// For the KvmSplitIrqChip, this function identifies which chips the irq routes to, then
553     /// attempts to call service_irq on those chips. If the ioapic is unable to be immediately
554     /// locked, we add the irq to the delayed_ioapic_irq_events Vec (though we still read
555     /// from the Event that triggered the irq event).
service_irq_event(&mut self, event_index: IrqEventIndex) -> Result<()>556     fn service_irq_event(&mut self, event_index: IrqEventIndex) -> Result<()> {
557         if let Some(evt) = &self.irq_events.lock()[event_index] {
558             evt.event.wait()?;
559             let chips = self.routes_to_chips(evt.gsi);
560 
561             for (chip, pin) in chips {
562                 match chip {
563                     IrqSourceChip::PicPrimary | IrqSourceChip::PicSecondary => {
564                         let mut pic = self.pic.lock();
565                         pic.service_irq(pin as u8, true);
566                         if evt.resample_event.is_none() {
567                             pic.service_irq(pin as u8, false);
568                         }
569                     }
570                     IrqSourceChip::Ioapic => {
571                         if let Ok(mut ioapic) = self.ioapic.try_lock() {
572                             ioapic.service_irq(pin as usize, true);
573                             if evt.resample_event.is_none() {
574                                 ioapic.service_irq(pin as usize, false);
575                             }
576                         } else {
577                             self.delayed_ioapic_irq_events.lock().push(event_index);
578                             self.delayed_ioapic_irq_trigger.signal().unwrap();
579                         }
580                     }
581                     _ => {}
582                 }
583             }
584         }
585 
586         Ok(())
587     }
588 
589     /// Broadcast an end of interrupt. For KvmSplitIrqChip this sends the EOI to the ioapic
broadcast_eoi(&self, vector: u8) -> Result<()>590     fn broadcast_eoi(&self, vector: u8) -> Result<()> {
591         self.ioapic.lock().end_of_interrupt(vector);
592         Ok(())
593     }
594 
595     /// Injects any pending interrupts for `vcpu`.
596     /// For KvmSplitIrqChip this injects any PIC interrupts on vcpu_id 0.
inject_interrupts(&self, vcpu: &dyn Vcpu) -> Result<()>597     fn inject_interrupts(&self, vcpu: &dyn Vcpu) -> Result<()> {
598         let vcpu: &KvmVcpu = vcpu
599             .downcast_ref()
600             .expect("KvmSplitIrqChip::add_vcpu called with non-KvmVcpu");
601 
602         let vcpu_id = vcpu.id();
603         if !self.interrupt_requested(vcpu_id) || !vcpu.ready_for_interrupt() {
604             return Ok(());
605         }
606 
607         if let Some(vector) = self.get_external_interrupt(vcpu_id) {
608             vcpu.interrupt(vector)?;
609         }
610 
611         // The second interrupt request should be handled immediately, so ask vCPU to exit as soon
612         // as possible.
613         if self.interrupt_requested(vcpu_id) {
614             vcpu.set_interrupt_window_requested(true);
615         }
616         Ok(())
617     }
618 
619     /// Notifies the irq chip that the specified VCPU has executed a halt instruction.
620     /// For KvmSplitIrqChip this is a no-op because KVM handles VCPU blocking.
halted(&self, _vcpu_id: usize)621     fn halted(&self, _vcpu_id: usize) {}
622 
623     /// Blocks until `vcpu` is in a runnable state or until interrupted by
624     /// `IrqChip::kick_halted_vcpus`.  Returns `VcpuRunState::Runnable if vcpu is runnable, or
625     /// `VcpuRunState::Interrupted` if the wait was interrupted.
626     /// For KvmSplitIrqChip this is a no-op and always returns Runnable because KVM handles VCPU
627     /// blocking.
wait_until_runnable(&self, _vcpu: &dyn Vcpu) -> Result<VcpuRunState>628     fn wait_until_runnable(&self, _vcpu: &dyn Vcpu) -> Result<VcpuRunState> {
629         Ok(VcpuRunState::Runnable)
630     }
631 
632     /// Makes unrunnable VCPUs return immediately from `wait_until_runnable`.
633     /// For KvmSplitIrqChip this is a no-op because KVM handles VCPU blocking.
kick_halted_vcpus(&self)634     fn kick_halted_vcpus(&self) {}
635 
636     /// Get the current MP state of the specified VCPU.
get_mp_state(&self, vcpu_id: usize) -> Result<MPState>637     fn get_mp_state(&self, vcpu_id: usize) -> Result<MPState> {
638         match self.vcpus.lock().get(vcpu_id) {
639             Some(Some(vcpu)) => Ok(MPState::from(&vcpu.get_mp_state()?)),
640             _ => Err(Error::new(libc::ENOENT)),
641         }
642     }
643 
644     /// Set the current MP state of the specified VCPU.
set_mp_state(&mut self, vcpu_id: usize, state: &MPState) -> Result<()>645     fn set_mp_state(&mut self, vcpu_id: usize, state: &MPState) -> Result<()> {
646         match self.vcpus.lock().get(vcpu_id) {
647             Some(Some(vcpu)) => vcpu.set_mp_state(&kvm_mp_state::from(state)),
648             _ => Err(Error::new(libc::ENOENT)),
649         }
650     }
651 
652     /// Attempt to clone this IrqChip instance.
try_clone(&self) -> Result<Self>653     fn try_clone(&self) -> Result<Self> {
654         Ok(KvmSplitIrqChip {
655             vm: self.vm.try_clone()?,
656             vcpus: self.vcpus.clone(),
657             routes: self.routes.clone(),
658             pit: self.pit.clone(),
659             pic: self.pic.clone(),
660             ioapic: self.ioapic.clone(),
661             ioapic_pins: self.ioapic_pins,
662             delayed_ioapic_irq_events: self.delayed_ioapic_irq_events.clone(),
663             delayed_ioapic_irq_trigger: Event::new()?,
664             irq_events: self.irq_events.clone(),
665         })
666     }
667 
668     /// Finalize irqchip setup. Should be called once all devices have registered irq events and
669     /// been added to the io_bus and mmio_bus.
finalize_devices( &mut self, resources: &mut SystemAllocator, io_bus: &Bus, mmio_bus: &Bus, ) -> Result<()>670     fn finalize_devices(
671         &mut self,
672         resources: &mut SystemAllocator,
673         io_bus: &Bus,
674         mmio_bus: &Bus,
675     ) -> Result<()> {
676         // Insert pit into io_bus
677         io_bus.insert(self.pit.clone(), 0x040, 0x8).unwrap();
678         io_bus.insert(self.pit.clone(), 0x061, 0x1).unwrap();
679 
680         // Insert pic into io_bus
681         io_bus.insert(self.pic.clone(), 0x20, 0x2).unwrap();
682         io_bus.insert(self.pic.clone(), 0xa0, 0x2).unwrap();
683         io_bus.insert(self.pic.clone(), 0x4d0, 0x2).unwrap();
684 
685         // Insert ioapic into mmio_bus
686         mmio_bus
687             .insert(
688                 self.ioapic.clone(),
689                 IOAPIC_BASE_ADDRESS,
690                 IOAPIC_MEM_LENGTH_BYTES,
691             )
692             .unwrap();
693 
694         // At this point, all of our devices have been created and they have registered their
695         // irq events, so we can clone our resample events
696         let mut ioapic_resample_events: Vec<Vec<Event>> =
697             (0..self.ioapic_pins).map(|_| Vec::new()).collect();
698         let mut pic_resample_events: Vec<Vec<Event>> =
699             (0..self.ioapic_pins).map(|_| Vec::new()).collect();
700 
701         for evt in self.irq_events.lock().iter().flatten() {
702             if (evt.gsi as usize) >= self.ioapic_pins {
703                 continue;
704             }
705             if let Some(resample_evt) = &evt.resample_event {
706                 ioapic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
707                 pic_resample_events[evt.gsi as usize].push(resample_evt.try_clone()?);
708             }
709         }
710 
711         // Register resample events with the ioapic
712         self.ioapic
713             .lock()
714             .register_resample_events(ioapic_resample_events);
715         // Register resample events with the pic
716         self.pic
717             .lock()
718             .register_resample_events(pic_resample_events);
719 
720         // Make sure all future irq numbers are beyond IO-APIC range.
721         let mut irq_num = resources.allocate_irq().unwrap();
722         while irq_num < self.ioapic_pins as u32 {
723             irq_num = resources.allocate_irq().unwrap();
724         }
725 
726         Ok(())
727     }
728 
729     /// The KvmSplitIrqChip's ioapic may be locked because a vcpu thread is currently writing to
730     /// the ioapic, and the ioapic may be blocking on adding MSI routes, which requires blocking
731     /// socket communication back to the main thread.  Thus, we do not want the main thread to
732     /// block on a locked ioapic, so any irqs that could not be serviced because the ioapic could
733     /// not be immediately locked are added to the delayed_ioapic_irq_events Vec. This function
734     /// processes each delayed event in the vec each time it's called. If the ioapic is still
735     /// locked, we keep the queued irqs for the next time this function is called.
process_delayed_irq_events(&mut self) -> Result<()>736     fn process_delayed_irq_events(&mut self) -> Result<()> {
737         self.delayed_ioapic_irq_events
738             .lock()
739             .retain(|&event_index| {
740                 if let Some(evt) = &self.irq_events.lock()[event_index] {
741                     if let Ok(mut ioapic) = self.ioapic.try_lock() {
742                         ioapic.service_irq(evt.gsi as usize, true);
743                         if evt.resample_event.is_none() {
744                             ioapic.service_irq(evt.gsi as usize, false);
745                         }
746 
747                         false
748                     } else {
749                         true
750                     }
751                 } else {
752                     true
753                 }
754             });
755 
756         if self.delayed_ioapic_irq_events.lock().is_empty() {
757             self.delayed_ioapic_irq_trigger.wait()?;
758         }
759 
760         Ok(())
761     }
762 
irq_delayed_event_token(&self) -> Result<Option<Event>>763     fn irq_delayed_event_token(&self) -> Result<Option<Event>> {
764         Ok(Some(self.delayed_ioapic_irq_trigger.try_clone()?))
765     }
766 
check_capability(&self, c: IrqChipCap) -> bool767     fn check_capability(&self, c: IrqChipCap) -> bool {
768         match c {
769             IrqChipCap::TscDeadlineTimer => self
770                 .vm
771                 .get_hypervisor()
772                 .check_capability(HypervisorCap::TscDeadlineTimer),
773             IrqChipCap::X2Apic => true,
774             IrqChipCap::MpStateGetSet => true,
775         }
776     }
777 }
778 
779 #[derive(Serialize, Deserialize)]
780 struct KvmSplitIrqChipSnapshot {
781     routes: Vec<IrqRoute>,
782 }
783 
784 impl IrqChipX86_64 for KvmSplitIrqChip {
try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>>785     fn try_box_clone(&self) -> Result<Box<dyn IrqChipX86_64>> {
786         Ok(Box::new(self.try_clone()?))
787     }
788 
as_irq_chip(&self) -> &dyn IrqChip789     fn as_irq_chip(&self) -> &dyn IrqChip {
790         self
791     }
792 
as_irq_chip_mut(&mut self) -> &mut dyn IrqChip793     fn as_irq_chip_mut(&mut self) -> &mut dyn IrqChip {
794         self
795     }
796 
797     /// Get the current state of the PIC
get_pic_state(&self, select: PicSelect) -> Result<PicState>798     fn get_pic_state(&self, select: PicSelect) -> Result<PicState> {
799         Ok(self.pic.lock().get_pic_state(select))
800     }
801 
802     /// Set the current state of the PIC
set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()>803     fn set_pic_state(&mut self, select: PicSelect, state: &PicState) -> Result<()> {
804         self.pic.lock().set_pic_state(select, state);
805         Ok(())
806     }
807 
808     /// Get the current state of the IOAPIC
get_ioapic_state(&self) -> Result<IoapicState>809     fn get_ioapic_state(&self) -> Result<IoapicState> {
810         Ok(self.ioapic.lock().get_ioapic_state())
811     }
812 
813     /// Set the current state of the IOAPIC
set_ioapic_state(&mut self, state: &IoapicState) -> Result<()>814     fn set_ioapic_state(&mut self, state: &IoapicState) -> Result<()> {
815         self.ioapic.lock().set_ioapic_state(state);
816         Ok(())
817     }
818 
819     /// Get the current state of the specified VCPU's local APIC
get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>820     fn get_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
821         match self.vcpus.lock().get(vcpu_id) {
822             Some(Some(vcpu)) => Ok(LapicState::from(&vcpu.get_lapic()?)),
823             _ => Err(Error::new(libc::ENOENT)),
824         }
825     }
826 
827     /// Set the current state of the specified VCPU's local APIC
set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>828     fn set_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
829         match self.vcpus.lock().get(vcpu_id) {
830             Some(Some(vcpu)) => vcpu.set_lapic(&kvm_lapic_state::from(state)),
831             _ => Err(Error::new(libc::ENOENT)),
832         }
833     }
834 
835     /// Get the lapic frequency in Hz
lapic_frequency(&self) -> u32836     fn lapic_frequency(&self) -> u32 {
837         // KVM emulates the lapic to have a bus frequency of 1GHz
838         1_000_000_000
839     }
840 
841     /// Retrieves the state of the PIT. Gets the pit state via the KVM API.
get_pit(&self) -> Result<PitState>842     fn get_pit(&self) -> Result<PitState> {
843         Ok(self.pit.lock().get_pit_state())
844     }
845 
846     /// Sets the state of the PIT. Sets the pit state via the KVM API.
set_pit(&mut self, state: &PitState) -> Result<()>847     fn set_pit(&mut self, state: &PitState) -> Result<()> {
848         self.pit.lock().set_pit_state(state);
849         Ok(())
850     }
851 
852     /// Returns true if the PIT uses port 0x61 for the PC speaker, false if 0x61 is unused.
853     /// devices::Pit uses 0x61.
pit_uses_speaker_port(&self) -> bool854     fn pit_uses_speaker_port(&self) -> bool {
855         true
856     }
857 
snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value>858     fn snapshot_chip_specific(&self) -> anyhow::Result<serde_json::Value> {
859         serde_json::to_value(KvmSplitIrqChipSnapshot {
860             routes: self.routes.lock().clone(),
861         })
862         .context("failed to serialize KvmSplitIrqChip")
863     }
864 
restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()>865     fn restore_chip_specific(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
866         let deser: KvmSplitIrqChipSnapshot =
867             serde_json::from_value(data).context("failed to deserialize KvmSplitIrqChip")?;
868         self.set_irq_routes(&deser.routes)?;
869         Ok(())
870     }
871 }
872