xref: /aosp_15_r20/external/crosvm/src/sys/windows/irq_wait.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! Handles the main wait loop for IRQs.
6 //! Should be started on a background thread.
7 
8 use std::collections::HashMap;
9 use std::io;
10 use std::sync::Arc;
11 use std::thread;
12 use std::thread::JoinHandle;
13 use std::time::Duration;
14 use std::time::Instant;
15 
16 use anyhow::Context;
17 use arch::IrqChipArch;
18 use base::error;
19 use base::info;
20 use base::warn;
21 use base::Event;
22 use base::EventToken;
23 use base::ReadNotifier;
24 use base::Result;
25 use base::Tube;
26 use base::TubeError;
27 use base::WaitContext;
28 use base::MAXIMUM_WAIT_OBJECTS;
29 use devices::IrqEdgeEvent;
30 use devices::IrqEventIndex;
31 use devices::IrqEventSource;
32 use metrics::log_high_frequency_descriptor_event;
33 use metrics::MetricEventType;
34 use resources::SystemAllocator;
35 use sync::Mutex;
36 use vm_control::IrqHandlerRequest;
37 use vm_control::IrqHandlerResponse;
38 use vm_control::IrqSetup;
39 use vm_control::VmIrqRequest;
40 
41 pub struct IrqWaitWorker {
42     irq_handler_control: Tube,
43     irq_chip: Box<dyn IrqChipArch>,
44     irq_control_tubes: Vec<Tube>,
45     sys_allocator: Arc<Mutex<SystemAllocator>>,
46 }
47 
48 #[derive(EventToken)]
49 enum Token {
50     VmControl { index: usize },
51     IrqHandlerControl,
52     DelayedIrqEvent,
53 }
54 
55 impl IrqWaitWorker {
start( irq_handler_control: Tube, irq_chip: Box<dyn IrqChipArch>, irq_control_tubes: Vec<Tube>, sys_allocator: Arc<Mutex<SystemAllocator>>, ) -> JoinHandle<anyhow::Result<()>>56     pub fn start(
57         irq_handler_control: Tube,
58         irq_chip: Box<dyn IrqChipArch>,
59         irq_control_tubes: Vec<Tube>,
60         sys_allocator: Arc<Mutex<SystemAllocator>>,
61     ) -> JoinHandle<anyhow::Result<()>> {
62         let mut irq_worker = IrqWaitWorker {
63             irq_handler_control,
64             irq_chip,
65             irq_control_tubes,
66             sys_allocator,
67         };
68         thread::Builder::new()
69             .name("irq_wait_loop".into())
70             .spawn(move || irq_worker.run())
71             .unwrap()
72     }
73 
add_child( wait_ctx: &WaitContext<Token>, children: &mut Vec<JoinHandle<Result<()>>>, child_control_tubes: &mut Vec<Tube>, irq_chip: Box<dyn IrqChipArch>, irq_frequencies: Arc<Mutex<Vec<u64>>>, ) -> Result<Arc<WaitContext<ChildToken>>>74     fn add_child(
75         wait_ctx: &WaitContext<Token>,
76         children: &mut Vec<JoinHandle<Result<()>>>,
77         child_control_tubes: &mut Vec<Tube>,
78         irq_chip: Box<dyn IrqChipArch>,
79         irq_frequencies: Arc<Mutex<Vec<u64>>>,
80     ) -> Result<Arc<WaitContext<ChildToken>>> {
81         let (child_control_tube, child_control_tube_for_child) = Tube::pair().map_err(|e| {
82             error!("failed to create IRQ child control tube: {:?}", e);
83             base::Error::from(io::Error::new(io::ErrorKind::Other, e))
84         })?;
85         let (child_wait_ctx, child_join_handle) =
86             IrqWaitWorkerChild::start(child_control_tube_for_child, irq_chip, irq_frequencies)?;
87         children.push(child_join_handle);
88         child_control_tubes.push(child_control_tube);
89         Ok(child_wait_ctx)
90     }
91 
run(&mut self) -> anyhow::Result<()>92     fn run(&mut self) -> anyhow::Result<()> {
93         let wait_ctx = WaitContext::new()?;
94 
95         let mut max_event_index: usize = 0;
96         let mut vm_control_added_irq_events: Vec<Event> = Vec::new();
97         let mut irq_event_sources: HashMap<IrqEventIndex, IrqEventSource> = HashMap::new();
98         // TODO(b/190828888): Move irq logging into the irqchip impls.
99         let irq_frequencies = Arc::new(Mutex::new(vec![0; max_event_index + 1]));
100         let irq_events = self.irq_chip.irq_event_tokens()?;
101         let mut children = vec![];
102         let mut child_control_tubes = vec![];
103 
104         let mut child_wait_ctx = Self::add_child(
105             &wait_ctx,
106             &mut children,
107             &mut child_control_tubes,
108             self.irq_chip.try_box_clone()?,
109             irq_frequencies.clone(),
110         )
111         .context("failed to create IRQ wait child")?;
112 
113         wait_ctx.add(
114             self.irq_handler_control.get_read_notifier(),
115             Token::IrqHandlerControl,
116         )?;
117 
118         for (event_index, source, evt) in irq_events {
119             child_wait_ctx.add(&evt, ChildToken::IrqEvent { event_index })?;
120             max_event_index = std::cmp::max(max_event_index, event_index);
121             irq_event_sources.insert(event_index, source);
122 
123             vm_control_added_irq_events.push(evt);
124         }
125 
126         irq_frequencies.lock().resize(max_event_index + 1, 0);
127 
128         for (index, control_tube) in self.irq_control_tubes.iter().enumerate() {
129             wait_ctx.add(control_tube.get_read_notifier(), Token::VmControl { index })?;
130         }
131 
132         let mut _delayed_event_token: Option<Event> = None;
133         if let Some(delayed_token) = self.irq_chip.irq_delayed_event_token()? {
134             wait_ctx.add(&delayed_token, Token::DelayedIrqEvent)?;
135             // store the token, so that it lasts outside this scope.
136             // We must store the event as try_clone creates a new event. It won't keep
137             // the current event valid that is waited on inside wait_ctx.
138             _delayed_event_token = Some(delayed_token);
139         }
140 
141         let mut intr_stat_sample_time = Instant::now();
142 
143         'poll: loop {
144             let events = {
145                 match wait_ctx.wait() {
146                     Ok(v) => v,
147                     Err(e) => {
148                         error!("failed to wait on irq thread: {}", e);
149                         break 'poll;
150                     }
151                 }
152             };
153 
154             let mut token_count = events.len();
155             let mut notify_control_on_iteration_end = false;
156             let mut vm_control_indices_to_remove = Vec::new();
157             for event in events.iter().filter(|e| e.is_readable) {
158                 match event.token {
159                     Token::IrqHandlerControl => {
160                         match self.irq_handler_control.recv::<IrqHandlerRequest>() {
161                             Ok(request) => match request {
162                                 IrqHandlerRequest::Exit => {
163                                     info!("irq event loop got exit request");
164                                     break 'poll;
165                                 }
166                                 IrqHandlerRequest::AddIrqControlTubes(_tubes) => {
167                                     panic!("CrosVM on Windows does not support adding devices on the fly yet.");
168                                 }
169                                 IrqHandlerRequest::WakeAndNotifyIteration => {
170                                     for child_control_tube in child_control_tubes.iter() {
171                                         child_control_tube
172                                             .send(&IrqHandlerRequest::WakeAndNotifyIteration)
173                                             .context("failed to send flush command to IRQ handler child thread")?;
174                                         let resp = child_control_tube
175                                             .recv()
176                                             .context("failed to recv flush response from IRQ handler child thread")?;
177                                         match resp {
178                                             IrqHandlerResponse::HandlerIterationComplete(
179                                                 tokens_serviced,
180                                             ) => {
181                                                 token_count += tokens_serviced;
182                                             }
183                                             unexpected_resp => panic!(
184                                                 "got unexpected response: {:?}",
185                                                 unexpected_resp
186                                             ),
187                                         }
188                                     }
189                                     notify_control_on_iteration_end = true;
190                                 }
191                                 IrqHandlerRequest::RefreshIrqEventTokens => {
192                                     // TODO(b/282755619): when we want to restore different shaped
193                                     // VMs, we'll have to implement this. For now, we'll just ack
194                                     // the message and not actually do the refresh.
195                                     self.irq_handler_control.send(&IrqHandlerResponse::IrqEventTokenRefreshComplete)
196                                         .context("failed to send reply to irq event token refresh request")?;
197                                 }
198                             },
199                             Err(e) => {
200                                 if let TubeError::Disconnected = e {
201                                     panic!("irq handler control tube disconnected.");
202                                 } else {
203                                     error!("failed to recv IrqHandlerRequest: {}", e);
204                                 }
205                             }
206                         }
207                     }
208                     Token::VmControl { index } => {
209                         if let Some(tube) = self.irq_control_tubes.get(index) {
210                             match tube.recv::<VmIrqRequest>() {
211                                 Ok(request) => {
212                                     let response = {
213                                         let irq_chip = &mut self.irq_chip;
214                                         // TODO(b/229262201): Refactor the closure into a standalone
215                                         // function to reduce indentation.
216                                         request.execute(
217                                             |setup| match setup {
218                                                 IrqSetup::Event(
219                                                     irq,
220                                                     ev,
221                                                     device_id,
222                                                     queue_id,
223                                                     device_name,
224                                                 ) => {
225                                                     let irqevent = IrqEdgeEvent::from_event(
226                                                         ev.try_clone()
227                                                             .expect("Failed to clone irq event."),
228                                                     );
229                                                     let source = IrqEventSource {
230                                                         device_id: device_id.try_into()?,
231                                                         queue_id,
232                                                         device_name,
233                                                     };
234                                                     let event_index = irq_chip
235                                                         .register_edge_irq_event(
236                                                             irq,
237                                                             &irqevent,
238                                                             source.clone(),
239                                                         )?;
240                                                     if let Some(event_index) = event_index {
241                                                         max_event_index = std::cmp::max(
242                                                             event_index,
243                                                             irq as usize,
244                                                         );
245                                                         irq_frequencies
246                                                             .lock()
247                                                             .resize(max_event_index + 1, 0);
248                                                         irq_event_sources
249                                                             .insert(event_index, source);
250                                                         // Make new thread if needed, including buffer space for any
251                                                         // events we didn't explicitly add (exit/reset/etc)
252                                                         if irq_event_sources.len()
253                                                             % (MAXIMUM_WAIT_OBJECTS - 3)
254                                                             == 0
255                                                         {
256                                                             // The child wait thread has reached max capacity, we
257                                                             // need to add another.
258                                                             child_wait_ctx = Self::add_child(
259                                                                 &wait_ctx, &mut children, &mut child_control_tubes,
260                                                                 irq_chip.try_box_clone()?, irq_frequencies.clone())?;
261 
262                                                         }
263                                                         let irqevent =
264                                                             irqevent.get_trigger().try_clone()?;
265                                                         match child_wait_ctx.add(
266                                                             &irqevent,
267                                                             ChildToken::IrqEvent { event_index },
268                                                         ) {
269                                                             Err(e) => {
270                                                                 warn!("failed to add IrqEvent to synchronization context: {}", e);
271                                                                 Err(e)
272                                                             },
273                                                             Ok(_) => {
274                                                                 vm_control_added_irq_events
275                                                                     .push(irqevent);
276                                                                 Ok(())
277                                                             }
278                                                         }
279                                                     } else {
280                                                         Ok(())
281                                                     }
282                                                 }
283                                                 IrqSetup::Route(route) => irq_chip.route_irq(route),
284                                                 IrqSetup::UnRegister(irq, ev) => irq_chip
285                                                     .unregister_edge_irq_event(
286                                                         irq,
287                                                         &IrqEdgeEvent::from_event(ev.try_clone()?),
288                                                     ),
289                                             },
290                                             &mut self.sys_allocator.lock(),
291                                         )
292                                     };
293                                     if let Err(e) = tube.send(&response) {
294                                         error!("failed to send VmIrqResponse: {}", e);
295                                     }
296                                 }
297                                 Err(e) => {
298                                     if let TubeError::Disconnected = e {
299                                         vm_control_indices_to_remove.push(index);
300                                     } else {
301                                         error!("failed to recv VmIrqRequest: {}", e);
302                                     }
303                                 }
304                             }
305                         }
306                     }
307                     Token::DelayedIrqEvent => {
308                         if let Err(e) = self.irq_chip.process_delayed_irq_events() {
309                             warn!("can't deliver delayed irqs: {}", e);
310                         }
311                     }
312                 }
313             }
314 
315             let now = Instant::now();
316             let intr_stat_duration = now.duration_since(intr_stat_sample_time);
317 
318             // include interrupt stats every 10 seconds
319             if intr_stat_duration > Duration::from_secs(10) {
320                 let mut event_indices: Vec<(&usize, &IrqEventSource)> =
321                     irq_event_sources.iter().collect();
322                 // sort the devices by irq_frequency
323                 let mut locked_irq_frequencies = irq_frequencies.lock();
324                 event_indices
325                     .sort_by_key(|(idx, _)| std::cmp::Reverse(locked_irq_frequencies[**idx]));
326                 let rates: Vec<String> = event_indices
327                     .iter()
328                     .filter(|(idx, _)| locked_irq_frequencies[**idx] > 0)
329                     .map(|(idx, source)| {
330                         let rate = locked_irq_frequencies[**idx] / intr_stat_duration.as_secs();
331                         // As the descriptor, use a 64bit int containing two 32bit ids.
332                         // low bits: queue_id, high bits: device_id
333                         let descriptor_bytes: [u8; 8] = {
334                             let mut bytes: [u8; 8] = [0; 8];
335                             for (i, byte) in
336                                 (source.queue_id as u32).to_le_bytes().iter().enumerate()
337                             {
338                                 bytes[i] = *byte
339                             }
340                             let device_id: u32 = source.device_id.into();
341                             for (i, byte) in device_id.to_le_bytes().iter().enumerate() {
342                                 bytes[i + 4] = *byte
343                             }
344                             bytes
345                         };
346                         log_high_frequency_descriptor_event(
347                             MetricEventType::Interrupts,
348                             i64::from_le_bytes(descriptor_bytes),
349                             rate as i64,
350                         );
351                         format!("{}({})->{}/s", source.device_name, source.queue_id, rate,)
352                     })
353                     .collect();
354 
355                 info!("crosvm-interrupt-rates: {}", rates.join(", "));
356 
357                 // reset sample time and counters
358                 intr_stat_sample_time = now;
359                 *locked_irq_frequencies = vec![0; max_event_index + 1];
360             }
361 
362             vm_control_indices_to_remove.dedup();
363             for index in vm_control_indices_to_remove {
364                 self.irq_control_tubes.swap_remove(index);
365             }
366 
367             if notify_control_on_iteration_end {
368                 if let Err(e) =
369                     // We want to send the number of IRQ events processed in
370                     // this iteration. Token count is almost what we want,
371                     // except it counts the IrqHandlerControl token, so we just
372                     // subtract it off to get the desired value.
373                     self.irq_handler_control.send(
374                         &IrqHandlerResponse::HandlerIterationComplete(token_count - 1),
375                     )
376                 {
377                     error!(
378                         "failed to notify on iteration completion (snapshotting may fail): {}",
379                         e
380                     );
381                 }
382             }
383         }
384 
385         // Ensure all children have exited.
386         for child_control_tube in child_control_tubes.iter() {
387             if let Err(e) = child_control_tube.send(&IrqHandlerRequest::Exit) {
388                 warn!("failed to send exit signal to IRQ worker: {}", e);
389             }
390         }
391 
392         // Ensure all children have exited and aren't stalled / stuck.
393         for child in children {
394             match child.join() {
395                 Ok(Err(e)) => warn!("IRQ worker child ended in error: {}", e),
396                 Err(e) => warn!("IRQ worker child panicked with error: {:?}", e),
397                 _ => {}
398             }
399         }
400 
401         Ok(())
402     }
403 }
404 
405 #[derive(EventToken)]
406 enum ChildToken {
407     IrqHandlerControl,
408     IrqEvent { event_index: IrqEventIndex },
409 }
410 /// An arbitrarily expandible worker for waiting on irq events.
411 /// This worker is responsible for hadling the irq events, whereas
412 /// the parent worker's job is just to handle the irq control tube requests.
413 struct IrqWaitWorkerChild {
414     wait_ctx: Arc<WaitContext<ChildToken>>,
415     irq_handler_control: Tube,
416     irq_chip: Box<dyn IrqChipArch>,
417     irq_frequencies: Arc<Mutex<Vec<u64>>>,
418 }
419 
420 impl IrqWaitWorkerChild {
start( irq_handler_control: Tube, irq_chip: Box<dyn IrqChipArch>, irq_frequencies: Arc<Mutex<Vec<u64>>>, ) -> Result<(Arc<WaitContext<ChildToken>>, JoinHandle<Result<()>>)>421     fn start(
422         irq_handler_control: Tube,
423         irq_chip: Box<dyn IrqChipArch>,
424         irq_frequencies: Arc<Mutex<Vec<u64>>>,
425     ) -> Result<(Arc<WaitContext<ChildToken>>, JoinHandle<Result<()>>)> {
426         let child_wait_ctx = Arc::new(WaitContext::new()?);
427         let mut child = IrqWaitWorkerChild {
428             wait_ctx: child_wait_ctx.clone(),
429             irq_handler_control,
430             irq_chip,
431             irq_frequencies,
432         };
433         let join_handle = thread::Builder::new()
434             .name("irq_child_wait_loop".into())
435             .spawn(move || child.run())?;
436 
437         Ok((child_wait_ctx, join_handle))
438     }
439 
run(&mut self) -> Result<()>440     fn run(&mut self) -> Result<()> {
441         self.wait_ctx.add(
442             self.irq_handler_control.get_read_notifier(),
443             ChildToken::IrqHandlerControl,
444         )?;
445         'poll: loop {
446             let events = {
447                 match self.wait_ctx.wait() {
448                     Ok(v) => v,
449                     Err(e) => {
450                         error!("failed to wait on irq child thread: {}", e);
451                         break 'poll;
452                     }
453                 }
454             };
455 
456             let token_count = events.len();
457             let mut notify_control_on_iteration_end = false;
458 
459             for event in events.iter().filter(|e| e.is_readable) {
460                 match event.token {
461                     ChildToken::IrqHandlerControl => {
462                         match self.irq_handler_control.recv::<IrqHandlerRequest>() {
463                             Ok(request) => match request {
464                                 IrqHandlerRequest::Exit => {
465                                     info!("irq child event loop got exit event");
466                                     break 'poll;
467                                 }
468                                 IrqHandlerRequest::AddIrqControlTubes(_tubes) => {
469                                     panic!("Windows does not support adding devices on the fly.");
470                                 }
471                                 IrqHandlerRequest::WakeAndNotifyIteration => {
472                                     notify_control_on_iteration_end = true;
473                                 }
474                                 IrqHandlerRequest::RefreshIrqEventTokens => {
475                                     // TODO(b/282755619): when we want to restore different shaped
476                                     // VMs, we'll have to implement this.
477                                     todo!("not implemented yet");
478                                 }
479                             },
480                             Err(e) => {
481                                 if let TubeError::Disconnected = e {
482                                     panic!("irq handler control tube disconnected.");
483                                 } else {
484                                     error!("failed to recv IrqHandlerRequest: {}", e);
485                                 }
486                             }
487                         }
488                     }
489                     ChildToken::IrqEvent { event_index } => {
490                         self.irq_frequencies.lock()[event_index] += 1;
491                         if let Err(e) = self.irq_chip.service_irq_event(event_index) {
492                             error!("failed to signal irq {}: {}", event_index, e);
493                         }
494                     }
495                 }
496             }
497 
498             if notify_control_on_iteration_end {
499                 if let Err(e) =
500                     // We want to send the number of IRQ events processed in
501                     // this iteration. Token count is almost what we want,
502                     // except it counts the IrqHandlerControl token, so we just
503                     // subtract it off to get the desired value.
504                     self.irq_handler_control.send(
505                         &IrqHandlerResponse::HandlerIterationComplete(token_count - 1),
506                     )
507                 {
508                     error!(
509                         "failed to notify on child iteration completion (snapshotting may fail): {}",
510                         e
511                     );
512                 }
513             }
514         }
515         Ok(())
516     }
517 }
518