xref: /aosp_15_r20/external/crosvm/net_util/src/slirp/sys/windows/handler.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::HashMap;
6 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
7 use std::fs::File;
8 use std::io;
9 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
10 use std::io::BufWriter;
11 use std::net::Ipv4Addr;
12 use std::net::Ipv6Addr;
13 use std::time::Duration;
14 use std::time::Instant;
15 
16 use base::error;
17 use base::named_pipes::OverlappedWrapper;
18 use base::named_pipes::PipeConnection;
19 use base::warn;
20 use base::AsRawDescriptor;
21 use base::Descriptor;
22 use base::Error as SysError;
23 use base::Event;
24 use base::EventExt;
25 use base::EventToken;
26 use base::RawDescriptor;
27 use base::Timer;
28 use base::TimerTrait;
29 use base::WaitContext;
30 use base::WaitContextExt;
31 use metrics::MetricEventType;
32 use metrics::PeriodicLogger;
33 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
34 use pcap_file::pcap::PcapWriter;
35 use smallvec::SmallVec;
36 use virtio_sys::virtio_net::virtio_net_hdr;
37 use virtio_sys::virtio_net::virtio_net_hdr_mrg_rxbuf;
38 use winapi::shared::minwindef::MAKEWORD;
39 use winapi::um::winnt::LONG;
40 use winapi::um::winnt::SHORT;
41 use winapi::um::winsock2::WSACleanup;
42 use winapi::um::winsock2::WSAEventSelect;
43 use winapi::um::winsock2::WSAGetLastError;
44 use winapi::um::winsock2::WSAPoll;
45 use winapi::um::winsock2::WSAStartup;
46 use winapi::um::winsock2::FD_CLOSE;
47 use winapi::um::winsock2::FD_READ;
48 use winapi::um::winsock2::FD_WRITE;
49 use winapi::um::winsock2::POLLERR;
50 use winapi::um::winsock2::POLLHUP;
51 use winapi::um::winsock2::POLLRDBAND;
52 use winapi::um::winsock2::POLLRDNORM;
53 use winapi::um::winsock2::POLLWRNORM;
54 use winapi::um::winsock2::SOCKET;
55 use winapi::um::winsock2::SOCKET_ERROR;
56 use winapi::um::winsock2::WSADATA;
57 use winapi::um::winsock2::WSAPOLLFD;
58 use zerocopy::AsBytes;
59 
60 use crate::slirp::context::CallbackHandler;
61 use crate::slirp::context::Context;
62 use crate::slirp::context::PollEvents;
63 #[cfg(feature = "slirp-ring-capture")]
64 use crate::slirp::packet_ring_buffer::PacketRingBuffer;
65 use crate::slirp::SlirpError;
66 use crate::slirp::ETHERNET_FRAME_SIZE;
67 use crate::Error;
68 use crate::Result;
69 
70 #[cfg(feature = "slirp-debug")]
71 const SLIRP_CAPTURE_FILE_NAME: &str = "slirp_capture.pcap";
72 
73 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
74 const PCAP_FILE_BUFFER_SIZE: usize = 1024 * 1024; // 1MiB
75 
76 const VETH_HEADER_LENGTH: usize = 12;
77 
78 #[cfg(feature = "slirp-ring-capture")]
79 const PACKET_RING_BUFFER_SIZE_IN_BYTES: usize = 30000000; // 30MBs
80 
81 struct Handler {
82     start: Instant,
83     pipe: PipeConnection,
84     read_overlapped_wrapper: OverlappedWrapper,
85     buf: [u8; ETHERNET_FRAME_SIZE],
86     write_overlapped_wrapper: OverlappedWrapper,
87     // Stores the actual timer (Event) and callback. Note that Event ownership is held by libslirp,
88     // and created/released via `timer_new` and `timer_free`.
89     timer_callbacks: HashMap<RawDescriptor, Box<dyn FnMut()>>,
90     tx_logger: PeriodicLogger,
91     rx_logger: PeriodicLogger,
92     #[allow(unused)]
93     handler_debug: Option<HandlerDebug>,
94 }
95 
96 /// Additional fields that exist only when debugging the slirp connection.
97 struct HandlerDebug {
98     #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
99     pcap_writer: PcapWriter<BufWriter<File>>,
100     #[cfg(feature = "slirp-ring-capture")]
101     tx_packet_ring_buffer: PacketRingBuffer,
102     #[cfg(feature = "slirp-ring-capture")]
103     rx_packet_ring_buffer: PacketRingBuffer,
104 }
105 
106 impl CallbackHandler for Handler {
107     type Timer = base::Timer;
108 
clock_get_ns(&mut self) -> i64109     fn clock_get_ns(&mut self) -> i64 {
110         const NANOS_PER_SEC: u64 = 1_000_000_000;
111         let running_duration = self.start.elapsed();
112         (running_duration.as_secs() * NANOS_PER_SEC + running_duration.subsec_nanos() as u64) as i64
113     }
114 
115     /// Sends a packet to the guest.
send_packet(&mut self, buf: &[u8]) -> io::Result<usize>116     fn send_packet(&mut self, buf: &[u8]) -> io::Result<usize> {
117         let vnet_hdr = virtio_net_hdr_mrg_rxbuf {
118             hdr: virtio_net_hdr {
119                 flags: 0,
120                 gso_size: 0,
121                 hdr_len: 0,
122                 csum_start: 0,
123                 csum_offset: 0,
124                 gso_type: virtio_sys::virtio_net::VIRTIO_NET_HDR_GSO_NONE as u8,
125             },
126             num_buffers: 1,
127         };
128         let send_buf = [vnet_hdr.as_bytes(), buf].concat();
129 
130         #[allow(unused)]
131         if let Some(handler_debug) = self.handler_debug.as_mut() {
132             let d = self.start.elapsed();
133             #[cfg(feature = "slirp-debug")]
134             {
135                 handler_debug
136                     .pcap_writer
137                     .write(d.as_secs() as u32, d.subsec_nanos(), buf, buf.len() as u32)
138                     .unwrap();
139             }
140             #[cfg(feature = "slirp-ring-capture")]
141             {
142                 handler_debug
143                     .tx_packet_ring_buffer
144                     .add_packet(buf, d)
145                     .expect("Failed to add packet.");
146             }
147         }
148 
149         // Log as rx from the guest's perspective
150         self.rx_logger.log(buf.len() as i64);
151         // SAFETY: safe because the operation ends with send_buf and
152         // write_overlapped_wrapper still in scope.
153         unsafe {
154             self.pipe
155                 .write_overlapped(&send_buf, &mut self.write_overlapped_wrapper)?;
156         }
157         self.pipe
158             .get_overlapped_result(&mut self.write_overlapped_wrapper)
159             .map(|x| x as usize)
160     }
161 
162     // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
register_poll_fd(&mut self, _fd: i32)163     fn register_poll_fd(&mut self, _fd: i32) {}
unregister_poll_fd(&mut self, _fd: i32)164     fn unregister_poll_fd(&mut self, _fd: i32) {}
165 
guest_error(&mut self, msg: &str)166     fn guest_error(&mut self, msg: &str) {
167         warn!("guest error: {}", msg);
168     }
169 
170     // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
notify(&mut self)171     fn notify(&mut self) {}
172 
timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer>173     fn timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer> {
174         let timer = Timer::new().expect("failed to create network timer");
175         self.timer_callbacks
176             .insert(timer.as_raw_descriptor(), callback);
177         Box::new(timer)
178     }
179 
timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64)180     fn timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64) {
181         // expire_time is a clock_get_ns relative deadline.
182         let timer_duration = Duration::from_millis(expire_time as u64)
183             - Duration::from_nanos(self.clock_get_ns() as u64);
184 
185         timer
186             .reset_oneshot(timer_duration)
187             .expect("failed to modify network timer");
188     }
189 
timer_free(&mut self, timer: Box<Self::Timer>)190     fn timer_free(&mut self, timer: Box<Self::Timer>) {
191         self.timer_callbacks.remove(&timer.as_raw_descriptor());
192         // The actual Timer is freed implicitly by the Box drop.
193     }
194 
get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a>195     fn get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a> {
196         Box::new(self.timer_callbacks.keys())
197     }
198 
execute_timer(&mut self, timer: RawDescriptor)199     fn execute_timer(&mut self, timer: RawDescriptor) {
200         let timer_callback = self
201             .timer_callbacks
202             .get_mut(&timer)
203             .expect("tried to run timer that has no callback");
204         timer_callback()
205     }
206 
begin_read_from_guest(&mut self) -> io::Result<()>207     fn begin_read_from_guest(&mut self) -> io::Result<()> {
208         // SAFETY:
209         // Safe because we are writing simple bytes.
210         unsafe {
211             self.pipe
212                 .read_overlapped(&mut self.buf, &mut self.read_overlapped_wrapper)
213         }
214     }
215 
end_read_from_guest(&mut self) -> io::Result<&[u8]>216     fn end_read_from_guest(&mut self) -> io::Result<&[u8]> {
217         match self
218             .pipe
219             .try_get_overlapped_result(&mut self.read_overlapped_wrapper)
220         {
221             Ok(len) if len as usize >= VETH_HEADER_LENGTH => {
222                 // Skip over the veth header (12 bytes, created by the frontend per the
223                 // virtio spec).
224                 let ethernet_pkt = &self.buf[VETH_HEADER_LENGTH..len as usize];
225 
226                 #[allow(unused)]
227                 if let Some(handler_debug) = self.handler_debug.as_mut() {
228                     let d = self.start.elapsed();
229 
230                     #[cfg(feature = "slirp-debug")]
231                     {
232                         handler_debug
233                             .pcap_writer
234                             .write(
235                                 d.as_secs() as u32,
236                                 d.subsec_nanos(),
237                                 ethernet_pkt,
238                                 len - VETH_HEADER_LENGTH as u32,
239                             )
240                             .unwrap();
241                     }
242                     #[cfg(feature = "slirp-ring-capture")]
243                     {
244                         handler_debug
245                             .rx_packet_ring_buffer
246                             .add_packet(ethernet_pkt, d)
247                             .expect("Failed to add packet.");
248                     }
249                 };
250 
251                 // Log as tx from the guest's perspective
252                 self.tx_logger.log(len as i64);
253                 Ok(ethernet_pkt)
254             }
255             Ok(len) => Err(io::Error::new(
256                 io::ErrorKind::InvalidData,
257                 format!(
258                     "Too few bytes ({}) read from the guest's virtio-net frontend.",
259                     len
260                 ),
261             )),
262             Err(e) => Err(e),
263         }
264     }
265 }
266 
267 #[cfg(feature = "slirp-ring-capture")]
268 impl Drop for Handler {
drop(&mut self)269     fn drop(&mut self) {
270         if let Some(handler_debug) = &mut self.handler_debug {
271             let packets = PacketRingBuffer::pop_ring_buffers_and_aggregate(
272                 &mut handler_debug.rx_packet_ring_buffer,
273                 &mut handler_debug.tx_packet_ring_buffer,
274             );
275 
276             for packet in packets {
277                 handler_debug
278                     .pcap_writer
279                     .write(
280                         packet.timestamp.as_secs() as u32,
281                         packet.timestamp.subsec_nanos(),
282                         &packet.buf,
283                         packet.buf.len() as u32,
284                     )
285                     .unwrap()
286             }
287         }
288     }
289 }
290 
last_wsa_error() -> io::Error291 fn last_wsa_error() -> io::Error {
292     io::Error::from_raw_os_error(
293         // SAFETY: trivially safe
294         unsafe { WSAGetLastError() },
295     )
296 }
297 
poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>>298 fn poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>> {
299     // SAFETY:
300     // Safe because sockets is guaranteed to be valid, and we handle error return codes below.
301     let poll_result = unsafe {
302         WSAPoll(
303             sockets.as_mut_ptr(),
304             sockets.len() as u32,
305             1, /* timeout in ms */
306         )
307     };
308 
309     match poll_result {
310         SOCKET_ERROR => Err(last_wsa_error()),
311         _ => Ok(sockets),
312     }
313 }
314 
315 /// Converts WSA poll events into the network event bitfield used by WSAEventSelect.
wsa_events_to_wsa_network_events(events: SHORT) -> LONG316 fn wsa_events_to_wsa_network_events(events: SHORT) -> LONG {
317     let mut net_events = 0;
318     if events & (POLLRDNORM | POLLRDBAND) != 0 {
319         net_events |= FD_READ;
320     }
321     if events & POLLWRNORM > 0 {
322         net_events |= FD_WRITE;
323     }
324     net_events
325 }
326 
wsa_events_to_slirp_events(events: SHORT) -> PollEvents327 fn wsa_events_to_slirp_events(events: SHORT) -> PollEvents {
328     // On Windows, revents have the following meaning:
329     // Linux POLLIN == POLLRDBAND | POLLRDNORM
330     // Linux POLLOUT == POLLWRNORM
331     // Linux POLLERR == POLLERR
332     // Windows: POLLPRI is not implemented.
333     // POLLNVAL is not a supported Slirp polling flag.
334     // Further details at
335     //      https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll
336     let mut poll_events = PollEvents::empty();
337     if events & (POLLRDNORM | POLLRDBAND) != 0 {
338         poll_events |= PollEvents::poll_in();
339     }
340     if events & POLLWRNORM != 0 {
341         poll_events |= PollEvents::poll_out();
342     }
343     if events & POLLERR != 0 {
344         poll_events |= PollEvents::poll_err();
345     }
346     if events & POLLHUP != 0 {
347         poll_events |= PollEvents::poll_hup();
348     }
349     poll_events
350 }
351 
slirp_events_to_wsa_events(events: PollEvents) -> SHORT352 fn slirp_events_to_wsa_events(events: PollEvents) -> SHORT {
353     // Note that the events that get sent into WSAPoll are a subset of the events that are returned
354     // by WSAPoll. As such, this function is not an inverse of wsa_events_to_slirp_events.
355     let mut wsa_events: SHORT = 0;
356     if events.has_in() {
357         wsa_events |= POLLRDNORM | POLLRDBAND;
358     }
359     if events.has_out() {
360         wsa_events |= POLLWRNORM;
361     }
362     // NOTE: POLLHUP cannot be supplied to WSAPoll.
363 
364     wsa_events
365 }
366 
367 #[derive(EventToken, Eq, PartialEq, Copy, Clone)]
368 enum Token {
369     EventHandleReady(usize),
370     SocketReady,
371 }
372 
373 /// Associates a WSAPOLLFD's events with an Event object, disassociating on drop.
374 struct EventSelectedSocket<'a> {
375     socket: WSAPOLLFD,
376     event: &'a Event,
377 }
378 
379 impl<'a> EventSelectedSocket<'a> {
new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket>380     fn new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket> {
381         // SAFETY:
382         // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
383         // return code below.
384         let res = unsafe {
385             WSAEventSelect(
386                 socket.fd as SOCKET,
387                 event.as_raw_descriptor(),
388                 // Because WSAPOLLFD cannot contain POLLHUP (even if libslirp wanted to specify it,
389                 // WSAPoll does not accept it), we assume it is always present.
390                 wsa_events_to_wsa_network_events(socket.events) | FD_CLOSE,
391             )
392         };
393         if res == SOCKET_ERROR {
394             return Err(Error::Slirp(SlirpError::SlirpIOPollError(last_wsa_error())));
395         }
396         Ok(EventSelectedSocket { socket, event })
397     }
398 }
399 
400 impl<'a> Drop for EventSelectedSocket<'a> {
drop(&mut self)401     fn drop(&mut self) {
402         // SAFETY:
403         // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
404         // return code below.
405         let res = unsafe {
406             WSAEventSelect(
407                 self.socket.fd as SOCKET,
408                 self.event.as_raw_descriptor(),
409                 /* listen for no events */ 0,
410             )
411         };
412         if res == SOCKET_ERROR {
413             warn!("failed to unselect socket: {}", last_wsa_error());
414         }
415     }
416 }
417 
418 /// Rough equivalent of select(...) for Windows.
419 /// The following behavior is guaranteed:
420 ///   1. The position of sockets in the sockets vector is maintained on return.
421 ///   2. Sockets are always polled on any wakeup.
422 ///
423 /// For optimization reasons, takes a utility event & WaitContext to avoid having to re-create
424 /// those objects if poll is called from an event loop. The Event and WaitContext MUST NOT be used
425 /// for any other purpose in between calls to `poll`.
poll<'a>( wait_ctx: &WaitContext<Token>, socket_event_handle: &Event, handles: Vec<&'a dyn AsRawDescriptor>, sockets: Vec<WSAPOLLFD>, timeout: Option<Duration>, ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)>426 fn poll<'a>(
427     wait_ctx: &WaitContext<Token>,
428     socket_event_handle: &Event,
429     handles: Vec<&'a dyn AsRawDescriptor>,
430     sockets: Vec<WSAPOLLFD>,
431     timeout: Option<Duration>,
432 ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)> {
433     let mut selected_sockets = Vec::with_capacity(sockets.len());
434     for socket in sockets.iter() {
435         selected_sockets.push(EventSelectedSocket::new(*socket, socket_event_handle)?);
436     }
437 
438     wait_ctx
439         .clear()
440         .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
441     for (i, handle) in handles.iter().enumerate() {
442         match wait_ctx.add(*handle, Token::EventHandleReady(i)) {
443             Ok(v) => v,
444             Err(e) => {
445                 return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
446             }
447         }
448     }
449     match wait_ctx.add(socket_event_handle, Token::SocketReady) {
450         Ok(v) => v,
451         Err(e) => {
452             return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
453         }
454     }
455 
456     let events = if let Some(timeout) = timeout {
457         wait_ctx
458             .wait_timeout(timeout)
459             .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
460     } else {
461         wait_ctx
462             .wait()
463             .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
464     };
465 
466     let tokens: Vec<Token> = events
467         .iter()
468         .filter(|e| e.is_readable)
469         .map(|e| e.token)
470         .collect();
471     let mut handle_results = Vec::new();
472     for token in tokens {
473         match token {
474             Token::EventHandleReady(i) => {
475                 handle_results.push(handles[i]);
476             }
477             Token::SocketReady => {
478                 // We always call poll_sockets, so whether the token is present doesn't matter.
479             }
480         };
481     }
482 
483     let socket_results = if sockets.is_empty() {
484         Vec::new()
485     } else {
486         poll_sockets(sockets).map_err(|e| Error::Slirp(SlirpError::SlirpIOPollError(e)))?
487     };
488 
489     Ok((handle_results, socket_results))
490 }
491 
492 /// Opens a WSAStartup/WSACleanup context; in other words, while a context is held, winsock calls
493 /// can be made.
494 struct WSAContext {
495     data: WSADATA,
496 }
497 
498 impl WSAContext {
new() -> Result<WSAContext>499     fn new() -> Result<WSAContext> {
500         // SAFETY:
501         // Trivially safe (initialization of this memory is not required).
502         let mut ctx: WSAContext = unsafe { std::mem::zeroed() };
503 
504         // SAFETY:
505         // Safe because ctx.data is guaranteed to exist, and we check the return code.
506         let err = unsafe { WSAStartup(MAKEWORD(2, 0), &mut ctx.data) };
507         if err != 0 {
508             Err(Error::Slirp(SlirpError::WSAStartupError(SysError::new(
509                 err,
510             ))))
511         } else {
512             Ok(ctx)
513         }
514     }
515 }
516 
517 impl Drop for WSAContext {
drop(&mut self)518     fn drop(&mut self) {
519         // SAFETY: trivially safe with return value checked.
520         let err = unsafe { WSACleanup() };
521         if err != 0 {
522             error!("WSACleanup failed: {}", last_wsa_error())
523         }
524     }
525 }
526 
527 /// Starts libslirp's main loop attached to host_pipe. Packets are exchanged between host_pipe and
528 /// the host's network stack.
529 ///
530 /// host_pipe must be non blocking & in message mode.
start_slirp( host_pipe: PipeConnection, shutdown_event: Event, disable_access_to_host: bool, #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option< String, >, ) -> Result<()>531 pub fn start_slirp(
532     host_pipe: PipeConnection,
533     shutdown_event: Event,
534     disable_access_to_host: bool,
535     #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option<
536         String,
537     >,
538 ) -> Result<()> {
539     // This call is not strictly required because libslirp currently calls WSAStartup for us, but
540     // relying on that is brittle and a potential source of bugs as we have our own socket code that
541     // runs on the Rust side.
542     let _wsa_context = WSAContext::new()?;
543 
544     let (mut context, host_pipe_notifier_handle) = create_slirp_context(
545         host_pipe,
546         disable_access_to_host,
547         #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
548         slirp_capture_file,
549     )?;
550     let shutdown_event_handle = shutdown_event.as_raw_descriptor();
551 
552     // Stack data for the poll function.
553     let wait_ctx: WaitContext<Token> =
554         WaitContext::new().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
555     let socket_event_handle =
556         Event::new_auto_reset().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
557 
558     'slirp: loop {
559         // Request the FDs that we should poll from Slirp. Slirp provides them to us by way of a
560         // callback, which is invoked for each FD. This callback requires us to assign each FD an
561         // index which will be used by a subsequent Slirp call to get the poll events for
562         // each FD. The data flow can be thought of as follows:
563         //    1. pollfds_fill creates a map of index -> fd inside Slirp based on the return values
564         //       from the pollfds_fill callback.
565         //    2. crosvm invokes poll on the FDs provided by Slirp.
566         //    3. crosvm notifies Slirp via pollfds_poll that polling completed for the provided FDs.
567         //    4. Slirp calls into crosvm via the pollfds_poll callback and asks for the statuses
568         //       using the fd indicies registered in step #1.
569         let mut poll_fds = Vec::new();
570         // We'd like to sleep as long as possible (assuming no actionable notifications arrive).
571         let mut timeout_ms: u32 = u32::MAX;
572         context.pollfds_fill(&mut timeout_ms, |fd: i32, events: PollEvents| {
573             poll_fds.push(WSAPOLLFD {
574                 fd: fd as usize,
575                 events: slirp_events_to_wsa_events(events),
576                 revents: 0,
577             });
578             (poll_fds.len() - 1) as i32
579         });
580 
581         // There are relatively few concurrent timer_callbacks used by libslirp, so we set the small
582         // vector size low.
583         let timer_callbacks = context
584             .get_timers()
585             .map(|timer| Descriptor(*timer))
586             .collect::<SmallVec<[Descriptor; 8]>>();
587         let mut handles: Vec<&dyn AsRawDescriptor> = Vec::with_capacity(timer_callbacks.len() + 2);
588         handles.extend(
589             timer_callbacks
590                 .iter()
591                 .map(|timer| timer as &dyn AsRawDescriptor),
592         );
593 
594         let host_pipe_notifier = Descriptor(host_pipe_notifier_handle);
595         handles.push(&host_pipe_notifier);
596         handles.push(&shutdown_event);
597 
598         let (handle_results, socket_results) = poll(
599             &wait_ctx,
600             &socket_event_handle,
601             handles,
602             poll_fds,
603             Some(Duration::from_millis(timeout_ms.into())),
604         )?;
605 
606         for handle in handle_results.iter() {
607             match handle.as_raw_descriptor() {
608                 h if h == host_pipe_notifier_handle => {
609                     // Collect input from the guest & inject into Slirp. It seems that this input
610                     // step should be between pollfds_fill & pollfds_poll.
611                     context.handle_guest_input()?;
612                 }
613                 h if h == shutdown_event_handle => {
614                     break 'slirp;
615                 }
616                 timer_handle => {
617                     // All other handles are timer_callbacks.
618                     context.execute_timer(timer_handle);
619                 }
620             }
621         }
622 
623         // It's possible no socket notified and we got here from a timeout. This is fine, because
624         // libslirp wants to be woken up if timeout has expired (even if no sockets are ready).
625         context.pollfds_poll(false, |fd_index: i32| {
626             wsa_events_to_slirp_events(socket_results[fd_index as usize].revents)
627         })
628     }
629 
630     // Never reached.
631     Ok(())
632 }
633 
create_slirp_context( host_pipe: PipeConnection, disable_access_to_host: bool, #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option< String, >, ) -> Result<(Box<Context<Handler>>, RawDescriptor)>634 fn create_slirp_context(
635     host_pipe: PipeConnection,
636     disable_access_to_host: bool,
637     #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option<
638         String,
639     >,
640 ) -> Result<(Box<Context<Handler>>, RawDescriptor)> {
641     // Set up handler_debug:
642     // - If slirp-debug is used, write to SLIRP_CAPTURE_FILE_NAME if slirp_capture_file not set.
643     // - If slirp-ring-capture is used, write to slirp_capture_file.
644     //     - If slirp_capture_file not set, don't debug.
645     // - Otherwise, set to None.
646     cfg_if::cfg_if! {
647         if #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] {
648             #[cfg(feature = "slirp-ring-capture")]
649             let capture_path = slirp_capture_file;
650             #[cfg(feature = "slirp-debug")]
651             let capture_path = slirp_capture_file.or(Some(SLIRP_CAPTURE_FILE_NAME.to_owned()));
652 
653             let handler_debug = capture_path
654                 .as_ref()
655                 .map(File::create)
656                 .transpose()
657                 .unwrap_or_default()
658                 .map(|capture_file| HandlerDebug {
659                     pcap_writer: PcapWriter::new(BufWriter::with_capacity(
660                         PCAP_FILE_BUFFER_SIZE,
661                         capture_file,
662                     ))
663                     .unwrap(),
664                     #[cfg(feature = "slirp-ring-capture")]
665                     tx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
666                     #[cfg(feature = "slirp-ring-capture")]
667                     rx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
668                 });
669 
670             // If there is a target capture, but no debug, let the dev know. In prod, capture_path
671             // won't exist, so we won't log.
672             if capture_path.is_some() && handler_debug.is_none() {
673                 error!("Failed to start packet capture! Check provided file path or sandboxing?");
674             }
675         } else {
676             let handler_debug = None;
677         }
678     }
679 
680     let overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
681     let read_notifier = overlapped_wrapper
682         .get_h_event_ref()
683         .unwrap()
684         .as_raw_descriptor();
685     let handler = Handler {
686         start: Instant::now(),
687         pipe: host_pipe,
688         read_overlapped_wrapper: overlapped_wrapper,
689         buf: [0; ETHERNET_FRAME_SIZE],
690         write_overlapped_wrapper: OverlappedWrapper::new(true).unwrap(),
691         timer_callbacks: HashMap::new(),
692         tx_logger: PeriodicLogger::new(MetricEventType::NetworkTxRate, Duration::from_secs(1))
693             .unwrap(),
694         rx_logger: PeriodicLogger::new(MetricEventType::NetworkRxRate, Duration::from_secs(1))
695             .unwrap(),
696         handler_debug,
697     };
698 
699     // Address & mask of the virtual network.
700     let v4_network_addr = Ipv4Addr::new(10, 0, 2, 0);
701     let v4_network_mask = Ipv4Addr::new(255, 255, 255, 0);
702 
703     // Address of the host machine on the virtual network (if the feature is enabled).
704     let host_v4_addr = Ipv4Addr::new(10, 0, 2, 2);
705 
706     // Address of the libslirp provided DNS proxy (packets to this address are intercepted by
707     // libslirp & routed to the first nameserver configured on the machine's NICs by libslirp).
708     let dns_addr = Ipv4Addr::new(10, 0, 2, 3);
709 
710     // DHCP range should start *after* the statically assigned addresses.
711     let dhcp_start_addr = Ipv4Addr::new(10, 0, 2, 4);
712 
713     // IPv6 network address. This is a ULA (unique local address) network, with a randomly generated
714     // ID (0x13624603218). The "prefix" or network address is 64 bits, incorporating both the
715     // network ID, and the subnet (0x0001).
716     let v6_network_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 0);
717 
718     let v6_host_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 2);
719     let v6_dns_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 3);
720     Ok((
721         Context::new(
722             disable_access_to_host,
723             /* IPv4 enabled */
724             true,
725             v4_network_addr,
726             v4_network_mask,
727             host_v4_addr,
728             /* IPv6 enabled */ true,
729             v6_network_addr,
730             /* virtual_network_v6_prefix_len */ 64,
731             /* host_v6_address */ v6_host_addr,
732             /* host_hostname */ None,
733             dhcp_start_addr,
734             dns_addr,
735             /* dns_server_v6_addr */ v6_dns_addr,
736             /* virtual_network_dns_search_domains */ Vec::new(),
737             /* dns_server_domain_name */ None,
738             handler,
739         )?,
740         read_notifier,
741     ))
742 }
743 
744 #[cfg(test)]
745 mod tests {
746     use std::net::UdpSocket;
747     use std::os::windows::io::AsRawSocket;
748 
749     use base::named_pipes;
750     use base::named_pipes::BlockingMode;
751     use base::named_pipes::FramingMode;
752 
753     use super::super::SLIRP_BUFFER_SIZE;
754     use super::*;
755 
create_socket() -> (UdpSocket, WSAPOLLFD)756     fn create_socket() -> (UdpSocket, WSAPOLLFD) {
757         let socket = UdpSocket::bind("127.0.0.1:0").unwrap();
758         socket
759             .set_nonblocking(true)
760             .expect("Socket failed to set non_blocking.");
761 
762         let poll_fd = WSAPOLLFD {
763             fd: socket.as_raw_socket() as usize,
764             events: POLLRDNORM | POLLRDBAND, // POLLIN equivalent
765             revents: 0,
766         };
767 
768         (socket, poll_fd)
769     }
770 
create_readable_socket() -> (UdpSocket, WSAPOLLFD)771     fn create_readable_socket() -> (UdpSocket, WSAPOLLFD) {
772         let (socket, poll_fd) = create_socket();
773         let receiving_addr = socket.local_addr().unwrap();
774         let buf = [0; 10];
775         socket.send_to(&buf, receiving_addr).unwrap();
776 
777         // Wait for the socket to really be readable before we return it back to the test. We've
778         // seen cases in CI where send_to completes, but WSAPoll won't find the socket to be
779         // readable.
780         let mut sockets = vec![poll_fd];
781         for _ in 0..5 {
782             sockets = poll_sockets(sockets).expect("poll_sockets failed");
783             if sockets[0].revents & (POLLRDNORM | POLLRDBAND) > 0 {
784                 return (socket, poll_fd);
785             }
786         }
787         panic!("socket never became readable");
788     }
789 
790     #[test]
test_polling_timeout_works()791     fn test_polling_timeout_works() {
792         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
793         let socket_event_handle = Event::new_auto_reset().unwrap();
794 
795         let (_socket, poll_fd) = create_socket();
796         let event_fd = Event::new_auto_reset().unwrap();
797         let (handles, sockets) = poll(
798             &wait_ctx,
799             &socket_event_handle,
800             vec![&event_fd],
801             vec![poll_fd],
802             Some(Duration::from_millis(2)),
803         )
804         .unwrap();
805 
806         // Asserts that we woke up because of a timeout.
807         assert_eq!(handles.len(), 0);
808         assert_eq!(sockets[0].revents, 0);
809     }
810 
811     #[test]
test_polling_handle_only()812     fn test_polling_handle_only() {
813         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
814         let socket_event_handle = Event::new_auto_reset().unwrap();
815 
816         // Required to ensure winsock is ready (needed by poll).
817         let (_sock, _poll_fd) = create_readable_socket();
818 
819         let event_fd = Event::new_auto_reset().unwrap();
820         event_fd.signal().expect("Failed to write event");
821         let (handles, _sockets) = poll(
822             &wait_ctx,
823             &socket_event_handle,
824             vec![&event_fd],
825             Vec::new(),
826             None,
827         )
828         .unwrap();
829 
830         assert_eq!(handles.len(), 1);
831         assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
832     }
833 
834     #[test]
test_polling_socket_only()835     fn test_polling_socket_only() {
836         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
837         let socket_event_handle = Event::new_auto_reset().unwrap();
838 
839         let (sock, poll_fd) = create_readable_socket();
840         let (_handles, sockets) = poll(
841             &wait_ctx,
842             &socket_event_handle,
843             Vec::new(),
844             vec![poll_fd],
845             None,
846         )
847         .unwrap();
848 
849         assert_eq!(sockets.len(), 1);
850         assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
851     }
852 
853     #[test]
test_polling_two_notifies()854     fn test_polling_two_notifies() {
855         let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
856         let socket_event_handle = Event::new_auto_reset().unwrap();
857 
858         let (sock, poll_fd) = create_readable_socket();
859         let event_fd = Event::new_auto_reset().unwrap();
860         event_fd.signal().expect("Failed to write event");
861 
862         let (handles, sockets) = poll(
863             &wait_ctx,
864             &socket_event_handle,
865             vec![&event_fd],
866             vec![poll_fd],
867             None,
868         )
869         .unwrap();
870 
871         assert_eq!(sockets.len(), 1);
872         assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
873 
874         assert_eq!(handles.len(), 1);
875         assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
876     }
877 
878     #[test]
test_slirp_stops_on_shutdown()879     fn test_slirp_stops_on_shutdown() {
880         let event_fd = Event::new_auto_reset().unwrap();
881         let (host_pipe, mut _guest_pipe) = named_pipes::pair_with_buffer_size(
882             &FramingMode::Message,
883             &BlockingMode::Wait,
884             0,
885             SLIRP_BUFFER_SIZE,
886             true,
887         )
888         .unwrap();
889         event_fd.signal().expect("Failed to write event");
890         start_slirp(
891             host_pipe,
892             event_fd.try_clone().unwrap(),
893             /* disable_access_to_host= */ false,
894             #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
895             None,
896         )
897         .expect("Failed to start slirp");
898     }
899 
900     // A gratuitous ARP from 52:55:0A:00:02:0F for IP 10.0.2.15
901     const VETH_ARP_ANNOUNCEMENT: [u8; 54] = [
902         // VETH header
903         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
904         // Ethernet frame
905         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x06, 0x00,
906         0x01, 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x0a, 0x00,
907         0x02, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0x02, 0x0f,
908     ];
909 
910     // TCP SYN from 52:55:0A:00:02:0F to 52:55:0A:00:02:01 (latter MAC should be arbitrary with
911     // Slirp) IP 10.0.2.15(5678) -> 127.0.0.1(19422)
912     // Note: MAC addresses in Slirp are arbitrary
913     const VETH_TCP_SYN: [u8; 66] = [
914         // VETH header
915         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
916         // Ethernet frame
917         0x52, 0x55, 0x0a, 0x00, 0x02, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x00, 0x45,
918         0x00, 0x00, 0x28, 0x12, 0x34, 0x40, 0x00, 0xff, 0x06, 0xde, 0x8b, 0x0a, 0x00, 0x02, 0x0f,
919         0x7f, 0x00, 0x00, 0x01, 0x16, 0x2e, 0x4b, 0xde, 0x00, 0x00, 0x04, 0xd2, 0x00, 0x00, 0x0d,
920         0x80, 0x50, 0x02, 0x0f, 0xa0, 0xa0, 0xd4, 0x00, 0x00,
921     ];
922 
923     // This is built into the TCP_SYN packet above; changing it will require a change to the TCP
924     // checksum
925     const LOOPBACK_SOCKET: &str = "127.0.0.1:19422";
926 
927     const TIMEOUT_MILLIS: u64 = 400;
928 
929     #[test]
test_send_tcp_syn()930     fn test_send_tcp_syn() {
931         use std::net::TcpListener;
932         use std::thread;
933         use std::time::Duration;
934 
935         let (mut guest_pipe, host_pipe) = named_pipes::pair_with_buffer_size(
936             &FramingMode::Message,
937             &BlockingMode::Wait,
938             0,
939             SLIRP_BUFFER_SIZE,
940             true,
941         )
942         .unwrap();
943         let mut overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
944 
945         // Start Slirp in another thread
946         let shutdown_sender = Event::new_auto_reset().unwrap();
947         let shutdown_receiver = shutdown_sender.try_clone().unwrap();
948 
949         // Run the slirp handling in a background thread
950         thread::spawn(move || {
951             start_slirp(
952                 host_pipe,
953                 shutdown_receiver,
954                 /* disable_access_to_host= */ false,
955                 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
956                 None,
957             )
958             .unwrap();
959         });
960 
961         // Create a timeout thread so the test doesn't block forever if something is amiss
962         thread::spawn(move || {
963             thread::sleep(Duration::from_millis(TIMEOUT_MILLIS));
964             shutdown_sender
965                 .signal()
966                 .expect("Failed to write to shutdown sender");
967         });
968 
969         // Start a local TCP server for our Slirp to connect to
970         let _listener = TcpListener::bind(LOOPBACK_SOCKET).unwrap();
971 
972         // This ARP is required or else Slirp will send us an ARP request before it returns an ACK
973         // SAFETY: safe because the buffer & overlapped wrapper are in scope for
974         // the duration of the overlapped operation.
975         unsafe {
976             guest_pipe
977                 .write_overlapped(&VETH_ARP_ANNOUNCEMENT, &mut overlapped_wrapper)
978                 .expect("Failed to write ARP to guest pipe");
979         }
980         guest_pipe
981             .get_overlapped_result(&mut overlapped_wrapper)
982             .unwrap();
983         // SAFETY: safe because the buffer & overlapped wrapper are in scope for
984         // the duration of the overlapped operation.
985         unsafe {
986             guest_pipe
987                 .write_overlapped(&VETH_TCP_SYN, &mut overlapped_wrapper)
988                 .expect("Failed to write SYN to guest pipe")
989         };
990         guest_pipe
991             .get_overlapped_result(&mut overlapped_wrapper)
992             .unwrap();
993 
994         let mut recv_buffer: [u8; 512] = [0; 512];
995         // SAFETY: safe because the buffer & overlapped wrapper are in scope for
996         // the duration of the overlapped operation.
997         unsafe { guest_pipe.read_overlapped(&mut recv_buffer, &mut overlapped_wrapper) }.unwrap();
998         let size = guest_pipe
999             .get_overlapped_result(&mut overlapped_wrapper)
1000             .unwrap() as usize;
1001 
1002         // This output is printed to aid in debugging; it can be parsed with https://hpd.gasmi.net/
1003         println!("Received frame:");
1004         for byte in recv_buffer[0..size].iter() {
1005             print!("{:01$x} ", byte, 2);
1006         }
1007         println!();
1008 
1009         // This test expects a VETH header + SYN+ACK response. It doesn't inspect every byte of
1010         // the response frame because some fields may be dependent on the host or OS.
1011         assert_eq!(size, VETH_HEADER_LENGTH + 58);
1012 
1013         // Strip off the VETH header and ignore it
1014         recv_buffer.copy_within(VETH_HEADER_LENGTH.., 0);
1015 
1016         // Check Ethernet header
1017         const ETH_RESPONSE_HEADER: [u8; 14] = [
1018             0x52, 0x55, 0x0A, 0x00, 0x02, 0x0F, 0x52, 0x55, 0x0A, 0x00, 0x02, 0x02, 0x08, 0x00,
1019         ];
1020         assert_eq!(
1021             recv_buffer[0..ETH_RESPONSE_HEADER.len()],
1022             ETH_RESPONSE_HEADER
1023         );
1024 
1025         // Check source IP
1026         assert_eq!(recv_buffer[26..=29], [0x7f, 0x00, 0x00, 0x01]); // 127.0.0.1
1027 
1028         // Check dest IP
1029         assert_eq!(recv_buffer[30..=33], [0x0A, 0x00, 0x02, 0x0F]); // 10.0.2.15
1030 
1031         // Check source port
1032         assert_eq!(recv_buffer[34..=35], [0x4b, 0xde]); // 19422
1033 
1034         // Check destination port
1035         assert_eq!(recv_buffer[36..=37], [0x16, 0x2e]); // 5678
1036 
1037         // Check TCP flags are SYN+ACK
1038         assert_eq!(recv_buffer[47], 0x12);
1039     }
1040 }
1041