1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::HashMap;
6 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
7 use std::fs::File;
8 use std::io;
9 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
10 use std::io::BufWriter;
11 use std::net::Ipv4Addr;
12 use std::net::Ipv6Addr;
13 use std::time::Duration;
14 use std::time::Instant;
15
16 use base::error;
17 use base::named_pipes::OverlappedWrapper;
18 use base::named_pipes::PipeConnection;
19 use base::warn;
20 use base::AsRawDescriptor;
21 use base::Descriptor;
22 use base::Error as SysError;
23 use base::Event;
24 use base::EventExt;
25 use base::EventToken;
26 use base::RawDescriptor;
27 use base::Timer;
28 use base::TimerTrait;
29 use base::WaitContext;
30 use base::WaitContextExt;
31 use metrics::MetricEventType;
32 use metrics::PeriodicLogger;
33 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
34 use pcap_file::pcap::PcapWriter;
35 use smallvec::SmallVec;
36 use virtio_sys::virtio_net::virtio_net_hdr;
37 use virtio_sys::virtio_net::virtio_net_hdr_mrg_rxbuf;
38 use winapi::shared::minwindef::MAKEWORD;
39 use winapi::um::winnt::LONG;
40 use winapi::um::winnt::SHORT;
41 use winapi::um::winsock2::WSACleanup;
42 use winapi::um::winsock2::WSAEventSelect;
43 use winapi::um::winsock2::WSAGetLastError;
44 use winapi::um::winsock2::WSAPoll;
45 use winapi::um::winsock2::WSAStartup;
46 use winapi::um::winsock2::FD_CLOSE;
47 use winapi::um::winsock2::FD_READ;
48 use winapi::um::winsock2::FD_WRITE;
49 use winapi::um::winsock2::POLLERR;
50 use winapi::um::winsock2::POLLHUP;
51 use winapi::um::winsock2::POLLRDBAND;
52 use winapi::um::winsock2::POLLRDNORM;
53 use winapi::um::winsock2::POLLWRNORM;
54 use winapi::um::winsock2::SOCKET;
55 use winapi::um::winsock2::SOCKET_ERROR;
56 use winapi::um::winsock2::WSADATA;
57 use winapi::um::winsock2::WSAPOLLFD;
58 use zerocopy::AsBytes;
59
60 use crate::slirp::context::CallbackHandler;
61 use crate::slirp::context::Context;
62 use crate::slirp::context::PollEvents;
63 #[cfg(feature = "slirp-ring-capture")]
64 use crate::slirp::packet_ring_buffer::PacketRingBuffer;
65 use crate::slirp::SlirpError;
66 use crate::slirp::ETHERNET_FRAME_SIZE;
67 use crate::Error;
68 use crate::Result;
69
70 #[cfg(feature = "slirp-debug")]
71 const SLIRP_CAPTURE_FILE_NAME: &str = "slirp_capture.pcap";
72
73 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
74 const PCAP_FILE_BUFFER_SIZE: usize = 1024 * 1024; // 1MiB
75
76 const VETH_HEADER_LENGTH: usize = 12;
77
78 #[cfg(feature = "slirp-ring-capture")]
79 const PACKET_RING_BUFFER_SIZE_IN_BYTES: usize = 30000000; // 30MBs
80
81 struct Handler {
82 start: Instant,
83 pipe: PipeConnection,
84 read_overlapped_wrapper: OverlappedWrapper,
85 buf: [u8; ETHERNET_FRAME_SIZE],
86 write_overlapped_wrapper: OverlappedWrapper,
87 // Stores the actual timer (Event) and callback. Note that Event ownership is held by libslirp,
88 // and created/released via `timer_new` and `timer_free`.
89 timer_callbacks: HashMap<RawDescriptor, Box<dyn FnMut()>>,
90 tx_logger: PeriodicLogger,
91 rx_logger: PeriodicLogger,
92 #[allow(unused)]
93 handler_debug: Option<HandlerDebug>,
94 }
95
96 /// Additional fields that exist only when debugging the slirp connection.
97 struct HandlerDebug {
98 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
99 pcap_writer: PcapWriter<BufWriter<File>>,
100 #[cfg(feature = "slirp-ring-capture")]
101 tx_packet_ring_buffer: PacketRingBuffer,
102 #[cfg(feature = "slirp-ring-capture")]
103 rx_packet_ring_buffer: PacketRingBuffer,
104 }
105
106 impl CallbackHandler for Handler {
107 type Timer = base::Timer;
108
clock_get_ns(&mut self) -> i64109 fn clock_get_ns(&mut self) -> i64 {
110 const NANOS_PER_SEC: u64 = 1_000_000_000;
111 let running_duration = self.start.elapsed();
112 (running_duration.as_secs() * NANOS_PER_SEC + running_duration.subsec_nanos() as u64) as i64
113 }
114
115 /// Sends a packet to the guest.
send_packet(&mut self, buf: &[u8]) -> io::Result<usize>116 fn send_packet(&mut self, buf: &[u8]) -> io::Result<usize> {
117 let vnet_hdr = virtio_net_hdr_mrg_rxbuf {
118 hdr: virtio_net_hdr {
119 flags: 0,
120 gso_size: 0,
121 hdr_len: 0,
122 csum_start: 0,
123 csum_offset: 0,
124 gso_type: virtio_sys::virtio_net::VIRTIO_NET_HDR_GSO_NONE as u8,
125 },
126 num_buffers: 1,
127 };
128 let send_buf = [vnet_hdr.as_bytes(), buf].concat();
129
130 #[allow(unused)]
131 if let Some(handler_debug) = self.handler_debug.as_mut() {
132 let d = self.start.elapsed();
133 #[cfg(feature = "slirp-debug")]
134 {
135 handler_debug
136 .pcap_writer
137 .write(d.as_secs() as u32, d.subsec_nanos(), buf, buf.len() as u32)
138 .unwrap();
139 }
140 #[cfg(feature = "slirp-ring-capture")]
141 {
142 handler_debug
143 .tx_packet_ring_buffer
144 .add_packet(buf, d)
145 .expect("Failed to add packet.");
146 }
147 }
148
149 // Log as rx from the guest's perspective
150 self.rx_logger.log(buf.len() as i64);
151 // SAFETY: safe because the operation ends with send_buf and
152 // write_overlapped_wrapper still in scope.
153 unsafe {
154 self.pipe
155 .write_overlapped(&send_buf, &mut self.write_overlapped_wrapper)?;
156 }
157 self.pipe
158 .get_overlapped_result(&mut self.write_overlapped_wrapper)
159 .map(|x| x as usize)
160 }
161
162 // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
register_poll_fd(&mut self, _fd: i32)163 fn register_poll_fd(&mut self, _fd: i32) {}
unregister_poll_fd(&mut self, _fd: i32)164 fn unregister_poll_fd(&mut self, _fd: i32) {}
165
guest_error(&mut self, msg: &str)166 fn guest_error(&mut self, msg: &str) {
167 warn!("guest error: {}", msg);
168 }
169
170 // Not required per https://github.com/rootless-containers/slirp4netns/blob/7f6a4a654a84d4356c881a10417bab77fd5be325/slirp4netns.c
notify(&mut self)171 fn notify(&mut self) {}
172
timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer>173 fn timer_new(&mut self, callback: Box<dyn FnMut()>) -> Box<Self::Timer> {
174 let timer = Timer::new().expect("failed to create network timer");
175 self.timer_callbacks
176 .insert(timer.as_raw_descriptor(), callback);
177 Box::new(timer)
178 }
179
timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64)180 fn timer_mod(&mut self, timer: &mut Self::Timer, expire_time: i64) {
181 // expire_time is a clock_get_ns relative deadline.
182 let timer_duration = Duration::from_millis(expire_time as u64)
183 - Duration::from_nanos(self.clock_get_ns() as u64);
184
185 timer
186 .reset_oneshot(timer_duration)
187 .expect("failed to modify network timer");
188 }
189
timer_free(&mut self, timer: Box<Self::Timer>)190 fn timer_free(&mut self, timer: Box<Self::Timer>) {
191 self.timer_callbacks.remove(&timer.as_raw_descriptor());
192 // The actual Timer is freed implicitly by the Box drop.
193 }
194
get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a>195 fn get_timers<'a>(&'a self) -> Box<dyn Iterator<Item = &RawDescriptor> + 'a> {
196 Box::new(self.timer_callbacks.keys())
197 }
198
execute_timer(&mut self, timer: RawDescriptor)199 fn execute_timer(&mut self, timer: RawDescriptor) {
200 let timer_callback = self
201 .timer_callbacks
202 .get_mut(&timer)
203 .expect("tried to run timer that has no callback");
204 timer_callback()
205 }
206
begin_read_from_guest(&mut self) -> io::Result<()>207 fn begin_read_from_guest(&mut self) -> io::Result<()> {
208 // SAFETY:
209 // Safe because we are writing simple bytes.
210 unsafe {
211 self.pipe
212 .read_overlapped(&mut self.buf, &mut self.read_overlapped_wrapper)
213 }
214 }
215
end_read_from_guest(&mut self) -> io::Result<&[u8]>216 fn end_read_from_guest(&mut self) -> io::Result<&[u8]> {
217 match self
218 .pipe
219 .try_get_overlapped_result(&mut self.read_overlapped_wrapper)
220 {
221 Ok(len) if len as usize >= VETH_HEADER_LENGTH => {
222 // Skip over the veth header (12 bytes, created by the frontend per the
223 // virtio spec).
224 let ethernet_pkt = &self.buf[VETH_HEADER_LENGTH..len as usize];
225
226 #[allow(unused)]
227 if let Some(handler_debug) = self.handler_debug.as_mut() {
228 let d = self.start.elapsed();
229
230 #[cfg(feature = "slirp-debug")]
231 {
232 handler_debug
233 .pcap_writer
234 .write(
235 d.as_secs() as u32,
236 d.subsec_nanos(),
237 ethernet_pkt,
238 len - VETH_HEADER_LENGTH as u32,
239 )
240 .unwrap();
241 }
242 #[cfg(feature = "slirp-ring-capture")]
243 {
244 handler_debug
245 .rx_packet_ring_buffer
246 .add_packet(ethernet_pkt, d)
247 .expect("Failed to add packet.");
248 }
249 };
250
251 // Log as tx from the guest's perspective
252 self.tx_logger.log(len as i64);
253 Ok(ethernet_pkt)
254 }
255 Ok(len) => Err(io::Error::new(
256 io::ErrorKind::InvalidData,
257 format!(
258 "Too few bytes ({}) read from the guest's virtio-net frontend.",
259 len
260 ),
261 )),
262 Err(e) => Err(e),
263 }
264 }
265 }
266
267 #[cfg(feature = "slirp-ring-capture")]
268 impl Drop for Handler {
drop(&mut self)269 fn drop(&mut self) {
270 if let Some(handler_debug) = &mut self.handler_debug {
271 let packets = PacketRingBuffer::pop_ring_buffers_and_aggregate(
272 &mut handler_debug.rx_packet_ring_buffer,
273 &mut handler_debug.tx_packet_ring_buffer,
274 );
275
276 for packet in packets {
277 handler_debug
278 .pcap_writer
279 .write(
280 packet.timestamp.as_secs() as u32,
281 packet.timestamp.subsec_nanos(),
282 &packet.buf,
283 packet.buf.len() as u32,
284 )
285 .unwrap()
286 }
287 }
288 }
289 }
290
last_wsa_error() -> io::Error291 fn last_wsa_error() -> io::Error {
292 io::Error::from_raw_os_error(
293 // SAFETY: trivially safe
294 unsafe { WSAGetLastError() },
295 )
296 }
297
poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>>298 fn poll_sockets(mut sockets: Vec<WSAPOLLFD>) -> io::Result<Vec<WSAPOLLFD>> {
299 // SAFETY:
300 // Safe because sockets is guaranteed to be valid, and we handle error return codes below.
301 let poll_result = unsafe {
302 WSAPoll(
303 sockets.as_mut_ptr(),
304 sockets.len() as u32,
305 1, /* timeout in ms */
306 )
307 };
308
309 match poll_result {
310 SOCKET_ERROR => Err(last_wsa_error()),
311 _ => Ok(sockets),
312 }
313 }
314
315 /// Converts WSA poll events into the network event bitfield used by WSAEventSelect.
wsa_events_to_wsa_network_events(events: SHORT) -> LONG316 fn wsa_events_to_wsa_network_events(events: SHORT) -> LONG {
317 let mut net_events = 0;
318 if events & (POLLRDNORM | POLLRDBAND) != 0 {
319 net_events |= FD_READ;
320 }
321 if events & POLLWRNORM > 0 {
322 net_events |= FD_WRITE;
323 }
324 net_events
325 }
326
wsa_events_to_slirp_events(events: SHORT) -> PollEvents327 fn wsa_events_to_slirp_events(events: SHORT) -> PollEvents {
328 // On Windows, revents have the following meaning:
329 // Linux POLLIN == POLLRDBAND | POLLRDNORM
330 // Linux POLLOUT == POLLWRNORM
331 // Linux POLLERR == POLLERR
332 // Windows: POLLPRI is not implemented.
333 // POLLNVAL is not a supported Slirp polling flag.
334 // Further details at
335 // https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll
336 let mut poll_events = PollEvents::empty();
337 if events & (POLLRDNORM | POLLRDBAND) != 0 {
338 poll_events |= PollEvents::poll_in();
339 }
340 if events & POLLWRNORM != 0 {
341 poll_events |= PollEvents::poll_out();
342 }
343 if events & POLLERR != 0 {
344 poll_events |= PollEvents::poll_err();
345 }
346 if events & POLLHUP != 0 {
347 poll_events |= PollEvents::poll_hup();
348 }
349 poll_events
350 }
351
slirp_events_to_wsa_events(events: PollEvents) -> SHORT352 fn slirp_events_to_wsa_events(events: PollEvents) -> SHORT {
353 // Note that the events that get sent into WSAPoll are a subset of the events that are returned
354 // by WSAPoll. As such, this function is not an inverse of wsa_events_to_slirp_events.
355 let mut wsa_events: SHORT = 0;
356 if events.has_in() {
357 wsa_events |= POLLRDNORM | POLLRDBAND;
358 }
359 if events.has_out() {
360 wsa_events |= POLLWRNORM;
361 }
362 // NOTE: POLLHUP cannot be supplied to WSAPoll.
363
364 wsa_events
365 }
366
367 #[derive(EventToken, Eq, PartialEq, Copy, Clone)]
368 enum Token {
369 EventHandleReady(usize),
370 SocketReady,
371 }
372
373 /// Associates a WSAPOLLFD's events with an Event object, disassociating on drop.
374 struct EventSelectedSocket<'a> {
375 socket: WSAPOLLFD,
376 event: &'a Event,
377 }
378
379 impl<'a> EventSelectedSocket<'a> {
new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket>380 fn new(socket: WSAPOLLFD, event: &'a Event) -> Result<EventSelectedSocket> {
381 // SAFETY:
382 // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
383 // return code below.
384 let res = unsafe {
385 WSAEventSelect(
386 socket.fd as SOCKET,
387 event.as_raw_descriptor(),
388 // Because WSAPOLLFD cannot contain POLLHUP (even if libslirp wanted to specify it,
389 // WSAPoll does not accept it), we assume it is always present.
390 wsa_events_to_wsa_network_events(socket.events) | FD_CLOSE,
391 )
392 };
393 if res == SOCKET_ERROR {
394 return Err(Error::Slirp(SlirpError::SlirpIOPollError(last_wsa_error())));
395 }
396 Ok(EventSelectedSocket { socket, event })
397 }
398 }
399
400 impl<'a> Drop for EventSelectedSocket<'a> {
drop(&mut self)401 fn drop(&mut self) {
402 // SAFETY:
403 // Safe because socket.fd exists, the event handle is guaranteed to exist, and we check the
404 // return code below.
405 let res = unsafe {
406 WSAEventSelect(
407 self.socket.fd as SOCKET,
408 self.event.as_raw_descriptor(),
409 /* listen for no events */ 0,
410 )
411 };
412 if res == SOCKET_ERROR {
413 warn!("failed to unselect socket: {}", last_wsa_error());
414 }
415 }
416 }
417
418 /// Rough equivalent of select(...) for Windows.
419 /// The following behavior is guaranteed:
420 /// 1. The position of sockets in the sockets vector is maintained on return.
421 /// 2. Sockets are always polled on any wakeup.
422 ///
423 /// For optimization reasons, takes a utility event & WaitContext to avoid having to re-create
424 /// those objects if poll is called from an event loop. The Event and WaitContext MUST NOT be used
425 /// for any other purpose in between calls to `poll`.
poll<'a>( wait_ctx: &WaitContext<Token>, socket_event_handle: &Event, handles: Vec<&'a dyn AsRawDescriptor>, sockets: Vec<WSAPOLLFD>, timeout: Option<Duration>, ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)>426 fn poll<'a>(
427 wait_ctx: &WaitContext<Token>,
428 socket_event_handle: &Event,
429 handles: Vec<&'a dyn AsRawDescriptor>,
430 sockets: Vec<WSAPOLLFD>,
431 timeout: Option<Duration>,
432 ) -> Result<(Vec<&'a dyn AsRawDescriptor>, Vec<WSAPOLLFD>)> {
433 let mut selected_sockets = Vec::with_capacity(sockets.len());
434 for socket in sockets.iter() {
435 selected_sockets.push(EventSelectedSocket::new(*socket, socket_event_handle)?);
436 }
437
438 wait_ctx
439 .clear()
440 .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
441 for (i, handle) in handles.iter().enumerate() {
442 match wait_ctx.add(*handle, Token::EventHandleReady(i)) {
443 Ok(v) => v,
444 Err(e) => {
445 return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
446 }
447 }
448 }
449 match wait_ctx.add(socket_event_handle, Token::SocketReady) {
450 Ok(v) => v,
451 Err(e) => {
452 return Err(Error::Slirp(SlirpError::SlirpPollError(e)));
453 }
454 }
455
456 let events = if let Some(timeout) = timeout {
457 wait_ctx
458 .wait_timeout(timeout)
459 .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
460 } else {
461 wait_ctx
462 .wait()
463 .map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?
464 };
465
466 let tokens: Vec<Token> = events
467 .iter()
468 .filter(|e| e.is_readable)
469 .map(|e| e.token)
470 .collect();
471 let mut handle_results = Vec::new();
472 for token in tokens {
473 match token {
474 Token::EventHandleReady(i) => {
475 handle_results.push(handles[i]);
476 }
477 Token::SocketReady => {
478 // We always call poll_sockets, so whether the token is present doesn't matter.
479 }
480 };
481 }
482
483 let socket_results = if sockets.is_empty() {
484 Vec::new()
485 } else {
486 poll_sockets(sockets).map_err(|e| Error::Slirp(SlirpError::SlirpIOPollError(e)))?
487 };
488
489 Ok((handle_results, socket_results))
490 }
491
492 /// Opens a WSAStartup/WSACleanup context; in other words, while a context is held, winsock calls
493 /// can be made.
494 struct WSAContext {
495 data: WSADATA,
496 }
497
498 impl WSAContext {
new() -> Result<WSAContext>499 fn new() -> Result<WSAContext> {
500 // SAFETY:
501 // Trivially safe (initialization of this memory is not required).
502 let mut ctx: WSAContext = unsafe { std::mem::zeroed() };
503
504 // SAFETY:
505 // Safe because ctx.data is guaranteed to exist, and we check the return code.
506 let err = unsafe { WSAStartup(MAKEWORD(2, 0), &mut ctx.data) };
507 if err != 0 {
508 Err(Error::Slirp(SlirpError::WSAStartupError(SysError::new(
509 err,
510 ))))
511 } else {
512 Ok(ctx)
513 }
514 }
515 }
516
517 impl Drop for WSAContext {
drop(&mut self)518 fn drop(&mut self) {
519 // SAFETY: trivially safe with return value checked.
520 let err = unsafe { WSACleanup() };
521 if err != 0 {
522 error!("WSACleanup failed: {}", last_wsa_error())
523 }
524 }
525 }
526
527 /// Starts libslirp's main loop attached to host_pipe. Packets are exchanged between host_pipe and
528 /// the host's network stack.
529 ///
530 /// host_pipe must be non blocking & in message mode.
start_slirp( host_pipe: PipeConnection, shutdown_event: Event, disable_access_to_host: bool, #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option< String, >, ) -> Result<()>531 pub fn start_slirp(
532 host_pipe: PipeConnection,
533 shutdown_event: Event,
534 disable_access_to_host: bool,
535 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option<
536 String,
537 >,
538 ) -> Result<()> {
539 // This call is not strictly required because libslirp currently calls WSAStartup for us, but
540 // relying on that is brittle and a potential source of bugs as we have our own socket code that
541 // runs on the Rust side.
542 let _wsa_context = WSAContext::new()?;
543
544 let (mut context, host_pipe_notifier_handle) = create_slirp_context(
545 host_pipe,
546 disable_access_to_host,
547 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
548 slirp_capture_file,
549 )?;
550 let shutdown_event_handle = shutdown_event.as_raw_descriptor();
551
552 // Stack data for the poll function.
553 let wait_ctx: WaitContext<Token> =
554 WaitContext::new().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
555 let socket_event_handle =
556 Event::new_auto_reset().map_err(|e| Error::Slirp(SlirpError::SlirpPollError(e)))?;
557
558 'slirp: loop {
559 // Request the FDs that we should poll from Slirp. Slirp provides them to us by way of a
560 // callback, which is invoked for each FD. This callback requires us to assign each FD an
561 // index which will be used by a subsequent Slirp call to get the poll events for
562 // each FD. The data flow can be thought of as follows:
563 // 1. pollfds_fill creates a map of index -> fd inside Slirp based on the return values
564 // from the pollfds_fill callback.
565 // 2. crosvm invokes poll on the FDs provided by Slirp.
566 // 3. crosvm notifies Slirp via pollfds_poll that polling completed for the provided FDs.
567 // 4. Slirp calls into crosvm via the pollfds_poll callback and asks for the statuses
568 // using the fd indicies registered in step #1.
569 let mut poll_fds = Vec::new();
570 // We'd like to sleep as long as possible (assuming no actionable notifications arrive).
571 let mut timeout_ms: u32 = u32::MAX;
572 context.pollfds_fill(&mut timeout_ms, |fd: i32, events: PollEvents| {
573 poll_fds.push(WSAPOLLFD {
574 fd: fd as usize,
575 events: slirp_events_to_wsa_events(events),
576 revents: 0,
577 });
578 (poll_fds.len() - 1) as i32
579 });
580
581 // There are relatively few concurrent timer_callbacks used by libslirp, so we set the small
582 // vector size low.
583 let timer_callbacks = context
584 .get_timers()
585 .map(|timer| Descriptor(*timer))
586 .collect::<SmallVec<[Descriptor; 8]>>();
587 let mut handles: Vec<&dyn AsRawDescriptor> = Vec::with_capacity(timer_callbacks.len() + 2);
588 handles.extend(
589 timer_callbacks
590 .iter()
591 .map(|timer| timer as &dyn AsRawDescriptor),
592 );
593
594 let host_pipe_notifier = Descriptor(host_pipe_notifier_handle);
595 handles.push(&host_pipe_notifier);
596 handles.push(&shutdown_event);
597
598 let (handle_results, socket_results) = poll(
599 &wait_ctx,
600 &socket_event_handle,
601 handles,
602 poll_fds,
603 Some(Duration::from_millis(timeout_ms.into())),
604 )?;
605
606 for handle in handle_results.iter() {
607 match handle.as_raw_descriptor() {
608 h if h == host_pipe_notifier_handle => {
609 // Collect input from the guest & inject into Slirp. It seems that this input
610 // step should be between pollfds_fill & pollfds_poll.
611 context.handle_guest_input()?;
612 }
613 h if h == shutdown_event_handle => {
614 break 'slirp;
615 }
616 timer_handle => {
617 // All other handles are timer_callbacks.
618 context.execute_timer(timer_handle);
619 }
620 }
621 }
622
623 // It's possible no socket notified and we got here from a timeout. This is fine, because
624 // libslirp wants to be woken up if timeout has expired (even if no sockets are ready).
625 context.pollfds_poll(false, |fd_index: i32| {
626 wsa_events_to_slirp_events(socket_results[fd_index as usize].revents)
627 })
628 }
629
630 // Never reached.
631 Ok(())
632 }
633
create_slirp_context( host_pipe: PipeConnection, disable_access_to_host: bool, #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option< String, >, ) -> Result<(Box<Context<Handler>>, RawDescriptor)>634 fn create_slirp_context(
635 host_pipe: PipeConnection,
636 disable_access_to_host: bool,
637 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] slirp_capture_file: Option<
638 String,
639 >,
640 ) -> Result<(Box<Context<Handler>>, RawDescriptor)> {
641 // Set up handler_debug:
642 // - If slirp-debug is used, write to SLIRP_CAPTURE_FILE_NAME if slirp_capture_file not set.
643 // - If slirp-ring-capture is used, write to slirp_capture_file.
644 // - If slirp_capture_file not set, don't debug.
645 // - Otherwise, set to None.
646 cfg_if::cfg_if! {
647 if #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))] {
648 #[cfg(feature = "slirp-ring-capture")]
649 let capture_path = slirp_capture_file;
650 #[cfg(feature = "slirp-debug")]
651 let capture_path = slirp_capture_file.or(Some(SLIRP_CAPTURE_FILE_NAME.to_owned()));
652
653 let handler_debug = capture_path
654 .as_ref()
655 .map(File::create)
656 .transpose()
657 .unwrap_or_default()
658 .map(|capture_file| HandlerDebug {
659 pcap_writer: PcapWriter::new(BufWriter::with_capacity(
660 PCAP_FILE_BUFFER_SIZE,
661 capture_file,
662 ))
663 .unwrap(),
664 #[cfg(feature = "slirp-ring-capture")]
665 tx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
666 #[cfg(feature = "slirp-ring-capture")]
667 rx_packet_ring_buffer: PacketRingBuffer::new(PACKET_RING_BUFFER_SIZE_IN_BYTES),
668 });
669
670 // If there is a target capture, but no debug, let the dev know. In prod, capture_path
671 // won't exist, so we won't log.
672 if capture_path.is_some() && handler_debug.is_none() {
673 error!("Failed to start packet capture! Check provided file path or sandboxing?");
674 }
675 } else {
676 let handler_debug = None;
677 }
678 }
679
680 let overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
681 let read_notifier = overlapped_wrapper
682 .get_h_event_ref()
683 .unwrap()
684 .as_raw_descriptor();
685 let handler = Handler {
686 start: Instant::now(),
687 pipe: host_pipe,
688 read_overlapped_wrapper: overlapped_wrapper,
689 buf: [0; ETHERNET_FRAME_SIZE],
690 write_overlapped_wrapper: OverlappedWrapper::new(true).unwrap(),
691 timer_callbacks: HashMap::new(),
692 tx_logger: PeriodicLogger::new(MetricEventType::NetworkTxRate, Duration::from_secs(1))
693 .unwrap(),
694 rx_logger: PeriodicLogger::new(MetricEventType::NetworkRxRate, Duration::from_secs(1))
695 .unwrap(),
696 handler_debug,
697 };
698
699 // Address & mask of the virtual network.
700 let v4_network_addr = Ipv4Addr::new(10, 0, 2, 0);
701 let v4_network_mask = Ipv4Addr::new(255, 255, 255, 0);
702
703 // Address of the host machine on the virtual network (if the feature is enabled).
704 let host_v4_addr = Ipv4Addr::new(10, 0, 2, 2);
705
706 // Address of the libslirp provided DNS proxy (packets to this address are intercepted by
707 // libslirp & routed to the first nameserver configured on the machine's NICs by libslirp).
708 let dns_addr = Ipv4Addr::new(10, 0, 2, 3);
709
710 // DHCP range should start *after* the statically assigned addresses.
711 let dhcp_start_addr = Ipv4Addr::new(10, 0, 2, 4);
712
713 // IPv6 network address. This is a ULA (unique local address) network, with a randomly generated
714 // ID (0x13624603218). The "prefix" or network address is 64 bits, incorporating both the
715 // network ID, and the subnet (0x0001).
716 let v6_network_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 0);
717
718 let v6_host_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 2);
719 let v6_dns_addr = Ipv6Addr::new(0xfd13, 0x6246, 0x3218, 0x0001, 0, 0, 0, 3);
720 Ok((
721 Context::new(
722 disable_access_to_host,
723 /* IPv4 enabled */
724 true,
725 v4_network_addr,
726 v4_network_mask,
727 host_v4_addr,
728 /* IPv6 enabled */ true,
729 v6_network_addr,
730 /* virtual_network_v6_prefix_len */ 64,
731 /* host_v6_address */ v6_host_addr,
732 /* host_hostname */ None,
733 dhcp_start_addr,
734 dns_addr,
735 /* dns_server_v6_addr */ v6_dns_addr,
736 /* virtual_network_dns_search_domains */ Vec::new(),
737 /* dns_server_domain_name */ None,
738 handler,
739 )?,
740 read_notifier,
741 ))
742 }
743
744 #[cfg(test)]
745 mod tests {
746 use std::net::UdpSocket;
747 use std::os::windows::io::AsRawSocket;
748
749 use base::named_pipes;
750 use base::named_pipes::BlockingMode;
751 use base::named_pipes::FramingMode;
752
753 use super::super::SLIRP_BUFFER_SIZE;
754 use super::*;
755
create_socket() -> (UdpSocket, WSAPOLLFD)756 fn create_socket() -> (UdpSocket, WSAPOLLFD) {
757 let socket = UdpSocket::bind("127.0.0.1:0").unwrap();
758 socket
759 .set_nonblocking(true)
760 .expect("Socket failed to set non_blocking.");
761
762 let poll_fd = WSAPOLLFD {
763 fd: socket.as_raw_socket() as usize,
764 events: POLLRDNORM | POLLRDBAND, // POLLIN equivalent
765 revents: 0,
766 };
767
768 (socket, poll_fd)
769 }
770
create_readable_socket() -> (UdpSocket, WSAPOLLFD)771 fn create_readable_socket() -> (UdpSocket, WSAPOLLFD) {
772 let (socket, poll_fd) = create_socket();
773 let receiving_addr = socket.local_addr().unwrap();
774 let buf = [0; 10];
775 socket.send_to(&buf, receiving_addr).unwrap();
776
777 // Wait for the socket to really be readable before we return it back to the test. We've
778 // seen cases in CI where send_to completes, but WSAPoll won't find the socket to be
779 // readable.
780 let mut sockets = vec![poll_fd];
781 for _ in 0..5 {
782 sockets = poll_sockets(sockets).expect("poll_sockets failed");
783 if sockets[0].revents & (POLLRDNORM | POLLRDBAND) > 0 {
784 return (socket, poll_fd);
785 }
786 }
787 panic!("socket never became readable");
788 }
789
790 #[test]
test_polling_timeout_works()791 fn test_polling_timeout_works() {
792 let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
793 let socket_event_handle = Event::new_auto_reset().unwrap();
794
795 let (_socket, poll_fd) = create_socket();
796 let event_fd = Event::new_auto_reset().unwrap();
797 let (handles, sockets) = poll(
798 &wait_ctx,
799 &socket_event_handle,
800 vec![&event_fd],
801 vec![poll_fd],
802 Some(Duration::from_millis(2)),
803 )
804 .unwrap();
805
806 // Asserts that we woke up because of a timeout.
807 assert_eq!(handles.len(), 0);
808 assert_eq!(sockets[0].revents, 0);
809 }
810
811 #[test]
test_polling_handle_only()812 fn test_polling_handle_only() {
813 let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
814 let socket_event_handle = Event::new_auto_reset().unwrap();
815
816 // Required to ensure winsock is ready (needed by poll).
817 let (_sock, _poll_fd) = create_readable_socket();
818
819 let event_fd = Event::new_auto_reset().unwrap();
820 event_fd.signal().expect("Failed to write event");
821 let (handles, _sockets) = poll(
822 &wait_ctx,
823 &socket_event_handle,
824 vec![&event_fd],
825 Vec::new(),
826 None,
827 )
828 .unwrap();
829
830 assert_eq!(handles.len(), 1);
831 assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
832 }
833
834 #[test]
test_polling_socket_only()835 fn test_polling_socket_only() {
836 let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
837 let socket_event_handle = Event::new_auto_reset().unwrap();
838
839 let (sock, poll_fd) = create_readable_socket();
840 let (_handles, sockets) = poll(
841 &wait_ctx,
842 &socket_event_handle,
843 Vec::new(),
844 vec![poll_fd],
845 None,
846 )
847 .unwrap();
848
849 assert_eq!(sockets.len(), 1);
850 assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
851 }
852
853 #[test]
test_polling_two_notifies()854 fn test_polling_two_notifies() {
855 let wait_ctx: WaitContext<Token> = WaitContext::new().unwrap();
856 let socket_event_handle = Event::new_auto_reset().unwrap();
857
858 let (sock, poll_fd) = create_readable_socket();
859 let event_fd = Event::new_auto_reset().unwrap();
860 event_fd.signal().expect("Failed to write event");
861
862 let (handles, sockets) = poll(
863 &wait_ctx,
864 &socket_event_handle,
865 vec![&event_fd],
866 vec![poll_fd],
867 None,
868 )
869 .unwrap();
870
871 assert_eq!(sockets.len(), 1);
872 assert_eq!(sockets[0].fd, sock.as_raw_socket() as usize);
873
874 assert_eq!(handles.len(), 1);
875 assert_eq!(handles[0].as_raw_descriptor(), event_fd.as_raw_descriptor());
876 }
877
878 #[test]
test_slirp_stops_on_shutdown()879 fn test_slirp_stops_on_shutdown() {
880 let event_fd = Event::new_auto_reset().unwrap();
881 let (host_pipe, mut _guest_pipe) = named_pipes::pair_with_buffer_size(
882 &FramingMode::Message,
883 &BlockingMode::Wait,
884 0,
885 SLIRP_BUFFER_SIZE,
886 true,
887 )
888 .unwrap();
889 event_fd.signal().expect("Failed to write event");
890 start_slirp(
891 host_pipe,
892 event_fd.try_clone().unwrap(),
893 /* disable_access_to_host= */ false,
894 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
895 None,
896 )
897 .expect("Failed to start slirp");
898 }
899
900 // A gratuitous ARP from 52:55:0A:00:02:0F for IP 10.0.2.15
901 const VETH_ARP_ANNOUNCEMENT: [u8; 54] = [
902 // VETH header
903 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
904 // Ethernet frame
905 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x06, 0x00,
906 0x01, 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x0a, 0x00,
907 0x02, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0x02, 0x0f,
908 ];
909
910 // TCP SYN from 52:55:0A:00:02:0F to 52:55:0A:00:02:01 (latter MAC should be arbitrary with
911 // Slirp) IP 10.0.2.15(5678) -> 127.0.0.1(19422)
912 // Note: MAC addresses in Slirp are arbitrary
913 const VETH_TCP_SYN: [u8; 66] = [
914 // VETH header
915 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
916 // Ethernet frame
917 0x52, 0x55, 0x0a, 0x00, 0x02, 0x01, 0x52, 0x55, 0x0a, 0x00, 0x02, 0x0f, 0x08, 0x00, 0x45,
918 0x00, 0x00, 0x28, 0x12, 0x34, 0x40, 0x00, 0xff, 0x06, 0xde, 0x8b, 0x0a, 0x00, 0x02, 0x0f,
919 0x7f, 0x00, 0x00, 0x01, 0x16, 0x2e, 0x4b, 0xde, 0x00, 0x00, 0x04, 0xd2, 0x00, 0x00, 0x0d,
920 0x80, 0x50, 0x02, 0x0f, 0xa0, 0xa0, 0xd4, 0x00, 0x00,
921 ];
922
923 // This is built into the TCP_SYN packet above; changing it will require a change to the TCP
924 // checksum
925 const LOOPBACK_SOCKET: &str = "127.0.0.1:19422";
926
927 const TIMEOUT_MILLIS: u64 = 400;
928
929 #[test]
test_send_tcp_syn()930 fn test_send_tcp_syn() {
931 use std::net::TcpListener;
932 use std::thread;
933 use std::time::Duration;
934
935 let (mut guest_pipe, host_pipe) = named_pipes::pair_with_buffer_size(
936 &FramingMode::Message,
937 &BlockingMode::Wait,
938 0,
939 SLIRP_BUFFER_SIZE,
940 true,
941 )
942 .unwrap();
943 let mut overlapped_wrapper = OverlappedWrapper::new(true).unwrap();
944
945 // Start Slirp in another thread
946 let shutdown_sender = Event::new_auto_reset().unwrap();
947 let shutdown_receiver = shutdown_sender.try_clone().unwrap();
948
949 // Run the slirp handling in a background thread
950 thread::spawn(move || {
951 start_slirp(
952 host_pipe,
953 shutdown_receiver,
954 /* disable_access_to_host= */ false,
955 #[cfg(any(feature = "slirp-ring-capture", feature = "slirp-debug"))]
956 None,
957 )
958 .unwrap();
959 });
960
961 // Create a timeout thread so the test doesn't block forever if something is amiss
962 thread::spawn(move || {
963 thread::sleep(Duration::from_millis(TIMEOUT_MILLIS));
964 shutdown_sender
965 .signal()
966 .expect("Failed to write to shutdown sender");
967 });
968
969 // Start a local TCP server for our Slirp to connect to
970 let _listener = TcpListener::bind(LOOPBACK_SOCKET).unwrap();
971
972 // This ARP is required or else Slirp will send us an ARP request before it returns an ACK
973 // SAFETY: safe because the buffer & overlapped wrapper are in scope for
974 // the duration of the overlapped operation.
975 unsafe {
976 guest_pipe
977 .write_overlapped(&VETH_ARP_ANNOUNCEMENT, &mut overlapped_wrapper)
978 .expect("Failed to write ARP to guest pipe");
979 }
980 guest_pipe
981 .get_overlapped_result(&mut overlapped_wrapper)
982 .unwrap();
983 // SAFETY: safe because the buffer & overlapped wrapper are in scope for
984 // the duration of the overlapped operation.
985 unsafe {
986 guest_pipe
987 .write_overlapped(&VETH_TCP_SYN, &mut overlapped_wrapper)
988 .expect("Failed to write SYN to guest pipe")
989 };
990 guest_pipe
991 .get_overlapped_result(&mut overlapped_wrapper)
992 .unwrap();
993
994 let mut recv_buffer: [u8; 512] = [0; 512];
995 // SAFETY: safe because the buffer & overlapped wrapper are in scope for
996 // the duration of the overlapped operation.
997 unsafe { guest_pipe.read_overlapped(&mut recv_buffer, &mut overlapped_wrapper) }.unwrap();
998 let size = guest_pipe
999 .get_overlapped_result(&mut overlapped_wrapper)
1000 .unwrap() as usize;
1001
1002 // This output is printed to aid in debugging; it can be parsed with https://hpd.gasmi.net/
1003 println!("Received frame:");
1004 for byte in recv_buffer[0..size].iter() {
1005 print!("{:01$x} ", byte, 2);
1006 }
1007 println!();
1008
1009 // This test expects a VETH header + SYN+ACK response. It doesn't inspect every byte of
1010 // the response frame because some fields may be dependent on the host or OS.
1011 assert_eq!(size, VETH_HEADER_LENGTH + 58);
1012
1013 // Strip off the VETH header and ignore it
1014 recv_buffer.copy_within(VETH_HEADER_LENGTH.., 0);
1015
1016 // Check Ethernet header
1017 const ETH_RESPONSE_HEADER: [u8; 14] = [
1018 0x52, 0x55, 0x0A, 0x00, 0x02, 0x0F, 0x52, 0x55, 0x0A, 0x00, 0x02, 0x02, 0x08, 0x00,
1019 ];
1020 assert_eq!(
1021 recv_buffer[0..ETH_RESPONSE_HEADER.len()],
1022 ETH_RESPONSE_HEADER
1023 );
1024
1025 // Check source IP
1026 assert_eq!(recv_buffer[26..=29], [0x7f, 0x00, 0x00, 0x01]); // 127.0.0.1
1027
1028 // Check dest IP
1029 assert_eq!(recv_buffer[30..=33], [0x0A, 0x00, 0x02, 0x0F]); // 10.0.2.15
1030
1031 // Check source port
1032 assert_eq!(recv_buffer[34..=35], [0x4b, 0xde]); // 19422
1033
1034 // Check destination port
1035 assert_eq!(recv_buffer[36..=37], [0x16, 0x2e]); // 5678
1036
1037 // Check TCP flags are SYN+ACK
1038 assert_eq!(recv_buffer[47], 0x12);
1039 }
1040 }
1041