xref: /aosp_15_r20/external/crosvm/devices/src/virtio/pmem.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::fs::File;
7 use std::io;
8 use std::mem::size_of;
9 use std::time::Duration;
10 
11 use anyhow::anyhow;
12 use anyhow::Context;
13 use base::error;
14 use base::AsRawDescriptor;
15 use base::Error as SysError;
16 use base::Event;
17 use base::RawDescriptor;
18 use base::Result as SysResult;
19 use base::Timer;
20 use base::Tube;
21 use base::TubeError;
22 use base::WorkerThread;
23 use cros_async::select3;
24 use cros_async::select4;
25 use cros_async::AsyncError;
26 use cros_async::EventAsync;
27 use cros_async::Executor;
28 use cros_async::TimerAsync;
29 use data_model::Le32;
30 use data_model::Le64;
31 use futures::pin_mut;
32 use remain::sorted;
33 use thiserror::Error;
34 use vm_control::MemSlot;
35 use vm_control::VmMemoryMappingRequest;
36 use vm_control::VmMemoryMappingResponse;
37 use vm_memory::GuestAddress;
38 use vm_memory::GuestMemory;
39 use zerocopy::AsBytes;
40 use zerocopy::FromBytes;
41 use zerocopy::FromZeroes;
42 
43 use super::async_utils;
44 use super::copy_config;
45 use super::DescriptorChain;
46 use super::DeviceType;
47 use super::Interrupt;
48 use super::Queue;
49 use super::VirtioDevice;
50 
51 const QUEUE_SIZE: u16 = 256;
52 const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
53 
54 /* Feature bits */
55 const VIRTIO_PMEM_F_DISCARD: u32 = 63;
56 
57 const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
58 const VIRTIO_PMEM_REQ_TYPE_DISCARD: u32 = u32::MAX;
59 const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
60 const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
61 
62 #[derive(Copy, Clone, Debug, Default, AsBytes, FromZeroes, FromBytes)]
63 #[repr(C)]
64 struct virtio_pmem_config {
65     start_address: Le64,
66     size: Le64,
67 }
68 
69 #[derive(Copy, Clone, Debug, Default, AsBytes, FromZeroes, FromBytes)]
70 #[repr(C)]
71 struct virtio_pmem_resp {
72     status_code: Le32,
73 }
74 
75 #[derive(Copy, Clone, Debug, Default, AsBytes, FromZeroes, FromBytes)]
76 #[repr(C)]
77 struct virtio_pmem_req {
78     type_: Le32,
79 }
80 
81 #[derive(Copy, Clone, Debug, Default, AsBytes, FromZeroes, FromBytes)]
82 #[repr(C)]
83 struct virtio_pmem_range_req {
84     type_: Le32,
85     padding_: Le32,
86     start_address: Le64,
87     size: Le64,
88 }
89 
90 #[sorted]
91 #[derive(Error, Debug)]
92 enum Error {
93     /// Failed to get value from pageout timer.
94     #[error("failed to get value from pageout timer: {0}")]
95     PageoutTimer(AsyncError),
96     /// Failed to read from virtqueue.
97     #[error("failed to read from virtqueue: {0}")]
98     ReadQueue(io::Error),
99     /// Failed to receive tube response.
100     #[error("failed to receive tube response: {0}")]
101     ReceiveResponse(TubeError),
102     /// Failed to send tube request.
103     #[error("failed to send tube request: {0}")]
104     SendingRequest(TubeError),
105     /// Failed to write to virtqueue.
106     #[error("failed to write to virtqueue: {0}")]
107     WriteQueue(io::Error),
108 }
109 
110 type Result<T> = ::std::result::Result<T, Error>;
111 
pageout( ex: &Executor, swap_interval: Duration, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> Result<()>112 async fn pageout(
113     ex: &Executor,
114     swap_interval: Duration,
115     pmem_device_tube: &Tube,
116     mapping_arena_slot: u32,
117     mapping_size: usize,
118 ) -> Result<()> {
119     let timer = Timer::new().expect("Failed to create a timer");
120     let mut pageout_timer =
121         TimerAsync::new(timer, ex).expect("Failed to create an async pageout timer");
122     pageout_timer
123         .reset_repeating(swap_interval)
124         .expect("Failed to reset pageout timer");
125 
126     loop {
127         pageout_timer.wait().await.map_err(Error::PageoutTimer)?;
128         let request = VmMemoryMappingRequest::MadvisePageout {
129             slot: mapping_arena_slot,
130             offset: 0,
131             size: mapping_size,
132         };
133 
134         pmem_device_tube
135             .send(&request)
136             .map_err(Error::SendingRequest)?;
137         match pmem_device_tube
138             .recv::<VmMemoryMappingResponse>()
139             .map_err(Error::ReceiveResponse)?
140         {
141             VmMemoryMappingResponse::Ok => {}
142             VmMemoryMappingResponse::Err(e) => {
143                 error!("failed to page out the memory mapping: {}", e);
144             }
145         };
146     }
147 }
148 
execute_request( request_type: u32, start_address: u64, size: u64, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> u32149 fn execute_request(
150     request_type: u32,
151     start_address: u64,
152     size: u64,
153     pmem_device_tube: &Tube,
154     mapping_arena_slot: u32,
155     mapping_size: usize,
156 ) -> u32 {
157     match request_type {
158         VIRTIO_PMEM_REQ_TYPE_FLUSH => {
159             let request = VmMemoryMappingRequest::MsyncArena {
160                 slot: mapping_arena_slot,
161                 offset: 0, // The pmem backing file is always at offset 0 in the arena.
162                 size: mapping_size,
163             };
164 
165             if let Err(e) = pmem_device_tube.send(&request) {
166                 error!("failed to send request: {}", e);
167                 return VIRTIO_PMEM_RESP_TYPE_EIO;
168             }
169 
170             match pmem_device_tube.recv() {
171                 Ok(response) => match response {
172                     VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
173                     VmMemoryMappingResponse::Err(e) => {
174                         error!("failed flushing disk image: {}", e);
175                         VIRTIO_PMEM_RESP_TYPE_EIO
176                     }
177                 },
178                 Err(e) => {
179                     error!("failed to receive data: {}", e);
180                     VIRTIO_PMEM_RESP_TYPE_EIO
181                 }
182             }
183         }
184 
185         VIRTIO_PMEM_REQ_TYPE_DISCARD => {
186             let request = VmMemoryMappingRequest::MadviseRemove {
187                 slot: mapping_arena_slot,
188                 offset: usize::try_from(start_address).unwrap(),
189                 size: usize::try_from(size).unwrap(),
190             };
191 
192             if let Err(e) = pmem_device_tube.send(&request) {
193                 error!("failed to send request: {}", e);
194                 return VIRTIO_PMEM_RESP_TYPE_EIO;
195             }
196 
197             match pmem_device_tube.recv() {
198                 Ok(response) => match response {
199                     VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
200                     VmMemoryMappingResponse::Err(e) => {
201                         error!("failed to discard memory range: {}", e);
202                         VIRTIO_PMEM_RESP_TYPE_EIO
203                     }
204                 },
205                 Err(e) => {
206                     error!("failed to receive data: {}", e);
207                     VIRTIO_PMEM_RESP_TYPE_EIO
208                 }
209             }
210         }
211 
212         _ => {
213             error!("unknown request type: {}", request_type);
214             VIRTIO_PMEM_RESP_TYPE_EIO
215         }
216     }
217 }
218 
handle_request( avail_desc: &mut DescriptorChain, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, ) -> Result<usize>219 fn handle_request(
220     avail_desc: &mut DescriptorChain,
221     pmem_device_tube: &Tube,
222     mapping_arena_slot: u32,
223     mapping_size: usize,
224 ) -> Result<usize> {
225     let (request_type, start_address, size) =
226         if avail_desc.reader.available_bytes() == size_of::<virtio_pmem_req>() {
227             let request = avail_desc
228                 .reader
229                 .read_obj::<virtio_pmem_req>()
230                 .map_err(Error::ReadQueue)?;
231             (request.type_.to_native(), 0, 0)
232         } else {
233             let request = avail_desc
234                 .reader
235                 .read_obj::<virtio_pmem_range_req>()
236                 .map_err(Error::ReadQueue)?;
237             (
238                 request.type_.to_native(),
239                 request.start_address.to_native(),
240                 request.size.to_native(),
241             )
242         };
243     let status_code = execute_request(
244         request_type,
245         start_address,
246         size,
247         pmem_device_tube,
248         mapping_arena_slot,
249         mapping_size,
250     );
251 
252     let response = virtio_pmem_resp {
253         status_code: status_code.into(),
254     };
255 
256     avail_desc
257         .writer
258         .write_obj(response)
259         .map_err(Error::WriteQueue)?;
260 
261     Ok(avail_desc.writer.bytes_written())
262 }
263 
handle_queue( queue: &mut Queue, mut queue_event: EventAsync, pmem_device_tube: &Tube, mapping_arena_slot: u32, mapping_size: usize, )264 async fn handle_queue(
265     queue: &mut Queue,
266     mut queue_event: EventAsync,
267     pmem_device_tube: &Tube,
268     mapping_arena_slot: u32,
269     mapping_size: usize,
270 ) {
271     loop {
272         let mut avail_desc = match queue.next_async(&mut queue_event).await {
273             Err(e) => {
274                 error!("Failed to read descriptor {}", e);
275                 return;
276             }
277             Ok(d) => d,
278         };
279 
280         let written = match handle_request(
281             &mut avail_desc,
282             pmem_device_tube,
283             mapping_arena_slot,
284             mapping_size,
285         ) {
286             Ok(n) => n,
287             Err(e) => {
288                 error!("pmem: failed to handle request: {}", e);
289                 0
290             }
291         };
292         queue.add_used(avail_desc, written as u32);
293         queue.trigger_interrupt();
294     }
295 }
296 
run_worker( queue: &mut Queue, pmem_device_tube: &Tube, interrupt: Interrupt, kill_evt: Event, mapping_arena_slot: u32, mapping_size: usize, swap_interval: Option<Duration>, )297 fn run_worker(
298     queue: &mut Queue,
299     pmem_device_tube: &Tube,
300     interrupt: Interrupt,
301     kill_evt: Event,
302     mapping_arena_slot: u32,
303     mapping_size: usize,
304     swap_interval: Option<Duration>,
305 ) {
306     let ex = Executor::new().unwrap();
307 
308     let queue_evt = queue
309         .event()
310         .try_clone()
311         .expect("failed to clone queue event");
312     let queue_evt = EventAsync::new(queue_evt, &ex).expect("failed to set up the queue event");
313 
314     // Process requests from the virtio queue.
315     let queue_fut = handle_queue(
316         queue,
317         queue_evt,
318         pmem_device_tube,
319         mapping_arena_slot,
320         mapping_size,
321     );
322     pin_mut!(queue_fut);
323 
324     // Process any requests to resample the irq value.
325     let resample = async_utils::handle_irq_resample(&ex, interrupt);
326     pin_mut!(resample);
327 
328     // Exit if the kill event is triggered.
329     let kill = async_utils::await_and_exit(&ex, kill_evt);
330     pin_mut!(kill);
331 
332     let interval = swap_interval.unwrap_or(Duration::ZERO);
333     if interval.is_zero() {
334         if let Err(e) = ex.run_until(select3(queue_fut, resample, kill)) {
335             error!("error happened in executor: {}", e);
336         }
337     } else {
338         let pageout_fut = pageout(
339             &ex,
340             interval,
341             pmem_device_tube,
342             mapping_arena_slot,
343             mapping_size,
344         );
345         pin_mut!(pageout_fut);
346         if let Err(e) = ex.run_until(select4(queue_fut, resample, kill, pageout_fut)) {
347             error!("error happened in executor: {}", e);
348         }
349     }
350 }
351 
352 /// Specifies how memory slot is initialized.
353 pub enum MemSlotConfig {
354     /// The memory region has already been mapped to the guest.
355     MemSlot {
356         /// index of the guest-mapped memory regions.
357         idx: MemSlot,
358     },
359     /// The memory region that is not initialized yet and whose slot index will be provided via
360     /// `Tube` later. e.g. pmem-ext2 device, where fs construction will be done in the main
361     /// process.
362     LazyInit { tube: Tube },
363 }
364 
365 pub struct Pmem {
366     worker_thread: Option<WorkerThread<(Queue, Tube)>>,
367     features: u64,
368     disk_image: Option<File>,
369     mapping_address: GuestAddress,
370     mem_slot: MemSlotConfig,
371     mapping_size: u64,
372     pmem_device_tube: Option<Tube>,
373     swap_interval: Option<Duration>,
374 }
375 
376 #[derive(serde::Serialize, serde::Deserialize)]
377 struct PmemSnapshot {
378     mapping_address: GuestAddress,
379     mapping_size: u64,
380 }
381 
382 /// Configuration of a virtio-pmem device.
383 pub struct PmemConfig {
384     /// Disk image exposed to the guest.
385     /// If the memory region is not backed by a file, this should be `None`.
386     pub disk_image: Option<File>,
387     /// Guest physical address where the memory will be mapped.
388     pub mapping_address: GuestAddress,
389     pub mem_slot: MemSlotConfig,
390     /// The size of the mapped region.
391     pub mapping_size: u64,
392     /// A communication channel to the main process to send memory requests.
393     pub pmem_device_tube: Tube,
394     /// Interval for periodic swap out of memory mapping
395     pub swap_interval: Option<Duration>,
396     /// Whether the region is writeble or not.
397     pub mapping_writable: bool,
398 }
399 
400 impl Pmem {
new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem>401     pub fn new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem> {
402         if cfg.mapping_size > usize::MAX as u64 {
403             return Err(SysError::new(libc::EOVERFLOW));
404         }
405 
406         let mut avail_features = base_features;
407         if cfg.mapping_writable {
408             if let MemSlotConfig::LazyInit { .. } = cfg.mem_slot {
409                 error!("pmem-ext2 must be a read-only device");
410                 return Err(SysError::new(libc::EINVAL));
411             }
412 
413             avail_features |= 1 << VIRTIO_PMEM_F_DISCARD;
414         }
415 
416         Ok(Pmem {
417             worker_thread: None,
418             features: avail_features,
419             disk_image: cfg.disk_image,
420             mapping_address: cfg.mapping_address,
421             mem_slot: cfg.mem_slot,
422             mapping_size: cfg.mapping_size,
423             pmem_device_tube: Some(cfg.pmem_device_tube),
424             swap_interval: cfg.swap_interval,
425         })
426     }
427 }
428 
429 impl VirtioDevice for Pmem {
keep_rds(&self) -> Vec<RawDescriptor>430     fn keep_rds(&self) -> Vec<RawDescriptor> {
431         let mut keep_rds = Vec::new();
432         if let Some(disk_image) = &self.disk_image {
433             keep_rds.push(disk_image.as_raw_descriptor());
434         }
435 
436         if let Some(ref pmem_device_tube) = self.pmem_device_tube {
437             keep_rds.push(pmem_device_tube.as_raw_descriptor());
438         }
439 
440         if let MemSlotConfig::LazyInit { tube } = &self.mem_slot {
441             keep_rds.push(tube.as_raw_descriptor());
442         }
443 
444         keep_rds
445     }
446 
device_type(&self) -> DeviceType447     fn device_type(&self) -> DeviceType {
448         DeviceType::Pmem
449     }
450 
queue_max_sizes(&self) -> &[u16]451     fn queue_max_sizes(&self) -> &[u16] {
452         QUEUE_SIZES
453     }
454 
features(&self) -> u64455     fn features(&self) -> u64 {
456         self.features
457     }
458 
read_config(&self, offset: u64, data: &mut [u8])459     fn read_config(&self, offset: u64, data: &mut [u8]) {
460         let config = virtio_pmem_config {
461             start_address: Le64::from(self.mapping_address.offset()),
462             size: Le64::from(self.mapping_size),
463         };
464         copy_config(data, 0, config.as_bytes(), offset);
465     }
466 
activate( &mut self, _memory: GuestMemory, interrupt: Interrupt, mut queues: BTreeMap<usize, Queue>, ) -> anyhow::Result<()>467     fn activate(
468         &mut self,
469         _memory: GuestMemory,
470         interrupt: Interrupt,
471         mut queues: BTreeMap<usize, Queue>,
472     ) -> anyhow::Result<()> {
473         if queues.len() != 1 {
474             return Err(anyhow!("expected 1 queue, got {}", queues.len()));
475         }
476 
477         let mut queue = queues.remove(&0).unwrap();
478 
479         // We checked that this fits in a usize in `Pmem::new`.
480         let mapping_size = self.mapping_size as usize;
481 
482         let pmem_device_tube = self
483             .pmem_device_tube
484             .take()
485             .context("missing pmem device tube")?;
486 
487         let swap_interval = self.swap_interval;
488 
489         let mapping_arena_slot = match &self.mem_slot {
490             MemSlotConfig::MemSlot { idx } => *idx,
491             MemSlotConfig::LazyInit { tube } => tube
492                 .recv::<u32>()
493                 .context("failed to receive memory slot for ext2 pmem device")?,
494         };
495 
496         self.worker_thread = Some(WorkerThread::start("v_pmem", move |kill_event| {
497             run_worker(
498                 &mut queue,
499                 &pmem_device_tube,
500                 interrupt,
501                 kill_event,
502                 mapping_arena_slot,
503                 mapping_size,
504                 swap_interval,
505             );
506             (queue, pmem_device_tube)
507         }));
508 
509         Ok(())
510     }
511 
reset(&mut self) -> anyhow::Result<()>512     fn reset(&mut self) -> anyhow::Result<()> {
513         if let Some(worker_thread) = self.worker_thread.take() {
514             let (_queue, pmem_device_tube) = worker_thread.stop();
515             self.pmem_device_tube = Some(pmem_device_tube);
516         }
517         Ok(())
518     }
519 
virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>>520     fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
521         if let Some(worker_thread) = self.worker_thread.take() {
522             let (queue, pmem_device_tube) = worker_thread.stop();
523             self.pmem_device_tube = Some(pmem_device_tube);
524             return Ok(Some(BTreeMap::from([(0, queue)])));
525         }
526         Ok(None)
527     }
528 
virtio_wake( &mut self, queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>, ) -> anyhow::Result<()>529     fn virtio_wake(
530         &mut self,
531         queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,
532     ) -> anyhow::Result<()> {
533         if let Some((mem, interrupt, queues)) = queues_state {
534             self.activate(mem, interrupt, queues)?;
535         }
536         Ok(())
537     }
538 
virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value>539     fn virtio_snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
540         serde_json::to_value(PmemSnapshot {
541             mapping_address: self.mapping_address,
542             mapping_size: self.mapping_size,
543         })
544         .context("failed to serialize pmem snapshot")
545     }
546 
virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()>547     fn virtio_restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
548         let snapshot: PmemSnapshot =
549             serde_json::from_value(data).context("failed to deserialize pmem snapshot")?;
550         anyhow::ensure!(
551             snapshot.mapping_address == self.mapping_address
552                 && snapshot.mapping_size == self.mapping_size,
553             "pmem snapshot doesn't match config: expected {:?}, got {:?}",
554             (self.mapping_address, self.mapping_size),
555             (snapshot.mapping_address, snapshot.mapping_size),
556         );
557         Ok(())
558     }
559 }
560