xref: /aosp_15_r20/external/crosvm/src/crosvm/plugin/vcpu.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2018 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::alloc::Layout;
6 use std::cell::Cell;
7 use std::cell::RefCell;
8 use std::cmp;
9 use std::cmp::min;
10 use std::cmp::Ord;
11 use std::cmp::PartialEq;
12 use std::cmp::PartialOrd;
13 use std::collections::btree_set::BTreeSet;
14 use std::io::Read;
15 use std::io::Write;
16 use std::mem;
17 use std::sync::Arc;
18 use std::sync::RwLock;
19 
20 use base::error;
21 use base::LayoutAllocation;
22 use kvm::CpuId;
23 use kvm::Vcpu;
24 use kvm_sys::kvm_debugregs;
25 use kvm_sys::kvm_enable_cap;
26 use kvm_sys::kvm_fpu;
27 use kvm_sys::kvm_lapic_state;
28 use kvm_sys::kvm_mp_state;
29 use kvm_sys::kvm_msr_entry;
30 use kvm_sys::kvm_msrs;
31 use kvm_sys::kvm_regs;
32 use kvm_sys::kvm_sregs;
33 use kvm_sys::kvm_vcpu_events;
34 use kvm_sys::kvm_xcrs;
35 use kvm_sys::KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
36 use libc::EINVAL;
37 use libc::ENOENT;
38 use libc::ENOTTY;
39 use libc::EPERM;
40 use libc::EPIPE;
41 use libc::EPROTO;
42 use protobuf::CodedOutputStream;
43 use protobuf::EnumOrUnknown;
44 use protobuf::Message;
45 use protos::plugin::*;
46 use static_assertions::const_assert;
47 use sync::Mutex;
48 use zerocopy::AsBytes;
49 use zerocopy::FromBytes;
50 
51 use super::*;
52 
53 /// Identifier for an address space in the VM.
54 #[derive(Copy, Clone, PartialEq, Eq)]
55 pub enum IoSpace {
56     Ioport,
57     Mmio,
58 }
59 
60 #[derive(Debug, Copy, Clone)]
61 struct Range(u64, u64, bool);
62 
63 impl Eq for Range {}
64 
65 impl PartialEq for Range {
eq(&self, other: &Range) -> bool66     fn eq(&self, other: &Range) -> bool {
67         self.0 == other.0
68     }
69 }
70 
71 impl Ord for Range {
cmp(&self, other: &Range) -> cmp::Ordering72     fn cmp(&self, other: &Range) -> cmp::Ordering {
73         self.0.cmp(&other.0)
74     }
75 }
76 
77 impl PartialOrd for Range {
partial_cmp(&self, other: &Range) -> Option<cmp::Ordering>78     fn partial_cmp(&self, other: &Range) -> Option<cmp::Ordering> {
79         Some(self.cmp(other))
80     }
81 }
82 
get_vcpu_state_enum_or_unknown( vcpu: &Vcpu, state_set: EnumOrUnknown<vcpu_request::StateSet>, ) -> SysResult<Vec<u8>>83 fn get_vcpu_state_enum_or_unknown(
84     vcpu: &Vcpu,
85     state_set: EnumOrUnknown<vcpu_request::StateSet>,
86 ) -> SysResult<Vec<u8>> {
87     get_vcpu_state(
88         vcpu,
89         state_set.enum_value().map_err(|_| SysError::new(EINVAL))?,
90     )
91 }
92 
get_vcpu_state(vcpu: &Vcpu, state_set: vcpu_request::StateSet) -> SysResult<Vec<u8>>93 fn get_vcpu_state(vcpu: &Vcpu, state_set: vcpu_request::StateSet) -> SysResult<Vec<u8>> {
94     Ok(match state_set {
95         vcpu_request::StateSet::REGS => vcpu.get_regs()?.as_bytes().to_vec(),
96         vcpu_request::StateSet::SREGS => vcpu.get_sregs()?.as_bytes().to_vec(),
97         vcpu_request::StateSet::FPU => vcpu.get_fpu()?.as_bytes().to_vec(),
98         vcpu_request::StateSet::DEBUGREGS => vcpu.get_debugregs()?.as_bytes().to_vec(),
99         vcpu_request::StateSet::XCREGS => vcpu.get_xcrs()?.as_bytes().to_vec(),
100         vcpu_request::StateSet::LAPIC => vcpu.get_lapic()?.as_bytes().to_vec(),
101         vcpu_request::StateSet::MP => vcpu.get_mp_state()?.as_bytes().to_vec(),
102         vcpu_request::StateSet::EVENTS => vcpu.get_vcpu_events()?.as_bytes().to_vec(),
103     })
104 }
105 
set_vcpu_state_enum_or_unknown( vcpu: &Vcpu, state_set: EnumOrUnknown<vcpu_request::StateSet>, state: &[u8], ) -> SysResult<()>106 fn set_vcpu_state_enum_or_unknown(
107     vcpu: &Vcpu,
108     state_set: EnumOrUnknown<vcpu_request::StateSet>,
109     state: &[u8],
110 ) -> SysResult<()> {
111     set_vcpu_state(
112         vcpu,
113         state_set.enum_value().map_err(|_| SysError::new(EINVAL))?,
114         state,
115     )
116 }
117 
set_vcpu_state(vcpu: &Vcpu, state_set: vcpu_request::StateSet, state: &[u8]) -> SysResult<()>118 fn set_vcpu_state(vcpu: &Vcpu, state_set: vcpu_request::StateSet, state: &[u8]) -> SysResult<()> {
119     match state_set {
120         vcpu_request::StateSet::REGS => {
121             let regs = kvm_regs::read_from(state).ok_or(SysError::new(EINVAL))?;
122             vcpu.set_regs(&regs)
123         }
124         vcpu_request::StateSet::SREGS => {
125             let sregs = kvm_sregs::read_from(state).ok_or(SysError::new(EINVAL))?;
126             vcpu.set_sregs(&sregs)
127         }
128         vcpu_request::StateSet::FPU => {
129             let fpu = kvm_fpu::read_from(state).ok_or(SysError::new(EINVAL))?;
130             vcpu.set_fpu(&fpu)
131         }
132         vcpu_request::StateSet::DEBUGREGS => {
133             let debugregs = kvm_debugregs::read_from(state).ok_or(SysError::new(EINVAL))?;
134             vcpu.set_debugregs(&debugregs)
135         }
136         vcpu_request::StateSet::XCREGS => {
137             let xcrs = kvm_xcrs::read_from(state).ok_or(SysError::new(EINVAL))?;
138             vcpu.set_xcrs(&xcrs)
139         }
140         vcpu_request::StateSet::LAPIC => {
141             let lapic_state = kvm_lapic_state::read_from(state).ok_or(SysError::new(EINVAL))?;
142             vcpu.set_lapic(&lapic_state)
143         }
144         vcpu_request::StateSet::MP => {
145             let mp_state = kvm_mp_state::read_from(state).ok_or(SysError::new(EINVAL))?;
146             vcpu.set_mp_state(&mp_state)
147         }
148         vcpu_request::StateSet::EVENTS => {
149             let vcpu_events = kvm_vcpu_events::read_from(state).ok_or(SysError::new(EINVAL))?;
150             vcpu.set_vcpu_events(&vcpu_events)
151         }
152     }
153 }
154 
155 pub struct CallHintDetails {
156     pub match_rax: bool,
157     pub match_rbx: bool,
158     pub match_rcx: bool,
159     pub match_rdx: bool,
160     pub rax: u64,
161     pub rbx: u64,
162     pub rcx: u64,
163     pub rdx: u64,
164     pub send_sregs: bool,
165     pub send_debugregs: bool,
166 }
167 
168 pub struct CallHint {
169     io_space: IoSpace,
170     addr: u64,
171     on_write: bool,
172     regs: Vec<CallHintDetails>,
173 }
174 
175 /// State shared by every VCPU, grouped together to make edits to the state coherent across VCPUs.
176 #[derive(Default)]
177 pub struct SharedVcpuState {
178     ioport_regions: BTreeSet<Range>,
179     mmio_regions: BTreeSet<Range>,
180     hint: Option<CallHint>,
181 }
182 
183 impl SharedVcpuState {
184     /// Reserves the given range for handling by the plugin process.
185     ///
186     /// This will reject any reservation that overlaps with an existing reservation.
reserve_range( &mut self, space: IoSpace, start: u64, length: u64, async_write: bool, ) -> SysResult<()>187     pub fn reserve_range(
188         &mut self,
189         space: IoSpace,
190         start: u64,
191         length: u64,
192         async_write: bool,
193     ) -> SysResult<()> {
194         if length == 0 {
195             return Err(SysError::new(EINVAL));
196         }
197 
198         // Reject all cases where this reservation is part of another reservation.
199         if self.is_reserved(space, start) {
200             return Err(SysError::new(EPERM));
201         }
202 
203         let last_address = match start.checked_add(length) {
204             Some(end) => end - 1,
205             None => return Err(SysError::new(EINVAL)),
206         };
207 
208         let space = match space {
209             IoSpace::Ioport => &mut self.ioport_regions,
210             IoSpace::Mmio => &mut self.mmio_regions,
211         };
212 
213         match space
214             .range(..Range(last_address, 0, false))
215             .next_back()
216             .cloned()
217         {
218             Some(Range(existing_start, _, _)) if existing_start >= start => {
219                 Err(SysError::new(EPERM))
220             }
221             _ => {
222                 space.insert(Range(start, length, async_write));
223                 Ok(())
224             }
225         }
226     }
227 
228     //// Releases a reservation previously made at `start` in the given `space`.
unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()>229     pub fn unreserve_range(&mut self, space: IoSpace, start: u64) -> SysResult<()> {
230         let range = Range(start, 0, false);
231         let space = match space {
232             IoSpace::Ioport => &mut self.ioport_regions,
233             IoSpace::Mmio => &mut self.mmio_regions,
234         };
235         if space.remove(&range) {
236             Ok(())
237         } else {
238             Err(SysError::new(ENOENT))
239         }
240     }
241 
set_hint( &mut self, space: IoSpace, addr: u64, on_write: bool, regs: Vec<CallHintDetails>, )242     pub fn set_hint(
243         &mut self,
244         space: IoSpace,
245         addr: u64,
246         on_write: bool,
247         regs: Vec<CallHintDetails>,
248     ) {
249         if addr == 0 {
250             self.hint = None;
251         } else {
252             let hint = CallHint {
253                 io_space: space,
254                 addr,
255                 on_write,
256                 regs,
257             };
258             self.hint = Some(hint);
259         }
260     }
261 
is_reserved(&self, space: IoSpace, addr: u64) -> bool262     fn is_reserved(&self, space: IoSpace, addr: u64) -> bool {
263         if let Some(Range(start, len, _)) = self.first_before(space, addr) {
264             let offset = addr - start;
265             if offset < len {
266                 return true;
267             }
268         }
269         false
270     }
271 
first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range>272     fn first_before(&self, io_space: IoSpace, addr: u64) -> Option<Range> {
273         let space = match io_space {
274             IoSpace::Ioport => &self.ioport_regions,
275             IoSpace::Mmio => &self.mmio_regions,
276         };
277 
278         match addr.checked_add(1) {
279             Some(next_addr) => space
280                 .range(..Range(next_addr, 0, false))
281                 .next_back()
282                 .cloned(),
283             None => None,
284         }
285     }
286 
matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool287     fn matches_hint(&self, io_space: IoSpace, addr: u64, is_write: bool) -> bool {
288         if let Some(hint) = &self.hint {
289             return io_space == hint.io_space && addr == hint.addr && is_write == hint.on_write;
290         }
291         false
292     }
293 
check_hint_details(&self, regs: &kvm_regs) -> (bool, bool)294     fn check_hint_details(&self, regs: &kvm_regs) -> (bool, bool) {
295         if let Some(hint) = &self.hint {
296             for entry in hint.regs.iter() {
297                 if (!entry.match_rax || entry.rax == regs.rax)
298                     && (!entry.match_rbx || entry.rbx == regs.rbx)
299                     && (!entry.match_rcx || entry.rcx == regs.rcx)
300                     && (!entry.match_rdx || entry.rdx == regs.rdx)
301                 {
302                     return (entry.send_sregs, entry.send_debugregs);
303                 }
304             }
305         }
306         (false, false)
307     }
308 }
309 
310 /// State specific to a VCPU, grouped so that each `PluginVcpu` object will share a canonical
311 /// version.
312 #[derive(Default)]
313 pub struct PerVcpuState {
314     pause_request: Option<u64>,
315 }
316 
317 impl PerVcpuState {
318     /// Indicates that a VCPU should wait until the plugin process resumes the VCPU.
319     ///
320     /// This method will not cause a VCPU to pause immediately. Instead, the VCPU thread will
321     /// continue running until a interrupted, at which point it will check for a pending pause. If
322     /// there is another call to `request_pause` for this VCPU before that happens, the last pause
323     /// request's `data` will be overwritten with the most recent `data.
324     ///
325     /// To get an immediate pause after calling `request_pause`, send a signal (with a registered
326     /// handler) to the thread handling the VCPU corresponding to this state. This should interrupt
327     /// the running VCPU, which should check for a pause with `PluginVcpu::pre_run`.
request_pause(&mut self, data: u64)328     pub fn request_pause(&mut self, data: u64) {
329         self.pause_request = Some(data);
330     }
331 }
332 
333 enum VcpuRunData<'a> {
334     Read(&'a mut [u8]),
335     Write(&'a [u8]),
336 }
337 
338 impl<'a> VcpuRunData<'a> {
is_write(&self) -> bool339     fn is_write(&self) -> bool {
340         matches!(self, VcpuRunData::Write(_))
341     }
342 
as_slice(&self) -> &[u8]343     fn as_slice(&self) -> &[u8] {
344         match self {
345             VcpuRunData::Read(s) => s,
346             VcpuRunData::Write(s) => s,
347         }
348     }
349 
copy_from_slice(&mut self, data: &[u8])350     fn copy_from_slice(&mut self, data: &[u8]) {
351         if let VcpuRunData::Read(s) = self {
352             let copy_size = min(s.len(), data.len());
353             s.copy_from_slice(&data[..copy_size]);
354         }
355     }
356 }
357 
358 /// State object for a VCPU's connection with the plugin process.
359 ///
360 /// This is used by a VCPU thread to allow the plugin process to handle vmexits. Each method may
361 /// block indefinitely while the plugin process is handling requests. In order to cleanly shutdown
362 /// during these blocking calls, the `connection` socket should be shutdown. This will end the
363 /// blocking calls,
364 pub struct PluginVcpu {
365     shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
366     per_vcpu_state: Arc<Mutex<PerVcpuState>>,
367     read_pipe: File,
368     write_pipe: File,
369     wait_reason: Cell<Option<vcpu_response::Wait>>,
370     request_buffer: RefCell<Vec<u8>>,
371     response_buffer: RefCell<Vec<u8>>,
372 }
373 
374 impl PluginVcpu {
375     /// Creates the plugin state and connection container for a VCPU thread.
new( shared_vcpu_state: Arc<RwLock<SharedVcpuState>>, per_vcpu_state: Arc<Mutex<PerVcpuState>>, read_pipe: File, write_pipe: File, ) -> PluginVcpu376     pub fn new(
377         shared_vcpu_state: Arc<RwLock<SharedVcpuState>>,
378         per_vcpu_state: Arc<Mutex<PerVcpuState>>,
379         read_pipe: File,
380         write_pipe: File,
381     ) -> PluginVcpu {
382         PluginVcpu {
383             shared_vcpu_state,
384             per_vcpu_state,
385             read_pipe,
386             write_pipe,
387             wait_reason: Default::default(),
388             request_buffer: Default::default(),
389             response_buffer: Default::default(),
390         }
391     }
392 
393     /// Tells the plugin process to initialize this VCPU.
394     ///
395     /// This should be called for each VCPU before the first run of any of the VCPUs in the VM.
init(&self, vcpu: &Vcpu) -> SysResult<()>396     pub fn init(&self, vcpu: &Vcpu) -> SysResult<()> {
397         let mut wait_reason = vcpu_response::Wait::new();
398         wait_reason.mut_init();
399         self.wait_reason.set(Some(wait_reason));
400         self.handle_until_resume(vcpu)?;
401         Ok(())
402     }
403 
404     /// The VCPU thread should call this before rerunning a VM in order to handle pending requests
405     /// to this VCPU.
pre_run(&self, vcpu: &Vcpu) -> SysResult<()>406     pub fn pre_run(&self, vcpu: &Vcpu) -> SysResult<()> {
407         let request = {
408             let mut lock = self.per_vcpu_state.lock();
409             lock.pause_request.take()
410         };
411 
412         if let Some(user_data) = request {
413             let mut wait_reason = vcpu_response::Wait::new();
414             wait_reason.mut_user().user = user_data;
415             self.wait_reason.set(Some(wait_reason));
416             self.handle_until_resume(vcpu)?;
417         }
418         Ok(())
419     }
420 
process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool421     fn process(&self, io_space: IoSpace, addr: u64, mut data: VcpuRunData, vcpu: &Vcpu) -> bool {
422         let vcpu_state_lock = match self.shared_vcpu_state.read() {
423             Ok(l) => l,
424             Err(e) => {
425                 error!("error read locking shared cpu state: {}", e);
426                 return false;
427             }
428         };
429 
430         let first_before_addr = vcpu_state_lock.first_before(io_space, addr);
431 
432         match first_before_addr {
433             Some(Range(start, len, async_write)) => {
434                 let offset = addr - start;
435                 if offset >= len {
436                     return false;
437                 }
438                 if async_write && !data.is_write() {
439                     return false;
440                 }
441 
442                 let mut wait_reason = vcpu_response::Wait::new();
443                 let io = wait_reason.mut_io();
444                 io.space = match io_space {
445                     IoSpace::Ioport => AddressSpace::IOPORT,
446                     IoSpace::Mmio => AddressSpace::MMIO,
447                 }
448                 .into();
449                 io.address = addr;
450                 io.is_write = data.is_write();
451                 io.data = data.as_slice().to_vec();
452                 io.no_resume = async_write;
453                 if !async_write && vcpu_state_lock.matches_hint(io_space, addr, io.is_write) {
454                     if let Ok(regs) = vcpu.get_regs() {
455                         let (has_sregs, has_debugregs) = vcpu_state_lock.check_hint_details(&regs);
456                         io.regs = regs.as_bytes().to_vec();
457                         if has_sregs {
458                             if let Ok(state) = get_vcpu_state(vcpu, vcpu_request::StateSet::SREGS) {
459                                 io.sregs = state;
460                             }
461                         }
462                         if has_debugregs {
463                             if let Ok(state) =
464                                 get_vcpu_state(vcpu, vcpu_request::StateSet::DEBUGREGS)
465                             {
466                                 io.debugregs = state;
467                             }
468                         }
469                     }
470                 }
471                 // don't hold lock while blocked in `handle_until_resume`.
472                 drop(vcpu_state_lock);
473 
474                 if async_write {
475                     let mut response = VcpuResponse::new();
476                     response.set_wait(wait_reason);
477 
478                     let mut response_buffer = self.response_buffer.borrow_mut();
479                     response_buffer.clear();
480                     let mut stream = CodedOutputStream::vec(&mut response_buffer);
481                     match response.write_length_delimited_to(&mut stream) {
482                         Ok(_) => {
483                             if let Err(e) = stream.flush() {
484                                 error!("failed to flush to vec: {}", e);
485                             }
486                             drop(stream);
487                             let mut write_pipe = &self.write_pipe;
488                             if let Err(e) = write_pipe.write_all(&response_buffer) {
489                                 error!("failed to write to pipe: {}", e);
490                             }
491                         }
492                         Err(e) => error!("failed to write to buffer: {}", e),
493                     }
494                 } else {
495                     self.wait_reason.set(Some(wait_reason));
496                     match self.handle_until_resume(vcpu) {
497                         Ok(resume_data) => data.copy_from_slice(&resume_data),
498                         Err(e) if e.errno() == EPIPE => {}
499                         Err(e) => error!("failed to process vcpu requests: {}", e),
500                     }
501                 }
502                 true
503             }
504             None => false,
505         }
506     }
507 
508     /// Has the plugin process handle a IO port read.
io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool509     pub fn io_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
510         self.process(IoSpace::Ioport, addr, VcpuRunData::Read(data), vcpu)
511     }
512 
513     /// Has the plugin process handle a IO port write.
io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool514     pub fn io_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
515         self.process(IoSpace::Ioport, addr, VcpuRunData::Write(data), vcpu)
516     }
517 
518     /// Has the plugin process handle a MMIO read.
mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool519     pub fn mmio_read(&self, addr: u64, data: &mut [u8], vcpu: &Vcpu) -> bool {
520         self.process(IoSpace::Mmio, addr, VcpuRunData::Read(data), vcpu)
521     }
522 
523     /// Has the plugin process handle a MMIO write.
mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool524     pub fn mmio_write(&self, addr: u64, data: &[u8], vcpu: &Vcpu) -> bool {
525         self.process(IoSpace::Mmio, addr, VcpuRunData::Write(data), vcpu)
526     }
527 
528     /// Has the plugin process handle a hyper-v call.
hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool529     pub fn hyperv_call(&self, input: u64, params: [u64; 2], data: &mut [u8], vcpu: &Vcpu) -> bool {
530         let mut wait_reason = vcpu_response::Wait::new();
531         let hv = wait_reason.mut_hyperv_call();
532         hv.input = input;
533         hv.params0 = params[0];
534         hv.params1 = params[1];
535 
536         self.wait_reason.set(Some(wait_reason));
537         match self.handle_until_resume(vcpu) {
538             Ok(resume_data) => {
539                 data.copy_from_slice(&resume_data);
540                 true
541             }
542             Err(e) if e.errno() == EPIPE => false,
543             Err(e) => {
544                 error!("failed to process hyperv call request: {}", e);
545                 false
546             }
547         }
548     }
549 
550     /// Has the plugin process handle a synic config change.
hyperv_synic( &self, msr: u32, control: u64, evt_page: u64, msg_page: u64, vcpu: &Vcpu, ) -> bool551     pub fn hyperv_synic(
552         &self,
553         msr: u32,
554         control: u64,
555         evt_page: u64,
556         msg_page: u64,
557         vcpu: &Vcpu,
558     ) -> bool {
559         let mut wait_reason = vcpu_response::Wait::new();
560         let hv = wait_reason.mut_hyperv_synic();
561         hv.msr = msr;
562         hv.control = control;
563         hv.evt_page = evt_page;
564         hv.msg_page = msg_page;
565         self.wait_reason.set(Some(wait_reason));
566         match self.handle_until_resume(vcpu) {
567             Ok(_resume_data) => true,
568             Err(e) if e.errno() == EPIPE => false,
569             Err(e) => {
570                 error!("failed to process hyperv synic request: {}", e);
571                 false
572             }
573         }
574     }
575 
handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>>576     fn handle_request(&self, vcpu: &Vcpu) -> SysResult<Option<Vec<u8>>> {
577         let mut wait_reason = self.wait_reason.take();
578         let mut do_recv = true;
579         let mut resume_data = None;
580         let mut response = VcpuResponse::new();
581         let mut send_response = true;
582 
583         // Typically a response is sent for every request received.  The odd (yet common)
584         // case is when a resume request is received.  This function will skip sending
585         // a resume reply, and instead we'll go run the VM and then later reply with a wait
586         // response message.  This code block handles checking if a wait reason is pending (where
587         // the wait reason isn't the first-time init [first time init needs to first
588         // receive a wait request from the plugin]) to send it as a reply before doing a recv()
589         // for the next request.  Note that if a wait reply is pending then this function
590         // will send the reply and do nothing else--the expectation is that handle_until_resume()
591         // is the only caller of this function, so the function will immediately get called again
592         // and this second call will no longer see a pending wait reason and do a recv() for the
593         // next message.
594         if let Some(reason) = wait_reason {
595             if reason.has_init() {
596                 wait_reason = Some(reason);
597             } else {
598                 response.set_wait(reason);
599                 do_recv = false;
600                 wait_reason = None;
601             }
602         }
603 
604         if do_recv {
605             let mut request_buffer = self.request_buffer.borrow_mut();
606             request_buffer.resize(MAX_VCPU_DATAGRAM_SIZE, 0);
607 
608             let mut read_pipe = &self.read_pipe;
609             let msg_size = read_pipe.read(&mut request_buffer).map_err(io_to_sys_err)?;
610 
611             let mut request: VcpuRequest =
612                 Message::parse_from_bytes(&request_buffer[..msg_size]).map_err(proto_to_sys_err)?;
613 
614             let res = if request.has_wait() {
615                 match wait_reason {
616                     Some(wait_reason) => {
617                         response.set_wait(wait_reason);
618                         Ok(())
619                     }
620                     None => Err(SysError::new(EPROTO)),
621                 }
622             } else if wait_reason.is_some() {
623                 // Any request other than getting the wait_reason while there is one pending is
624                 // invalid.
625                 self.wait_reason.set(wait_reason);
626                 Err(SysError::new(EPROTO))
627             } else if request.has_resume() {
628                 send_response = false;
629                 let resume = request.take_resume();
630                 if !resume.regs.is_empty() {
631                     set_vcpu_state(vcpu, vcpu_request::StateSet::REGS, &resume.regs)?;
632                 }
633                 if !resume.sregs.is_empty() {
634                     set_vcpu_state(vcpu, vcpu_request::StateSet::SREGS, &resume.sregs)?;
635                 }
636                 if !resume.debugregs.is_empty() {
637                     set_vcpu_state(vcpu, vcpu_request::StateSet::DEBUGREGS, &resume.debugregs)?;
638                 }
639                 resume_data = Some(resume.data);
640                 Ok(())
641             } else if request.has_get_state() {
642                 let response_state = response.mut_get_state();
643                 match get_vcpu_state_enum_or_unknown(vcpu, request.get_state().set) {
644                     Ok(state) => {
645                         response_state.state = state;
646                         Ok(())
647                     }
648                     Err(e) => Err(e),
649                 }
650             } else if request.has_set_state() {
651                 response.mut_set_state();
652                 let set_state = request.set_state();
653                 set_vcpu_state_enum_or_unknown(vcpu, set_state.set, &set_state.state)
654             } else if request.has_get_hyperv_cpuid() {
655                 let cpuid_response = &mut response.mut_get_hyperv_cpuid().entries;
656                 match vcpu.get_hyperv_cpuid() {
657                     Ok(mut cpuid) => {
658                         for entry in cpuid.mut_entries_slice() {
659                             cpuid_response.push(cpuid_kvm_to_proto(entry));
660                         }
661                         Ok(())
662                     }
663                     Err(e) => Err(e),
664                 }
665             } else if request.has_get_msrs() {
666                 let entry_data = &mut response.mut_get_msrs().entry_data;
667                 let entry_indices = &request.get_msrs().entry_indices;
668                 let mut msr_entries = Vec::with_capacity(entry_indices.len());
669                 for &index in entry_indices {
670                     msr_entries.push(kvm_msr_entry {
671                         index,
672                         ..Default::default()
673                     });
674                 }
675                 match vcpu.get_msrs(&mut msr_entries) {
676                     Ok(()) => {
677                         for msr_entry in msr_entries {
678                             entry_data.push(msr_entry.data);
679                         }
680                         Ok(())
681                     }
682                     Err(e) => Err(e),
683                 }
684             } else if request.has_set_msrs() {
685                 const SIZE_OF_MSRS: usize = mem::size_of::<kvm_msrs>();
686                 const SIZE_OF_ENTRY: usize = mem::size_of::<kvm_msr_entry>();
687                 const ALIGN_OF_MSRS: usize = mem::align_of::<kvm_msrs>();
688                 const_assert!(ALIGN_OF_MSRS >= mem::align_of::<kvm_msr_entry>());
689 
690                 response.mut_set_msrs();
691                 let request_entries = &request.set_msrs().entries;
692 
693                 let size = SIZE_OF_MSRS + request_entries.len() * SIZE_OF_ENTRY;
694                 let layout =
695                     Layout::from_size_align(size, ALIGN_OF_MSRS).expect("impossible layout");
696                 let mut allocation = LayoutAllocation::zeroed(layout);
697 
698                 // SAFETY:
699                 // Safe to obtain an exclusive reference because there are no other
700                 // references to the allocation yet and all-zero is a valid bit
701                 // pattern.
702                 let kvm_msrs = unsafe { allocation.as_mut::<kvm_msrs>() };
703 
704                 // SAFETY:
705                 // Mapping the unsized array to a slice is unsafe becase the length isn't known.
706                 // Providing the length used to create the struct guarantees the entire slice is
707                 // valid.
708                 unsafe {
709                     let kvm_msr_entries: &mut [kvm_msr_entry] =
710                         kvm_msrs.entries.as_mut_slice(request_entries.len());
711                     for (msr_entry, entry) in kvm_msr_entries.iter_mut().zip(request_entries) {
712                         msr_entry.index = entry.index;
713                         msr_entry.data = entry.data;
714                     }
715                 }
716                 kvm_msrs.nmsrs = request_entries.len() as u32;
717                 vcpu.set_msrs(kvm_msrs)
718             } else if request.has_set_cpuid() {
719                 response.mut_set_cpuid();
720                 let request_entries = &request.set_cpuid().entries;
721                 let mut cpuid = CpuId::new(request_entries.len());
722                 let cpuid_entries = cpuid.mut_entries_slice();
723                 for (request_entry, cpuid_entry) in request_entries.iter().zip(cpuid_entries) {
724                     cpuid_entry.function = request_entry.function;
725                     if request_entry.has_index {
726                         cpuid_entry.index = request_entry.index;
727                         cpuid_entry.flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
728                     }
729                     cpuid_entry.eax = request_entry.eax;
730                     cpuid_entry.ebx = request_entry.ebx;
731                     cpuid_entry.ecx = request_entry.ecx;
732                     cpuid_entry.edx = request_entry.edx;
733                 }
734                 vcpu.set_cpuid2(&cpuid)
735             } else if request.has_enable_capability() {
736                 response.mut_enable_capability();
737                 let capability = request.enable_capability().capability;
738                 if capability != kvm_sys::KVM_CAP_HYPERV_SYNIC
739                     && capability != kvm_sys::KVM_CAP_HYPERV_SYNIC2
740                 {
741                     Err(SysError::new(EINVAL))
742                 } else {
743                     let cap = kvm_enable_cap {
744                         cap: capability,
745                         ..Default::default()
746                     };
747                     // SAFETY:
748                     // Safe because the allowed capabilities don't take pointer arguments.
749                     unsafe { vcpu.kvm_enable_cap(&cap) }
750                 }
751             } else if request.has_shutdown() {
752                 return Err(SysError::new(EPIPE));
753             } else {
754                 Err(SysError::new(ENOTTY))
755             };
756 
757             if let Err(e) = res {
758                 response.errno = e.errno();
759             }
760         }
761 
762         // Send the response, except if it's a resume response (in which case
763         // we'll go run the VM and afterwards send a wait response message).
764         if send_response {
765             let mut response_buffer = self.response_buffer.borrow_mut();
766             response_buffer.clear();
767             {
768                 let mut stream = CodedOutputStream::vec(&mut response_buffer);
769                 response
770                     .write_length_delimited_to(&mut stream)
771                     .map_err(proto_to_sys_err)?;
772                 stream.flush().map_err(proto_to_sys_err)?;
773             }
774             let mut write_pipe = &self.write_pipe;
775             write_pipe
776                 .write(&response_buffer[..])
777                 .map_err(io_to_sys_err)?;
778         }
779 
780         Ok(resume_data)
781     }
782 
handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>>783     fn handle_until_resume(&self, vcpu: &Vcpu) -> SysResult<Vec<u8>> {
784         loop {
785             if let Some(resume_data) = self.handle_request(vcpu)? {
786                 return Ok(resume_data);
787             }
788         }
789     }
790 }
791 
792 #[cfg(test)]
793 mod tests {
794     use super::*;
795 
796     #[test]
shared_vcpu_reserve()797     fn shared_vcpu_reserve() {
798         let mut shared_vcpu_state = SharedVcpuState::default();
799         shared_vcpu_state
800             .reserve_range(IoSpace::Ioport, 0x10, 0, false)
801             .unwrap_err();
802         shared_vcpu_state
803             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
804             .unwrap();
805         shared_vcpu_state
806             .reserve_range(IoSpace::Ioport, 0x0f, 0x10, false)
807             .unwrap_err();
808         shared_vcpu_state
809             .reserve_range(IoSpace::Ioport, 0x10, 0x10, false)
810             .unwrap_err();
811         shared_vcpu_state
812             .reserve_range(IoSpace::Ioport, 0x10, 0x15, false)
813             .unwrap_err();
814         shared_vcpu_state
815             .reserve_range(IoSpace::Ioport, 0x12, 0x15, false)
816             .unwrap_err();
817         shared_vcpu_state
818             .reserve_range(IoSpace::Ioport, 0x12, 0x01, false)
819             .unwrap_err();
820         shared_vcpu_state
821             .reserve_range(IoSpace::Ioport, 0x0, 0x20, false)
822             .unwrap_err();
823         shared_vcpu_state
824             .reserve_range(IoSpace::Ioport, 0x20, 0x05, false)
825             .unwrap();
826         shared_vcpu_state
827             .reserve_range(IoSpace::Ioport, 0x25, 0x05, false)
828             .unwrap();
829         shared_vcpu_state
830             .reserve_range(IoSpace::Ioport, 0x0, 0x10, false)
831             .unwrap();
832     }
833 }
834