xref: /aosp_15_r20/external/crosvm/x86_64/src/regs.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2017 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::BTreeMap;
6 use std::mem;
7 use std::result;
8 
9 use base::warn;
10 use hypervisor::Sregs;
11 use hypervisor::VcpuX86_64;
12 use hypervisor::Vm;
13 use remain::sorted;
14 use thiserror::Error;
15 use vm_memory::GuestAddress;
16 use vm_memory::GuestMemory;
17 
18 use crate::gdt;
19 
20 #[sorted]
21 #[derive(Error, Debug)]
22 pub enum Error {
23     /// Failed to get sregs for this cpu.
24     #[error("failed to get sregs for this cpu: {0}")]
25     GetSRegsIoctlFailed(base::Error),
26     /// Failed to get base registers for this cpu.
27     #[error("failed to get base registers for this cpu: {0}")]
28     GettingRegistersIoctl(base::Error),
29     /// Failed to set sregs for this cpu.
30     #[error("failed to set sregs for this cpu: {0}")]
31     SetSRegsIoctlFailed(base::Error),
32     /// Failed to set base registers for this cpu.
33     #[error("failed to set base registers for this cpu: {0}")]
34     SettingRegistersIoctl(base::Error),
35     /// Writing the GDT to RAM failed.
36     #[error("writing the GDT to RAM failed")]
37     WriteGDTFailure,
38     /// Writing the IDT to RAM failed.
39     #[error("writing the IDT to RAM failed")]
40     WriteIDTFailure,
41     /// Writing PDE to RAM failed.
42     #[error("writing PDE to RAM failed")]
43     WritePDEAddress,
44     /// Writing PDPTE to RAM failed.
45     #[error("writing PDPTE to RAM failed")]
46     WritePDPTEAddress,
47     /// Writing PML4 to RAM failed.
48     #[error("writing PML4 to RAM failed")]
49     WritePML4Address,
50 }
51 
52 pub type Result<T> = result::Result<T, Error>;
53 
54 const MTRR_MEMTYPE_UC: u8 = 0x0;
55 const MTRR_MEMTYPE_WB: u8 = 0x6;
56 const MTRR_VAR_VALID: u64 = 0x800;
57 const MTRR_ENABLE: u64 = 0x800;
58 const MTRR_PHYS_BASE_MSR: u32 = 0x200;
59 const MTRR_PHYS_MASK_MSR: u32 = 0x201;
60 const VAR_MTRR_NUM_MASK: u64 = 0xFF;
61 
62 // Returns the value of the highest bit in a 64-bit value. Equivalent to
63 // 1 << HighBitSet(x)
get_power_of_two(data: u64) -> u6464 fn get_power_of_two(data: u64) -> u64 {
65     1 << (64 - data.leading_zeros() - 1)
66 }
67 
68 // Returns the max length which suitable for mtrr setting based on the
69 // specified (base, len)
get_max_len(base: u64, len: u64) -> u6470 fn get_max_len(base: u64, len: u64) -> u64 {
71     let mut ret = get_power_of_two(len);
72 
73     while base % ret != 0 {
74         ret >>= 1;
75     }
76 
77     ret
78 }
79 
80 // For the specified (Base, Len), returns (base, len) pair which could be
81 // set into mtrr register. mtrr requires: the base-address alignment value can't be
82 // less than its length
get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)>83 fn get_mtrr_pairs(base: u64, len: u64) -> Vec<(u64, u64)> {
84     let mut vecs = Vec::new();
85 
86     let mut remains = len;
87     let mut new = base;
88     while remains != 0 {
89         let max = get_max_len(new, remains);
90         vecs.push((new, max));
91         remains -= max;
92         new += max;
93     }
94 
95     vecs
96 }
97 
98 /// Returns the number of variable MTRR entries supported by `vcpu`.
vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize99 pub fn vcpu_supported_variable_mtrrs(vcpu: &dyn VcpuX86_64) -> usize {
100     // Get VAR MTRR num from MSR_MTRRcap
101     match vcpu.get_msr(crate::msr_index::MSR_MTRRcap) {
102         Ok(value) => (value & VAR_MTRR_NUM_MASK) as usize,
103         Err(_e) => {
104             warn!("failed to get MSR_MTRRcap, guests with passthrough devices may be very slow");
105             0
106         }
107     }
108 }
109 
110 /// Returns `true` if the given MSR `id` is a MTRR entry.
is_mtrr_msr(id: u32) -> bool111 pub fn is_mtrr_msr(id: u32) -> bool {
112     // Variable MTRR MSRs are pairs starting at 0x200 (MTRR_PHYS_BASE_MSR) / 0x201
113     // (MTRR_PHYS_MASK_MSR) and extending up to 0xFF pairs at most.
114     (id >= MTRR_PHYS_BASE_MSR && id <= MTRR_PHYS_BASE_MSR + 2 * VAR_MTRR_NUM_MASK as u32)
115         || id == crate::msr_index::MSR_MTRRdefType
116 }
117 
118 /// Returns the count of variable MTRR entries specified by the list of `msrs`.
count_variable_mtrrs(msrs: &BTreeMap<u32, u64>) -> usize119 pub fn count_variable_mtrrs(msrs: &BTreeMap<u32, u64>) -> usize {
120     // Each variable MTRR takes up two MSRs (base + mask), so divide by 2. This will also count the
121     // MTRRdefType entry, but that is only one extra and the division truncates, so it won't affect
122     // the final count.
123     msrs.keys().filter(|&msr| is_mtrr_msr(*msr)).count() / 2
124 }
125 
126 /// Returns a set of MSRs containing the MTRR configuration.
set_mtrr_msrs(msrs: &mut BTreeMap<u32, u64>, vm: &dyn Vm, pci_start: u64)127 pub fn set_mtrr_msrs(msrs: &mut BTreeMap<u32, u64>, vm: &dyn Vm, pci_start: u64) {
128     // Set pci_start .. 4G as UC
129     // all others are set to default WB
130     let pci_len = (1 << 32) - pci_start;
131     let vecs = get_mtrr_pairs(pci_start, pci_len);
132 
133     let phys_mask: u64 = (1 << vm.get_guest_phys_addr_bits()) - 1;
134     for (idx, (base, len)) in vecs.iter().enumerate() {
135         let reg_idx = idx as u32 * 2;
136         msrs.insert(MTRR_PHYS_BASE_MSR + reg_idx, base | MTRR_MEMTYPE_UC as u64);
137         let mask: u64 = len.wrapping_neg() & phys_mask | MTRR_VAR_VALID;
138         msrs.insert(MTRR_PHYS_MASK_MSR + reg_idx, mask);
139     }
140     // Disable fixed MTRRs and enable variable MTRRs, set default type as WB
141     msrs.insert(
142         crate::msr_index::MSR_MTRRdefType,
143         MTRR_ENABLE | MTRR_MEMTYPE_WB as u64,
144     );
145 }
146 
147 /// Returns the default value of MSRs at reset.
148 ///
149 /// Currently only sets IA32_TSC to 0.
set_default_msrs(msrs: &mut BTreeMap<u32, u64>)150 pub fn set_default_msrs(msrs: &mut BTreeMap<u32, u64>) {
151     msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
152     msrs.insert(
153         crate::msr_index::MSR_IA32_MISC_ENABLE,
154         crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
155     );
156 }
157 
158 /// Configure Model specific registers for long (64-bit) mode.
set_long_mode_msrs(msrs: &mut BTreeMap<u32, u64>)159 pub fn set_long_mode_msrs(msrs: &mut BTreeMap<u32, u64>) {
160     msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_CS, 0x0);
161     msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_ESP, 0x0);
162     msrs.insert(crate::msr_index::MSR_IA32_SYSENTER_EIP, 0x0);
163 
164     // x86_64 specific msrs, we only run on x86_64 not x86
165     msrs.insert(crate::msr_index::MSR_STAR, 0x0);
166     msrs.insert(crate::msr_index::MSR_CSTAR, 0x0);
167     msrs.insert(crate::msr_index::MSR_KERNEL_GS_BASE, 0x0);
168     msrs.insert(crate::msr_index::MSR_SYSCALL_MASK, 0x0);
169     msrs.insert(crate::msr_index::MSR_LSTAR, 0x0);
170     // end of x86_64 specific code
171 
172     msrs.insert(crate::msr_index::MSR_IA32_TSC, 0x0);
173     msrs.insert(
174         crate::msr_index::MSR_IA32_MISC_ENABLE,
175         crate::msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64,
176     );
177 }
178 
179 const X86_CR0_PE: u64 = 0x1;
180 const X86_CR0_PG: u64 = 0x80000000;
181 const X86_CR4_PAE: u64 = 0x20;
182 
183 const EFER_LME: u64 = 0x100;
184 const EFER_LMA: u64 = 0x400;
185 
186 const BOOT_GDT_OFFSET: u64 = 0x1500;
187 const BOOT_IDT_OFFSET: u64 = 0x1528;
188 
189 const BOOT_GDT_MAX: usize = 5;
190 
write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()>191 fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
192     let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
193     for (index, entry) in table.iter().enumerate() {
194         let addr = boot_gdt_addr
195             .checked_add((index * mem::size_of::<u64>()) as u64)
196             .ok_or(Error::WriteGDTFailure)?;
197         if !guest_mem.is_valid_range(addr, mem::size_of::<u64>() as u64) {
198             return Err(Error::WriteGDTFailure);
199         }
200 
201         guest_mem
202             .write_obj_at_addr(*entry, addr)
203             .map_err(|_| Error::WriteGDTFailure)?;
204     }
205     Ok(())
206 }
207 
write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()>208 fn write_idt_value(val: u64, guest_mem: &GuestMemory) -> Result<()> {
209     let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET);
210     guest_mem
211         .write_obj_at_addr(val, boot_idt_addr)
212         .map_err(|_| Error::WriteIDTFailure)
213 }
214 
215 /// Configures the GDT, IDT, and segment registers for long mode.
configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>216 pub fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
217     // reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
218     let gdt_table: [u64; BOOT_GDT_MAX] = [
219         gdt::gdt_entry(0, 0, 0),            // NULL
220         gdt::gdt_entry(0, 0, 0),            // NULL
221         gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
222         gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
223         gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
224     ];
225 
226     let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
227     let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
228     let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
229 
230     // Write segments
231     write_gdt_table(&gdt_table[..], mem)?;
232     sregs.gdt.base = BOOT_GDT_OFFSET;
233     sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
234 
235     write_idt_value(0, mem)?;
236     sregs.idt.base = BOOT_IDT_OFFSET;
237     sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
238 
239     sregs.cs = code_seg;
240     sregs.ds = data_seg;
241     sregs.es = data_seg;
242     sregs.fs = data_seg;
243     sregs.gs = data_seg;
244     sregs.ss = data_seg;
245     sregs.tr = tss_seg;
246 
247     /* 64-bit protected mode */
248     sregs.cr0 |= X86_CR0_PE;
249     sregs.efer |= EFER_LME;
250 
251     Ok(())
252 }
253 
254 /// Configures the GDT, IDT, and segment registers for 32-bit protected mode with paging disabled.
configure_segments_and_sregs_flat32(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>255 pub fn configure_segments_and_sregs_flat32(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
256     // reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
257     let gdt_table: [u64; BOOT_GDT_MAX] = [
258         gdt::gdt_entry(0, 0, 0),            // NULL
259         gdt::gdt_entry(0, 0, 0),            // NULL
260         gdt::gdt_entry(0xc09b, 0, 0xfffff), // CODE
261         gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
262         gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
263     ];
264 
265     let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
266     let data_seg = gdt::segment_from_gdt(gdt_table[3], 3);
267     let tss_seg = gdt::segment_from_gdt(gdt_table[4], 4);
268 
269     // Write segments
270     write_gdt_table(&gdt_table[..], mem)?;
271     sregs.gdt.base = BOOT_GDT_OFFSET;
272     sregs.gdt.limit = mem::size_of_val(&gdt_table) as u16 - 1;
273 
274     write_idt_value(0, mem)?;
275     sregs.idt.base = BOOT_IDT_OFFSET;
276     sregs.idt.limit = mem::size_of::<u64>() as u16 - 1;
277 
278     sregs.cs = code_seg;
279     sregs.ds = data_seg;
280     sregs.es = data_seg;
281     sregs.fs = data_seg;
282     sregs.gs = data_seg;
283     sregs.ss = data_seg;
284     sregs.tr = tss_seg;
285 
286     /* 32-bit protected mode with paging disabled */
287     sregs.cr0 |= X86_CR0_PE;
288     sregs.cr0 &= !X86_CR0_PG;
289 
290     Ok(())
291 }
292 
293 /// Configures the system page tables and control registers for long mode with paging.
294 /// Prepares identity mapping for the low 4GB memory.
setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()>295 pub fn setup_page_tables(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
296     // Puts PML4 right after zero page but aligned to 4k.
297     let boot_pml4_addr = GuestAddress(0x9000);
298     let boot_pdpte_addr = GuestAddress(0xa000);
299     let boot_pde_addr = GuestAddress(0xb000);
300 
301     const PDE_FLAGS_TABLE_REFERENCE: u64 = 0x03; // Present | Read/Write
302     const PDE_FLAGS_PAGE_MAPPING: u64 = 0x83; // Present | Read/Write | Page Size
303 
304     // Entry covering VA [0..512GB)
305     mem.write_obj_at_addr(
306         boot_pdpte_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
307         boot_pml4_addr,
308     )
309     .map_err(|_| Error::WritePML4Address)?;
310 
311     // Identity mapping for VA [0..4GB)
312     for i in 0..4 {
313         let pde_addr = boot_pde_addr.unchecked_add(i * 0x1000);
314 
315         // Entry covering a single 1GB VA area
316         mem.write_obj_at_addr(
317             pde_addr.offset() | PDE_FLAGS_TABLE_REFERENCE,
318             boot_pdpte_addr.unchecked_add(i * 8),
319         )
320         .map_err(|_| Error::WritePDPTEAddress)?;
321 
322         // 512 2MB entries together covering a single 1GB VA area. Note we are assuming
323         // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do.
324         for j in 0..512 {
325             mem.write_obj_at_addr(
326                 (i << 30) | (j << 21) | PDE_FLAGS_PAGE_MAPPING,
327                 pde_addr.unchecked_add(j * 8),
328             )
329             .map_err(|_| Error::WritePDEAddress)?;
330         }
331     }
332 
333     sregs.cr3 = boot_pml4_addr.offset();
334     sregs.cr4 |= X86_CR4_PAE;
335     sregs.cr0 |= X86_CR0_PG;
336     sregs.efer |= EFER_LMA; // Long mode is active. Must be auto-enabled with CR0_PG.
337     Ok(())
338 }
339 
340 #[cfg(test)]
341 mod tests {
342     use vm_memory::GuestAddress;
343     use vm_memory::GuestMemory;
344 
345     use super::*;
346 
create_guest_mem() -> GuestMemory347     fn create_guest_mem() -> GuestMemory {
348         GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap()
349     }
350 
read_u64(gm: &GuestMemory, offset: u64) -> u64351     fn read_u64(gm: &GuestMemory, offset: u64) -> u64 {
352         let read_addr = GuestAddress(offset);
353         gm.read_obj_from_addr(read_addr).unwrap()
354     }
355 
356     #[test]
segments_and_sregs()357     fn segments_and_sregs() {
358         let mut sregs = Default::default();
359         let gm = create_guest_mem();
360         configure_segments_and_sregs(&gm, &mut sregs).unwrap();
361 
362         assert_eq!(0x0, read_u64(&gm, BOOT_GDT_OFFSET));
363         assert_eq!(0xaf9b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x10));
364         assert_eq!(0xcf93000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x18));
365         assert_eq!(0x8f8b000000ffff, read_u64(&gm, BOOT_GDT_OFFSET + 0x20));
366         assert_eq!(0x0, read_u64(&gm, BOOT_IDT_OFFSET));
367 
368         assert_eq!(0, sregs.cs.base);
369         assert_eq!(0xffffffff, sregs.ds.limit_bytes);
370         assert_eq!(0x10, sregs.cs.selector);
371         assert_eq!(0x18, sregs.ds.selector);
372         assert_eq!(0x18, sregs.es.selector);
373         assert_eq!(0x18, sregs.ss.selector);
374         assert_eq!(1, sregs.fs.present);
375         assert_eq!(1, sregs.gs.g);
376         assert_eq!(0, sregs.ss.avl);
377         assert_eq!(0, sregs.tr.base);
378         assert_eq!(0xffffffff, sregs.tr.limit_bytes);
379         assert_eq!(0, sregs.tr.avl);
380         assert_eq!(X86_CR0_PE, sregs.cr0 & X86_CR0_PE);
381         assert_eq!(EFER_LME, sregs.efer);
382     }
383 
384     #[test]
page_tables()385     fn page_tables() {
386         let mut sregs = Default::default();
387         let gm = create_guest_mem();
388         setup_page_tables(&gm, &mut sregs).unwrap();
389 
390         assert_eq!(0xa003, read_u64(&gm, 0x9000));
391         assert_eq!(0xb003, read_u64(&gm, 0xa000));
392         for i in 0..512 {
393             assert_eq!((i << 21) + 0x83u64, read_u64(&gm, 0xb000 + i * 8));
394         }
395 
396         assert_eq!(0x9000, sregs.cr3);
397         assert_eq!(X86_CR4_PAE, sregs.cr4);
398         assert_eq!(X86_CR0_PG, sregs.cr0 & X86_CR0_PG);
399     }
400 }
401