1 /*
2 * Copyright (c) 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 
24 #include <trace.h>
25 #include <arch/x86.h>
26 #include <arch/fpu.h>
27 #include <string.h>
28 #include <kernel/thread.h>
29 
30 #define LOCAL_TRACE 0
31 
32 #if X86_WITH_FPU
33 
34 #define FPU_MASK_ALL_EXCEPTIONS 1
35 
36 /* CPUID EAX = 1 return values */
37 
38 #define ECX_SSE3    (0x00000001 << 0)
39 #define ECX_SSSE3   (0x00000001 << 9)
40 #define ECX_SSE4_1  (0x00000001 << 19)
41 #define ECX_SSE4_2  (0x00000001 << 20)
42 #define EDX_FXSR    (0x00000001 << 24)
43 #define EDX_SSE     (0x00000001 << 25)
44 #define EDX_SSE2    (0x00000001 << 26)
45 #define EDX_FPU     (0x00000001 << 0)
46 
47 #define FPU_CAP(ecx, edx) ((edx & EDX_FPU) != 0)
48 
49 #define SSE_CAP(ecx, edx) ( \
50     ((ecx & (ECX_SSE3 | ECX_SSSE3 | ECX_SSE4_1 | ECX_SSE4_2)) != 0) || \
51     ((edx & (EDX_SSE | EDX_SSE2)) != 0) \
52     )
53 
54 #define FXSAVE_CAP(ecx, edx) ((edx & EDX_FXSR) != 0)
55 
56 static int fp_supported;
57 static thread_t *fp_owner;
58 
59 /* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
60 static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
61 
get_cpu_cap(uint32_t * ecx,uint32_t * edx)62 static void get_cpu_cap(uint32_t *ecx, uint32_t *edx)
63 {
64     uint32_t eax = 1;
65 
66     __asm__ __volatile__
67     ("cpuid" : "=c" (*ecx), "=d" (*edx) : "a" (eax));
68 }
69 
fpu_init(void)70 void fpu_init(void)
71 {
72     uint32_t ecx = 0, edx = 0;
73     uint16_t fcw;
74     uint32_t mxcsr;
75 
76 #ifdef ARCH_X86_64
77     uint64_t x;
78 #else
79     uint32_t x;
80 #endif
81 
82     fp_supported = 0;
83     fp_owner = NULL;
84 
85     get_cpu_cap(&ecx, &edx);
86 
87     if (!FPU_CAP(ecx, edx) || !SSE_CAP(ecx, edx) || !FXSAVE_CAP(ecx, edx))
88         return;
89 
90     fp_supported = 1;
91 
92     /* No x87 emul, monitor co-processor */
93 
94     x = x86_get_cr0();
95     x &= ~X86_CR0_EM;
96     x |= X86_CR0_NE;
97     x |= X86_CR0_MP;
98     x86_set_cr0(x);
99 
100     /* Init x87 */
101     __asm__ __volatile__ ("finit");
102     __asm__ __volatile__("fstcw %0" : "=m" (fcw));
103 #if FPU_MASK_ALL_EXCEPTIONS
104     /* mask all exceptions */
105     fcw |= 0x3f;
106 #else
107     /* unmask all exceptions */
108     fcw &= 0xffc0;
109 #endif
110     __asm__ __volatile__("fldcw %0" : : "m" (fcw));
111 
112     /* Init SSE */
113     x = x86_get_cr4();
114     x |= X86_CR4_OSXMMEXPT;
115     x |= X86_CR4_OSFXSR;
116     x &= ~X86_CR4_OSXSAVE;
117     x86_set_cr4(x);
118 
119     __asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
120 #if FPU_MASK_ALL_EXCEPTIONS
121     /* mask all exceptions */
122     mxcsr = (0x3f << 7);
123 #else
124     /* unmask all exceptions */
125     mxcsr &= 0x0000003f;
126 #endif
127     __asm__ __volatile__("ldmxcsr %0" : : "m" (mxcsr));
128 
129     /* save fpu initial states, and used when new thread creates */
130     __asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
131 
132     x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
133     return;
134 }
135 
fpu_init_thread_states(thread_t * t)136 void fpu_init_thread_states(thread_t *t)
137 {
138     t->arch.fpu_states = (vaddr_t *)round_up(((vaddr_t)t->arch.fpu_buffer), 16);
139     memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
140 }
141 
fpu_context_switch(thread_t * old_thread,thread_t * new_thread)142 void fpu_context_switch(thread_t *old_thread, thread_t *new_thread)
143 {
144     if (fp_supported == 0)
145         return;
146 
147     if (new_thread != fp_owner)
148         x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
149     else
150         x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);
151 
152     if (old_thread == fp_owner && old_thread->state == THREAD_DEATH) {
153         LTRACEF("dead fp_owner thread\n");
154         fp_owner = NULL;
155     }
156 
157     return;
158 }
159 
fpu_dev_na_handler(void)160 void fpu_dev_na_handler(void)
161 {
162     thread_t *self;
163 
164     x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);
165 
166     if (fp_supported == 0)
167         return;
168 
169     self = get_current_thread();
170 
171     LTRACEF("owner %p self %p\n", fp_owner, self);
172     if ((fp_owner != NULL) && (fp_owner != self)) {
173         __asm__ __volatile__("fxsave %0" : "=m" (*fp_owner->arch.fpu_states));
174         __asm__ __volatile__("fxrstor %0" : : "m" (*self->arch.fpu_states));
175     }
176 
177     fp_owner = self;
178     return;
179 }
180 #endif
181 
182 /* End of file */
183