1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #ifndef _fpcontrol_h
17*6467f958SSadaf Ebrahimi #define _fpcontrol_h
18*6467f958SSadaf Ebrahimi
19*6467f958SSadaf Ebrahimi #include <cstdint>
20*6467f958SSadaf Ebrahimi
21*6467f958SSadaf Ebrahimi // In order to get tests for correctly rounded operations (e.g. multiply) to
22*6467f958SSadaf Ebrahimi // work properly we need to be able to set the reference hardware to FTZ mode if
23*6467f958SSadaf Ebrahimi // the device hardware is running in that mode. We have explored all other
24*6467f958SSadaf Ebrahimi // options short of writing correctly rounded operations in integer code, and
25*6467f958SSadaf Ebrahimi // have found this is the only way to correctly verify operation.
26*6467f958SSadaf Ebrahimi //
27*6467f958SSadaf Ebrahimi // Non-Apple implementations will need to provide their own implentation for
28*6467f958SSadaf Ebrahimi // these features. If the reference hardware and device are both running in the
29*6467f958SSadaf Ebrahimi // same state (either FTZ or IEEE compliant modes) then these functions may be
30*6467f958SSadaf Ebrahimi // empty. If the device is running in non-default rounding mode (e.g. round
31*6467f958SSadaf Ebrahimi // toward zero), then these functions should also set the reference device into
32*6467f958SSadaf Ebrahimi // that rounding mode.
33*6467f958SSadaf Ebrahimi #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
34*6467f958SSadaf Ebrahimi || defined(__MINGW32__)
35*6467f958SSadaf Ebrahimi #ifdef _MSC_VER
36*6467f958SSadaf Ebrahimi typedef int FPU_mode_type;
37*6467f958SSadaf Ebrahimi #else
38*6467f958SSadaf Ebrahimi typedef int64_t FPU_mode_type;
39*6467f958SSadaf Ebrahimi #endif
40*6467f958SSadaf Ebrahimi #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
41*6467f958SSadaf Ebrahimi || defined(__MINGW32__)
42*6467f958SSadaf Ebrahimi #include <xmmintrin.h>
43*6467f958SSadaf Ebrahimi #elif defined(__PPC__)
44*6467f958SSadaf Ebrahimi #include <fpu_control.h>
45*6467f958SSadaf Ebrahimi extern __thread fpu_control_t fpu_control;
46*6467f958SSadaf Ebrahimi #endif
47*6467f958SSadaf Ebrahimi // Set the reference hardware floating point unit to FTZ mode
ForceFTZ(FPU_mode_type * mode)48*6467f958SSadaf Ebrahimi inline void ForceFTZ(FPU_mode_type *mode)
49*6467f958SSadaf Ebrahimi {
50*6467f958SSadaf Ebrahimi #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
51*6467f958SSadaf Ebrahimi || defined(__MINGW32__)
52*6467f958SSadaf Ebrahimi *mode = _mm_getcsr();
53*6467f958SSadaf Ebrahimi _mm_setcsr(*mode | 0x8040);
54*6467f958SSadaf Ebrahimi #elif defined(__PPC__)
55*6467f958SSadaf Ebrahimi *mode = fpu_control;
56*6467f958SSadaf Ebrahimi fpu_control |= _FPU_MASK_NI;
57*6467f958SSadaf Ebrahimi #elif defined(__arm__)
58*6467f958SSadaf Ebrahimi unsigned fpscr;
59*6467f958SSadaf Ebrahimi __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
60*6467f958SSadaf Ebrahimi *mode = fpscr;
61*6467f958SSadaf Ebrahimi __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
62*6467f958SSadaf Ebrahimi // Add 64 bit support
63*6467f958SSadaf Ebrahimi #elif defined(__aarch64__)
64*6467f958SSadaf Ebrahimi uint64_t fpscr;
65*6467f958SSadaf Ebrahimi __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
66*6467f958SSadaf Ebrahimi *mode = fpscr;
67*6467f958SSadaf Ebrahimi __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
68*6467f958SSadaf Ebrahimi #else
69*6467f958SSadaf Ebrahimi #error ForceFTZ needs an implentation
70*6467f958SSadaf Ebrahimi #endif
71*6467f958SSadaf Ebrahimi }
72*6467f958SSadaf Ebrahimi
73*6467f958SSadaf Ebrahimi // Disable the denorm flush to zero
DisableFTZ(FPU_mode_type * mode)74*6467f958SSadaf Ebrahimi inline void DisableFTZ(FPU_mode_type *mode)
75*6467f958SSadaf Ebrahimi {
76*6467f958SSadaf Ebrahimi #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
77*6467f958SSadaf Ebrahimi || defined(__MINGW32__)
78*6467f958SSadaf Ebrahimi *mode = _mm_getcsr();
79*6467f958SSadaf Ebrahimi _mm_setcsr(*mode & ~0x8040);
80*6467f958SSadaf Ebrahimi #elif defined(__PPC__)
81*6467f958SSadaf Ebrahimi *mode = fpu_control;
82*6467f958SSadaf Ebrahimi fpu_control &= ~_FPU_MASK_NI;
83*6467f958SSadaf Ebrahimi #elif defined(__arm__)
84*6467f958SSadaf Ebrahimi unsigned fpscr;
85*6467f958SSadaf Ebrahimi __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
86*6467f958SSadaf Ebrahimi *mode = fpscr;
87*6467f958SSadaf Ebrahimi __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
88*6467f958SSadaf Ebrahimi // Add 64 bit support
89*6467f958SSadaf Ebrahimi #elif defined(__aarch64__)
90*6467f958SSadaf Ebrahimi uint64_t fpscr;
91*6467f958SSadaf Ebrahimi __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
92*6467f958SSadaf Ebrahimi *mode = fpscr;
93*6467f958SSadaf Ebrahimi __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
94*6467f958SSadaf Ebrahimi #else
95*6467f958SSadaf Ebrahimi #error DisableFTZ needs an implentation
96*6467f958SSadaf Ebrahimi #endif
97*6467f958SSadaf Ebrahimi }
98*6467f958SSadaf Ebrahimi
99*6467f958SSadaf Ebrahimi // Restore the reference hardware to floating point state indicated by *mode
RestoreFPState(FPU_mode_type * mode)100*6467f958SSadaf Ebrahimi inline void RestoreFPState(FPU_mode_type *mode)
101*6467f958SSadaf Ebrahimi {
102*6467f958SSadaf Ebrahimi #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
103*6467f958SSadaf Ebrahimi || defined(__MINGW32__)
104*6467f958SSadaf Ebrahimi _mm_setcsr(*mode);
105*6467f958SSadaf Ebrahimi #elif defined(__PPC__)
106*6467f958SSadaf Ebrahimi fpu_control = *mode;
107*6467f958SSadaf Ebrahimi #elif defined(__arm__)
108*6467f958SSadaf Ebrahimi __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
109*6467f958SSadaf Ebrahimi // Add 64 bit support
110*6467f958SSadaf Ebrahimi #elif defined(__aarch64__)
111*6467f958SSadaf Ebrahimi __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
112*6467f958SSadaf Ebrahimi #else
113*6467f958SSadaf Ebrahimi #error RestoreFPState needs an implementation
114*6467f958SSadaf Ebrahimi #endif
115*6467f958SSadaf Ebrahimi }
116*6467f958SSadaf Ebrahimi #else
117*6467f958SSadaf Ebrahimi #error ForceFTZ and RestoreFPState need implentations
118*6467f958SSadaf Ebrahimi #endif
119*6467f958SSadaf Ebrahimi
120*6467f958SSadaf Ebrahimi #endif
121