1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef _fpcontrol_h
17 #define _fpcontrol_h
18
19 #include <cstdint>
20
21 // In order to get tests for correctly rounded operations (e.g. multiply) to
22 // work properly we need to be able to set the reference hardware to FTZ mode if
23 // the device hardware is running in that mode. We have explored all other
24 // options short of writing correctly rounded operations in integer code, and
25 // have found this is the only way to correctly verify operation.
26 //
27 // Non-Apple implementations will need to provide their own implentation for
28 // these features. If the reference hardware and device are both running in the
29 // same state (either FTZ or IEEE compliant modes) then these functions may be
30 // empty. If the device is running in non-default rounding mode (e.g. round
31 // toward zero), then these functions should also set the reference device into
32 // that rounding mode.
33 #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
34 || defined(__MINGW32__)
35 #ifdef _MSC_VER
36 typedef int FPU_mode_type;
37 #else
38 typedef int64_t FPU_mode_type;
39 #endif
40 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
41 || defined(__MINGW32__)
42 #include <xmmintrin.h>
43 #elif defined(__PPC__)
44 #include <fpu_control.h>
45 extern __thread fpu_control_t fpu_control;
46 #endif
47 // Set the reference hardware floating point unit to FTZ mode
ForceFTZ(FPU_mode_type * mode)48 inline void ForceFTZ(FPU_mode_type *mode)
49 {
50 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
51 || defined(__MINGW32__)
52 *mode = _mm_getcsr();
53 _mm_setcsr(*mode | 0x8040);
54 #elif defined(__PPC__)
55 *mode = fpu_control;
56 fpu_control |= _FPU_MASK_NI;
57 #elif defined(__arm__)
58 unsigned fpscr;
59 __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
60 *mode = fpscr;
61 __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
62 // Add 64 bit support
63 #elif defined(__aarch64__)
64 uint64_t fpscr;
65 __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
66 *mode = fpscr;
67 __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
68 #else
69 #error ForceFTZ needs an implentation
70 #endif
71 }
72
73 // Disable the denorm flush to zero
DisableFTZ(FPU_mode_type * mode)74 inline void DisableFTZ(FPU_mode_type *mode)
75 {
76 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
77 || defined(__MINGW32__)
78 *mode = _mm_getcsr();
79 _mm_setcsr(*mode & ~0x8040);
80 #elif defined(__PPC__)
81 *mode = fpu_control;
82 fpu_control &= ~_FPU_MASK_NI;
83 #elif defined(__arm__)
84 unsigned fpscr;
85 __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
86 *mode = fpscr;
87 __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
88 // Add 64 bit support
89 #elif defined(__aarch64__)
90 uint64_t fpscr;
91 __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
92 *mode = fpscr;
93 __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
94 #else
95 #error DisableFTZ needs an implentation
96 #endif
97 }
98
99 // Restore the reference hardware to floating point state indicated by *mode
RestoreFPState(FPU_mode_type * mode)100 inline void RestoreFPState(FPU_mode_type *mode)
101 {
102 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
103 || defined(__MINGW32__)
104 _mm_setcsr(*mode);
105 #elif defined(__PPC__)
106 fpu_control = *mode;
107 #elif defined(__arm__)
108 __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
109 // Add 64 bit support
110 #elif defined(__aarch64__)
111 __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
112 #else
113 #error RestoreFPState needs an implementation
114 #endif
115 }
116 #else
117 #error ForceFTZ and RestoreFPState need implentations
118 #endif
119
120 #endif
121