1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
17
18 #include <cstring>
19
20 #include "absl/base/attributes.h"
21
22 namespace {
23
24 // Helper class that lets us access the underlying bit representation
25 // of a float without breaking C++ strict aliasing.
26 class AliasedFloatInt {
27 public:
28 static_assert(sizeof(float) == sizeof(uint32_t), "");
29
FromFloat(float f)30 static AliasedFloatInt FromFloat(float f) {
31 AliasedFloatInt value;
32 value.set_float(f);
33 return value;
34 }
35
FromUInt(uint32_t u)36 static AliasedFloatInt FromUInt(uint32_t u) {
37 AliasedFloatInt value;
38 value.set_uint(u);
39 return value;
40 }
41
set_float(float f)42 void set_float(float f) { memcpy(&value_, &f, sizeof(f)); }
as_float() const43 float as_float() const {
44 float f;
45 memcpy(&f, &value_, sizeof(f));
46 return f;
47 }
48
set_uint(uint32_t u)49 void set_uint(uint32_t u) { value_ = u; }
as_uint() const50 uint32_t as_uint() const { return value_; }
51
52 private:
53 uint32_t value_;
54 };
55 } // namespace
56
57 // __gnu_f2h_ieee and __gnu_h2f_ieee are marked as weak symbols so if XLA is
58 // built with compiler-rt (that also defines these symbols) we don't get a
59 // duplicate definition linker error. Making these symbols weak also ensures
60 // that the compiler-rt definitions "win", but that isn't essential.
61
62 // Algorithm copied from Eigen.
__gnu_f2h_ieee(float float_value)63 XlaF16ABIType ABSL_ATTRIBUTE_WEAK __gnu_f2h_ieee(float float_value) {
64 AliasedFloatInt f = AliasedFloatInt::FromFloat(float_value);
65
66 const AliasedFloatInt f32infty = AliasedFloatInt::FromUInt(255 << 23);
67 const AliasedFloatInt f16max = AliasedFloatInt::FromUInt((127 + 16) << 23);
68 const AliasedFloatInt denorm_magic =
69 AliasedFloatInt::FromUInt(((127 - 15) + (23 - 10) + 1) << 23);
70 unsigned int sign_mask = 0x80000000u;
71 uint32_t o = static_cast<uint16_t>(0x0u);
72
73 unsigned int sign = f.as_uint() & sign_mask;
74 f.set_uint(f.as_uint() ^ sign);
75
76 // NOTE all the integer compares in this function can be safely
77 // compiled into signed compares since all operands are below
78 // 0x80000000. Important if you want fast straight SSE2 code
79 // (since there's no unsigned PCMPGTD).
80
81 if (f.as_uint() >=
82 f16max.as_uint()) { // result is Inf or NaN (all exponent bits set)
83 o = (f.as_uint() > f32infty.as_uint()) ? 0x7e00
84 : 0x7c00; // NaN->qNaN and Inf->Inf
85 } else { // (De)normalized number or zero
86 if (f.as_uint() < (113 << 23)) { // resulting FP16 is subnormal or zero
87 // use a magic value to align our 10 mantissa bits at the bottom of
88 // the float. as long as FP addition is round-to-nearest-even this
89 // just works.
90 f.set_float(f.as_float() + denorm_magic.as_float());
91
92 // and one integer subtract of the bias later, we have our final float!
93 o = static_cast<uint16_t>(f.as_uint() - denorm_magic.as_uint());
94 } else {
95 unsigned int mant_odd =
96 (f.as_uint() >> 13) & 1; // resulting mantissa is odd
97
98 // update exponent, rounding bias part 1
99 f.set_uint(f.as_uint() + (static_cast<unsigned int>(15 - 127) << 23) +
100 0xfff);
101 // rounding bias part 2
102 f.set_uint(f.as_uint() + mant_odd);
103 // take the bits!
104 o = static_cast<uint16_t>(f.as_uint() >> 13);
105 }
106 }
107
108 o |= static_cast<uint16_t>(sign >> 16);
109 // The output can be a float type, bitcast it from uint16_t.
110 auto ho = static_cast<uint16_t>(o);
111 XlaF16ABIType ret = 0;
112 std::memcpy(&ret, &ho, sizeof(ho));
113 return ret;
114 }
115
116 // Algorithm copied from Eigen.
__gnu_h2f_ieee(XlaF16ABIType hf)117 float ABSL_ATTRIBUTE_WEAK __gnu_h2f_ieee(XlaF16ABIType hf) {
118 const AliasedFloatInt magic = AliasedFloatInt::FromUInt(113 << 23);
119 const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
120 AliasedFloatInt o;
121
122 // The input can be a float type, bitcast it to uint16_t.
123 uint16_t h;
124 std::memcpy(&h, &hf, sizeof(h));
125 o.set_uint((h & 0x7fff) << 13); // exponent/mantissa bits
126 unsigned int exp = shifted_exp & o.as_uint(); // just the exponent
127 o.set_uint(o.as_uint() + ((127 - 15) << 23)); // exponent adjust
128
129 // handle exponent special cases
130 if (exp == shifted_exp) { // Inf/NaN?
131 o.set_uint(o.as_uint() + ((128 - 16) << 23)); // extra exp adjust
132 } else if (exp == 0) { // Zero/Denormal?
133 o.set_uint(o.as_uint() + (1 << 23)); // extra exp adjust
134 o.set_float(o.as_float() - magic.as_float()); // renormalize
135 }
136
137 o.set_uint(o.as_uint() | (h & 0x8000) << 16); // sign bit
138 return o.as_float();
139 }
140
__truncdfhf2(double d)141 XlaF16ABIType ABSL_ATTRIBUTE_WEAK __truncdfhf2(double d) {
142 // This does a double rounding step, but it's precise enough for our use
143 // cases.
144 return __gnu_f2h_ieee(static_cast<float>(d));
145 }
146