1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Mesa 3-D graphics library
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
5*61046927SAndroid Build Coastguard Worker * Copyright 2015 Philip Taylor <[email protected]>
6*61046927SAndroid Build Coastguard Worker * Copyright 2018 Advanced Micro Devices, Inc.
7*61046927SAndroid Build Coastguard Worker * Copyright (C) 2018-2019 Intel Corporation
8*61046927SAndroid Build Coastguard Worker *
9*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
10*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
11*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
12*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
14*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
15*61046927SAndroid Build Coastguard Worker *
16*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice shall be included
17*61046927SAndroid Build Coastguard Worker * in all copies or substantial portions of the Software.
18*61046927SAndroid Build Coastguard Worker *
19*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20*61046927SAndroid Build Coastguard Worker * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23*61046927SAndroid Build Coastguard Worker * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24*61046927SAndroid Build Coastguard Worker * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25*61046927SAndroid Build Coastguard Worker * OTHER DEALINGS IN THE SOFTWARE.
26*61046927SAndroid Build Coastguard Worker */
27*61046927SAndroid Build Coastguard Worker
28*61046927SAndroid Build Coastguard Worker #include <math.h>
29*61046927SAndroid Build Coastguard Worker #include <assert.h>
30*61046927SAndroid Build Coastguard Worker #include "half_float.h"
31*61046927SAndroid Build Coastguard Worker #include "rounding.h"
32*61046927SAndroid Build Coastguard Worker #include "softfloat.h"
33*61046927SAndroid Build Coastguard Worker #include "macros.h"
34*61046927SAndroid Build Coastguard Worker #include "u_math.h"
35*61046927SAndroid Build Coastguard Worker
36*61046927SAndroid Build Coastguard Worker typedef union { float f; int32_t i; uint32_t u; } fi_type;
37*61046927SAndroid Build Coastguard Worker
38*61046927SAndroid Build Coastguard Worker /**
39*61046927SAndroid Build Coastguard Worker * Convert a 4-byte float to a 2-byte half float.
40*61046927SAndroid Build Coastguard Worker *
41*61046927SAndroid Build Coastguard Worker * Not all float32 values can be represented exactly as a float16 value. We
42*61046927SAndroid Build Coastguard Worker * round such intermediate float32 values to the nearest float16. When the
43*61046927SAndroid Build Coastguard Worker * float32 lies exactly between to float16 values, we round to the one with
44*61046927SAndroid Build Coastguard Worker * an even mantissa.
45*61046927SAndroid Build Coastguard Worker *
46*61046927SAndroid Build Coastguard Worker * This rounding behavior has several benefits:
47*61046927SAndroid Build Coastguard Worker * - It has no sign bias.
48*61046927SAndroid Build Coastguard Worker *
49*61046927SAndroid Build Coastguard Worker * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
50*61046927SAndroid Build Coastguard Worker * GPU ISA.
51*61046927SAndroid Build Coastguard Worker *
52*61046927SAndroid Build Coastguard Worker * - By reproducing the behavior of the GPU (at least on Intel hardware),
53*61046927SAndroid Build Coastguard Worker * compile-time evaluation of constant packHalf2x16 GLSL expressions will
54*61046927SAndroid Build Coastguard Worker * result in the same value as if the expression were executed on the GPU.
55*61046927SAndroid Build Coastguard Worker */
56*61046927SAndroid Build Coastguard Worker uint16_t
_mesa_float_to_half_slow(float val)57*61046927SAndroid Build Coastguard Worker _mesa_float_to_half_slow(float val)
58*61046927SAndroid Build Coastguard Worker {
59*61046927SAndroid Build Coastguard Worker const fi_type fi = {val};
60*61046927SAndroid Build Coastguard Worker const int flt_m = fi.i & 0x7fffff;
61*61046927SAndroid Build Coastguard Worker const int flt_e = (fi.i >> 23) & 0xff;
62*61046927SAndroid Build Coastguard Worker const int flt_s = (fi.i >> 31) & 0x1;
63*61046927SAndroid Build Coastguard Worker int s, e, m = 0;
64*61046927SAndroid Build Coastguard Worker uint16_t result;
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker /* sign bit */
67*61046927SAndroid Build Coastguard Worker s = flt_s;
68*61046927SAndroid Build Coastguard Worker
69*61046927SAndroid Build Coastguard Worker /* handle special cases */
70*61046927SAndroid Build Coastguard Worker if ((flt_e == 0) && (flt_m == 0)) {
71*61046927SAndroid Build Coastguard Worker /* zero */
72*61046927SAndroid Build Coastguard Worker /* m = 0; - already set */
73*61046927SAndroid Build Coastguard Worker e = 0;
74*61046927SAndroid Build Coastguard Worker }
75*61046927SAndroid Build Coastguard Worker else if ((flt_e == 0) && (flt_m != 0)) {
76*61046927SAndroid Build Coastguard Worker /* denorm -- denorm float maps to 0 half */
77*61046927SAndroid Build Coastguard Worker /* m = 0; - already set */
78*61046927SAndroid Build Coastguard Worker e = 0;
79*61046927SAndroid Build Coastguard Worker }
80*61046927SAndroid Build Coastguard Worker else if ((flt_e == 0xff) && (flt_m == 0)) {
81*61046927SAndroid Build Coastguard Worker /* infinity */
82*61046927SAndroid Build Coastguard Worker /* m = 0; - already set */
83*61046927SAndroid Build Coastguard Worker e = 31;
84*61046927SAndroid Build Coastguard Worker }
85*61046927SAndroid Build Coastguard Worker else if ((flt_e == 0xff) && (flt_m != 0)) {
86*61046927SAndroid Build Coastguard Worker /* Retain the top bits of a NaN to make sure that the quiet/signaling
87*61046927SAndroid Build Coastguard Worker * status stays the same.
88*61046927SAndroid Build Coastguard Worker */
89*61046927SAndroid Build Coastguard Worker m = flt_m >> 13;
90*61046927SAndroid Build Coastguard Worker if (!m)
91*61046927SAndroid Build Coastguard Worker m = 1;
92*61046927SAndroid Build Coastguard Worker e = 31;
93*61046927SAndroid Build Coastguard Worker }
94*61046927SAndroid Build Coastguard Worker else {
95*61046927SAndroid Build Coastguard Worker /* regular number */
96*61046927SAndroid Build Coastguard Worker const int new_exp = flt_e - 127;
97*61046927SAndroid Build Coastguard Worker if (new_exp < -14) {
98*61046927SAndroid Build Coastguard Worker /* The float32 lies in the range (0.0, min_normal16) and is rounded
99*61046927SAndroid Build Coastguard Worker * to a nearby float16 value. The result will be either zero, subnormal,
100*61046927SAndroid Build Coastguard Worker * or normal.
101*61046927SAndroid Build Coastguard Worker */
102*61046927SAndroid Build Coastguard Worker e = 0;
103*61046927SAndroid Build Coastguard Worker m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
104*61046927SAndroid Build Coastguard Worker }
105*61046927SAndroid Build Coastguard Worker else if (new_exp > 15) {
106*61046927SAndroid Build Coastguard Worker /* map this value to infinity */
107*61046927SAndroid Build Coastguard Worker /* m = 0; - already set */
108*61046927SAndroid Build Coastguard Worker e = 31;
109*61046927SAndroid Build Coastguard Worker }
110*61046927SAndroid Build Coastguard Worker else {
111*61046927SAndroid Build Coastguard Worker /* The float32 lies in the range
112*61046927SAndroid Build Coastguard Worker * [min_normal16, max_normal16 + max_step16)
113*61046927SAndroid Build Coastguard Worker * and is rounded to a nearby float16 value. The result will be
114*61046927SAndroid Build Coastguard Worker * either normal or infinite.
115*61046927SAndroid Build Coastguard Worker */
116*61046927SAndroid Build Coastguard Worker e = new_exp + 15;
117*61046927SAndroid Build Coastguard Worker m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
118*61046927SAndroid Build Coastguard Worker }
119*61046927SAndroid Build Coastguard Worker }
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker assert(0 <= m && m <= 1024);
122*61046927SAndroid Build Coastguard Worker if (m == 1024) {
123*61046927SAndroid Build Coastguard Worker /* The float32 was rounded upwards into the range of the next exponent,
124*61046927SAndroid Build Coastguard Worker * so bump the exponent. This correctly handles the case where f32
125*61046927SAndroid Build Coastguard Worker * should be rounded up to float16 infinity.
126*61046927SAndroid Build Coastguard Worker */
127*61046927SAndroid Build Coastguard Worker ++e;
128*61046927SAndroid Build Coastguard Worker m = 0;
129*61046927SAndroid Build Coastguard Worker }
130*61046927SAndroid Build Coastguard Worker
131*61046927SAndroid Build Coastguard Worker result = (s << 15) | (e << 10) | m;
132*61046927SAndroid Build Coastguard Worker return result;
133*61046927SAndroid Build Coastguard Worker }
134*61046927SAndroid Build Coastguard Worker
135*61046927SAndroid Build Coastguard Worker uint16_t
_mesa_float_to_float16_rtz_slow(float val)136*61046927SAndroid Build Coastguard Worker _mesa_float_to_float16_rtz_slow(float val)
137*61046927SAndroid Build Coastguard Worker {
138*61046927SAndroid Build Coastguard Worker return _mesa_float_to_half_rtz_slow(val);
139*61046927SAndroid Build Coastguard Worker }
140*61046927SAndroid Build Coastguard Worker
141*61046927SAndroid Build Coastguard Worker /**
142*61046927SAndroid Build Coastguard Worker * Convert a 2-byte half float to a 4-byte float.
143*61046927SAndroid Build Coastguard Worker * Based on code from:
144*61046927SAndroid Build Coastguard Worker * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
145*61046927SAndroid Build Coastguard Worker */
146*61046927SAndroid Build Coastguard Worker float
_mesa_half_to_float_slow(uint16_t val)147*61046927SAndroid Build Coastguard Worker _mesa_half_to_float_slow(uint16_t val)
148*61046927SAndroid Build Coastguard Worker {
149*61046927SAndroid Build Coastguard Worker union fi infnan;
150*61046927SAndroid Build Coastguard Worker union fi magic;
151*61046927SAndroid Build Coastguard Worker union fi f32;
152*61046927SAndroid Build Coastguard Worker
153*61046927SAndroid Build Coastguard Worker infnan.ui = 0x8f << 23;
154*61046927SAndroid Build Coastguard Worker infnan.f = 65536.0f;
155*61046927SAndroid Build Coastguard Worker magic.ui = 0xef << 23;
156*61046927SAndroid Build Coastguard Worker
157*61046927SAndroid Build Coastguard Worker /* Exponent / Mantissa */
158*61046927SAndroid Build Coastguard Worker f32.ui = (val & 0x7fff) << 13;
159*61046927SAndroid Build Coastguard Worker
160*61046927SAndroid Build Coastguard Worker /* Adjust */
161*61046927SAndroid Build Coastguard Worker f32.f *= magic.f;
162*61046927SAndroid Build Coastguard Worker /* XXX: The magic mul relies on denorms being available */
163*61046927SAndroid Build Coastguard Worker
164*61046927SAndroid Build Coastguard Worker /* Inf / NaN */
165*61046927SAndroid Build Coastguard Worker if (f32.f >= infnan.f)
166*61046927SAndroid Build Coastguard Worker f32.ui |= 0xff << 23;
167*61046927SAndroid Build Coastguard Worker
168*61046927SAndroid Build Coastguard Worker /* Sign */
169*61046927SAndroid Build Coastguard Worker f32.ui |= (uint32_t)(val & 0x8000) << 16;
170*61046927SAndroid Build Coastguard Worker
171*61046927SAndroid Build Coastguard Worker return f32.f;
172*61046927SAndroid Build Coastguard Worker }
173*61046927SAndroid Build Coastguard Worker
174*61046927SAndroid Build Coastguard Worker /**
175*61046927SAndroid Build Coastguard Worker * Takes a uint16_t, divides by 65536, converts the infinite-precision
176*61046927SAndroid Build Coastguard Worker * result to fp16 with round-to-zero. Used by the ASTC decoder.
177*61046927SAndroid Build Coastguard Worker */
_mesa_uint16_div_64k_to_half(uint16_t v)178*61046927SAndroid Build Coastguard Worker uint16_t _mesa_uint16_div_64k_to_half(uint16_t v)
179*61046927SAndroid Build Coastguard Worker {
180*61046927SAndroid Build Coastguard Worker /* Zero or subnormal. Set the mantissa to (v << 8) and return. */
181*61046927SAndroid Build Coastguard Worker if (v < 4)
182*61046927SAndroid Build Coastguard Worker return v << 8;
183*61046927SAndroid Build Coastguard Worker
184*61046927SAndroid Build Coastguard Worker /* Count the leading 0s in the uint16_t */
185*61046927SAndroid Build Coastguard Worker #ifdef HAVE___BUILTIN_CLZ
186*61046927SAndroid Build Coastguard Worker int n = __builtin_clz(v) - 16;
187*61046927SAndroid Build Coastguard Worker #else
188*61046927SAndroid Build Coastguard Worker int n = 16;
189*61046927SAndroid Build Coastguard Worker for (int i = 15; i >= 0; i--) {
190*61046927SAndroid Build Coastguard Worker if (v & (1 << i)) {
191*61046927SAndroid Build Coastguard Worker n = 15 - i;
192*61046927SAndroid Build Coastguard Worker break;
193*61046927SAndroid Build Coastguard Worker }
194*61046927SAndroid Build Coastguard Worker }
195*61046927SAndroid Build Coastguard Worker #endif
196*61046927SAndroid Build Coastguard Worker
197*61046927SAndroid Build Coastguard Worker /* Shift the mantissa up so bit 16 is the hidden 1 bit,
198*61046927SAndroid Build Coastguard Worker * mask it off, then shift back down to 10 bits
199*61046927SAndroid Build Coastguard Worker */
200*61046927SAndroid Build Coastguard Worker int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6;
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker /* (0{n} 1 X{15-n}) * 2^-16
203*61046927SAndroid Build Coastguard Worker * = 1.X * 2^(15-n-16)
204*61046927SAndroid Build Coastguard Worker * = 1.X * 2^(14-n - 15)
205*61046927SAndroid Build Coastguard Worker * which is the FP16 form with e = 14 - n
206*61046927SAndroid Build Coastguard Worker */
207*61046927SAndroid Build Coastguard Worker int e = 14 - n;
208*61046927SAndroid Build Coastguard Worker
209*61046927SAndroid Build Coastguard Worker assert(e >= 1 && e <= 30);
210*61046927SAndroid Build Coastguard Worker assert(m >= 0 && m < 0x400);
211*61046927SAndroid Build Coastguard Worker
212*61046927SAndroid Build Coastguard Worker return (e << 10) | m;
213*61046927SAndroid Build Coastguard Worker }
214