xref: /aosp_15_r20/external/igt-gpu-tools/lib/igt_halffloat.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1*d83cc019SAndroid Build Coastguard Worker /*
2*d83cc019SAndroid Build Coastguard Worker  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
3*d83cc019SAndroid Build Coastguard Worker  * Copyright 2015 Philip Taylor <[email protected]>
4*d83cc019SAndroid Build Coastguard Worker  * Copyright 2018 Advanced Micro Devices, Inc.
5*d83cc019SAndroid Build Coastguard Worker  *
6*d83cc019SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
7*d83cc019SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
8*d83cc019SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
9*d83cc019SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10*d83cc019SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
11*d83cc019SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
12*d83cc019SAndroid Build Coastguard Worker  *
13*d83cc019SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice shall be included
14*d83cc019SAndroid Build Coastguard Worker  * in all copies or substantial portions of the Software.
15*d83cc019SAndroid Build Coastguard Worker  *
16*d83cc019SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17*d83cc019SAndroid Build Coastguard Worker  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*d83cc019SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19*d83cc019SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20*d83cc019SAndroid Build Coastguard Worker  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21*d83cc019SAndroid Build Coastguard Worker  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22*d83cc019SAndroid Build Coastguard Worker  * OTHER DEALINGS IN THE SOFTWARE.
23*d83cc019SAndroid Build Coastguard Worker  */
24*d83cc019SAndroid Build Coastguard Worker 
25*d83cc019SAndroid Build Coastguard Worker #include <assert.h>
26*d83cc019SAndroid Build Coastguard Worker #include <math.h>
27*d83cc019SAndroid Build Coastguard Worker 
28*d83cc019SAndroid Build Coastguard Worker #include "igt_halffloat.h"
29*d83cc019SAndroid Build Coastguard Worker #include "igt_x86.h"
30*d83cc019SAndroid Build Coastguard Worker 
31*d83cc019SAndroid Build Coastguard Worker typedef union { float f; int32_t i; uint32_t u; } fi_type;
32*d83cc019SAndroid Build Coastguard Worker 
33*d83cc019SAndroid Build Coastguard Worker /**
34*d83cc019SAndroid Build Coastguard Worker  * Convert a 4-byte float to a 2-byte half float.
35*d83cc019SAndroid Build Coastguard Worker  *
36*d83cc019SAndroid Build Coastguard Worker  * Not all float32 values can be represented exactly as a float16 value. We
37*d83cc019SAndroid Build Coastguard Worker  * round such intermediate float32 values to the nearest float16. When the
38*d83cc019SAndroid Build Coastguard Worker  * float32 lies exactly between to float16 values, we round to the one with
39*d83cc019SAndroid Build Coastguard Worker  * an even mantissa.
40*d83cc019SAndroid Build Coastguard Worker  *
41*d83cc019SAndroid Build Coastguard Worker  * This rounding behavior has several benefits:
42*d83cc019SAndroid Build Coastguard Worker  *   - It has no sign bias.
43*d83cc019SAndroid Build Coastguard Worker  *
44*d83cc019SAndroid Build Coastguard Worker  *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
45*d83cc019SAndroid Build Coastguard Worker  *     GPU ISA.
46*d83cc019SAndroid Build Coastguard Worker  *
47*d83cc019SAndroid Build Coastguard Worker  *   - By reproducing the behavior of the GPU (at least on Intel hardware),
48*d83cc019SAndroid Build Coastguard Worker  *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
49*d83cc019SAndroid Build Coastguard Worker  *     result in the same value as if the expression were executed on the GPU.
50*d83cc019SAndroid Build Coastguard Worker  */
_float_to_half(float val)51*d83cc019SAndroid Build Coastguard Worker static inline uint16_t _float_to_half(float val)
52*d83cc019SAndroid Build Coastguard Worker {
53*d83cc019SAndroid Build Coastguard Worker 	const fi_type fi = {val};
54*d83cc019SAndroid Build Coastguard Worker 	const int flt_m = fi.i & 0x7fffff;
55*d83cc019SAndroid Build Coastguard Worker 	const int flt_e = (fi.i >> 23) & 0xff;
56*d83cc019SAndroid Build Coastguard Worker 	const int flt_s = (fi.i >> 31) & 0x1;
57*d83cc019SAndroid Build Coastguard Worker 	int s, e, m = 0;
58*d83cc019SAndroid Build Coastguard Worker 	uint16_t result;
59*d83cc019SAndroid Build Coastguard Worker 
60*d83cc019SAndroid Build Coastguard Worker 	/* sign bit */
61*d83cc019SAndroid Build Coastguard Worker 	s = flt_s;
62*d83cc019SAndroid Build Coastguard Worker 
63*d83cc019SAndroid Build Coastguard Worker 	/* handle special cases */
64*d83cc019SAndroid Build Coastguard Worker 	if ((flt_e == 0) && (flt_m == 0)) {
65*d83cc019SAndroid Build Coastguard Worker 		/* zero */
66*d83cc019SAndroid Build Coastguard Worker 		/* m = 0; - already set */
67*d83cc019SAndroid Build Coastguard Worker 		e = 0;
68*d83cc019SAndroid Build Coastguard Worker 	} else if ((flt_e == 0) && (flt_m != 0)) {
69*d83cc019SAndroid Build Coastguard Worker 		/* denorm -- denorm float maps to 0 half */
70*d83cc019SAndroid Build Coastguard Worker 		/* m = 0; - already set */
71*d83cc019SAndroid Build Coastguard Worker 		e = 0;
72*d83cc019SAndroid Build Coastguard Worker 	} else if ((flt_e == 0xff) && (flt_m == 0)) {
73*d83cc019SAndroid Build Coastguard Worker 		/* infinity */
74*d83cc019SAndroid Build Coastguard Worker 		/* m = 0; - already set */
75*d83cc019SAndroid Build Coastguard Worker 		e = 31;
76*d83cc019SAndroid Build Coastguard Worker 	} else if ((flt_e == 0xff) && (flt_m != 0)) {
77*d83cc019SAndroid Build Coastguard Worker 		/* NaN */
78*d83cc019SAndroid Build Coastguard Worker 		m = 1;
79*d83cc019SAndroid Build Coastguard Worker 		e = 31;
80*d83cc019SAndroid Build Coastguard Worker 	} else {
81*d83cc019SAndroid Build Coastguard Worker 		/* regular number */
82*d83cc019SAndroid Build Coastguard Worker 		const int new_exp = flt_e - 127;
83*d83cc019SAndroid Build Coastguard Worker 		if (new_exp < -14) {
84*d83cc019SAndroid Build Coastguard Worker 			/* The float32 lies in the range (0.0, min_normal16) and
85*d83cc019SAndroid Build Coastguard Worker 			 * is rounded to a nearby float16 value. The result will
86*d83cc019SAndroid Build Coastguard Worker 			 * be either zero, subnormal, or normal.
87*d83cc019SAndroid Build Coastguard Worker 			 */
88*d83cc019SAndroid Build Coastguard Worker 			e = 0;
89*d83cc019SAndroid Build Coastguard Worker 			m = lrintf((1 << 24) * fabsf(fi.f));
90*d83cc019SAndroid Build Coastguard Worker 		} else if (new_exp > 15) {
91*d83cc019SAndroid Build Coastguard Worker 			/* map this value to infinity */
92*d83cc019SAndroid Build Coastguard Worker 			/* m = 0; - already set */
93*d83cc019SAndroid Build Coastguard Worker 			e = 31;
94*d83cc019SAndroid Build Coastguard Worker 		} else {
95*d83cc019SAndroid Build Coastguard Worker 			/* The float32 lies in the range
96*d83cc019SAndroid Build Coastguard Worker 			 *   [min_normal16, max_normal16 + max_step16)
97*d83cc019SAndroid Build Coastguard Worker 			 * and is rounded to a nearby float16 value. The result
98*d83cc019SAndroid Build Coastguard Worker 			 * will be either normal or infinite.
99*d83cc019SAndroid Build Coastguard Worker 			 */
100*d83cc019SAndroid Build Coastguard Worker 			e = new_exp + 15;
101*d83cc019SAndroid Build Coastguard Worker 			m = lrintf(flt_m / (float)(1 << 13));
102*d83cc019SAndroid Build Coastguard Worker 		}
103*d83cc019SAndroid Build Coastguard Worker 	}
104*d83cc019SAndroid Build Coastguard Worker 
105*d83cc019SAndroid Build Coastguard Worker 	assert(0 <= m && m <= 1024);
106*d83cc019SAndroid Build Coastguard Worker 	if (m == 1024) {
107*d83cc019SAndroid Build Coastguard Worker 		/* The float32 was rounded upwards into the range of the next
108*d83cc019SAndroid Build Coastguard Worker 		 * exponent, so bump the exponent. This correctly handles the
109*d83cc019SAndroid Build Coastguard Worker 		 * case where f32 should be rounded up to float16 infinity.
110*d83cc019SAndroid Build Coastguard Worker 		 */
111*d83cc019SAndroid Build Coastguard Worker 		++e;
112*d83cc019SAndroid Build Coastguard Worker 		m = 0;
113*d83cc019SAndroid Build Coastguard Worker 	}
114*d83cc019SAndroid Build Coastguard Worker 
115*d83cc019SAndroid Build Coastguard Worker 	result = (s << 15) | (e << 10) | m;
116*d83cc019SAndroid Build Coastguard Worker 	return result;
117*d83cc019SAndroid Build Coastguard Worker }
118*d83cc019SAndroid Build Coastguard Worker 
119*d83cc019SAndroid Build Coastguard Worker /**
120*d83cc019SAndroid Build Coastguard Worker  * Convert a 2-byte half float to a 4-byte float.
121*d83cc019SAndroid Build Coastguard Worker  * Based on code from:
122*d83cc019SAndroid Build Coastguard Worker  * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
123*d83cc019SAndroid Build Coastguard Worker  */
_half_to_float(uint16_t val)124*d83cc019SAndroid Build Coastguard Worker static inline float _half_to_float(uint16_t val)
125*d83cc019SAndroid Build Coastguard Worker {
126*d83cc019SAndroid Build Coastguard Worker 	/* XXX could also use a 64K-entry lookup table */
127*d83cc019SAndroid Build Coastguard Worker 	const int m = val & 0x3ff;
128*d83cc019SAndroid Build Coastguard Worker 	const int e = (val >> 10) & 0x1f;
129*d83cc019SAndroid Build Coastguard Worker 	const int s = (val >> 15) & 0x1;
130*d83cc019SAndroid Build Coastguard Worker 	int flt_m, flt_e, flt_s;
131*d83cc019SAndroid Build Coastguard Worker 	fi_type fi;
132*d83cc019SAndroid Build Coastguard Worker 
133*d83cc019SAndroid Build Coastguard Worker 	/* sign bit */
134*d83cc019SAndroid Build Coastguard Worker 	flt_s = s;
135*d83cc019SAndroid Build Coastguard Worker 
136*d83cc019SAndroid Build Coastguard Worker 	/* handle special cases */
137*d83cc019SAndroid Build Coastguard Worker 	if ((e == 0) && (m == 0)) {
138*d83cc019SAndroid Build Coastguard Worker 		/* zero */
139*d83cc019SAndroid Build Coastguard Worker 		flt_m = 0;
140*d83cc019SAndroid Build Coastguard Worker 		flt_e = 0;
141*d83cc019SAndroid Build Coastguard Worker 	} else if ((e == 0) && (m != 0)) {
142*d83cc019SAndroid Build Coastguard Worker 		/* denorm -- denorm half will fit in non-denorm single */
143*d83cc019SAndroid Build Coastguard Worker 		const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
144*d83cc019SAndroid Build Coastguard Worker 		float mantissa = ((float) (m)) / 1024.0f;
145*d83cc019SAndroid Build Coastguard Worker 		float sign = s ? -1.0f : 1.0f;
146*d83cc019SAndroid Build Coastguard Worker 		return sign * mantissa * half_denorm;
147*d83cc019SAndroid Build Coastguard Worker 	} else if ((e == 31) && (m == 0)) {
148*d83cc019SAndroid Build Coastguard Worker 		/* infinity */
149*d83cc019SAndroid Build Coastguard Worker 		flt_e = 0xff;
150*d83cc019SAndroid Build Coastguard Worker 		flt_m = 0;
151*d83cc019SAndroid Build Coastguard Worker 	} else if ((e == 31) && (m != 0)) {
152*d83cc019SAndroid Build Coastguard Worker 		/* NaN */
153*d83cc019SAndroid Build Coastguard Worker 		flt_e = 0xff;
154*d83cc019SAndroid Build Coastguard Worker 		flt_m = 1;
155*d83cc019SAndroid Build Coastguard Worker 	} else {
156*d83cc019SAndroid Build Coastguard Worker 		/* regular */
157*d83cc019SAndroid Build Coastguard Worker 		flt_e = e + 112;
158*d83cc019SAndroid Build Coastguard Worker 		flt_m = m << 13;
159*d83cc019SAndroid Build Coastguard Worker 	}
160*d83cc019SAndroid Build Coastguard Worker 
161*d83cc019SAndroid Build Coastguard Worker 	fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
162*d83cc019SAndroid Build Coastguard Worker 	return fi.f;
163*d83cc019SAndroid Build Coastguard Worker }
164*d83cc019SAndroid Build Coastguard Worker 
165*d83cc019SAndroid Build Coastguard Worker #if defined(__x86_64__) && !defined(__clang__)
166*d83cc019SAndroid Build Coastguard Worker #pragma GCC push_options
167*d83cc019SAndroid Build Coastguard Worker #pragma GCC target("f16c")
168*d83cc019SAndroid Build Coastguard Worker 
169*d83cc019SAndroid Build Coastguard Worker #include <immintrin.h>
170*d83cc019SAndroid Build Coastguard Worker 
float_to_half_f16c(const float * f,uint16_t * h,unsigned int num)171*d83cc019SAndroid Build Coastguard Worker static void float_to_half_f16c(const float *f, uint16_t *h, unsigned int num)
172*d83cc019SAndroid Build Coastguard Worker {
173*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
174*d83cc019SAndroid Build Coastguard Worker 		h[i] = _cvtss_sh(f[i], 0);
175*d83cc019SAndroid Build Coastguard Worker }
176*d83cc019SAndroid Build Coastguard Worker 
half_to_float_f16c(const uint16_t * h,float * f,unsigned int num)177*d83cc019SAndroid Build Coastguard Worker static void half_to_float_f16c(const uint16_t *h, float *f, unsigned int num)
178*d83cc019SAndroid Build Coastguard Worker {
179*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
180*d83cc019SAndroid Build Coastguard Worker 		f[i] = _cvtsh_ss(h[i]);
181*d83cc019SAndroid Build Coastguard Worker }
182*d83cc019SAndroid Build Coastguard Worker 
183*d83cc019SAndroid Build Coastguard Worker #pragma GCC pop_options
184*d83cc019SAndroid Build Coastguard Worker 
float_to_half(const float * f,uint16_t * h,unsigned int num)185*d83cc019SAndroid Build Coastguard Worker static void float_to_half(const float *f, uint16_t *h, unsigned int num)
186*d83cc019SAndroid Build Coastguard Worker {
187*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
188*d83cc019SAndroid Build Coastguard Worker 		h[i] = _float_to_half(f[i]);
189*d83cc019SAndroid Build Coastguard Worker }
190*d83cc019SAndroid Build Coastguard Worker 
half_to_float(const uint16_t * h,float * f,unsigned int num)191*d83cc019SAndroid Build Coastguard Worker static void half_to_float(const uint16_t *h, float *f, unsigned int num)
192*d83cc019SAndroid Build Coastguard Worker {
193*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
194*d83cc019SAndroid Build Coastguard Worker 		f[i] = _half_to_float(h[i]);
195*d83cc019SAndroid Build Coastguard Worker }
196*d83cc019SAndroid Build Coastguard Worker 
resolve_float_to_half(void)197*d83cc019SAndroid Build Coastguard Worker static void (*resolve_float_to_half(void))(const float *f, uint16_t *h, unsigned int num)
198*d83cc019SAndroid Build Coastguard Worker {
199*d83cc019SAndroid Build Coastguard Worker 	if (igt_x86_features() & F16C)
200*d83cc019SAndroid Build Coastguard Worker 		return float_to_half_f16c;
201*d83cc019SAndroid Build Coastguard Worker 
202*d83cc019SAndroid Build Coastguard Worker 	return float_to_half;
203*d83cc019SAndroid Build Coastguard Worker }
204*d83cc019SAndroid Build Coastguard Worker 
205*d83cc019SAndroid Build Coastguard Worker void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
206*d83cc019SAndroid Build Coastguard Worker 	__attribute__((ifunc("resolve_float_to_half")));
207*d83cc019SAndroid Build Coastguard Worker 
resolve_half_to_float(void)208*d83cc019SAndroid Build Coastguard Worker static void (*resolve_half_to_float(void))(const uint16_t *h, float *f, unsigned int num)
209*d83cc019SAndroid Build Coastguard Worker {
210*d83cc019SAndroid Build Coastguard Worker 	if (igt_x86_features() & F16C)
211*d83cc019SAndroid Build Coastguard Worker 		return half_to_float_f16c;
212*d83cc019SAndroid Build Coastguard Worker 
213*d83cc019SAndroid Build Coastguard Worker 	return half_to_float;
214*d83cc019SAndroid Build Coastguard Worker }
215*d83cc019SAndroid Build Coastguard Worker 
216*d83cc019SAndroid Build Coastguard Worker void igt_half_to_float(const uint16_t *h, float *f, unsigned int num)
217*d83cc019SAndroid Build Coastguard Worker 	__attribute__((ifunc("resolve_half_to_float")));
218*d83cc019SAndroid Build Coastguard Worker 
219*d83cc019SAndroid Build Coastguard Worker #else
220*d83cc019SAndroid Build Coastguard Worker 
igt_float_to_half(const float * f,uint16_t * h,unsigned int num)221*d83cc019SAndroid Build Coastguard Worker void igt_float_to_half(const float *f, uint16_t *h, unsigned int num)
222*d83cc019SAndroid Build Coastguard Worker {
223*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
224*d83cc019SAndroid Build Coastguard Worker 		h[i] = _float_to_half(f[i]);
225*d83cc019SAndroid Build Coastguard Worker }
226*d83cc019SAndroid Build Coastguard Worker 
igt_half_to_float(const uint16_t * h,float * f,unsigned int num)227*d83cc019SAndroid Build Coastguard Worker void igt_half_to_float(const uint16_t *h, float *f, unsigned int num)
228*d83cc019SAndroid Build Coastguard Worker {
229*d83cc019SAndroid Build Coastguard Worker 	for (int i = 0; i < num; i++)
230*d83cc019SAndroid Build Coastguard Worker 		f[i] = _half_to_float(h[i]);
231*d83cc019SAndroid Build Coastguard Worker }
232*d83cc019SAndroid Build Coastguard Worker 
233*d83cc019SAndroid Build Coastguard Worker #endif
234*d83cc019SAndroid Build Coastguard Worker 
235