1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdint.h>
17 #include <math.h>
18 #include "fplib.h"
19
20 #if !defined(FLT_MANT_DIG)
21 #define FLT_MANT_DIG 24
22 #endif
23 #define as_float(x) (*((float *)(&x)))
24 #define as_long(x) (*((int64_t *)(&x)))
25
clz(uint64_t value)26 static uint32_t clz(uint64_t value)
27 {
28 uint32_t num_zeros;
29
30 for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
31 {
32 volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
33 if (v) break;
34 }
35 return num_zeros;
36 }
37
qcom_s64_2_f32(int64_t data,bool sat,roundingMode rnd)38 float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
39 {
40 switch (rnd) {
41 case qcomRTZ: {
42 int sign = 0;
43 if (!data)
44 return 0.0f;
45 if (data < 0){
46 data = - data;
47 sign = 1;
48 }
49 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
50 int mantShift = 40 - clz(data);
51 uint32_t mantissa;
52 if (mantShift >= 0)
53 mantissa = (uint32_t)((uint64_t)data >> mantShift);
54 else
55 mantissa = (uint32_t)((uint64_t)data << -mantShift);
56 mantissa &= 0x7fffff;//mask off the leading 1
57
58 uint32_t result = exponent | mantissa;
59 if (sign)
60 result |= 0x80000000;
61 return as_float(result);
62 break;
63 }
64 case qcomRTE: return (float)(data); break;
65 case qcomRTP: {
66 int sign = 0;
67 int inExact = 0;
68 uint32_t f = 0xdf000000;
69 if (!data)
70 return 0.0f;
71 if (data == 0x8000000000000000)
72 return as_float(f);
73 if (data < 0){
74 data = - data;
75 sign = 1;
76 }
77 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
78 int mantShift = 40 - clz(data);
79 uint32_t mantissa;
80 if (mantShift >= 0){
81 uint64_t temp = (uint64_t)data >> mantShift;
82 if ((temp << mantShift) != data)
83 inExact = 1;
84 mantissa = (uint32_t)temp;
85 }
86 else
87 {
88 mantissa = (uint32_t)((uint64_t)data << -mantShift);
89 }
90 mantissa &= 0x7fffff;//mask off the leading 1
91
92 uint32_t result = exponent | mantissa;
93 if (sign)
94 result |= 0x80000000;
95 if (sign)
96 return as_float(result); // for negative inputs return rtz results
97 else
98 {
99 if(inExact)
100 { // for positive inputs return higher next fp
101 uint32_t high_float = 0x7f7fffff;
102 return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
103 }
104 else
105 return as_float(result);
106 }
107 }
108 break;
109 case qcomRTN: {
110 int sign = 0;
111 int inExact = 0;
112 uint32_t f = 0xdf000000;
113 if (!data)
114 return 0.0f;
115 if (data == 0x8000000000000000)
116 return as_float(f);
117 if (data < 0){
118 data = - data;
119 sign = 1;
120 }
121 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
122 int mantShift = 40 - clz(data);
123 uint32_t mantissa;
124 if (mantShift >= 0){
125 uint64_t temp = (uint64_t)data >> mantShift;
126 if (temp << mantShift != data)
127 inExact = 1;
128 mantissa = (uint32_t)temp;
129 }
130 else
131 mantissa = (uint32_t)((uint64_t)data << -mantShift);
132 mantissa &= 0x7fffff;//mask off the leading 1
133
134 uint32_t result = exponent | mantissa;
135 if (sign)
136 result |= 0x80000000;
137 if (!sign)
138 return as_float(result); // for positive inputs return RTZ result
139 else{
140 if(inExact){ // for negative inputs find the lower next fp number
141 uint32_t low_float = 0xff7fffff;
142 return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
143 }
144 else
145 return as_float(result);
146 }
147 }
148 case qcomRoundingModeCount: {
149 break; // Avoid build error for unhandled enum value
150 }
151 }
152 return 0.0f;
153 }
154
qcom_u64_2_f32(uint64_t data,bool sat,roundingMode rnd)155 float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
156 {
157 switch (rnd) {
158 case qcomRTZ: {
159 if (!data)
160 return 0.0f;
161 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
162 int mantShift = 40 - clz(data);
163 uint32_t mantissa;
164 if (mantShift >= 0)
165 mantissa = (uint32_t)(data >> mantShift);
166 else
167 mantissa = (uint32_t)(data << -mantShift);
168 mantissa &= 0x7fffff;//mask off the leading 1
169
170 uint32_t result = exponent | mantissa;
171 return as_float(result);
172 break;
173 }
174 case qcomRTE: return (float)(data); break;
175 case qcomRTP: {
176 int inExact = 0;
177 if (!data)
178 return 0.0f;
179 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
180 int mantShift = 40 - clz(data);
181 uint32_t mantissa;
182 if (mantShift >= 0){
183 uint64_t temp = data >> mantShift;
184 if (temp << mantShift != data)
185 inExact = 1;
186 mantissa = (uint32_t)temp;
187 }
188 else
189 mantissa = (uint32_t)(data << -mantShift);
190 mantissa &= 0x7fffff;//mask off the leading 1
191
192 uint32_t result = exponent | mantissa;
193 if(inExact){ // for positive inputs return higher next fp
194 uint32_t high_float = 0x7f7fffff;
195 return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
196 }
197 else
198 return as_float(result);
199 }
200 case qcomRTN: {
201 int inExact = 0;
202 if (!data)
203 return 0.0f;
204 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
205 int mantShift = 40 - clz(data);
206 uint32_t mantissa;
207 if (mantShift >= 0){
208 uint64_t temp = (uint64_t)data >> mantShift;
209 if (temp << mantShift != data)
210 inExact = 1;
211 mantissa = (uint32_t)temp;
212 }
213 else
214 mantissa = (uint32_t)((uint64_t)data << -mantShift);
215 mantissa &= 0x7fffff;//mask off the leading 1
216
217 uint32_t result = exponent | mantissa;
218 return as_float(result); // for positive inputs return RTZ result
219 }
220 case qcomRoundingModeCount: {
221 break; // Avoid build error for unhandled enum value
222 }
223 }
224 return 0.0f;
225 }
226