1 /* Copyright 2022 Advanced Micro Devices, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 * OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors: AMD
22 *
23 */
24
25 #include "fixed31_32.h"
26 #include "calc_u64.h"
27
abs_i64(long long arg)28 static inline unsigned long long abs_i64(long long arg)
29 {
30 if (arg > 0)
31 return (unsigned long long)arg;
32 else
33 return (unsigned long long)(-arg);
34 }
35
36 /*
37 * @brief
38 * result = dividend / divisor
39 * *remainder = dividend % divisor
40 */
complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)41 static inline unsigned long long complete_integer_division_u64(
42 unsigned long long dividend, unsigned long long divisor, unsigned long long *remainder)
43 {
44 unsigned long long result;
45
46 VPE_ASSERT(divisor);
47
48 result = div64_u64_rem(dividend, divisor, (uint64_t *)remainder);
49
50 return result;
51 }
52
53 #define FRACTIONAL_PART_MASK ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
54
55 #define GET_INTEGER_PART(x) ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
56
57 #define GET_FRACTIONAL_PART(x) (FRACTIONAL_PART_MASK & (x))
58
vpe_fixpt_from_fraction(long long numerator,long long denominator)59 struct fixed31_32 vpe_fixpt_from_fraction(long long numerator, long long denominator)
60 {
61 struct fixed31_32 res;
62
63 bool arg1_negative = numerator < 0;
64 bool arg2_negative = denominator < 0;
65
66 unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -numerator : numerator);
67 unsigned long long arg2_value =
68 (unsigned long long)(arg2_negative ? -denominator : denominator);
69
70 unsigned long long remainder;
71
72 /* determine integer part */
73
74 unsigned long long res_value =
75 complete_integer_division_u64(arg1_value, arg2_value, &remainder);
76
77 VPE_ASSERT(res_value <= LONG_MAX);
78
79 /* determine fractional part */
80 {
81 unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
82
83 do {
84 remainder <<= 1;
85
86 res_value <<= 1;
87
88 if (remainder >= arg2_value) {
89 res_value |= 1;
90 remainder -= arg2_value;
91 }
92 } while (--i != 0);
93 }
94
95 /* round up LSB */
96 {
97 unsigned long long summand = (remainder << 1) >= arg2_value;
98
99 VPE_ASSERT(res_value <= LLONG_MAX - summand);
100
101 res_value += summand;
102 }
103
104 res.value = (long long)res_value;
105
106 if (arg1_negative ^ arg2_negative)
107 res.value = -res.value;
108
109 return res;
110 }
111
vpe_fixpt_mul(struct fixed31_32 arg1,struct fixed31_32 arg2)112 struct fixed31_32 vpe_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
113 {
114 struct fixed31_32 res;
115
116 bool arg1_negative = arg1.value < 0;
117 bool arg2_negative = arg2.value < 0;
118
119 unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -arg1.value : arg1.value);
120 unsigned long long arg2_value = (unsigned long long)(arg2_negative ? -arg2.value : arg2.value);
121
122 unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
123 unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
124
125 unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
126 unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
127
128 unsigned long long tmp;
129
130 res.value = (long long)(arg1_int * arg2_int);
131
132 VPE_ASSERT(res.value <= LONG_MAX);
133
134 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
135
136 tmp = arg1_int * arg2_fra;
137
138 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
139
140 res.value += tmp;
141
142 tmp = arg2_int * arg1_fra;
143
144 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
145
146 res.value += tmp;
147
148 tmp = arg1_fra * arg2_fra;
149
150 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
151 (tmp >= (unsigned long long)vpe_fixpt_half.value);
152
153 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
154
155 res.value += tmp;
156
157 if (arg1_negative ^ arg2_negative)
158 res.value = -res.value;
159
160 return res;
161 }
162
vpe_fixpt_sqr(struct fixed31_32 arg)163 struct fixed31_32 vpe_fixpt_sqr(struct fixed31_32 arg)
164 {
165 struct fixed31_32 res;
166
167 unsigned long long arg_value = abs_i64(arg.value);
168
169 unsigned long long arg_int = GET_INTEGER_PART(arg_value);
170
171 unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
172
173 unsigned long long tmp;
174
175 res.value = (long long)(arg_int * arg_int);
176
177 VPE_ASSERT(res.value <= LONG_MAX);
178
179 res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
180
181 tmp = arg_int * arg_fra;
182
183 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184
185 res.value += tmp;
186
187 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
188
189 res.value += tmp;
190
191 tmp = arg_fra * arg_fra;
192
193 tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
194 (tmp >= (unsigned long long)vpe_fixpt_half.value);
195
196 VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
197
198 res.value += tmp;
199
200 return res;
201 }
202
vpe_fixpt_recip(struct fixed31_32 arg)203 struct fixed31_32 vpe_fixpt_recip(struct fixed31_32 arg)
204 {
205 /*
206 * @note
207 * Good idea to use Newton's method
208 */
209
210 VPE_ASSERT(arg.value);
211
212 return vpe_fixpt_from_fraction(vpe_fixpt_one.value, arg.value);
213 }
214
vpe_fixpt_sinc(struct fixed31_32 arg)215 struct fixed31_32 vpe_fixpt_sinc(struct fixed31_32 arg)
216 {
217 struct fixed31_32 square;
218
219 struct fixed31_32 res = vpe_fixpt_one;
220
221 int n = 27;
222
223 struct fixed31_32 arg_norm = arg;
224
225 if (vpe_fixpt_le(vpe_fixpt_two_pi, vpe_fixpt_abs(arg))) {
226 arg_norm =
227 vpe_fixpt_sub(arg_norm, vpe_fixpt_mul_int(vpe_fixpt_two_pi,
228 (int)div64_s64(arg_norm.value, vpe_fixpt_two_pi.value)));
229 }
230
231 square = vpe_fixpt_sqr(arg_norm);
232
233 do {
234 res = vpe_fixpt_sub(
235 vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
236
237 n -= 2;
238 } while (n > 2);
239
240 if (arg.value != arg_norm.value)
241 res = vpe_fixpt_div(vpe_fixpt_mul(res, arg_norm), arg);
242
243 return res;
244 }
245
vpe_fixpt_sin(struct fixed31_32 arg)246 struct fixed31_32 vpe_fixpt_sin(struct fixed31_32 arg)
247 {
248 return vpe_fixpt_mul(arg, vpe_fixpt_sinc(arg));
249 }
250
vpe_fixpt_cos(struct fixed31_32 arg)251 struct fixed31_32 vpe_fixpt_cos(struct fixed31_32 arg)
252 {
253 /* TODO implement argument normalization */
254
255 const struct fixed31_32 square = vpe_fixpt_sqr(arg);
256
257 struct fixed31_32 res = vpe_fixpt_one;
258
259 int n = 26;
260
261 do {
262 res = vpe_fixpt_sub(
263 vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
264
265 n -= 2;
266 } while (n != 0);
267
268 return res;
269 }
270
271 /*
272 * @brief
273 * result = exp(arg),
274 * where abs(arg) < 1
275 *
276 * Calculated as Taylor series.
277 */
fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)278 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
279 {
280 unsigned int n = 9;
281
282 struct fixed31_32 res = vpe_fixpt_from_fraction(n + 2, n + 1);
283 /* TODO find correct res */
284
285 VPE_ASSERT(vpe_fixpt_lt(arg, vpe_fixpt_one));
286
287 do
288 res = vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(arg, res), n));
289 while (--n != 1);
290
291 return vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_mul(arg, res));
292 }
293
vpe_fixpt_exp(struct fixed31_32 arg)294 struct fixed31_32 vpe_fixpt_exp(struct fixed31_32 arg)
295 {
296 /*
297 * @brief
298 * Main equation is:
299 * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
300 * where m = round(x / ln(2)), r = x - m * ln(2)
301 */
302
303 if (vpe_fixpt_le(vpe_fixpt_ln2_div_2, vpe_fixpt_abs(arg))) {
304 int m = vpe_fixpt_round(vpe_fixpt_div(arg, vpe_fixpt_ln2));
305
306 struct fixed31_32 r = vpe_fixpt_sub(arg, vpe_fixpt_mul_int(vpe_fixpt_ln2, m));
307
308 VPE_ASSERT(m != 0);
309
310 VPE_ASSERT(vpe_fixpt_lt(vpe_fixpt_abs(r), vpe_fixpt_one));
311
312 if (m > 0)
313 return vpe_fixpt_shl(fixed31_32_exp_from_taylor_series(r), (unsigned char)m);
314 else
315 return vpe_fixpt_div_int(fixed31_32_exp_from_taylor_series(r), 1LL << -m);
316 } else if (arg.value != 0)
317 return fixed31_32_exp_from_taylor_series(arg);
318 else
319 return vpe_fixpt_one;
320 }
321
vpe_fixpt_log(struct fixed31_32 arg)322 struct fixed31_32 vpe_fixpt_log(struct fixed31_32 arg)
323 {
324 struct fixed31_32 res = vpe_fixpt_neg(vpe_fixpt_one);
325 /* TODO improve 1st estimation */
326
327 struct fixed31_32 error;
328
329 VPE_ASSERT(arg.value > 0);
330 /* TODO if arg is negative, return NaN */
331 /* TODO if arg is zero, return -INF */
332
333 do {
334 struct fixed31_32 res1 = vpe_fixpt_add(
335 vpe_fixpt_sub(res, vpe_fixpt_one), vpe_fixpt_div(arg, vpe_fixpt_exp(res)));
336
337 error = vpe_fixpt_sub(res, res1);
338
339 res = res1;
340 /* TODO determine max_allowed_error based on quality of exp() */
341 } while (abs_i64(error.value) > 100ULL);
342
343 return res;
344 }
345
346 /* this function is a generic helper to translate fixed point value to
347 * specified integer format that will consist of integer_bits integer part and
348 * fractional_bits fractional part. For example it is used in
349 * vpe_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
350 * part in 32 bits. It is used in hw programming (scaler)
351 */
352
ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)353 static inline unsigned int ux_dy(
354 long long value, unsigned int integer_bits, unsigned int fractional_bits)
355 {
356 /* 1. create mask of integer part */
357 unsigned int result = (1 << integer_bits) - 1;
358 /* 2. mask out fractional part */
359 unsigned int fractional_part = FRACTIONAL_PART_MASK & (unsigned long long)value;
360 /* 3. shrink fixed point integer part to be of integer_bits width*/
361 result &= GET_INTEGER_PART(value);
362 /* 4. make space for fractional part to be filled in after integer */
363 result <<= fractional_bits;
364 /* 5. shrink fixed point fractional part to of fractional_bits width*/
365 fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
366 /* 6. merge the result */
367 return result | fractional_part;
368 }
369
clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)370 static inline unsigned int clamp_ux_dy(long long value, unsigned int integer_bits,
371 unsigned int fractional_bits, unsigned int min_clamp)
372 {
373 unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
374
375 if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
376 return (1 << (integer_bits + fractional_bits)) - 1;
377 else if (truncated_val > min_clamp)
378 return truncated_val;
379 else
380 return min_clamp;
381 }
382
vpe_fixpt_u4d19(struct fixed31_32 arg)383 unsigned int vpe_fixpt_u4d19(struct fixed31_32 arg)
384 {
385 return ux_dy(arg.value, 4, 19);
386 }
387
vpe_fixpt_u3d19(struct fixed31_32 arg)388 unsigned int vpe_fixpt_u3d19(struct fixed31_32 arg)
389 {
390 return ux_dy(arg.value, 3, 19);
391 }
392
vpe_fixpt_u2d19(struct fixed31_32 arg)393 unsigned int vpe_fixpt_u2d19(struct fixed31_32 arg)
394 {
395 return ux_dy(arg.value, 2, 19);
396 }
397
vpe_fixpt_u0d19(struct fixed31_32 arg)398 unsigned int vpe_fixpt_u0d19(struct fixed31_32 arg)
399 {
400 return ux_dy(arg.value, 0, 19);
401 }
402
vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)403 unsigned int vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)
404 {
405 return clamp_ux_dy(arg.value, 0, 14, 1);
406 }
407
vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)408 unsigned int vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)
409 {
410 return clamp_ux_dy(arg.value, 0, 10, 1);
411 }
412
vpe_fixpt_s4d19(struct fixed31_32 arg)413 int vpe_fixpt_s4d19(struct fixed31_32 arg)
414 {
415 if (arg.value < 0)
416 return -(int)ux_dy(vpe_fixpt_abs(arg).value, 4, 19);
417 else
418 return (int)ux_dy(arg.value, 4, 19);
419 }
420
vpe_to_fixed_point(unsigned int decimalBits,double value,unsigned int mask,double d_pix)421 unsigned int vpe_to_fixed_point(
422 unsigned int decimalBits, double value, unsigned int mask, double d_pix)
423 {
424 unsigned int d_i;
425
426 d_i = (int)((value * d_pix) + 0.5);
427 d_i = d_i & mask;
428 return d_i;
429 }
430