xref: /aosp_15_r20/external/mesa3d/src/amd/vpelib/src/utils/fixpt31_32.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 
25 #include "fixed31_32.h"
26 #include "calc_u64.h"
27 
abs_i64(long long arg)28 static inline unsigned long long abs_i64(long long arg)
29 {
30     if (arg > 0)
31         return (unsigned long long)arg;
32     else
33         return (unsigned long long)(-arg);
34 }
35 
36 /*
37  * @brief
38  * result = dividend / divisor
39  * *remainder = dividend % divisor
40  */
complete_integer_division_u64(unsigned long long dividend,unsigned long long divisor,unsigned long long * remainder)41 static inline unsigned long long complete_integer_division_u64(
42     unsigned long long dividend, unsigned long long divisor, unsigned long long *remainder)
43 {
44     unsigned long long result;
45 
46     VPE_ASSERT(divisor);
47 
48     result = div64_u64_rem(dividend, divisor, (uint64_t *)remainder);
49 
50     return result;
51 }
52 
53 #define FRACTIONAL_PART_MASK ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1)
54 
55 #define GET_INTEGER_PART(x) ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART)
56 
57 #define GET_FRACTIONAL_PART(x) (FRACTIONAL_PART_MASK & (x))
58 
vpe_fixpt_from_fraction(long long numerator,long long denominator)59 struct fixed31_32 vpe_fixpt_from_fraction(long long numerator, long long denominator)
60 {
61     struct fixed31_32 res;
62 
63     bool arg1_negative = numerator < 0;
64     bool arg2_negative = denominator < 0;
65 
66     unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -numerator : numerator);
67     unsigned long long arg2_value =
68         (unsigned long long)(arg2_negative ? -denominator : denominator);
69 
70     unsigned long long remainder;
71 
72     /* determine integer part */
73 
74     unsigned long long res_value =
75         complete_integer_division_u64(arg1_value, arg2_value, &remainder);
76 
77     VPE_ASSERT(res_value <= LONG_MAX);
78 
79     /* determine fractional part */
80     {
81         unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
82 
83         do {
84             remainder <<= 1;
85 
86             res_value <<= 1;
87 
88             if (remainder >= arg2_value) {
89                 res_value |= 1;
90                 remainder -= arg2_value;
91             }
92         } while (--i != 0);
93     }
94 
95     /* round up LSB */
96     {
97         unsigned long long summand = (remainder << 1) >= arg2_value;
98 
99         VPE_ASSERT(res_value <= LLONG_MAX - summand);
100 
101         res_value += summand;
102     }
103 
104     res.value = (long long)res_value;
105 
106     if (arg1_negative ^ arg2_negative)
107         res.value = -res.value;
108 
109     return res;
110 }
111 
vpe_fixpt_mul(struct fixed31_32 arg1,struct fixed31_32 arg2)112 struct fixed31_32 vpe_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
113 {
114     struct fixed31_32 res;
115 
116     bool arg1_negative = arg1.value < 0;
117     bool arg2_negative = arg2.value < 0;
118 
119     unsigned long long arg1_value = (unsigned long long)(arg1_negative ? -arg1.value : arg1.value);
120     unsigned long long arg2_value = (unsigned long long)(arg2_negative ? -arg2.value : arg2.value);
121 
122     unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
123     unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
124 
125     unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
126     unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
127 
128     unsigned long long tmp;
129 
130     res.value = (long long)(arg1_int * arg2_int);
131 
132     VPE_ASSERT(res.value <= LONG_MAX);
133 
134     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
135 
136     tmp = arg1_int * arg2_fra;
137 
138     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
139 
140     res.value += tmp;
141 
142     tmp = arg2_int * arg1_fra;
143 
144     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
145 
146     res.value += tmp;
147 
148     tmp = arg1_fra * arg2_fra;
149 
150     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
151           (tmp >= (unsigned long long)vpe_fixpt_half.value);
152 
153     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
154 
155     res.value += tmp;
156 
157     if (arg1_negative ^ arg2_negative)
158         res.value = -res.value;
159 
160     return res;
161 }
162 
vpe_fixpt_sqr(struct fixed31_32 arg)163 struct fixed31_32 vpe_fixpt_sqr(struct fixed31_32 arg)
164 {
165     struct fixed31_32 res;
166 
167     unsigned long long arg_value = abs_i64(arg.value);
168 
169     unsigned long long arg_int = GET_INTEGER_PART(arg_value);
170 
171     unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
172 
173     unsigned long long tmp;
174 
175     res.value = (long long)(arg_int * arg_int);
176 
177     VPE_ASSERT(res.value <= LONG_MAX);
178 
179     res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART;
180 
181     tmp = arg_int * arg_fra;
182 
183     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
184 
185     res.value += tmp;
186 
187     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
188 
189     res.value += tmp;
190 
191     tmp = arg_fra * arg_fra;
192 
193     tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
194           (tmp >= (unsigned long long)vpe_fixpt_half.value);
195 
196     VPE_ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
197 
198     res.value += tmp;
199 
200     return res;
201 }
202 
vpe_fixpt_recip(struct fixed31_32 arg)203 struct fixed31_32 vpe_fixpt_recip(struct fixed31_32 arg)
204 {
205     /*
206      * @note
207      * Good idea to use Newton's method
208      */
209 
210     VPE_ASSERT(arg.value);
211 
212     return vpe_fixpt_from_fraction(vpe_fixpt_one.value, arg.value);
213 }
214 
vpe_fixpt_sinc(struct fixed31_32 arg)215 struct fixed31_32 vpe_fixpt_sinc(struct fixed31_32 arg)
216 {
217     struct fixed31_32 square;
218 
219     struct fixed31_32 res = vpe_fixpt_one;
220 
221     int n = 27;
222 
223     struct fixed31_32 arg_norm = arg;
224 
225     if (vpe_fixpt_le(vpe_fixpt_two_pi, vpe_fixpt_abs(arg))) {
226         arg_norm =
227             vpe_fixpt_sub(arg_norm, vpe_fixpt_mul_int(vpe_fixpt_two_pi,
228                                         (int)div64_s64(arg_norm.value, vpe_fixpt_two_pi.value)));
229     }
230 
231     square = vpe_fixpt_sqr(arg_norm);
232 
233     do {
234         res = vpe_fixpt_sub(
235             vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
236 
237         n -= 2;
238     } while (n > 2);
239 
240     if (arg.value != arg_norm.value)
241         res = vpe_fixpt_div(vpe_fixpt_mul(res, arg_norm), arg);
242 
243     return res;
244 }
245 
vpe_fixpt_sin(struct fixed31_32 arg)246 struct fixed31_32 vpe_fixpt_sin(struct fixed31_32 arg)
247 {
248     return vpe_fixpt_mul(arg, vpe_fixpt_sinc(arg));
249 }
250 
vpe_fixpt_cos(struct fixed31_32 arg)251 struct fixed31_32 vpe_fixpt_cos(struct fixed31_32 arg)
252 {
253     /* TODO implement argument normalization */
254 
255     const struct fixed31_32 square = vpe_fixpt_sqr(arg);
256 
257     struct fixed31_32 res = vpe_fixpt_one;
258 
259     int n = 26;
260 
261     do {
262         res = vpe_fixpt_sub(
263             vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(square, res), n * (n - 1)));
264 
265         n -= 2;
266     } while (n != 0);
267 
268     return res;
269 }
270 
271 /*
272  * @brief
273  * result = exp(arg),
274  * where abs(arg) < 1
275  *
276  * Calculated as Taylor series.
277  */
fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)278 static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
279 {
280     unsigned int n = 9;
281 
282     struct fixed31_32 res = vpe_fixpt_from_fraction(n + 2, n + 1);
283     /* TODO find correct res */
284 
285     VPE_ASSERT(vpe_fixpt_lt(arg, vpe_fixpt_one));
286 
287     do
288         res = vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_div_int(vpe_fixpt_mul(arg, res), n));
289     while (--n != 1);
290 
291     return vpe_fixpt_add(vpe_fixpt_one, vpe_fixpt_mul(arg, res));
292 }
293 
vpe_fixpt_exp(struct fixed31_32 arg)294 struct fixed31_32 vpe_fixpt_exp(struct fixed31_32 arg)
295 {
296     /*
297      * @brief
298      * Main equation is:
299      * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
300      * where m = round(x / ln(2)), r = x - m * ln(2)
301      */
302 
303     if (vpe_fixpt_le(vpe_fixpt_ln2_div_2, vpe_fixpt_abs(arg))) {
304         int m = vpe_fixpt_round(vpe_fixpt_div(arg, vpe_fixpt_ln2));
305 
306         struct fixed31_32 r = vpe_fixpt_sub(arg, vpe_fixpt_mul_int(vpe_fixpt_ln2, m));
307 
308         VPE_ASSERT(m != 0);
309 
310         VPE_ASSERT(vpe_fixpt_lt(vpe_fixpt_abs(r), vpe_fixpt_one));
311 
312         if (m > 0)
313             return vpe_fixpt_shl(fixed31_32_exp_from_taylor_series(r), (unsigned char)m);
314         else
315             return vpe_fixpt_div_int(fixed31_32_exp_from_taylor_series(r), 1LL << -m);
316     } else if (arg.value != 0)
317         return fixed31_32_exp_from_taylor_series(arg);
318     else
319         return vpe_fixpt_one;
320 }
321 
vpe_fixpt_log(struct fixed31_32 arg)322 struct fixed31_32 vpe_fixpt_log(struct fixed31_32 arg)
323 {
324     struct fixed31_32 res = vpe_fixpt_neg(vpe_fixpt_one);
325     /* TODO improve 1st estimation */
326 
327     struct fixed31_32 error;
328 
329     VPE_ASSERT(arg.value > 0);
330     /* TODO if arg is negative, return NaN */
331     /* TODO if arg is zero, return -INF */
332 
333     do {
334         struct fixed31_32 res1 = vpe_fixpt_add(
335             vpe_fixpt_sub(res, vpe_fixpt_one), vpe_fixpt_div(arg, vpe_fixpt_exp(res)));
336 
337         error = vpe_fixpt_sub(res, res1);
338 
339         res = res1;
340         /* TODO determine max_allowed_error based on quality of exp() */
341     } while (abs_i64(error.value) > 100ULL);
342 
343     return res;
344 }
345 
346 /* this function is a generic helper to translate fixed point value to
347  * specified integer format that will consist of integer_bits integer part and
348  * fractional_bits fractional part. For example it is used in
349  *  vpe_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
350  * part in 32 bits. It is used in hw programming (scaler)
351  */
352 
ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits)353 static inline unsigned int ux_dy(
354     long long value, unsigned int integer_bits, unsigned int fractional_bits)
355 {
356     /* 1. create mask of integer part */
357     unsigned int result = (1 << integer_bits) - 1;
358     /* 2. mask out fractional part */
359     unsigned int fractional_part = FRACTIONAL_PART_MASK & (unsigned long long)value;
360     /* 3. shrink fixed point integer part to be of integer_bits width*/
361     result &= GET_INTEGER_PART(value);
362     /* 4. make space for fractional part to be filled in after integer */
363     result <<= fractional_bits;
364     /* 5. shrink fixed point fractional part to of fractional_bits width*/
365     fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits;
366     /* 6. merge the result */
367     return result | fractional_part;
368 }
369 
clamp_ux_dy(long long value,unsigned int integer_bits,unsigned int fractional_bits,unsigned int min_clamp)370 static inline unsigned int clamp_ux_dy(long long value, unsigned int integer_bits,
371     unsigned int fractional_bits, unsigned int min_clamp)
372 {
373     unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
374 
375     if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
376         return (1 << (integer_bits + fractional_bits)) - 1;
377     else if (truncated_val > min_clamp)
378         return truncated_val;
379     else
380         return min_clamp;
381 }
382 
vpe_fixpt_u4d19(struct fixed31_32 arg)383 unsigned int vpe_fixpt_u4d19(struct fixed31_32 arg)
384 {
385     return ux_dy(arg.value, 4, 19);
386 }
387 
vpe_fixpt_u3d19(struct fixed31_32 arg)388 unsigned int vpe_fixpt_u3d19(struct fixed31_32 arg)
389 {
390     return ux_dy(arg.value, 3, 19);
391 }
392 
vpe_fixpt_u2d19(struct fixed31_32 arg)393 unsigned int vpe_fixpt_u2d19(struct fixed31_32 arg)
394 {
395     return ux_dy(arg.value, 2, 19);
396 }
397 
vpe_fixpt_u0d19(struct fixed31_32 arg)398 unsigned int vpe_fixpt_u0d19(struct fixed31_32 arg)
399 {
400     return ux_dy(arg.value, 0, 19);
401 }
402 
vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)403 unsigned int vpe_fixpt_clamp_u0d14(struct fixed31_32 arg)
404 {
405     return clamp_ux_dy(arg.value, 0, 14, 1);
406 }
407 
vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)408 unsigned int vpe_fixpt_clamp_u0d10(struct fixed31_32 arg)
409 {
410     return clamp_ux_dy(arg.value, 0, 10, 1);
411 }
412 
vpe_fixpt_s4d19(struct fixed31_32 arg)413 int vpe_fixpt_s4d19(struct fixed31_32 arg)
414 {
415     if (arg.value < 0)
416         return -(int)ux_dy(vpe_fixpt_abs(arg).value, 4, 19);
417     else
418         return (int)ux_dy(arg.value, 4, 19);
419 }
420 
vpe_to_fixed_point(unsigned int decimalBits,double value,unsigned int mask,double d_pix)421 unsigned int vpe_to_fixed_point(
422     unsigned int decimalBits, double value, unsigned int mask, double d_pix)
423 {
424     unsigned int d_i;
425 
426     d_i = (int)((value * d_pix) + 0.5);
427     d_i = d_i & mask;
428     return d_i;
429 }
430