xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/math_config.h (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1 /*
2  * Configuration for math routines.
3  *
4  * Copyright (c) 2017-2024, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10 
11 #include <math.h>
12 #include <stdint.h>
13 
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16    rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
17    -0.0f). This may be set to 0 if there is no fenv support or if math
18    functions only get called in round to nearest mode.  */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C.  Many math
23    libraries do not set errno, so this is 0 by default.  It may need to be
24    set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.  */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_SIMD_EXCEPT
28 /* If defined to 1, trigger fp exceptions in vector routines, consistently with
29    behaviour expected from the corresponding scalar routine.  */
30 # define WANT_SIMD_EXCEPT 0
31 #endif
32 
33 /* Compiler can inline round as a single instruction.  */
34 #ifndef HAVE_FAST_ROUND
35 # if __aarch64__
36 #  define HAVE_FAST_ROUND 1
37 # else
38 #  define HAVE_FAST_ROUND 0
39 # endif
40 #endif
41 
42 /* Compiler can inline lround, but not (long)round(x).  */
43 #ifndef HAVE_FAST_LROUND
44 # if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408                 \
45       && __NO_MATH_ERRNO__
46 #  define HAVE_FAST_LROUND 1
47 # else
48 #  define HAVE_FAST_LROUND 0
49 # endif
50 #endif
51 
52 /* Compiler can inline fma as a single instruction.  */
53 #ifndef HAVE_FAST_FMA
54 # if defined FP_FAST_FMA || __aarch64__
55 #  define HAVE_FAST_FMA 1
56 # else
57 #  define HAVE_FAST_FMA 0
58 # endif
59 #endif
60 
61 /* Provide *_finite symbols and some of the glibc hidden symbols
62    so libmathlib can be used with binaries compiled against glibc
63    to interpose math functions with both static and dynamic linking.  */
64 #ifndef USE_GLIBC_ABI
65 # if __GNUC__
66 #  define USE_GLIBC_ABI 1
67 # else
68 #  define USE_GLIBC_ABI 0
69 # endif
70 #endif
71 
72 /* Symbol renames to avoid libc conflicts.  */
73 #define __exp_data pl_math_exp_data
74 
75 /* Optionally used extensions.  */
76 #ifdef __GNUC__
77 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
78 # define NOINLINE __attribute__ ((noinline))
79 # define UNUSED __attribute__ ((unused))
80 # define likely(x) __builtin_expect (!!(x), 1)
81 # define unlikely(x) __builtin_expect (x, 0)
82 # if __GNUC__ >= 9
83 #  define attribute_copy(f) __attribute__ ((copy (f)))
84 # else
85 #  define attribute_copy(f)
86 # endif
87 # define strong_alias(f, a)                                                   \
88     extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
89 # define hidden_alias(f, a)                                                   \
90     extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
91 	attribute_copy (f);
92 #else
93 # define HIDDEN
94 # define NOINLINE
95 # define UNUSED
96 # define likely(x) (x)
97 # define unlikely(x) (x)
98 #endif
99 
100 /* Return ptr but hide its value from the compiler so accesses through it
101    cannot be optimized based on the contents.  */
102 #define ptr_barrier(ptr)                                                      \
103   ({                                                                          \
104     __typeof (ptr) __ptr = (ptr);                                             \
105     __asm("" : "+r"(__ptr));                                                  \
106     __ptr;                                                                    \
107   })
108 
109 /* Symbol renames to avoid libc conflicts.  */
110 #define __math_oflowf arm_math_oflowf
111 #define __math_uflowf arm_math_uflowf
112 #define __math_may_uflowf arm_math_may_uflowf
113 #define __math_divzerof arm_math_divzerof
114 #define __math_oflow arm_math_oflow
115 #define __math_uflow arm_math_uflow
116 #define __math_may_uflow arm_math_may_uflow
117 #define __math_divzero arm_math_divzero
118 #define __math_invalidf arm_math_invalidf
119 #define __math_invalid arm_math_invalid
120 #define __math_check_oflow arm_math_check_oflow
121 #define __math_check_uflow arm_math_check_uflow
122 #define __math_check_oflowf arm_math_check_oflowf
123 #define __math_check_uflowf arm_math_check_uflowf
124 
125 #if HAVE_FAST_ROUND
126 /* When set, the roundtoint and converttoint functions are provided with
127    the semantics documented below.  */
128 # define TOINT_INTRINSICS 1
129 
130 /* Round x to nearest int in all rounding modes, ties have to be rounded
131    consistently with converttoint so the results match.  If the result
132    would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
133 static inline double_t
roundtoint(double_t x)134 roundtoint (double_t x)
135 {
136   return round (x);
137 }
138 
139 /* Convert x to nearest int in all rounding modes, ties have to be rounded
140    consistently with roundtoint.  If the result is not representible in an
141    int32_t then the semantics is unspecified.  */
142 static inline int32_t
converttoint(double_t x)143 converttoint (double_t x)
144 {
145 # if HAVE_FAST_LROUND
146   return lround (x);
147 # else
148   return (long) round (x);
149 # endif
150 }
151 #endif
152 
153 static inline uint32_t
asuint(float f)154 asuint (float f)
155 {
156   union
157   {
158     float f;
159     uint32_t i;
160   } u = { f };
161   return u.i;
162 }
163 
164 static inline float
asfloat(uint32_t i)165 asfloat (uint32_t i)
166 {
167   union
168   {
169     uint32_t i;
170     float f;
171   } u = { i };
172   return u.f;
173 }
174 
175 static inline uint64_t
asuint64(double f)176 asuint64 (double f)
177 {
178   union
179   {
180     double f;
181     uint64_t i;
182   } u = { f };
183   return u.i;
184 }
185 
186 static inline double
asdouble(uint64_t i)187 asdouble (uint64_t i)
188 {
189   union
190   {
191     uint64_t i;
192     double f;
193   } u = { i };
194   return u.f;
195 }
196 
197 #ifndef IEEE_754_2008_SNAN
198 # define IEEE_754_2008_SNAN 1
199 #endif
200 static inline int
issignalingf_inline(float x)201 issignalingf_inline (float x)
202 {
203   uint32_t ix = asuint (x);
204   if (!IEEE_754_2008_SNAN)
205     return (ix & 0x7fc00000) == 0x7fc00000;
206   return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
207 }
208 
209 static inline int
issignaling_inline(double x)210 issignaling_inline (double x)
211 {
212   uint64_t ix = asuint64 (x);
213   if (!IEEE_754_2008_SNAN)
214     return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
215   return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
216 }
217 
218 #if __aarch64__ && __GNUC__
219 /* Prevent the optimization of a floating-point expression.  */
220 static inline float
opt_barrier_float(float x)221 opt_barrier_float (float x)
222 {
223   __asm__ __volatile__ ("" : "+w" (x));
224   return x;
225 }
226 static inline double
opt_barrier_double(double x)227 opt_barrier_double (double x)
228 {
229   __asm__ __volatile__ ("" : "+w" (x));
230   return x;
231 }
232 /* Force the evaluation of a floating-point expression for its side-effect.  */
233 static inline void
force_eval_float(float x)234 force_eval_float (float x)
235 {
236   __asm__ __volatile__ ("" : "+w" (x));
237 }
238 static inline void
force_eval_double(double x)239 force_eval_double (double x)
240 {
241   __asm__ __volatile__ ("" : "+w" (x));
242 }
243 #else
244 static inline float
opt_barrier_float(float x)245 opt_barrier_float (float x)
246 {
247   volatile float y = x;
248   return y;
249 }
250 static inline double
opt_barrier_double(double x)251 opt_barrier_double (double x)
252 {
253   volatile double y = x;
254   return y;
255 }
256 static inline void
force_eval_float(float x)257 force_eval_float (float x)
258 {
259   volatile float y UNUSED = x;
260 }
261 static inline void
force_eval_double(double x)262 force_eval_double (double x)
263 {
264   volatile double y UNUSED = x;
265 }
266 #endif
267 
268 /* Evaluate an expression as the specified type, normally a type
269    cast should be enough, but compilers implement non-standard
270    excess-precision handling, so when FLT_EVAL_METHOD != 0 then
271    these functions may need to be customized.  */
272 static inline float
eval_as_float(float x)273 eval_as_float (float x)
274 {
275   return x;
276 }
277 static inline double
eval_as_double(double x)278 eval_as_double (double x)
279 {
280   return x;
281 }
282 
283 /* Error handling tail calls for special cases, with a sign argument.
284    The sign of the return value is set if the argument is non-zero.  */
285 
286 /* The result overflows.  */
287 HIDDEN float __math_oflowf (uint32_t);
288 /* The result underflows to 0 in nearest rounding mode.  */
289 HIDDEN float __math_uflowf (uint32_t);
290 /* The result underflows to 0 in some directed rounding mode only.  */
291 HIDDEN float __math_may_uflowf (uint32_t);
292 /* Division by zero.  */
293 HIDDEN float __math_divzerof (uint32_t);
294 /* The result overflows.  */
295 HIDDEN double __math_oflow (uint32_t);
296 /* The result underflows to 0 in nearest rounding mode.  */
297 HIDDEN double __math_uflow (uint32_t);
298 /* The result underflows to 0 in some directed rounding mode only.  */
299 HIDDEN double __math_may_uflow (uint32_t);
300 /* Division by zero.  */
301 HIDDEN double __math_divzero (uint32_t);
302 
303 /* Error handling using input checking.  */
304 
305 /* Invalid input unless it is a quiet NaN.  */
306 HIDDEN float __math_invalidf (float);
307 /* Invalid input unless it is a quiet NaN.  */
308 HIDDEN double __math_invalid (double);
309 
310 /* Error handling using output checking, only for errno setting.  */
311 
312 /* Check if the result overflowed to infinity.  */
313 HIDDEN double __math_check_oflow (double);
314 /* Check if the result underflowed to 0.  */
315 HIDDEN double __math_check_uflow (double);
316 
317 /* Check if the result overflowed to infinity.  */
318 static inline double
check_oflow(double x)319 check_oflow (double x)
320 {
321   return WANT_ERRNO ? __math_check_oflow (x) : x;
322 }
323 
324 /* Check if the result underflowed to 0.  */
325 static inline double
check_uflow(double x)326 check_uflow (double x)
327 {
328   return WANT_ERRNO ? __math_check_uflow (x) : x;
329 }
330 
331 /* Check if the result overflowed to infinity.  */
332 HIDDEN float __math_check_oflowf (float);
333 /* Check if the result underflowed to 0.  */
334 HIDDEN float __math_check_uflowf (float);
335 
336 /* Check if the result overflowed to infinity.  */
337 static inline float
check_oflowf(float x)338 check_oflowf (float x)
339 {
340   return WANT_ERRNO ? __math_check_oflowf (x) : x;
341 }
342 
343 /* Check if the result underflowed to 0.  */
344 static inline float
check_uflowf(float x)345 check_uflowf (float x)
346 {
347   return WANT_ERRNO ? __math_check_uflowf (x) : x;
348 }
349 
350 extern const struct erff_data
351 {
352   struct
353   {
354     float erf, scale;
355   } tab[513];
356 } __erff_data HIDDEN;
357 
358 extern const struct sv_erff_data
359 {
360   float erf[513];
361   float scale[513];
362 } __sv_erff_data HIDDEN;
363 
364 extern const struct erfcf_data
365 {
366   struct
367   {
368     float erfc, scale;
369   } tab[645];
370 } __erfcf_data HIDDEN;
371 
372 /* Data for logf and log10f.  */
373 #define LOGF_TABLE_BITS 4
374 #define LOGF_POLY_ORDER 4
375 extern const struct logf_data
376 {
377   struct
378   {
379     double invc, logc;
380   } tab[1 << LOGF_TABLE_BITS];
381   double ln2;
382   double invln10;
383   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
384 } __logf_data HIDDEN;
385 
386 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients).  */
387 #define LOG10_TABLE_BITS 7
388 #define LOG10_POLY_ORDER 6
389 #define LOG10_POLY1_ORDER 12
390 extern const struct log10_data
391 {
392   double ln2hi;
393   double ln2lo;
394   double invln10;
395   double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10).  */
396   double poly1[LOG10_POLY1_ORDER - 1];
397   struct
398   {
399     double invc, logc;
400   } tab[1 << LOG10_TABLE_BITS];
401 #if !HAVE_FAST_FMA
402   struct
403   {
404     double chi, clo;
405   } tab2[1 << LOG10_TABLE_BITS];
406 #endif
407 } __log10_data HIDDEN;
408 
409 #define EXP_TABLE_BITS 7
410 #define EXP_POLY_ORDER 5
411 /* Use polynomial that is optimized for a wider input range.  This may be
412    needed for good precision in non-nearest rounding and !TOINT_INTRINSICS.  */
413 #define EXP_POLY_WIDE 0
414 /* Use close to nearest rounding toint when !TOINT_INTRINSICS.  This may be
415    needed for good precision in non-nearest rouning and !EXP_POLY_WIDE.  */
416 #define EXP_USE_TOINT_NARROW 0
417 #define EXP2_POLY_ORDER 5
418 #define EXP2_POLY_WIDE 0
419 extern const struct exp_data
420 {
421   double invln2N;
422   double shift;
423   double negln2hiN;
424   double negln2loN;
425   double poly[4]; /* Last four coefficients.  */
426   double exp2_shift;
427   double exp2_poly[EXP2_POLY_ORDER];
428   uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
429 } __exp_data HIDDEN;
430 
431 /* Copied from math/v_exp.h for use in vector exp_tail.  */
432 #define V_EXP_TAIL_TABLE_BITS 8
433 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
434 
435 /* Copied from math/v_exp.h for use in vector exp2.  */
436 #define V_EXP_TABLE_BITS 7
437 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
438 
439 extern const struct erf_data
440 {
441   struct
442   {
443     double erf, scale;
444   } tab[769];
445 } __erf_data HIDDEN;
446 
447 extern const struct sv_erf_data
448 {
449   double erf[769];
450   double scale[769];
451 } __sv_erf_data HIDDEN;
452 
453 extern const struct erfc_data
454 {
455   struct
456   {
457     double erfc, scale;
458   } tab[3488];
459 } __erfc_data HIDDEN;
460 
461 #define ATAN_POLY_NCOEFFS 20
462 extern const struct atan_poly_data
463 {
464   double poly[ATAN_POLY_NCOEFFS];
465 } __atan_poly_data HIDDEN;
466 
467 #define ATANF_POLY_NCOEFFS 8
468 extern const struct atanf_poly_data
469 {
470   float poly[ATANF_POLY_NCOEFFS];
471 } __atanf_poly_data HIDDEN;
472 
473 #define ASINHF_NCOEFFS 8
474 extern const struct asinhf_data
475 {
476   float coeffs[ASINHF_NCOEFFS];
477 } __asinhf_data HIDDEN;
478 
479 #define LOG_TABLE_BITS 7
480 #define LOG_POLY_ORDER 6
481 #define LOG_POLY1_ORDER 12
482 extern const struct log_data
483 {
484   double ln2hi;
485   double ln2lo;
486   double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
487   double poly1[LOG_POLY1_ORDER - 1];
488   struct
489   {
490     double invc, logc;
491   } tab[1 << LOG_TABLE_BITS];
492 #if !HAVE_FAST_FMA
493   struct
494   {
495     double chi, clo;
496   } tab2[1 << LOG_TABLE_BITS];
497 #endif
498 } __log_data HIDDEN;
499 
500 #define ASINH_NCOEFFS 18
501 extern const struct asinh_data
502 {
503   double poly[ASINH_NCOEFFS];
504 } __asinh_data HIDDEN;
505 
506 #define LOG1P_NCOEFFS 19
507 extern const struct log1p_data
508 {
509   double coeffs[LOG1P_NCOEFFS];
510 } __log1p_data HIDDEN;
511 
512 #define LOG1PF_2U5
513 #define LOG1PF_NCOEFFS 9
514 extern const struct log1pf_data
515 {
516   float coeffs[LOG1PF_NCOEFFS];
517 } __log1pf_data HIDDEN;
518 
519 #define TANF_P_POLY_NCOEFFS 6
520 /* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps.  */
521 #define TANF_Q_POLY_NCOEFFS 4
522 extern const struct tanf_poly_data
523 {
524   float poly_tan[TANF_P_POLY_NCOEFFS];
525   float poly_cotan[TANF_Q_POLY_NCOEFFS];
526 } __tanf_poly_data HIDDEN;
527 
528 #define V_LOG2_TABLE_BITS 7
529 extern const struct v_log2_data
530 {
531   double poly[5];
532   double invln2;
533   struct
534   {
535     double invc, log2c;
536   } table[1 << V_LOG2_TABLE_BITS];
537 } __v_log2_data HIDDEN;
538 
539 #define V_LOG10_TABLE_BITS 7
540 extern const struct v_log10_data
541 {
542   double poly[5];
543   double invln10, log10_2;
544   struct
545   {
546     double invc, log10c;
547   } table[1 << V_LOG10_TABLE_BITS];
548 } __v_log10_data HIDDEN;
549 
550 /* Some data for SVE powf's internal exp and log.  */
551 #define V_POWF_EXP2_TABLE_BITS 5
552 #define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
553 #define V_POWF_LOG2_TABLE_BITS 5
554 #define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
555 extern const struct v_powf_data
556 {
557   double invc[V_POWF_LOG2_N];
558   double logc[V_POWF_LOG2_N];
559   uint64_t scale[V_POWF_EXP2_N];
560 } __v_powf_data HIDDEN;
561 
562 #define V_LOG_POLY_ORDER 6
563 #define V_LOG_TABLE_BITS 7
564 extern const struct v_log_data
565 {
566   /* Shared data for vector log and log-derived routines (e.g. asinh).  */
567   double poly[V_LOG_POLY_ORDER - 1];
568   double ln2;
569   struct
570   {
571     double invc, logc;
572   } table[1 << V_LOG_TABLE_BITS];
573 } __v_log_data HIDDEN;
574 
575 #define EXPM1F_POLY_ORDER 5
576 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
577 
578 #define EXPF_TABLE_BITS 5
579 #define EXPF_POLY_ORDER 3
580 extern const struct expf_data
581 {
582   uint64_t tab[1 << EXPF_TABLE_BITS];
583   double invln2_scaled;
584   double poly_scaled[EXPF_POLY_ORDER];
585 } __expf_data HIDDEN;
586 
587 #define EXPM1_POLY_ORDER 11
588 extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
589 
590 extern const struct cbrtf_data
591 {
592   float poly[4];
593   float table[5];
594 } __cbrtf_data HIDDEN;
595 
596 extern const struct cbrt_data
597 {
598   double poly[4];
599   double table[5];
600 } __cbrt_data HIDDEN;
601 
602 #define ASINF_POLY_ORDER 4
603 extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
604 
605 #define ASIN_POLY_ORDER 11
606 extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
607 
608 /* Some data for AdvSIMD and SVE pow's internal exp and log.  */
609 #define V_POW_EXP_TABLE_BITS 8
610 extern const struct v_pow_exp_data
611 {
612   double poly[3];
613   double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
614   uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
615 } __v_pow_exp_data HIDDEN;
616 
617 #define V_POW_LOG_TABLE_BITS 7
618 extern const struct v_pow_log_data
619 {
620   double poly[7]; /* First coefficient is 1.  */
621   double ln2_hi, ln2_lo;
622   double invc[1 << V_POW_LOG_TABLE_BITS];
623   double logc[1 << V_POW_LOG_TABLE_BITS];
624   double logctail[1 << V_POW_LOG_TABLE_BITS];
625 } __v_pow_log_data HIDDEN;
626 
627 #endif
628