1 /*
2 * Configuration for math routines.
3 *
4 * Copyright (c) 2017-2024, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #ifndef _MATH_CONFIG_H
9 #define _MATH_CONFIG_H
10
11 #include <math.h>
12 #include <stdint.h>
13
14 #ifndef WANT_ROUNDING
15 /* If defined to 1, return correct results for special cases in non-nearest
16 rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
17 -0.0f). This may be set to 0 if there is no fenv support or if math
18 functions only get called in round to nearest mode. */
19 # define WANT_ROUNDING 1
20 #endif
21 #ifndef WANT_ERRNO
22 /* If defined to 1, set errno in math functions according to ISO C. Many math
23 libraries do not set errno, so this is 0 by default. It may need to be
24 set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
25 # define WANT_ERRNO 0
26 #endif
27 #ifndef WANT_SIMD_EXCEPT
28 /* If defined to 1, trigger fp exceptions in vector routines, consistently with
29 behaviour expected from the corresponding scalar routine. */
30 # define WANT_SIMD_EXCEPT 0
31 #endif
32
33 /* Compiler can inline round as a single instruction. */
34 #ifndef HAVE_FAST_ROUND
35 # if __aarch64__
36 # define HAVE_FAST_ROUND 1
37 # else
38 # define HAVE_FAST_ROUND 0
39 # endif
40 #endif
41
42 /* Compiler can inline lround, but not (long)round(x). */
43 #ifndef HAVE_FAST_LROUND
44 # if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \
45 && __NO_MATH_ERRNO__
46 # define HAVE_FAST_LROUND 1
47 # else
48 # define HAVE_FAST_LROUND 0
49 # endif
50 #endif
51
52 /* Compiler can inline fma as a single instruction. */
53 #ifndef HAVE_FAST_FMA
54 # if defined FP_FAST_FMA || __aarch64__
55 # define HAVE_FAST_FMA 1
56 # else
57 # define HAVE_FAST_FMA 0
58 # endif
59 #endif
60
61 /* Provide *_finite symbols and some of the glibc hidden symbols
62 so libmathlib can be used with binaries compiled against glibc
63 to interpose math functions with both static and dynamic linking. */
64 #ifndef USE_GLIBC_ABI
65 # if __GNUC__
66 # define USE_GLIBC_ABI 1
67 # else
68 # define USE_GLIBC_ABI 0
69 # endif
70 #endif
71
72 /* Symbol renames to avoid libc conflicts. */
73 #define __exp_data pl_math_exp_data
74
75 /* Optionally used extensions. */
76 #ifdef __GNUC__
77 # define HIDDEN __attribute__ ((__visibility__ ("hidden")))
78 # define NOINLINE __attribute__ ((noinline))
79 # define UNUSED __attribute__ ((unused))
80 # define likely(x) __builtin_expect (!!(x), 1)
81 # define unlikely(x) __builtin_expect (x, 0)
82 # if __GNUC__ >= 9
83 # define attribute_copy(f) __attribute__ ((copy (f)))
84 # else
85 # define attribute_copy(f)
86 # endif
87 # define strong_alias(f, a) \
88 extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
89 # define hidden_alias(f, a) \
90 extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
91 attribute_copy (f);
92 #else
93 # define HIDDEN
94 # define NOINLINE
95 # define UNUSED
96 # define likely(x) (x)
97 # define unlikely(x) (x)
98 #endif
99
100 /* Return ptr but hide its value from the compiler so accesses through it
101 cannot be optimized based on the contents. */
102 #define ptr_barrier(ptr) \
103 ({ \
104 __typeof (ptr) __ptr = (ptr); \
105 __asm("" : "+r"(__ptr)); \
106 __ptr; \
107 })
108
109 /* Symbol renames to avoid libc conflicts. */
110 #define __math_oflowf arm_math_oflowf
111 #define __math_uflowf arm_math_uflowf
112 #define __math_may_uflowf arm_math_may_uflowf
113 #define __math_divzerof arm_math_divzerof
114 #define __math_oflow arm_math_oflow
115 #define __math_uflow arm_math_uflow
116 #define __math_may_uflow arm_math_may_uflow
117 #define __math_divzero arm_math_divzero
118 #define __math_invalidf arm_math_invalidf
119 #define __math_invalid arm_math_invalid
120 #define __math_check_oflow arm_math_check_oflow
121 #define __math_check_uflow arm_math_check_uflow
122 #define __math_check_oflowf arm_math_check_oflowf
123 #define __math_check_uflowf arm_math_check_uflowf
124
125 #if HAVE_FAST_ROUND
126 /* When set, the roundtoint and converttoint functions are provided with
127 the semantics documented below. */
128 # define TOINT_INTRINSICS 1
129
130 /* Round x to nearest int in all rounding modes, ties have to be rounded
131 consistently with converttoint so the results match. If the result
132 would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
133 static inline double_t
roundtoint(double_t x)134 roundtoint (double_t x)
135 {
136 return round (x);
137 }
138
139 /* Convert x to nearest int in all rounding modes, ties have to be rounded
140 consistently with roundtoint. If the result is not representible in an
141 int32_t then the semantics is unspecified. */
142 static inline int32_t
converttoint(double_t x)143 converttoint (double_t x)
144 {
145 # if HAVE_FAST_LROUND
146 return lround (x);
147 # else
148 return (long) round (x);
149 # endif
150 }
151 #endif
152
153 static inline uint32_t
asuint(float f)154 asuint (float f)
155 {
156 union
157 {
158 float f;
159 uint32_t i;
160 } u = { f };
161 return u.i;
162 }
163
164 static inline float
asfloat(uint32_t i)165 asfloat (uint32_t i)
166 {
167 union
168 {
169 uint32_t i;
170 float f;
171 } u = { i };
172 return u.f;
173 }
174
175 static inline uint64_t
asuint64(double f)176 asuint64 (double f)
177 {
178 union
179 {
180 double f;
181 uint64_t i;
182 } u = { f };
183 return u.i;
184 }
185
186 static inline double
asdouble(uint64_t i)187 asdouble (uint64_t i)
188 {
189 union
190 {
191 uint64_t i;
192 double f;
193 } u = { i };
194 return u.f;
195 }
196
197 #ifndef IEEE_754_2008_SNAN
198 # define IEEE_754_2008_SNAN 1
199 #endif
200 static inline int
issignalingf_inline(float x)201 issignalingf_inline (float x)
202 {
203 uint32_t ix = asuint (x);
204 if (!IEEE_754_2008_SNAN)
205 return (ix & 0x7fc00000) == 0x7fc00000;
206 return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
207 }
208
209 static inline int
issignaling_inline(double x)210 issignaling_inline (double x)
211 {
212 uint64_t ix = asuint64 (x);
213 if (!IEEE_754_2008_SNAN)
214 return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
215 return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
216 }
217
218 #if __aarch64__ && __GNUC__
219 /* Prevent the optimization of a floating-point expression. */
220 static inline float
opt_barrier_float(float x)221 opt_barrier_float (float x)
222 {
223 __asm__ __volatile__ ("" : "+w" (x));
224 return x;
225 }
226 static inline double
opt_barrier_double(double x)227 opt_barrier_double (double x)
228 {
229 __asm__ __volatile__ ("" : "+w" (x));
230 return x;
231 }
232 /* Force the evaluation of a floating-point expression for its side-effect. */
233 static inline void
force_eval_float(float x)234 force_eval_float (float x)
235 {
236 __asm__ __volatile__ ("" : "+w" (x));
237 }
238 static inline void
force_eval_double(double x)239 force_eval_double (double x)
240 {
241 __asm__ __volatile__ ("" : "+w" (x));
242 }
243 #else
244 static inline float
opt_barrier_float(float x)245 opt_barrier_float (float x)
246 {
247 volatile float y = x;
248 return y;
249 }
250 static inline double
opt_barrier_double(double x)251 opt_barrier_double (double x)
252 {
253 volatile double y = x;
254 return y;
255 }
256 static inline void
force_eval_float(float x)257 force_eval_float (float x)
258 {
259 volatile float y UNUSED = x;
260 }
261 static inline void
force_eval_double(double x)262 force_eval_double (double x)
263 {
264 volatile double y UNUSED = x;
265 }
266 #endif
267
268 /* Evaluate an expression as the specified type, normally a type
269 cast should be enough, but compilers implement non-standard
270 excess-precision handling, so when FLT_EVAL_METHOD != 0 then
271 these functions may need to be customized. */
272 static inline float
eval_as_float(float x)273 eval_as_float (float x)
274 {
275 return x;
276 }
277 static inline double
eval_as_double(double x)278 eval_as_double (double x)
279 {
280 return x;
281 }
282
283 /* Error handling tail calls for special cases, with a sign argument.
284 The sign of the return value is set if the argument is non-zero. */
285
286 /* The result overflows. */
287 HIDDEN float __math_oflowf (uint32_t);
288 /* The result underflows to 0 in nearest rounding mode. */
289 HIDDEN float __math_uflowf (uint32_t);
290 /* The result underflows to 0 in some directed rounding mode only. */
291 HIDDEN float __math_may_uflowf (uint32_t);
292 /* Division by zero. */
293 HIDDEN float __math_divzerof (uint32_t);
294 /* The result overflows. */
295 HIDDEN double __math_oflow (uint32_t);
296 /* The result underflows to 0 in nearest rounding mode. */
297 HIDDEN double __math_uflow (uint32_t);
298 /* The result underflows to 0 in some directed rounding mode only. */
299 HIDDEN double __math_may_uflow (uint32_t);
300 /* Division by zero. */
301 HIDDEN double __math_divzero (uint32_t);
302
303 /* Error handling using input checking. */
304
305 /* Invalid input unless it is a quiet NaN. */
306 HIDDEN float __math_invalidf (float);
307 /* Invalid input unless it is a quiet NaN. */
308 HIDDEN double __math_invalid (double);
309
310 /* Error handling using output checking, only for errno setting. */
311
312 /* Check if the result overflowed to infinity. */
313 HIDDEN double __math_check_oflow (double);
314 /* Check if the result underflowed to 0. */
315 HIDDEN double __math_check_uflow (double);
316
317 /* Check if the result overflowed to infinity. */
318 static inline double
check_oflow(double x)319 check_oflow (double x)
320 {
321 return WANT_ERRNO ? __math_check_oflow (x) : x;
322 }
323
324 /* Check if the result underflowed to 0. */
325 static inline double
check_uflow(double x)326 check_uflow (double x)
327 {
328 return WANT_ERRNO ? __math_check_uflow (x) : x;
329 }
330
331 /* Check if the result overflowed to infinity. */
332 HIDDEN float __math_check_oflowf (float);
333 /* Check if the result underflowed to 0. */
334 HIDDEN float __math_check_uflowf (float);
335
336 /* Check if the result overflowed to infinity. */
337 static inline float
check_oflowf(float x)338 check_oflowf (float x)
339 {
340 return WANT_ERRNO ? __math_check_oflowf (x) : x;
341 }
342
343 /* Check if the result underflowed to 0. */
344 static inline float
check_uflowf(float x)345 check_uflowf (float x)
346 {
347 return WANT_ERRNO ? __math_check_uflowf (x) : x;
348 }
349
350 extern const struct erff_data
351 {
352 struct
353 {
354 float erf, scale;
355 } tab[513];
356 } __erff_data HIDDEN;
357
358 extern const struct sv_erff_data
359 {
360 float erf[513];
361 float scale[513];
362 } __sv_erff_data HIDDEN;
363
364 extern const struct erfcf_data
365 {
366 struct
367 {
368 float erfc, scale;
369 } tab[645];
370 } __erfcf_data HIDDEN;
371
372 /* Data for logf and log10f. */
373 #define LOGF_TABLE_BITS 4
374 #define LOGF_POLY_ORDER 4
375 extern const struct logf_data
376 {
377 struct
378 {
379 double invc, logc;
380 } tab[1 << LOGF_TABLE_BITS];
381 double ln2;
382 double invln10;
383 double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
384 } __logf_data HIDDEN;
385
386 /* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */
387 #define LOG10_TABLE_BITS 7
388 #define LOG10_POLY_ORDER 6
389 #define LOG10_POLY1_ORDER 12
390 extern const struct log10_data
391 {
392 double ln2hi;
393 double ln2lo;
394 double invln10;
395 double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */
396 double poly1[LOG10_POLY1_ORDER - 1];
397 struct
398 {
399 double invc, logc;
400 } tab[1 << LOG10_TABLE_BITS];
401 #if !HAVE_FAST_FMA
402 struct
403 {
404 double chi, clo;
405 } tab2[1 << LOG10_TABLE_BITS];
406 #endif
407 } __log10_data HIDDEN;
408
409 #define EXP_TABLE_BITS 7
410 #define EXP_POLY_ORDER 5
411 /* Use polynomial that is optimized for a wider input range. This may be
412 needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
413 #define EXP_POLY_WIDE 0
414 /* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
415 needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
416 #define EXP_USE_TOINT_NARROW 0
417 #define EXP2_POLY_ORDER 5
418 #define EXP2_POLY_WIDE 0
419 extern const struct exp_data
420 {
421 double invln2N;
422 double shift;
423 double negln2hiN;
424 double negln2loN;
425 double poly[4]; /* Last four coefficients. */
426 double exp2_shift;
427 double exp2_poly[EXP2_POLY_ORDER];
428 uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
429 } __exp_data HIDDEN;
430
431 /* Copied from math/v_exp.h for use in vector exp_tail. */
432 #define V_EXP_TAIL_TABLE_BITS 8
433 extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
434
435 /* Copied from math/v_exp.h for use in vector exp2. */
436 #define V_EXP_TABLE_BITS 7
437 extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
438
439 extern const struct erf_data
440 {
441 struct
442 {
443 double erf, scale;
444 } tab[769];
445 } __erf_data HIDDEN;
446
447 extern const struct sv_erf_data
448 {
449 double erf[769];
450 double scale[769];
451 } __sv_erf_data HIDDEN;
452
453 extern const struct erfc_data
454 {
455 struct
456 {
457 double erfc, scale;
458 } tab[3488];
459 } __erfc_data HIDDEN;
460
461 #define ATAN_POLY_NCOEFFS 20
462 extern const struct atan_poly_data
463 {
464 double poly[ATAN_POLY_NCOEFFS];
465 } __atan_poly_data HIDDEN;
466
467 #define ATANF_POLY_NCOEFFS 8
468 extern const struct atanf_poly_data
469 {
470 float poly[ATANF_POLY_NCOEFFS];
471 } __atanf_poly_data HIDDEN;
472
473 #define ASINHF_NCOEFFS 8
474 extern const struct asinhf_data
475 {
476 float coeffs[ASINHF_NCOEFFS];
477 } __asinhf_data HIDDEN;
478
479 #define LOG_TABLE_BITS 7
480 #define LOG_POLY_ORDER 6
481 #define LOG_POLY1_ORDER 12
482 extern const struct log_data
483 {
484 double ln2hi;
485 double ln2lo;
486 double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
487 double poly1[LOG_POLY1_ORDER - 1];
488 struct
489 {
490 double invc, logc;
491 } tab[1 << LOG_TABLE_BITS];
492 #if !HAVE_FAST_FMA
493 struct
494 {
495 double chi, clo;
496 } tab2[1 << LOG_TABLE_BITS];
497 #endif
498 } __log_data HIDDEN;
499
500 #define ASINH_NCOEFFS 18
501 extern const struct asinh_data
502 {
503 double poly[ASINH_NCOEFFS];
504 } __asinh_data HIDDEN;
505
506 #define LOG1P_NCOEFFS 19
507 extern const struct log1p_data
508 {
509 double coeffs[LOG1P_NCOEFFS];
510 } __log1p_data HIDDEN;
511
512 #define LOG1PF_2U5
513 #define LOG1PF_NCOEFFS 9
514 extern const struct log1pf_data
515 {
516 float coeffs[LOG1PF_NCOEFFS];
517 } __log1pf_data HIDDEN;
518
519 #define TANF_P_POLY_NCOEFFS 6
520 /* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps. */
521 #define TANF_Q_POLY_NCOEFFS 4
522 extern const struct tanf_poly_data
523 {
524 float poly_tan[TANF_P_POLY_NCOEFFS];
525 float poly_cotan[TANF_Q_POLY_NCOEFFS];
526 } __tanf_poly_data HIDDEN;
527
528 #define V_LOG2_TABLE_BITS 7
529 extern const struct v_log2_data
530 {
531 double poly[5];
532 double invln2;
533 struct
534 {
535 double invc, log2c;
536 } table[1 << V_LOG2_TABLE_BITS];
537 } __v_log2_data HIDDEN;
538
539 #define V_LOG10_TABLE_BITS 7
540 extern const struct v_log10_data
541 {
542 double poly[5];
543 double invln10, log10_2;
544 struct
545 {
546 double invc, log10c;
547 } table[1 << V_LOG10_TABLE_BITS];
548 } __v_log10_data HIDDEN;
549
550 /* Some data for SVE powf's internal exp and log. */
551 #define V_POWF_EXP2_TABLE_BITS 5
552 #define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
553 #define V_POWF_LOG2_TABLE_BITS 5
554 #define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
555 extern const struct v_powf_data
556 {
557 double invc[V_POWF_LOG2_N];
558 double logc[V_POWF_LOG2_N];
559 uint64_t scale[V_POWF_EXP2_N];
560 } __v_powf_data HIDDEN;
561
562 #define V_LOG_POLY_ORDER 6
563 #define V_LOG_TABLE_BITS 7
564 extern const struct v_log_data
565 {
566 /* Shared data for vector log and log-derived routines (e.g. asinh). */
567 double poly[V_LOG_POLY_ORDER - 1];
568 double ln2;
569 struct
570 {
571 double invc, logc;
572 } table[1 << V_LOG_TABLE_BITS];
573 } __v_log_data HIDDEN;
574
575 #define EXPM1F_POLY_ORDER 5
576 extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
577
578 #define EXPF_TABLE_BITS 5
579 #define EXPF_POLY_ORDER 3
580 extern const struct expf_data
581 {
582 uint64_t tab[1 << EXPF_TABLE_BITS];
583 double invln2_scaled;
584 double poly_scaled[EXPF_POLY_ORDER];
585 } __expf_data HIDDEN;
586
587 #define EXPM1_POLY_ORDER 11
588 extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
589
590 extern const struct cbrtf_data
591 {
592 float poly[4];
593 float table[5];
594 } __cbrtf_data HIDDEN;
595
596 extern const struct cbrt_data
597 {
598 double poly[4];
599 double table[5];
600 } __cbrt_data HIDDEN;
601
602 #define ASINF_POLY_ORDER 4
603 extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
604
605 #define ASIN_POLY_ORDER 11
606 extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
607
608 /* Some data for AdvSIMD and SVE pow's internal exp and log. */
609 #define V_POW_EXP_TABLE_BITS 8
610 extern const struct v_pow_exp_data
611 {
612 double poly[3];
613 double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
614 uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
615 } __v_pow_exp_data HIDDEN;
616
617 #define V_POW_LOG_TABLE_BITS 7
618 extern const struct v_pow_log_data
619 {
620 double poly[7]; /* First coefficient is 1. */
621 double ln2_hi, ln2_lo;
622 double invc[1 << V_POW_LOG_TABLE_BITS];
623 double logc[1 << V_POW_LOG_TABLE_BITS];
624 double logctail[1 << V_POW_LOG_TABLE_BITS];
625 } __v_pow_log_data HIDDEN;
626
627 #endif
628