1 #pragma once 2 3 #include <ATen/AccumulateType.h> 4 #include <ATen/jit_macros.h> 5 #include <c10/macros/Macros.h> 6 #include <ATen/native/cuda/jit_utils.h> 7 8 namespace at { 9 namespace native { 10 // See note [Jiterator] 11 // TODO: elaborate in this comment on the structure of math.cuh 12 #if AT_USE_JITERATOR() 13 14 const auto ndtri_string = jiterator_stringify( 15 /* 16 * This function is derived from the implementation of the digamma function in the Cephes Math Library. 17 * See note [3-Clause BSD License for the Cephes Math Library]. 18 * 19 * Evaluates polynomial of degree N: 20 * 21 * 2 N 22 * y = C + C x + C x +...+ C x 23 * 0 1 2 N 24 * 25 * Coefficients are stored in reverse order: 26 * 27 * coef[0] = C , ..., coef[N] = C . 28 * N 0 29 */ 30 template <typename T> 31 T polevl(const T x, const T A[], const int len) { 32 // NOTE: This `polevl` is different from other `polevl` 33 // implementation (in PyTorch) which expect the `len` to be 34 // `len(A) - 1` instead of `len(A)`. 35 T result = 0; 36 for (int i = 0; i < len; ++i) { 37 result = result * x + A[i]; 38 } 39 return result; 40 } 41 42 /* 43 * This function is derived from the implementation of the i1e function in the Cephes Math Library. 44 * See note [3-Clause BSD License for the Cephes Math Library]. 45 * 46 * Computes the argument, x, for which the area under the Gaussian probability density function 47 * (integrated from minus infinity to x) is equal to y. 48 */ 49 template <typename T> 50 T ndtri(T y0) { 51 52 constexpr T zero = 0; 53 constexpr T one = 1; 54 55 // Handles special cases 56 if (y0 == zero) { 57 return NEG_INFINITY; 58 } 59 if (y0 == one) { 60 return POS_INFINITY; 61 } 62 if (y0 < zero || y0 > one) { 63 return NAN; 64 } 65 66 bool code = true; 67 T y = y0; 68 // Note: the constant 0.135... is equal to exp(-2) 69 if (y > one - T{0.13533528323661269189}) { 70 y = one - y; 71 code = false; 72 } 73 74 if (y > T{0.13533528323661269189}) { 75 /* approximation for 0 <= |y - 0.5| <= 3/8 */ 76 static const T P0[5] = { 77 -5.99633501014107895267E1, 78 9.80010754185999661536E1, 79 -5.66762857469070293439E1, 80 1.39312609387279679503E1, 81 -1.23916583867381258016E0, 82 }; 83 84 static const T Q0[9] = { 85 1.00000000000000000000E0, 86 1.95448858338141759834E0, 87 4.67627912898881538453E0, 88 8.63602421390890590575E1, 89 -2.25462687854119370527E2, 90 2.00260212380060660359E2, 91 -8.20372256168333339912E1, 92 1.59056225126211695515E1, 93 -1.18331621121330003142E0, 94 }; 95 96 /* sqrt(2pi) */ 97 constexpr T s2pi = 2.50662827463100050242E0; 98 99 y = y - T{0.5}; 100 const T y2 = y * y; 101 T x = y + y * (y2 * polevl(y2, P0, int{5}) / polevl(y2, Q0, int{9})); 102 return x * s2pi; 103 } 104 105 T x = sqrt(T{-2.} * log(y)); 106 const T x0 = x - (log(x) / x); 107 108 const T z = one / x; 109 T x1; 110 111 /* y > exp(-32) = 1.2664165549e-14 */ 112 if (x < T{8.0}) { 113 /* Approximation for interval z = sqrt(-2 log y ) between 2 and 8 114 * i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14. 115 */ 116 static const T P1[9] = { 117 4.05544892305962419923E0, 118 3.15251094599893866154E1, 119 5.71628192246421288162E1, 120 4.40805073893200834700E1, 121 1.46849561928858024014E1, 122 2.18663306850790267539E0, 123 -1.40256079171354495875E-1, 124 -3.50424626827848203418E-2, 125 -8.57456785154685413611E-4, 126 }; 127 128 static const T Q1[9] = { 129 1.00000000000000000000E0, 130 1.57799883256466749731E1, 131 4.53907635128879210584E1, 132 4.13172038254672030440E1, 133 1.50425385692907503408E1, 134 2.50464946208309415979E0, 135 -1.42182922854787788574E-1, 136 -3.80806407691578277194E-2, 137 -9.33259480895457427372E-4, 138 }; 139 140 x1 = z * polevl(z, P1, int{9}) / polevl(z, Q1, int{9}); 141 } else { 142 /* Approximation for interval z = sqrt(-2 log y ) between 8 and 64 143 * i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. 144 */ 145 static const T P2[9] = { 146 3.23774891776946035970E0, 147 6.91522889068984211695E0, 148 3.93881025292474443415E0, 149 1.33303460815807542389E0, 150 2.01485389549179081538E-1, 151 1.23716634817820021358E-2, 152 3.01581553508235416007E-4, 153 2.65806974686737550832E-6, 154 6.23974539184983293730E-9, 155 }; 156 157 static const T Q2[9] = { 158 1.00000000000000000000E0, 159 6.02427039364742014255E0, 160 3.67983563856160859403E0, 161 1.37702099489081330271E0, 162 2.16236993594496635890E-1, 163 1.34204006088543189037E-2, 164 3.28014464682127739104E-4, 165 2.89247864745380683936E-6, 166 6.79019408009981274425E-9, 167 }; 168 169 x1 = z * polevl(z, P2, int{9}) / polevl(z, Q2, int{9}); 170 } 171 172 x = x0 - x1; 173 return (!code) ? x : -x; 174 } 175 ); // ndtri_string 176 177 const auto log_ndtr_string = jiterator_stringify( 178 template <typename T> 179 T log_ndtr(T x) { 180 constexpr T SQRT1_2{0.707106781186547524400844362104849039}; // 1/sqrt(2) 181 T t = x * SQRT1_2; 182 if (x < T{-1.0}) { 183 return log(erfcx(-t) / 2) - t * t; 184 } else { 185 return log1p(-erfc(t) / 2); 186 } 187 } 188 ); // log_ndtr_string 189 190 const auto gcd_string = jiterator_stringify( 191 template <typename T> 192 T gcd(const T a_in, const T b_in) { 193 T a = abs(a_in); 194 T b = abs(b_in); 195 196 while (a != T{0}) { 197 T c = a; 198 a = b % a; 199 b = c; 200 } 201 202 return b; 203 } 204 ); // gcd_string 205 206 const auto lcm_string = jiterator_stringify( 207 template <typename T> 208 T gcd(const T a_in, const T b_in) { 209 T a = abs(a_in); 210 T b = abs(b_in); 211 212 while (a != T{0}) { 213 T c = a; 214 a = b % a; 215 b = c; 216 } 217 218 return b; 219 } 220 221 template <typename T> 222 T lcm(const T a, const T b) { 223 T g = gcd(a, b); 224 return (g == T{0}) ? T{0} : abs(a / g * b); 225 } 226 ); // lcm_string 227 228 /* 229 * For licensing information, please refer to the cpu implementation located in "ATen/native/Math.h". 230 */ 231 // [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma 232 const auto digamma_string = jiterator_stringify( 233 template <typename T> 234 T digamma(T x) { 235 static const double PI_f64 = 3.14159265358979323846; 236 237 // Short-circuits if x is +/- 0 and returns -/+ ∞ per the C++ standard 238 if (x == 0) { 239 return copysign(POS_INFINITY, -x); 240 } 241 242 T result = 0; 243 if (x < 0) { 244 // Short-circuits if x is a negative integer and returns NaN 245 // per the C++ standard 246 const bool x_is_integer = (x == trunc(x)); 247 if (x_is_integer) { 248 return NAN; 249 } 250 251 // Extracts the fractional part of x as r, since tan(pi * r) is more numerically 252 // accurate than tan(pi * x). While these operations are mathematically equivalent 253 // since both x and r are in radians and tan() has a periodicity of pi, in practice 254 // the computation of pi * x is a source of error (when |x| > 1). 255 double q, r; 256 r = modf(static_cast<double>(x), &q); 257 result = - PI_f64 / tan(PI_f64 * r); 258 x = 1 - x; 259 } 260 261 while (x < T{10}) { 262 result -= T{1} / x; 263 x += T{1}; 264 } 265 266 if (x == T{10}) { 267 return result + T{2.25175258906672110764}; 268 } 269 270 T y = 0; 271 if (x < T{1.0e17}) { 272 const T A[] = { 273 8.33333333333333333333E-2, 274 -2.10927960927960927961E-2, 275 7.57575757575757575758E-3, 276 -4.16666666666666666667E-3, 277 3.96825396825396825397E-3, 278 -8.33333333333333333333E-3, 279 8.33333333333333333333E-2, 280 }; 281 282 283 T z = T{1} / (x * x); 284 285 T polevl_result = 0; 286 for (int i = 0; i <= 6; i++) { 287 polevl_result = polevl_result * z + A[i]; 288 } 289 y = z * polevl_result; 290 } 291 292 return log(x) - (T{0.5} / x) - y + result; 293 } 294 ); // digamma_string 295 296 /* 297 * This function is derived from the implementation of the zeta function in the Cephes Math Library. 298 * See note [3-Clause BSD License for the Cephes Math Library]. 299 */ 300 const auto zeta_string = jiterator_stringify( 301 template <typename T> 302 T zeta(T x, T q) { 303 const T MACHEP{1.11022302462515654042E-16}; 304 constexpr T zero{0}; 305 constexpr T half{0.5}; 306 constexpr T one{1}; 307 static const T A[] = { 308 12.0, 309 -720.0, 310 30240.0, 311 -1209600.0, 312 47900160.0, 313 -1.8924375803183791606e9, /*1.307674368e12/691*/ 314 7.47242496e10, 315 -2.950130727918164224e12, /*1.067062284288e16/3617*/ 316 1.1646782814350067249e14, /*5.109094217170944e18/43867*/ 317 -4.5979787224074726105e15, /*8.028576626982912e20/174611*/ 318 1.8152105401943546773e17, /*1.5511210043330985984e23/854513*/ 319 -7.1661652561756670113e18 /*1.6938241367317436694528e27/236364091*/ 320 }; 321 322 int i = 0; 323 T a, b, k, s, t, w; 324 325 // Short-circuits x -> +infty 326 if (x == one) { 327 return POS_INFINITY; 328 } 329 330 // Short-circuits x < 1 -> NaN 331 if (x < one) { 332 return NAN; 333 } 334 335 // Short-circuits negative q integers map to +infty, 336 // negative q non-integers map to NaN 337 if (q <= zero) { 338 if (q == floor(q)) { 339 return POS_INFINITY; 340 } 341 if (x != floor(x)) { 342 return NAN; 343 } 344 } 345 346 s = pow(q, -x); 347 a = q; 348 i = 0; 349 b = zero; 350 while ((i < 9) || (a <= T{9.0})) { 351 i += 1; 352 a += one; 353 b = pow(a, -x); 354 s += b; 355 if ((-MACHEP * s < b) && (b < MACHEP * s)) { 356 return s; 357 } 358 }; 359 360 w = a; 361 s += b * w / (x - one); 362 s -= half * b; 363 a = one; 364 k = zero; 365 for (int i = 0; i < 12; i++) { 366 a *= x + k; 367 b /= w; 368 t = a * b / A[i]; 369 s = s + t; 370 t = fabs(t / s); 371 372 if (t < MACHEP) { 373 return s; 374 } 375 376 k += one; 377 a *= x + k; 378 b /= w; 379 k += one; 380 } 381 382 return s; 383 } 384 ); // zeta_string 385 386 const auto trigamma_string = jiterator_stringify( 387 template <typename T> 388 T trigamma(T x) { 389 const T PI{3.14159265358979323846}; 390 T sign = 1; 391 T result = 0; 392 393 if (x < T{0.5}) { 394 sign = -1; 395 T sin_pi_x = sin(PI * x); 396 result -= (PI * PI) / (sin_pi_x * sin_pi_x); 397 x = 1 - x; 398 } 399 400 for (int i = 0; i < 6; ++i) { 401 result += T{1} / (x * x); 402 x += 1; 403 } 404 405 const T one{1}; 406 const T ixx = one / (x*x); 407 result += (one + one / (T{2}*x) + ixx * (one/T{6} - ixx * (one/T{30} - ixx * (one/T{42})))) / x; 408 return sign * result; 409 } 410 ); // trigamma_string 411 412 const auto lgamma_string = jiterator_stringify( 413 template <typename T> 414 T lgamma_kernel(T a) { 415 return lgamma(a); 416 } 417 ); // lgamma_string 418 419 const auto polygamma_string = zeta_string + jiterator_stringify( 420 template <typename T> 421 T polygamma(T x, int n) { 422 // already blocked if n <= 1 423 const auto one = T{1}; 424 return ((n % 2) ? one : -one) * exp(lgamma(static_cast<T>(n) + one)) * 425 zeta<T>(static_cast<T>(n + 1), x); 426 } 427 ); // polygamma_string 428 429 const auto exp2_string = jiterator_stringify( 430 template <typename T> 431 T exp2_impl(T a) { 432 return exp2(a); 433 } 434 435 namespace std { template <typename _Ty> class complex; } 436 template <typename T> 437 std::complex<T> exp2_impl(std::complex<T> x) { 438 // There is no std::exp2 overload for complex, so instead 439 // use the identity 2^x = e^(ln(2) * x) 440 const auto ln_2 = static_cast<T>(0.693147180559945309417232121458176); 441 return exp(ln_2 * x); 442 } 443 444 template <typename T> 445 T exp2_kernel(T a) { 446 return exp2_impl(a); 447 } 448 ); // exp2_string 449 450 const auto erfc_string = jiterator_stringify( 451 template <typename T> 452 T erfc_kernel(T a) { 453 return erfc(a); 454 } 455 ); // erfc_string 456 457 const auto erfinv_string = jiterator_stringify( 458 template <typename T> 459 T erfinv_kernel(T a) { 460 return erfinv(a); 461 } 462 ); // erfinv_string 463 464 const auto entr_string = jiterator_stringify( 465 template <typename T> 466 T entr(T a) { 467 if (a != a) { 468 return a; 469 } 470 471 if (a > 0) { 472 return -a * log(a); 473 } 474 475 if (a == 0) { 476 return 0; 477 } 478 479 return NEG_INFINITY; 480 } 481 ); // entr_string 482 483 // NOTE: `kaiser_window_string` depends on `i0_string` 484 // for its implementation. 485 const auto i0_string = jiterator_stringify( 486 template<typename T> 487 T chbevl(T x, const T array[], const int len) { 488 489 T b0, b1, b2; 490 491 b0 = array[0]; 492 b1 = 0; 493 494 for (int i = 1; i < len; ++i) { 495 b2 = b1; 496 b1 = b0; 497 b0 = x * b1 - b2 + array[i]; 498 } 499 500 return T{0.5} * (b0 - b2); 501 } 502 503 template<typename T> 504 T i0(T _x) { 505 T x = fabs(_x); 506 507 if (x <= T{8.0}) { 508 /* Chebyshev coefficients for exp(-x) I0(x) 509 * in the interval [0,8]. 510 * 511 * lim(x->0){ exp(-x) I0(x) } = 1. 512 */ 513 static const T A[] = { 514 -4.41534164647933937950E-18, 3.33079451882223809783E-17, 515 -2.43127984654795469359E-16, 1.71539128555513303061E-15, 516 -1.16853328779934516808E-14, 7.67618549860493561688E-14, 517 -4.85644678311192946090E-13, 2.95505266312963983461E-12, 518 -1.72682629144155570723E-11, 9.67580903537323691224E-11, 519 -5.18979560163526290666E-10, 2.65982372468238665035E-9, 520 -1.30002500998624804212E-8, 6.04699502254191894932E-8, 521 -2.67079385394061173391E-7, 1.11738753912010371815E-6, 522 -4.41673835845875056359E-6, 1.64484480707288970893E-5, 523 -5.75419501008210370398E-5, 1.88502885095841655729E-4, 524 -5.76375574538582365885E-4, 1.63947561694133579842E-3, 525 -4.32430999505057594430E-3, 1.05464603945949983183E-2, 526 -2.37374148058994688156E-2, 4.93052842396707084878E-2, 527 -9.49010970480476444210E-2, 1.71620901522208775349E-1, 528 -3.04682672343198398683E-1, 6.76795274409476084995E-1}; 529 530 T y = (x / T{2.0}) - T{2.0}; 531 return exp(x) * chbevl(y, A, int{30}); 532 } 533 534 // Handles x > 8 case 535 /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x) 536 * in the inverted interval [8,infinity]. 537 * 538 * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi). 539 */ 540 const T B[] = { 541 -7.23318048787475395456E-18, -4.83050448594418207126E-18, 542 4.46562142029675999901E-17, 3.46122286769746109310E-17, 543 -2.82762398051658348494E-16, -3.42548561967721913462E-16, 544 1.77256013305652638360E-15, 3.81168066935262242075E-15, 545 -9.55484669882830764870E-15, -4.15056934728722208663E-14, 546 1.54008621752140982691E-14, 3.85277838274214270114E-13, 547 7.18012445138366623367E-13, -1.79417853150680611778E-12, 548 -1.32158118404477131188E-11, -3.14991652796324136454E-11, 549 1.18891471078464383424E-11, 4.94060238822496958910E-10, 550 3.39623202570838634515E-9, 2.26666899049817806459E-8, 551 2.04891858946906374183E-7, 2.89137052083475648297E-6, 552 6.88975834691682398426E-5, 3.36911647825569408990E-3, 553 8.04490411014108831608E-1}; 554 555 return (exp(x) * chbevl(T{32.0} / x - T{2.0}, B, int{25})) / sqrt(x); 556 } 557 ); // i0_string 558 559 const auto i1_string = jiterator_stringify( 560 template<typename T> 561 T chbevl(const T x, const T array[], const int len) { 562 T b0, b1, b2; 563 564 b0 = array[0]; 565 b1 = 0; 566 567 for (int i = 1; i < len; ++i) { 568 b2 = b1; 569 b1 = b0; 570 b0 = x * b1 - b2 + array[i]; 571 } 572 573 return T{0.5} * (b0 - b2); 574 } 575 576 template <typename T> 577 T i1(T _x) { 578 const T x = fabs(_x); 579 580 if (x <= T{8.0}) { 581 // Chebyshev coefficients for exp(-x) i1(x) in the internal [0, 8] 582 // lim(x->0){ exp(-x) i1(x) / x } = 1/2 583 static const T coefficients[] = { 584 2.77791411276104639959E-18, -2.11142121435816608115E-17, 585 1.55363195773620046921E-16, -1.10559694773538630805E-15, 586 7.60068429473540693410E-15, -5.04218550472791168711E-14, 587 3.22379336594557470981E-13, -1.98397439776494371520E-12, 588 1.17361862988909016308E-11, -6.66348972350202774223E-11, 589 3.62559028155211703701E-10, -1.88724975172282928790E-9, 590 9.38153738649577178388E-9, -4.44505912879632808065E-8, 591 2.00329475355213526229E-7, -8.56872026469545474066E-7, 592 3.47025130813767847674E-6, -1.32731636560394358279E-5, 593 4.78156510755005422638E-5, -1.61760815825896745588E-4, 594 5.12285956168575772895E-4, -1.51357245063125314899E-3, 595 4.15642294431288815669E-3, -1.05640848946261981558E-2, 596 2.47264490306265168283E-2, -5.29459812080949914269E-2, 597 1.02643658689847095384E-1, -1.76416518357834055153E-1, 598 2.52587186443633654823E-1}; 599 const T y = x / T{2.0} - T{2.0}; 600 const T out = exp(x) * x * chbevl(y, coefficients, int{29}); 601 return (_x < T{0.0}) ? -out : out; 602 } 603 604 // Chebyshev coefficients for exp(-x) sqrt(x) i1(x) 605 // in the inverted interval [8, infinity] 606 // lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi) 607 static const T coefficients[] = { 608 7.51729631084210481353E-18, 4.41434832307170791151E-18, 609 -4.65030536848935832153E-17, -3.20952592199342395980E-17, 610 2.96262899764595013876E-16, 3.30820231092092828324E-16, 611 -1.88035477551078244854E-15, -3.81440307243700780478E-15, 612 1.04202769841288027642E-14, 4.27244001671195135429E-14, 613 -2.10154184277266431302E-14, -4.08355111109219731823E-13, 614 -7.19855177624590851209E-13, 2.03562854414708950722E-12, 615 1.41258074366137813316E-11, 3.25260358301548823856E-11, 616 -1.89749581235054123450E-11, -5.58974346219658380687E-10, 617 -3.83538038596423702205E-9, -2.63146884688951950684E-8, 618 -2.51223623787020892529E-7, -3.88256480887769039346E-6, 619 -1.10588938762623716291E-4, -9.76109749136146840777E-3, 620 7.78576235018280120474E-1}; 621 const T out = (exp(x) * chbevl(T{32.} / x - T{2.}, coefficients, int{25})) / sqrt(x); 622 return (_x < T{0.}) ? -out : out; 623 } 624 ); // i1_string 625 626 const auto i1e_string = jiterator_stringify( 627 template<typename T> 628 T chbevl(const T x, const T array[], const int len) { 629 T b0, b1, b2; 630 631 b0 = array[0]; 632 b1 = 0; 633 634 for (int i = 1; i < len; ++i) { 635 b2 = b1; 636 b1 = b0; 637 b0 = x * b1 - b2 + array[i]; 638 } 639 640 return T{0.5} * (b0 - b2); 641 } 642 643 // See double and float instantiations below 644 template <typename T> 645 T i1e(T _x) { } 646 647 // Double specialization (uses different coefficients than the float version) 648 template<> 649 double i1e(double _x) { 650 const double x = fabs(_x); 651 if (x <= double{8.}) { 652 // Chebyshev double coefficients for exp(-x) i1(x) in the interval [0,8]. 653 // Note: lim(x->0){ exp(-x) i1(x) / x } = 1/2. 654 static const double coefficients[] = { 655 2.77791411276104639959E-18, -2.11142121435816608115E-17, 656 1.55363195773620046921E-16, -1.10559694773538630805E-15, 657 7.60068429473540693410E-15, -5.04218550472791168711E-14, 658 3.22379336594557470981E-13, -1.98397439776494371520E-12, 659 1.17361862988909016308E-11, -6.66348972350202774223E-11, 660 3.62559028155211703701E-10, -1.88724975172282928790E-9, 661 9.38153738649577178388E-9, -4.44505912879632808065E-8, 662 2.00329475355213526229E-7, -8.56872026469545474066E-7, 663 3.47025130813767847674E-6, -1.32731636560394358279E-5, 664 4.78156510755005422638E-5, -1.61760815825896745588E-4, 665 5.12285956168575772895E-4, -1.51357245063125314899E-3, 666 4.15642294431288815669E-3, -1.05640848946261981558E-2, 667 2.47264490306265168283E-2, -5.29459812080949914269E-2, 668 1.02643658689847095384E-1, -1.76416518357834055153E-1, 669 2.52587186443633654823E-1}; 670 const double y = x / double{2.} - double{2.}; 671 const double out = chbevl(y, coefficients, int{29}) * x; 672 return (_x < 0.) ? -out : out; 673 } 674 675 // Chebyshev coefficients for exp(-x) sqrt(x) i1(x) 676 // in the inverted interval (8, infinity]. 677 // Note: lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi). 678 // TODO: what's an "inverted interval"? Open on the left 679 // and closed on the right? 680 static const double coefficients[] = { 681 7.51729631084210481353E-18, 4.41434832307170791151E-18, 682 -4.65030536848935832153E-17, -3.20952592199342395980E-17, 683 2.96262899764595013876E-16, 3.30820231092092828324E-16, 684 -1.88035477551078244854E-15, -3.81440307243700780478E-15, 685 1.04202769841288027642E-14, 4.27244001671195135429E-14, 686 -2.10154184277266431302E-14, -4.08355111109219731823E-13, 687 -7.19855177624590851209E-13, 2.03562854414708950722E-12, 688 1.41258074366137813316E-11, 3.25260358301548823856E-11, 689 -1.89749581235054123450E-11, -5.58974346219658380687E-10, 690 -3.83538038596423702205E-9, -2.63146884688951950684E-8, 691 -2.51223623787020892529E-7, -3.88256480887769039346E-6, 692 -1.10588938762623716291E-4, -9.76109749136146840777E-3, 693 7.78576235018280120474E-1}; 694 695 const double out = chbevl(double{32.} / x - double{2.}, coefficients, int{25}) / sqrt(x); 696 return (_x < double{0.}) ? -out : out; 697 } 698 699 // Float specialization (uses different coefficients than the double version) 700 template<> 701 float i1e(float _x) { 702 const float x = fabsf(_x); 703 if (x <= float{8.}) { 704 // Chebyshev double coefficients for exp(-x) i1(x) in the interval [0,8]. 705 // Note: lim(x->0){ exp(-x) i1(x) / x } = 1/2. 706 static const float coefficients[] = { 707 9.38153738649577178388E-9f, 708 -4.44505912879632808065E-8f, 709 2.00329475355213526229E-7f, 710 -8.56872026469545474066E-7f, 711 3.47025130813767847674E-6f, 712 -1.32731636560394358279E-5f, 713 4.78156510755005422638E-5f, 714 -1.61760815825896745588E-4f, 715 5.12285956168575772895E-4f, 716 -1.51357245063125314899E-3f, 717 4.15642294431288815669E-3f, 718 -1.05640848946261981558E-2f, 719 2.47264490306265168283E-2f, 720 -5.29459812080949914269E-2f, 721 1.02643658689847095384E-1f, 722 -1.76416518357834055153E-1f, 723 2.52587186443633654823E-1f}; 724 const float y = x / float{2.} - float{2.}; 725 const float out = chbevl(y, coefficients, int{17}) * x; 726 return (_x < 0.) ? -out : out; 727 } 728 729 // Chebyshev coefficients for exp(-x) sqrt(x) i1(x) 730 // in the inverted interval (8, infinity]. 731 // Note: lim(x->inf){ exp(-x) sqrt(x) i1(x) } = 1/sqrt(2pi). 732 // TODO: what's an "inverted interval"? Open on the left 733 // and closed on the right? 734 static const float coefficients[] = { 735 -3.83538038596423702205E-9f, 736 -2.63146884688951950684E-8f, 737 -2.51223623787020892529E-7f, 738 -3.88256480887769039346E-6f, 739 -1.10588938762623716291E-4f, 740 -9.76109749136146840777E-3f, 741 7.78576235018280120474E-1f}; 742 743 const float out = chbevl(float{32.} / x - float{2.}, coefficients, int{7}) / sqrt(x); 744 return (_x < float{0.}) ? -out : out; 745 } 746 ); // i1e_string 747 748 const auto kaiser_window_string = i0_string + jiterator_stringify( 749 template <typename T> 750 T kaiser_window(T a, T inv_alpha, T beta, T inv_i0_beta) { 751 T x = a * inv_alpha - T{1}; 752 T y = max(T{0}, T{1} - x * x); 753 return i0(beta * sqrt(y)) * inv_i0_beta; 754 } 755 ); // kaiser_window_string 756 757 const auto sinc_string = jiterator_stringify( 758 template <typename T> 759 T sinc(T a) { 760 if (a == T(0)) { 761 return T(1); 762 } else { 763 constexpr T pi = T(3.14159265358979323846L); 764 T product = pi * a; 765 return std::sin(product) / product; 766 } 767 } 768 ); // sinc_string 769 770 const auto erfcx_string = jiterator_stringify( 771 /* The next function is taken from http://ab-initio.mit.edu/Faddeev */ 772 773 /* Copyright (c) 2012 Massachusetts Institute of Technology 774 * 775 * Permission is hereby granted, free of charge, to any person obtaining 776 * a copy of this software and associated documentation files (the 777 * "Software"), to deal in the Software without restriction, including 778 * without limitation the rights to use, copy, modify, merge, publish, 779 * distribute, sublicense, and/or sell copies of the Software, and to 780 * permit persons to whom the Software is furnished to do so, subject to 781 * the following conditions: 782 * 783 * The above copyright notice and this permission notice shall be 784 * included in all copies or substantial portions of the Software. 785 * 786 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 787 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 788 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 789 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 790 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 791 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 792 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 793 */ 794 795 /* erfcx(x) = exp(x^2) erfc(x) function, for real x, written by 796 Steven G. Johnson, October 2012. 797 798 This function combines a few different ideas. 799 800 First, for x > 50, it uses a continued-fraction expansion (same as 801 for the Faddeeva function, but with algebraic simplifications for z=i*x). 802 803 Second, for 0 <= x <= 50, it uses Chebyshev polynomial approximations, 804 but with two twists: 805 806 a) It maps x to y = 4 / (4+x) in [0,1]. This simple transformation, 807 inspired by a similar transformation in the octave-forge/specfun 808 erfcx by Soren Hauberg, results in much faster Chebyshev convergence 809 than other simple transformations I have examined. 810 811 b) Instead of using a single Chebyshev polynomial for the entire 812 [0,1] y interval, we break the interval up into 100 equal 813 subintervals, with a switch/lookup table, and use much lower 814 degree Chebyshev polynomials in each subinterval. This greatly 815 improves performance in my tests. 816 817 For x < 0, we use the relationship erfcx(-x) = 2 exp(x^2) - erfc(x), 818 with the usual checks for overflow etcetera. 819 820 Performance-wise, it seems to be substantially faster than either 821 the SLATEC DERFC function [or an erfcx function derived therefrom] 822 or Cody's CALERF function (from netlib.org/specfun), while 823 retaining near machine precision in accuracy. 824 */ 825 826 /* Given y100 = 100 * y, where y = 4 / (4 + x) for x >= 0, compute erfc(x). 827 828 Uses a look-up table of 100 different Chebyshev polynomials 829 for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated 830 with the help of Maple and a little shell script. This allows 831 the Chebyshev polynomials to be of significantly lower degree (about 1/4) 832 compared to fitting the whole [0,1] interval with a single polynomial. 833 */ 834 835 // TODO: review if this is computing in double when given a float input 836 template <typename T> 837 T erfcx_y100(T y100) { 838 switch (static_cast<int>(y100)) { 839 case 0: { 840 T t = 2*y100 - 1; 841 return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t; 842 } 843 case 1: { 844 T t = 2*y100 - 3; 845 return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t; 846 } 847 case 2: { 848 T t = 2*y100 - 5; 849 return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t; 850 } 851 case 3: { 852 T t = 2*y100 - 7; 853 return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t; 854 } 855 case 4: { 856 T t = 2*y100 - 9; 857 return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t; 858 } 859 case 5: { 860 T t = 2*y100 - 11; 861 return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t; 862 } 863 case 6: { 864 T t = 2*y100 - 13; 865 return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t; 866 } 867 case 7: { 868 T t = 2*y100 - 15; 869 return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t; 870 } 871 case 8: { 872 T t = 2*y100 - 17; 873 return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t; 874 } 875 case 9: { 876 T t = 2*y100 - 19; 877 return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t; 878 } 879 case 10: { 880 T t = 2*y100 - 21; 881 return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t; 882 } 883 case 11: { 884 T t = 2*y100 - 23; 885 return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t; 886 } 887 case 12: { 888 T t = 2*y100 - 25; 889 return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t; 890 } 891 case 13: { 892 T t = 2*y100 - 27; 893 return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t; 894 } 895 case 14: { 896 T t = 2*y100 - 29; 897 return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t; 898 } 899 case 15: { 900 T t = 2*y100 - 31; 901 return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t; 902 } 903 case 16: { 904 T t = 2*y100 - 33; 905 return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t; 906 } 907 case 17: { 908 T t = 2*y100 - 35; 909 return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t; 910 } 911 case 18: { 912 T t = 2*y100 - 37; 913 return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t; 914 } 915 case 19: { 916 T t = 2*y100 - 39; 917 return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t; 918 } 919 case 20: { 920 T t = 2*y100 - 41; 921 return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t; 922 } 923 case 21: { 924 T t = 2*y100 - 43; 925 return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t; 926 } 927 case 22: { 928 T t = 2*y100 - 45; 929 return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t; 930 } 931 case 23: { 932 T t = 2*y100 - 47; 933 return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t; 934 } 935 case 24: { 936 T t = 2*y100 - 49; 937 return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t; 938 } 939 case 25: { 940 T t = 2*y100 - 51; 941 return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t; 942 } 943 case 26: { 944 T t = 2*y100 - 53; 945 return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t; 946 } 947 case 27: { 948 T t = 2*y100 - 55; 949 return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t; 950 } 951 case 28: { 952 T t = 2*y100 - 57; 953 return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t; 954 } 955 case 29: { 956 T t = 2*y100 - 59; 957 return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t; 958 } 959 case 30: { 960 T t = 2*y100 - 61; 961 return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t; 962 } 963 case 31: { 964 T t = 2*y100 - 63; 965 return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t; 966 } 967 case 32: { 968 T t = 2*y100 - 65; 969 return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t; 970 } 971 case 33: { 972 T t = 2*y100 - 67; 973 return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t; 974 } 975 case 34: { 976 T t = 2*y100 - 69; 977 return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t; 978 } 979 case 35: { 980 T t = 2*y100 - 71; 981 return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t; 982 } 983 case 36: { 984 T t = 2*y100 - 73; 985 return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t; 986 } 987 case 37: { 988 T t = 2*y100 - 75; 989 return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t; 990 } 991 case 38: { 992 T t = 2*y100 - 77; 993 return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t; 994 } 995 case 39: { 996 T t = 2*y100 - 79; 997 return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t; 998 } 999 case 40: { 1000 T t = 2*y100 - 81; 1001 return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t; 1002 } 1003 case 41: { 1004 T t = 2*y100 - 83; 1005 return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t; 1006 } 1007 case 42: { 1008 T t = 2*y100 - 85; 1009 return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t; 1010 } 1011 case 43: { 1012 T t = 2*y100 - 87; 1013 return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t; 1014 } 1015 case 44: { 1016 T t = 2*y100 - 89; 1017 return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t; 1018 } 1019 case 45: { 1020 T t = 2*y100 - 91; 1021 return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t; 1022 } 1023 case 46: { 1024 T t = 2*y100 - 93; 1025 return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t; 1026 } 1027 case 47: { 1028 T t = 2*y100 - 95; 1029 return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t; 1030 } 1031 case 48: { 1032 T t = 2*y100 - 97; 1033 return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t; 1034 } 1035 case 49: { 1036 T t = 2*y100 - 99; 1037 return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t; 1038 } 1039 case 50: { 1040 T t = 2*y100 - 101; 1041 return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t; 1042 } 1043 case 51: { 1044 T t = 2*y100 - 103; 1045 return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t; 1046 } 1047 case 52: { 1048 T t = 2*y100 - 105; 1049 return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t; 1050 } 1051 case 53: { 1052 T t = 2*y100 - 107; 1053 return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t; 1054 } 1055 case 54: { 1056 T t = 2*y100 - 109; 1057 return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t; 1058 } 1059 case 55: { 1060 T t = 2*y100 - 111; 1061 return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t; 1062 } 1063 case 56: { 1064 T t = 2*y100 - 113; 1065 return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t; 1066 } 1067 case 57: { 1068 T t = 2*y100 - 115; 1069 return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t; 1070 } 1071 case 58: { 1072 T t = 2*y100 - 117; 1073 return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t; 1074 } 1075 case 59: { 1076 T t = 2*y100 - 119; 1077 return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t; 1078 } 1079 case 60: { 1080 T t = 2*y100 - 121; 1081 return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t; 1082 } 1083 case 61: { 1084 T t = 2*y100 - 123; 1085 return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t; 1086 } 1087 case 62: { 1088 T t = 2*y100 - 125; 1089 return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t; 1090 } 1091 case 63: { 1092 T t = 2*y100 - 127; 1093 return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t; 1094 } 1095 case 64: { 1096 T t = 2*y100 - 129; 1097 return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t; 1098 } 1099 case 65: { 1100 T t = 2*y100 - 131; 1101 return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t; 1102 } 1103 case 66: { 1104 T t = 2*y100 - 133; 1105 return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t; 1106 } 1107 case 67: { 1108 T t = 2*y100 - 135; 1109 return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t; 1110 } 1111 case 68: { 1112 T t = 2*y100 - 137; 1113 return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t; 1114 } 1115 case 69: { 1116 T t = 2*y100 - 139; 1117 return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t; 1118 } 1119 case 70: { 1120 T t = 2*y100 - 141; 1121 return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t; 1122 } 1123 case 71: { 1124 T t = 2*y100 - 143; 1125 return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t; 1126 } 1127 case 72: { 1128 T t = 2*y100 - 145; 1129 return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t; 1130 } 1131 case 73: { 1132 T t = 2*y100 - 147; 1133 return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t; 1134 } 1135 case 74: { 1136 T t = 2*y100 - 149; 1137 return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t; 1138 } 1139 case 75: { 1140 T t = 2*y100 - 151; 1141 return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t; 1142 } 1143 case 76: { 1144 T t = 2*y100 - 153; 1145 return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t; 1146 } 1147 case 77: { 1148 T t = 2*y100 - 155; 1149 return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t; 1150 } 1151 case 78: { 1152 T t = 2*y100 - 157; 1153 return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t; 1154 } 1155 case 79: { 1156 T t = 2*y100 - 159; 1157 return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t; 1158 } 1159 case 80: { 1160 T t = 2*y100 - 161; 1161 return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t; 1162 } 1163 case 81: { 1164 T t = 2*y100 - 163; 1165 return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t; 1166 } 1167 case 82: { 1168 T t = 2*y100 - 165; 1169 return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t; 1170 } 1171 case 83: { 1172 T t = 2*y100 - 167; 1173 return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t; 1174 } 1175 case 84: { 1176 T t = 2*y100 - 169; 1177 return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t; 1178 } 1179 case 85: { 1180 T t = 2*y100 - 171; 1181 return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t; 1182 } 1183 case 86: { 1184 T t = 2*y100 - 173; 1185 return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t; 1186 } 1187 case 87: { 1188 T t = 2*y100 - 175; 1189 return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t; 1190 } 1191 case 88: { 1192 T t = 2*y100 - 177; 1193 return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t; 1194 } 1195 case 89: { 1196 T t = 2*y100 - 179; 1197 return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t; 1198 } 1199 case 90: { 1200 T t = 2*y100 - 181; 1201 return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t; 1202 } 1203 case 91: { 1204 T t = 2*y100 - 183; 1205 return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t; 1206 } 1207 case 92: { 1208 T t = 2*y100 - 185; 1209 return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t; 1210 } 1211 case 93: { 1212 T t = 2*y100 - 187; 1213 return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t; 1214 } 1215 case 94: { 1216 T t = 2*y100 - 189; 1217 return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t; 1218 } 1219 case 95: { 1220 T t = 2*y100 - 191; 1221 return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t; 1222 } 1223 case 96: { 1224 T t = 2*y100 - 193; 1225 return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t; 1226 } 1227 case 97: { 1228 T t = 2*y100 - 195; 1229 return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t; 1230 } 1231 case 98: { 1232 T t = 2*y100 - 197; 1233 return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t; 1234 } 1235 case 99: { 1236 T t = 2*y100 - 199; 1237 return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t; 1238 } 1239 } 1240 1241 // we only get here if y = 1, i.e. |x| < 4*eps, in which case 1242 // erfcx is within 1e-15 of 1.. 1243 return 1.; 1244 } 1245 1246 template <typename T> 1247 T erfcx(T x) { 1248 // Short-circuits on NaN (returning NaN) 1249 if (x != x) { 1250 return x; 1251 } 1252 1253 if (x >= 0) { 1254 if (x > T{50}) { // continued-fraction expansion is faster 1255 const T ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi) 1256 1257 if (x > T{5e7}) { // 1-term expansion, important to avoid overflow 1258 return ispi / x; 1259 } 1260 1261 /* 5-term expansion (rely on compiler for CSE), simplified from: 1262 ispi / (x+0.5/(x+1/(x+1.5/(x+2/x)))) */ 1263 return ispi * ((x*x) * (x*x+T{4.5}) + T{2}) / (x * ((x*x) * (x*x+T{5}) + T{3.75})); 1264 } 1265 1266 // x >= 0 x <= 50 1267 return erfcx_y100(T{400} / (T{4} + x)); 1268 } 1269 1270 // x < 0 1271 if (x < T{-26.7}) { 1272 return POS_INFINITY; 1273 } else if (x < T{-6.1}) { 1274 return T{2} * exp(x * x); 1275 } 1276 1277 // x < 0 and x >= -6.1 1278 return T{2} * exp(x * x) - erfcx_y100(T{400} / (T{4} - x)); 1279 } 1280 ); // erfcx_string 1281 1282 const auto airy_ai_string = jiterator_stringify( 1283 template<typename T> 1284 T airy_ai_forward(T x) { 1285 static const T AN[] = { 1286 +3.46538101525629032477e-01, 1287 +1.20075952739645805542e+01, 1288 +7.62796053615234516538e+01, 1289 +1.68089224934630576269e+02, 1290 +1.59756391350164413639e+02, 1291 +7.05360906840444183113e+01, 1292 +1.40264691163389668864e+01, 1293 +9.99999999999999995305e-01, 1294 }; 1295 1296 static const T AD[] = { 1297 +5.67594532638770212846e-01, 1298 +1.47562562584847203173e+01, 1299 +8.45138970141474626562e+01, 1300 +1.77318088145400459522e+02, 1301 +1.64234692871529701831e+02, 1302 +7.14778400825575695274e+01, 1303 +1.40959135607834029598e+01, 1304 +1.00000000000000000470e+00, 1305 }; 1306 1307 static const T AFN[] = { 1308 -1.31696323418331795333e-01, 1309 -6.26456544431912369773e-01, 1310 -6.93158036036933542233e-01, 1311 -2.79779981545119124951e-01, 1312 -4.91900132609500318020e-02, 1313 -4.06265923594885404393e-03, 1314 -1.59276496239262096340e-04, 1315 -2.77649108155232920844e-06, 1316 -1.67787698489114633780e-08, 1317 }; 1318 1319 static const T AFD[] = { 1320 +1.33560420706553243746e+01, 1321 +3.26825032795224613948e+01, 1322 +2.67367040941499554804e+01, 1323 +9.18707402907259625840e+00, 1324 +1.47529146771666414581e+00, 1325 +1.15687173795188044134e-01, 1326 +4.40291641615211203805e-03, 1327 +7.54720348287414296618e-05, 1328 +4.51850092970580378464e-07, 1329 }; 1330 1331 static const T AGN[] = { 1332 +1.97339932091685679179e-02, 1333 +3.91103029615688277255e-01, 1334 +1.06579897599595591108e+00, 1335 +9.39169229816650230044e-01, 1336 +3.51465656105547619242e-01, 1337 +6.33888919628925490927e-02, 1338 +5.85804113048388458567e-03, 1339 +2.82851600836737019778e-04, 1340 +6.98793669997260967291e-06, 1341 +8.11789239554389293311e-08, 1342 +3.41551784765923618484e-10, 1343 }; 1344 1345 static const T AGD[] = { 1346 +9.30892908077441974853e+00, 1347 +1.98352928718312140417e+01, 1348 +1.55646628932864612953e+01, 1349 +5.47686069422975497931e+00, 1350 +9.54293611618961883998e-01, 1351 +8.64580826352392193095e-02, 1352 +4.12656523824222607191e-03, 1353 +1.01259085116509135510e-04, 1354 +1.17166733214413521882e-06, 1355 +4.91834570062930015649e-09, 1356 }; 1357 1358 int domain_flag = 0; 1359 1360 T ai; 1361 1362 if (isinf(x)) { 1363 return NAN; 1364 } 1365 1366 if (x > T(103.892)) { 1367 return T(0.0); 1368 } 1369 1370 T f; 1371 T g; 1372 T k; 1373 1374 if (x < T(-2.09)) { 1375 T z = T(1.0) / (T(-2.0) * x * sqrt(-x) / T(3.0)); 1376 1377 T afn = 0.0; 1378 1379 for (uint8_t index = 0; index <= 8; index++) { 1380 afn = afn * (z * z) + AFN[index]; 1381 } 1382 1383 T afd = 0.0; 1384 1385 for (uint8_t index = 0; index <= 8; index++) { 1386 afd = afd * (z * z) + AFD[index]; 1387 } 1388 1389 T agn = 0.0; 1390 1391 for (uint8_t index = 0; index <= 10 + 0; index++) { 1392 agn = agn * (z * z) + AGN[index]; 1393 } 1394 1395 T agd = 0.0; 1396 1397 for (uint8_t index = 0; index <= 10 - 1; index++) { 1398 agd = agd * (z * z) + AGD[index]; 1399 } 1400 1401 T t = T(-2.0) * x * sqrt(-x) / T(3.0) + T(0.25) * T(3.14159265358979323846); 1402 1403 return T(5.64189583547756286948e-01) / sqrt(sqrt(-x)) * (sin(t) * (T(1.0) + z * z * afn / afd) - cos(t) * (z * agn / agd)); 1404 } 1405 1406 if (x >= T(2.09)) { 1407 domain_flag = 5; 1408 1409 T zeta = T(2.0) * x * sqrt(x) / T(3.0); 1410 1411 T an = 0.0; 1412 1413 for (uint8_t index = 0; index <= 7; index++) { 1414 an = an * (T(1.0) / zeta) + AN[index]; 1415 } 1416 1417 T ad = 0.0; 1418 1419 for (uint8_t index = 0; index <= 7; index++) { 1420 ad = ad * (T(1.0) / zeta) + AD[index]; 1421 } 1422 1423 ai = T(5.64189583547756286948e-01) * (an / ad) / (T(2.0) * sqrt(sqrt(x)) * exp(zeta)); 1424 1425 if (x > T(8.3203353)) { 1426 return ai; 1427 } 1428 } 1429 1430 f = 1.0; 1431 g = x; 1432 k = 1.0; 1433 1434 T m = 1.0; 1435 T n = x; 1436 T t = 1.0; 1437 T z = x * x * x; 1438 1439 while (t > T(1.11022302462515654042e-16)) { 1440 m *= z; 1441 k += T(1.0); 1442 m /= k; 1443 n *= z; 1444 k += T(1.0); 1445 n /= k; 1446 m /= k; 1447 f += m; 1448 k += T(1.0); 1449 n /= k; 1450 g += n; 1451 1452 t = abs(m / f); 1453 } 1454 1455 if ((domain_flag & 1) == 0) { 1456 return T(0.355028053887817239260) * f - T(0.258819403792806798405) * g; 1457 } 1458 1459 return ai; 1460 } // T airy_ai(T x) 1461 ); // airy_ai_string 1462 1463 const auto bessel_j0_string = jiterator_stringify( 1464 template<typename T> 1465 T bessel_j0_forward(T x) { 1466 static const T PP[] = { 1467 +7.96936729297347051624e-04, 1468 +8.28352392107440799803e-02, 1469 +1.23953371646414299388e+00, 1470 +5.44725003058768775090e+00, 1471 +8.74716500199817011941e+00, 1472 +5.30324038235394892183e+00, 1473 +9.99999999999999997821e-01, 1474 }; 1475 1476 static const T PQ[] = { 1477 +9.24408810558863637013e-04, 1478 +8.56288474354474431428e-02, 1479 +1.25352743901058953537e+00, 1480 +5.47097740330417105182e+00, 1481 +8.76190883237069594232e+00, 1482 +5.30605288235394617618e+00, 1483 +1.00000000000000000218e+00, 1484 }; 1485 1486 static const T QP[] = { 1487 -1.13663838898469149931e-02, 1488 -1.28252718670509318512e+00, 1489 -1.95539544257735972385e+01, 1490 -9.32060152123768231369e+01, 1491 -1.77681167980488050595e+02, 1492 -1.47077505154951170175e+02, 1493 -5.14105326766599330220e+01, 1494 -6.05014350600728481186e+00, 1495 }; 1496 1497 static const T QQ[] = { 1498 +6.43178256118178023184e+01, 1499 +8.56430025976980587198e+02, 1500 +3.88240183605401609683e+03, 1501 +7.24046774195652478189e+03, 1502 +5.93072701187316984827e+03, 1503 +2.06209331660327847417e+03, 1504 +2.42005740240291393179e+02, 1505 }; 1506 1507 static const T RP[] = { 1508 -4.79443220978201773821e+09, 1509 +1.95617491946556577543e+12, 1510 -2.49248344360967716204e+14, 1511 +9.70862251047306323952e+15, 1512 }; 1513 1514 static const T RQ[] = { 1515 +4.99563147152651017219e+02, 1516 +1.73785401676374683123e+05, 1517 +4.84409658339962045305e+07, 1518 +1.11855537045356834862e+10, 1519 +2.11277520115489217587e+12, 1520 +3.10518229857422583814e+14, 1521 +3.18121955943204943306e+16, 1522 +1.71086294081043136091e+18, 1523 }; 1524 1525 if (x < T(0)) { 1526 x = -x; 1527 } 1528 1529 if (x <= T(5.0)) { 1530 if (x < T(0.00001)) { 1531 return T(1.0) - x * x / T(4.0); 1532 } 1533 1534 T rp = 0.0; 1535 1536 for (uint8_t index = 0; index <= 3; index++) { 1537 rp = rp * (x * x) + RP[index]; 1538 } 1539 1540 T rq = 0.0; 1541 1542 for (uint8_t index = 0; index <= 7; index++) { 1543 rq = rq * (x * x) + RQ[index]; 1544 } 1545 1546 return (x * x - T(5.78318596294678452118e+00)) * (x * x - T(3.04712623436620863991e+01)) * rp / rq; 1547 } 1548 1549 T pp = 0.0; 1550 1551 for (uint8_t index = 0; index <= 6; index++) { 1552 pp = pp * (T(25.0) / (x * x)) + PP[index]; 1553 } 1554 1555 T pq = 0.0; 1556 1557 for (uint8_t index = 0; index <= 6; index++) { 1558 pq = pq * (T(25.0) / (x * x)) + PQ[index]; 1559 } 1560 1561 T qp = 0.0; 1562 1563 for (uint8_t index = 0; index <= 7; index++) { 1564 qp = qp * (T(25.0) / (x * x)) + QP[index]; 1565 } 1566 1567 T qq = 0.0; 1568 1569 for (uint8_t index = 0; index <= 6; index++) { 1570 qq = qq * (T(25.0) / (x * x)) + QQ[index]; 1571 } 1572 1573 return (pp / pq * cos(x - T(0.785398163397448309615660845819875721)) - T(5.0) / x * (qp / qq) * sin(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / sqrt(x); 1574 } // bessel_j0_forward(T x) 1575 ); // bessel_j0_string 1576 1577 const auto bessel_y0_string = bessel_j0_string + jiterator_stringify( 1578 template<typename T> 1579 T bessel_y0_forward(T x) { 1580 static const T PP[] = { 1581 +7.96936729297347051624e-04, 1582 +8.28352392107440799803e-02, 1583 +1.23953371646414299388e+00, 1584 +5.44725003058768775090e+00, 1585 +8.74716500199817011941e+00, 1586 +5.30324038235394892183e+00, 1587 +9.99999999999999997821e-01, 1588 }; 1589 1590 static const T PQ[] = { 1591 +9.24408810558863637013e-04, 1592 +8.56288474354474431428e-02, 1593 +1.25352743901058953537e+00, 1594 +5.47097740330417105182e+00, 1595 +8.76190883237069594232e+00, 1596 +5.30605288235394617618e+00, 1597 +1.00000000000000000218e+00, 1598 }; 1599 1600 static const T QP[] = { 1601 -1.13663838898469149931e-02, 1602 -1.28252718670509318512e+00, 1603 -1.95539544257735972385e+01, 1604 -9.32060152123768231369e+01, 1605 -1.77681167980488050595e+02, 1606 -1.47077505154951170175e+02, 1607 -5.14105326766599330220e+01, 1608 -6.05014350600728481186e+00, 1609 }; 1610 1611 static const T QQ[] = { 1612 +6.43178256118178023184e+01, 1613 +8.56430025976980587198e+02, 1614 +3.88240183605401609683e+03, 1615 +7.24046774195652478189e+03, 1616 +5.93072701187316984827e+03, 1617 +2.06209331660327847417e+03, 1618 +2.42005740240291393179e+02, 1619 }; 1620 1621 static const T YP[] = { 1622 +1.55924367855235737965e+04, 1623 -1.46639295903971606143e+07, 1624 +5.43526477051876500413e+09, 1625 -9.82136065717911466409e+11, 1626 +8.75906394395366999549e+13, 1627 -3.46628303384729719441e+15, 1628 +4.42733268572569800351e+16, 1629 -1.84950800436986690637e+16, 1630 }; 1631 1632 static const T YQ[] = { 1633 +1.04128353664259848412e+03, 1634 +6.26107330137134956842e+05, 1635 +2.68919633393814121987e+08, 1636 +8.64002487103935000337e+10, 1637 +2.02979612750105546709e+13, 1638 +3.17157752842975028269e+15, 1639 +2.50596256172653059228e+17, 1640 }; 1641 1642 if (x <= T(5.0)) { 1643 if (x == T(0.0)) { 1644 return NEG_INFINITY; 1645 } 1646 1647 if (x < T(0.0)) { 1648 NAN; 1649 } 1650 1651 T yp = 0.0; 1652 1653 for (uint8_t index = 0; index <= 7; index++) { 1654 yp = yp * (x * x) + YP[index]; 1655 } 1656 1657 T yq = 0.0; 1658 1659 for (uint8_t index = 0; index <= 6; index++) { 1660 yq = yq * (x * x) + YQ[index]; 1661 } 1662 1663 return yp / yq + (T(0.636619772367581343075535053490057448) * log(x) * bessel_j0_forward(x)); 1664 } 1665 1666 T pp = 0.0; 1667 1668 for (uint8_t index = 0; index <= 6; index++) { 1669 pp = pp * (T(25.0) / (x * x)) + PP[index]; 1670 } 1671 1672 T pq = 0.0; 1673 1674 for (uint8_t index = 0; index <= 6; index++) { 1675 pq = pq * (T(25.0) / (x * x)) + PQ[index]; 1676 } 1677 1678 T qp = 0.0; 1679 1680 for (uint8_t index = 0; index <= 7; index++) { 1681 qp = qp * (T(25.0) / (x * x)) + QP[index]; 1682 } 1683 1684 T qq = 0.0; 1685 1686 for (uint8_t index = 0; index <= 6; index++) { 1687 qq = qq * (T(25.0) / (x * x)) + QQ[index]; 1688 } 1689 1690 return (pp / pq * sin(x - T(0.785398163397448309615660845819875721)) + T(5.0) / x * (qp / qq) * cos(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / sqrt(x); 1691 } // bessel_y0_forward(T x) 1692 ); // bessel_y0_string 1693 1694 const auto bessel_j1_string = jiterator_stringify( 1695 template<typename T> 1696 T bessel_j1_forward(T x) { 1697 static const T PP[] = { 1698 +7.62125616208173112003e-04, 1699 +7.31397056940917570436e-02, 1700 +1.12719608129684925192e+00, 1701 +5.11207951146807644818e+00, 1702 +8.42404590141772420927e+00, 1703 +5.21451598682361504063e+00, 1704 +1.00000000000000000254e+00, 1705 }; 1706 1707 static const T PQ[] = { 1708 +5.71323128072548699714e-04, 1709 +6.88455908754495404082e-02, 1710 +1.10514232634061696926e+00, 1711 +5.07386386128601488557e+00, 1712 +8.39985554327604159757e+00, 1713 +5.20982848682361821619e+00, 1714 +9.99999999999999997461e-01, 1715 }; 1716 1717 static const T QP[] = { 1718 +5.10862594750176621635e-02, 1719 +4.98213872951233449420e+00, 1720 +7.58238284132545283818e+01, 1721 +3.66779609360150777800e+02, 1722 +7.10856304998926107277e+02, 1723 +5.97489612400613639965e+02, 1724 +2.11688757100572135698e+02, 1725 +2.52070205858023719784e+01, 1726 }; 1727 1728 static const T QQ[] = { 1729 +7.42373277035675149943e+01, 1730 +1.05644886038262816351e+03, 1731 +4.98641058337653607651e+03, 1732 +9.56231892404756170795e+03, 1733 +7.99704160447350683650e+03, 1734 +2.82619278517639096600e+03, 1735 +3.36093607810698293419e+02, 1736 }; 1737 1738 static const T RP[] = { 1739 -8.99971225705559398224e+08, 1740 +4.52228297998194034323e+11, 1741 -7.27494245221818276015e+13, 1742 +3.68295732863852883286e+15, 1743 }; 1744 1745 static const T RQ[] = { 1746 +6.20836478118054335476e+02, 1747 +2.56987256757748830383e+05, 1748 +8.35146791431949253037e+07, 1749 +2.21511595479792499675e+10, 1750 +4.74914122079991414898e+12, 1751 +7.84369607876235854894e+14, 1752 +8.95222336184627338078e+16, 1753 +5.32278620332680085395e+18, 1754 }; 1755 1756 if (x < T(0.0)) { 1757 return -bessel_j1_forward(-x); 1758 } 1759 1760 if (x <= T(5.0)) { 1761 T rp = 0.0; 1762 1763 for (uint8_t index = 0; index <= 3; index++) { 1764 rp = rp * (x * x) + RP[index]; 1765 } 1766 1767 T rq = 0.0; 1768 1769 for (uint8_t index = 0; index <= 7; index++) { 1770 rq = rq * (x * x) + RQ[index]; 1771 } 1772 1773 return rp / rq * x * (x * x - T(1.46819706421238932572e+01)) * (x * x - T(4.92184563216946036703e+01)); 1774 } 1775 1776 T pp = 0.0; 1777 1778 for (uint8_t index = 0; index <= 6; index++) { 1779 pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index]; 1780 } 1781 1782 T pq = 0.0; 1783 1784 for (uint8_t index = 0; index <= 6; index++) { 1785 pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index]; 1786 } 1787 1788 T qp = 0.0; 1789 1790 for (uint8_t index = 0; index <= 7; index++) { 1791 qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index]; 1792 } 1793 1794 T qq = 0.0; 1795 1796 for (uint8_t index = 0; index <= 6; index++) { 1797 qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index]; 1798 } 1799 1800 return (pp / pq * cos(x - T(2.356194490192344928846982537459627163)) - T(5.0) / x * (qp / qq) * sin(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / sqrt(x); 1801 } // bessel_j1_forward(T x) 1802 ); // bessel_j1_string 1803 1804 const auto bessel_y1_string = bessel_j1_string + jiterator_stringify( 1805 template<typename T> 1806 T bessel_y1_forward(T x) { 1807 static const T PP[] = { 1808 +7.62125616208173112003e-04, 1809 +7.31397056940917570436e-02, 1810 +1.12719608129684925192e+00, 1811 +5.11207951146807644818e+00, 1812 +8.42404590141772420927e+00, 1813 +5.21451598682361504063e+00, 1814 +1.00000000000000000254e+00, 1815 }; 1816 1817 static const T PQ[] = { 1818 +5.71323128072548699714e-04, 1819 +6.88455908754495404082e-02, 1820 +1.10514232634061696926e+00, 1821 +5.07386386128601488557e+00, 1822 +8.39985554327604159757e+00, 1823 +5.20982848682361821619e+00, 1824 +9.99999999999999997461e-01, 1825 }; 1826 1827 static const T QP[] = { 1828 +5.10862594750176621635e-02, 1829 +4.98213872951233449420e+00, 1830 +7.58238284132545283818e+01, 1831 +3.66779609360150777800e+02, 1832 +7.10856304998926107277e+02, 1833 +5.97489612400613639965e+02, 1834 +2.11688757100572135698e+02, 1835 +2.52070205858023719784e+01, 1836 }; 1837 1838 static const T QQ[] = { 1839 +7.42373277035675149943e+01, 1840 +1.05644886038262816351e+03, 1841 +4.98641058337653607651e+03, 1842 +9.56231892404756170795e+03, 1843 +7.99704160447350683650e+03, 1844 +2.82619278517639096600e+03, 1845 +3.36093607810698293419e+02, 1846 }; 1847 1848 static const T YP[] = { 1849 +1.26320474790178026440e+09, 1850 -6.47355876379160291031e+11, 1851 +1.14509511541823727583e+14, 1852 -8.12770255501325109621e+15, 1853 +2.02439475713594898196e+17, 1854 -7.78877196265950026825e+17, 1855 }; 1856 1857 static const T YQ[] = { 1858 +5.94301592346128195359e+02, 1859 +2.35564092943068577943e+05, 1860 +7.34811944459721705660e+07, 1861 +1.87601316108706159478e+10, 1862 +3.88231277496238566008e+12, 1863 +6.20557727146953693363e+14, 1864 +6.87141087355300489866e+16, 1865 +3.97270608116560655612e+18, 1866 }; 1867 1868 if (x <= T(5.0)) { 1869 if (x == T(0.0)) { 1870 return NEG_INFINITY; 1871 } 1872 1873 if (x <= T(0.0)) { 1874 return NAN; 1875 } 1876 1877 T yp = 0.0; 1878 1879 for (uint8_t index = 0; index <= 5; index++) { 1880 yp = yp * (x * x) + YP[index]; 1881 } 1882 1883 T yq = 0.0; 1884 1885 for (uint8_t index = 0; index <= 7; index++) { 1886 yq = yq * (x * x) + YQ[index]; 1887 } 1888 1889 return x * (yp / yq) + (T(0.636619772367581343075535053490057448) * (bessel_j1_forward(x) * log(x) - T(1.0) / x)); 1890 } 1891 1892 T pp = 0.0; 1893 1894 for (uint8_t index = 0; index <= 6; index++) { 1895 pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index]; 1896 } 1897 1898 T pq = 0.0; 1899 1900 for (uint8_t index = 0; index <= 6; index++) { 1901 pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index]; 1902 } 1903 1904 T qp = 0.0; 1905 1906 for (uint8_t index = 0; index <= 7; index++) { 1907 qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index]; 1908 } 1909 1910 T qq = 0.0; 1911 1912 for (uint8_t index = 0; index <= 6; index++) { 1913 qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index]; 1914 } 1915 1916 return (pp / pq * sin(x - T(2.356194490192344928846982537459627163)) + T(5.0) / x * (qp / qq) * cos(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / sqrt(x); 1917 } // bessel_y1_forward(T x) 1918 ); // bessel_y1_string 1919 1920 const auto chebyshev_polynomial_t_string = jiterator_stringify( 1921 template<typename T> 1922 T chebyshev_polynomial_t_forward(T x, int64_t n) { 1923 if (n < 0) { 1924 return T(0.0); 1925 } 1926 1927 if (abs(x) == T(1.0)) { 1928 if (x > T(0.0) || n % 2 == 0) { 1929 return T(1.0); 1930 } 1931 1932 return T(-1.0); 1933 } 1934 1935 if ((n > 6) && (abs(x) < T(1.0))) { 1936 return cos(n * acos(x)); 1937 } 1938 1939 if (n == 0) { 1940 return T(1.0); 1941 } 1942 1943 if (n == 1) { 1944 return x; 1945 } 1946 1947 T p = T(1.0); 1948 T q = x; 1949 T r; 1950 1951 for (int64_t k = 2; k <= n; k++) { 1952 r = (x + x) * q - p; 1953 p = q; 1954 q = r; 1955 } 1956 1957 return r; 1958 } // chebyshev_polynomial_t_forward(T x, int64_t n) 1959 1960 template<typename T> 1961 T chebyshev_polynomial_t_forward(T x, T n) { 1962 return chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n)); 1963 } // chebyshev_polynomial_t_forward(T x, T n) 1964 ); // chebyshev_polynomial_t_string 1965 1966 const auto chebyshev_polynomial_u_string = jiterator_stringify( 1967 template<typename T> 1968 T chebyshev_polynomial_u_forward(T x, int64_t n) { 1969 if (n < 0) { 1970 return T(0.0); 1971 } 1972 1973 if (abs(x) == T(1.0)) { 1974 if (x > T(0.0) || n % 2 == 0) { 1975 return n + 1; 1976 } 1977 1978 return -(n + 1); 1979 } 1980 1981 if ((n > 8) && (abs(x) < T(1.0))) { 1982 if (sin(acos(x)) != T(0.0)) { 1983 return sin((n + 1) * acos(x)) / sin(acos(x)); 1984 } 1985 1986 return (n + 1) * cos((n + 1) * acos(x)) / x; 1987 } 1988 1989 if (n == 0) { 1990 return T(1.0); 1991 } 1992 1993 if (n == 1) { 1994 return x + x; 1995 } 1996 1997 T p = T(1.0); 1998 T q = x + x; 1999 T r; 2000 2001 for (int64_t k = 2; k <= n; k++) { 2002 r = (x + x) * q - p; 2003 p = q; 2004 q = r; 2005 } 2006 2007 return r; 2008 } // chebyshev_polynomial_u_forward(T x, int64_t n) 2009 2010 template<typename T> 2011 T chebyshev_polynomial_u_forward(T x, T n) { 2012 return chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n)); 2013 } // chebyshev_polynomial_u_forward(T x, T n) 2014 ); // chebyshev_polynomial_u_string 2015 2016 const auto chebyshev_polynomial_v_string = jiterator_stringify( 2017 template<typename T> 2018 T chebyshev_polynomial_v_forward(T x, int64_t n) { 2019 if (n < 0) { 2020 return T(0.0); 2021 } 2022 2023 if (abs(x) == T(1.0)) { 2024 if (x > T(0.0)) { 2025 return T(1.0); 2026 } 2027 2028 if (n % 2 == 0) { 2029 return n + n + 1; 2030 } 2031 2032 return -(n + n + 1); 2033 } 2034 2035 if ((n > 8) && (abs(x) < T(1.0))) { 2036 if (sin(acos(x) / T(2.0)) != T(1.0)) { 2037 return cos((n + T(0.5)) * acos(x)) / cos(acos(x) / T(2.0)); 2038 } 2039 2040 if (n % 2 == 0) { 2041 return n + n + 1; 2042 } 2043 2044 return -(n + n + 1); 2045 } 2046 2047 if (n == 0) { 2048 return T(1.0); 2049 } 2050 2051 if (n == 1) { 2052 return x + x - T(1.0); 2053 } 2054 2055 T p = T(1.0); 2056 T q = x + x - T(1.0); 2057 T r; 2058 2059 for (int64_t k = 2; k <= n; k++) { 2060 r = (x + x) * q - p; 2061 p = q; 2062 q = r; 2063 } 2064 2065 return r; 2066 } // chebyshev_polynomial_v_forward(T x, int64_t n) 2067 2068 template<typename T> 2069 T chebyshev_polynomial_v_forward(T x, T n) { 2070 return chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n)); 2071 } // chebyshev_polynomial_v_forward(T x, T n) 2072 ); // chebyshev_polynomial_v_string 2073 2074 const auto chebyshev_polynomial_w_string = jiterator_stringify( 2075 template<typename T> 2076 T chebyshev_polynomial_w_forward(T x, int64_t n) { 2077 if (n < 0) { 2078 return T(0.0); 2079 } 2080 2081 if (abs(x) == T(1.0)) { 2082 if (x > T(0.0)) { 2083 return n + n + 1; 2084 } 2085 2086 if (n % 2 == 0) { 2087 return T(1.0); 2088 } 2089 2090 return T(-1.0); 2091 } 2092 2093 if ((n > 8) && (abs(x) < T(1.0))) { 2094 if (cos(acos(x) / T(2.0)) != T(1.0)) { 2095 return sin((n + T(0.5)) * acos(x)) / sin(acos(x) / T(2.0)); 2096 } 2097 2098 if (x > T(0.0)) { 2099 return n + n + 1; 2100 } 2101 2102 if (n % 2 == 0) { 2103 return T(1.0); 2104 } 2105 2106 return T(-1.0); 2107 } 2108 2109 if (n == 0) { 2110 return T(1.0); 2111 } 2112 2113 if (n == 1) { 2114 return x + x + T(1.0); 2115 } 2116 2117 T p = T(1.0); 2118 T q = x + x + T(1.0); 2119 T r; 2120 2121 for (int64_t k = 2; k <= n; k++) { 2122 r = (x + x) * q - p; 2123 p = q; 2124 q = r; 2125 } 2126 2127 return r; 2128 } // chebyshev_polynomial_w_forward(T x, int64_t n) 2129 2130 template<typename T> 2131 T chebyshev_polynomial_w_forward(T x, T n) { 2132 return chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n)); 2133 } // chebyshev_polynomial_w_forward(T x, T n) 2134 ); // chebyshev_polynomial_w_string 2135 2136 const auto hermite_polynomial_h_string = jiterator_stringify( 2137 template<typename T> 2138 T hermite_polynomial_h_forward(T x, int64_t n) { 2139 if (n < 0) { 2140 return T(0.0); 2141 } 2142 2143 if (n == 0) { 2144 return T(1.0); 2145 } 2146 2147 if (n == 1) { 2148 return x + x; 2149 } 2150 2151 T p = T(1.0); 2152 T q = x + x; 2153 T r = T(0.0); 2154 2155 for (int64_t k = 2; k < n + n; k += 2) { 2156 r = (x + x) * q - k * p; 2157 p = q; 2158 q = r; 2159 } 2160 2161 return r; 2162 } // hermite_polynomial_h_forward(T x, int64_t n) 2163 2164 template<typename T> 2165 T hermite_polynomial_h_forward(T x, T n) { 2166 return hermite_polynomial_h_forward(x, static_cast<int64_t>(n)); 2167 } // hermite_polynomial_h_forward(T x, T n) 2168 ); // hermite_polynomial_h_string 2169 2170 const auto hermite_polynomial_he_string = jiterator_stringify( 2171 template<typename T> 2172 T hermite_polynomial_he_forward(T x, int64_t n) { 2173 if (n < 0) { 2174 return T(0.0); 2175 } 2176 2177 if (n == 0) { 2178 return T(1.0); 2179 } 2180 2181 if (n == 1) { 2182 return x; 2183 } 2184 2185 T p = T(1.0); 2186 T q = x; 2187 T r; 2188 2189 for (int64_t k = 1; k < n; k++) { 2190 r = x * q - k * p; 2191 p = q; 2192 q = r; 2193 } 2194 2195 return r; 2196 } // hermite_polynomial_he_forward(T x, int64_t n) 2197 2198 template<typename T> 2199 T hermite_polynomial_he_forward(T x, T n) { 2200 return hermite_polynomial_he_forward(x, static_cast<int64_t>(n)); 2201 } // hermite_polynomial_he_forward(T x, T n) 2202 ); // hermite_polynomial_he_string 2203 2204 const auto laguerre_polynomial_l_string = jiterator_stringify( 2205 template<typename T> 2206 T laguerre_polynomial_l_forward(T x, int64_t n) { 2207 if (n < 0) { 2208 return T(0.0); 2209 } 2210 2211 if (abs(x) == T(0.0)) { 2212 return T(1.0); 2213 } 2214 2215 if (n == 0) { 2216 return T(1.0); 2217 } 2218 2219 if (n == 1) { 2220 return T(1.0) - x; 2221 } 2222 2223 T p = T(1.0); 2224 T q = T(1.0) - x; 2225 T r; 2226 2227 for (int64_t k = 1; k < n; k++) { 2228 r = (((k + k) + (T(1.0) - x)) * q - k * p) / (k + 1); 2229 p = q; 2230 q = r; 2231 } 2232 2233 return r; 2234 } // laguerre_polynomial_l_forward(T x, int64_t n) 2235 2236 template<typename T> 2237 T laguerre_polynomial_l_forward(T x, T n) { 2238 return laguerre_polynomial_l_forward(x, static_cast<int64_t>(n)); 2239 } // laguerre_polynomial_l_forward(T x, T n) 2240 ); // laguerre_polynomial_l_string 2241 2242 const auto legendre_polynomial_p_string = jiterator_stringify( 2243 template<typename T> 2244 T legendre_polynomial_p_forward(T x, int64_t n) { 2245 if (n < 0) { 2246 return T(0.0); 2247 } 2248 2249 if (abs(x) == T(1.0)) { 2250 if (x > T(0.0) || n % 2 == 0) { 2251 return T(1.0); 2252 } 2253 2254 return T(-1.0); 2255 } 2256 2257 if (n == 0) { 2258 return T(1.0); 2259 } 2260 2261 if (n == 1) { 2262 return x; 2263 } 2264 2265 T p = T(1.0); 2266 T q = x; 2267 T r; 2268 2269 for (int64_t k = 1; k < n; k++) { 2270 r = ((k + k + 1) * x * q - k * p) / (k + 1); 2271 p = q; 2272 q = r; 2273 } 2274 2275 return r; 2276 } // legendre_polynomial_p_forward(T x, int64_t n) 2277 2278 template<typename T> 2279 T legendre_polynomial_p_forward(T x, T n) { 2280 return legendre_polynomial_p_forward(x, static_cast<int64_t>(n)); 2281 } // legendre_polynomial_p_forward(T x, T n) 2282 ); // legendre_polynomial_p_string 2283 2284 const auto modified_bessel_i0_string = jiterator_stringify( 2285 template<typename T> 2286 T modified_bessel_i0_forward(T x) { 2287 static const T A[] = { 2288 -4.41534164647933937950e-18, 2289 +3.33079451882223809783e-17, 2290 -2.43127984654795469359e-16, 2291 +1.71539128555513303061e-15, 2292 -1.16853328779934516808e-14, 2293 +7.67618549860493561688e-14, 2294 -4.85644678311192946090e-13, 2295 +2.95505266312963983461e-12, 2296 -1.72682629144155570723e-11, 2297 +9.67580903537323691224e-11, 2298 -5.18979560163526290666e-10, 2299 +2.65982372468238665035e-09, 2300 -1.30002500998624804212e-08, 2301 +6.04699502254191894932e-08, 2302 -2.67079385394061173391e-07, 2303 +1.11738753912010371815e-06, 2304 -4.41673835845875056359e-06, 2305 +1.64484480707288970893e-05, 2306 -5.75419501008210370398e-05, 2307 +1.88502885095841655729e-04, 2308 -5.76375574538582365885e-04, 2309 +1.63947561694133579842e-03, 2310 -4.32430999505057594430e-03, 2311 +1.05464603945949983183e-02, 2312 -2.37374148058994688156e-02, 2313 +4.93052842396707084878e-02, 2314 -9.49010970480476444210e-02, 2315 +1.71620901522208775349e-01, 2316 -3.04682672343198398683e-01, 2317 +6.76795274409476084995e-01, 2318 }; 2319 2320 static const T B[] = { 2321 -7.23318048787475395456e-18, 2322 -4.83050448594418207126e-18, 2323 +4.46562142029675999901e-17, 2324 +3.46122286769746109310e-17, 2325 -2.82762398051658348494e-16, 2326 -3.42548561967721913462e-16, 2327 +1.77256013305652638360e-15, 2328 +3.81168066935262242075e-15, 2329 -9.55484669882830764870e-15, 2330 -4.15056934728722208663e-14, 2331 +1.54008621752140982691e-14, 2332 +3.85277838274214270114e-13, 2333 +7.18012445138366623367e-13, 2334 -1.79417853150680611778e-12, 2335 -1.32158118404477131188e-11, 2336 -3.14991652796324136454e-11, 2337 +1.18891471078464383424e-11, 2338 +4.94060238822496958910e-10, 2339 +3.39623202570838634515e-09, 2340 +2.26666899049817806459e-08, 2341 +2.04891858946906374183e-07, 2342 +2.89137052083475648297e-06, 2343 +6.88975834691682398426e-05, 2344 +3.36911647825569408990e-03, 2345 +8.04490411014108831608e-01, 2346 }; 2347 2348 T p; 2349 T q = 0.0; 2350 2351 if (abs(x) <= T(8.0)) { 2352 T a = A[0]; 2353 2354 for (uint8_t index = 1; index < 30; index++) { 2355 p = q; 2356 q = a; 2357 a = ((abs(x) / T(2.0)) - T(2.0)) * q - p + A[index]; 2358 } 2359 2360 return exp(abs(x)) * (T(0.5) * (a - p)); 2361 } 2362 2363 T b = B[0]; 2364 2365 for (uint8_t index = 1; index < 25; index++) { 2366 p = q; 2367 q = b; 2368 b = (T(32.0) / abs(x) - T(2.0)) * q - p + B[index]; 2369 } 2370 2371 return exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x)); 2372 } // modified_bessel_i0_forward(T x) 2373 ); // modified_bessel_i0_string 2374 2375 const auto modified_bessel_i1_string = jiterator_stringify( 2376 template<typename T> 2377 T modified_bessel_i1_forward(T x) { 2378 static const T A[] = { 2379 +2.77791411276104639959e-18, 2380 -2.11142121435816608115e-17, 2381 +1.55363195773620046921e-16, 2382 -1.10559694773538630805e-15, 2383 +7.60068429473540693410e-15, 2384 -5.04218550472791168711e-14, 2385 +3.22379336594557470981e-13, 2386 -1.98397439776494371520e-12, 2387 +1.17361862988909016308e-11, 2388 -6.66348972350202774223e-11, 2389 +3.62559028155211703701e-10, 2390 -1.88724975172282928790e-09, 2391 +9.38153738649577178388e-09, 2392 -4.44505912879632808065e-08, 2393 +2.00329475355213526229e-07, 2394 -8.56872026469545474066e-07, 2395 +3.47025130813767847674e-06, 2396 -1.32731636560394358279e-05, 2397 +4.78156510755005422638e-05, 2398 -1.61760815825896745588e-04, 2399 +5.12285956168575772895e-04, 2400 -1.51357245063125314899e-03, 2401 +4.15642294431288815669e-03, 2402 -1.05640848946261981558e-02, 2403 +2.47264490306265168283e-02, 2404 -5.29459812080949914269e-02, 2405 +1.02643658689847095384e-01, 2406 -1.76416518357834055153e-01, 2407 +2.52587186443633654823e-01, 2408 }; 2409 2410 static const T B[] = { 2411 +7.51729631084210481353e-18, 2412 +4.41434832307170791151e-18, 2413 -4.65030536848935832153e-17, 2414 -3.20952592199342395980e-17, 2415 +2.96262899764595013876e-16, 2416 +3.30820231092092828324e-16, 2417 -1.88035477551078244854e-15, 2418 -3.81440307243700780478e-15, 2419 +1.04202769841288027642e-14, 2420 +4.27244001671195135429e-14, 2421 -2.10154184277266431302e-14, 2422 -4.08355111109219731823e-13, 2423 -7.19855177624590851209e-13, 2424 +2.03562854414708950722e-12, 2425 +1.41258074366137813316e-11, 2426 +3.25260358301548823856e-11, 2427 -1.89749581235054123450e-11, 2428 -5.58974346219658380687e-10, 2429 -3.83538038596423702205e-09, 2430 -2.63146884688951950684e-08, 2431 -2.51223623787020892529e-07, 2432 -3.88256480887769039346e-06, 2433 -1.10588938762623716291e-04, 2434 -9.76109749136146840777e-03, 2435 +7.78576235018280120474e-01, 2436 }; 2437 2438 T p; 2439 T q = 0.0; 2440 2441 if (abs(x) <= T(8.0)) { 2442 T a = A[0]; 2443 2444 for (uint8_t index = 1; index < 29; index++) { 2445 p = q; 2446 q = a; 2447 a = ((abs(x) / T(2.0)) - T(2.0)) * q - p + A[index]; 2448 } 2449 2450 if (x < T(0.0)) { 2451 return -(T(0.5) * (a - p) * abs(x) * exp(abs(x))); 2452 } 2453 2454 return T(0.5) * (a - p) * abs(x) * exp(abs(x)); 2455 } 2456 2457 T b = B[0]; 2458 2459 for (uint8_t index = 1; index < 25; index++) { 2460 p = q; 2461 q = b; 2462 b = (T(32.0) / abs(x) - T(2.0)) * q - p + B[index]; 2463 } 2464 2465 if (x < T(0.0)) { 2466 return -(exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x))); 2467 } 2468 2469 return exp(abs(x)) * (T(0.5) * (b - p)) / sqrt(abs(x)); 2470 } // modified_bessel_i1_forward(T x) 2471 ); // modified_bessel_i1_string 2472 2473 const auto modified_bessel_k0_string = modified_bessel_i0_string + jiterator_stringify( 2474 template<typename T> 2475 T modified_bessel_k0_forward(T x) { 2476 static const T A[] = { 2477 +1.37446543561352307156e-16, 2478 +4.25981614279661018399e-14, 2479 +1.03496952576338420167e-11, 2480 +1.90451637722020886025e-09, 2481 +2.53479107902614945675e-07, 2482 +2.28621210311945178607e-05, 2483 +1.26461541144692592338e-03, 2484 +3.59799365153615016266e-02, 2485 +3.44289899924628486886e-01, 2486 -5.35327393233902768720e-01, 2487 }; 2488 2489 static const T B[] = { 2490 +5.30043377268626276149e-18, 2491 -1.64758043015242134646e-17, 2492 +5.21039150503902756861e-17, 2493 -1.67823109680541210385e-16, 2494 +5.51205597852431940784e-16, 2495 -1.84859337734377901440e-15, 2496 +6.34007647740507060557e-15, 2497 -2.22751332699166985548e-14, 2498 +8.03289077536357521100e-14, 2499 -2.98009692317273043925e-13, 2500 +1.14034058820847496303e-12, 2501 -4.51459788337394416547e-12, 2502 +1.85594911495471785253e-11, 2503 -7.95748924447710747776e-11, 2504 +3.57739728140030116597e-10, 2505 -1.69753450938905987466e-09, 2506 +8.57403401741422608519e-09, 2507 -4.66048989768794782956e-08, 2508 +2.76681363944501510342e-07, 2509 -1.83175552271911948767e-06, 2510 +1.39498137188764993662e-05, 2511 -1.28495495816278026384e-04, 2512 +1.56988388573005337491e-03, 2513 -3.14481013119645005427e-02, 2514 +2.44030308206595545468e+00, 2515 }; 2516 2517 if (x == T(0.0)) { 2518 return INFINITY; 2519 } 2520 2521 if (x < T(0.0)) { 2522 return NAN; 2523 } 2524 2525 T p; 2526 T q = 0.0; 2527 2528 if (x <= T(2.0)) { 2529 T a = A[0]; 2530 2531 for (uint8_t index = 1; index < 10; index++) { 2532 p = q; 2533 q = a; 2534 a = (x * x - T(2.0)) * q - p + A[index]; 2535 } 2536 2537 return T(0.5) * (a - p) - log(0.5 * x) * modified_bessel_i0_forward(x); 2538 } 2539 2540 T b = B[0]; 2541 2542 for (uint8_t index = 1; index < 25; index++) { 2543 p = q; 2544 q = b; 2545 b = (T(8.0) / x - T(2.0)) * q - p + B[index]; 2546 } 2547 2548 return exp(-x) * (T(0.5) * (b - p)) / sqrt(x); 2549 } // modified_bessel_k0_forward(T x) 2550 ); // modified_bessel_k0_string 2551 2552 const auto scaled_modified_bessel_k0_string = modified_bessel_i0_string + jiterator_stringify( 2553 template<typename T> 2554 T scaled_modified_bessel_k0_forward(T x) { 2555 static const T A[] = { 2556 +1.37446543561352307156e-16, 2557 +4.25981614279661018399e-14, 2558 +1.03496952576338420167e-11, 2559 +1.90451637722020886025e-09, 2560 +2.53479107902614945675e-07, 2561 +2.28621210311945178607e-05, 2562 +1.26461541144692592338e-03, 2563 +3.59799365153615016266e-02, 2564 +3.44289899924628486886e-01, 2565 -5.35327393233902768720e-01, 2566 }; 2567 2568 static const T B[] = { 2569 +5.30043377268626276149e-18, 2570 -1.64758043015242134646e-17, 2571 +5.21039150503902756861e-17, 2572 -1.67823109680541210385e-16, 2573 +5.51205597852431940784e-16, 2574 -1.84859337734377901440e-15, 2575 +6.34007647740507060557e-15, 2576 -2.22751332699166985548e-14, 2577 +8.03289077536357521100e-14, 2578 -2.98009692317273043925e-13, 2579 +1.14034058820847496303e-12, 2580 -4.51459788337394416547e-12, 2581 +1.85594911495471785253e-11, 2582 -7.95748924447710747776e-11, 2583 +3.57739728140030116597e-10, 2584 -1.69753450938905987466e-09, 2585 +8.57403401741422608519e-09, 2586 -4.66048989768794782956e-08, 2587 +2.76681363944501510342e-07, 2588 -1.83175552271911948767e-06, 2589 +1.39498137188764993662e-05, 2590 -1.28495495816278026384e-04, 2591 +1.56988388573005337491e-03, 2592 -3.14481013119645005427e-02, 2593 +2.44030308206595545468e+00, 2594 }; 2595 2596 if (x == T(0.0)) { 2597 return INFINITY; 2598 } 2599 2600 if (x < T(0.0)) { 2601 return NAN; 2602 } 2603 2604 T p; 2605 T q = 0.0; 2606 2607 if (x <= T(2.0)) { 2608 T a = A[0]; 2609 2610 for (uint8_t index = 1; index < 10; index++) { 2611 p = q; 2612 q = a; 2613 a = (x * x - T(2.0)) * q - p + A[index]; 2614 } 2615 2616 return (T(0.5) * (a - p) - log(T(0.5) * x) * modified_bessel_i0_forward(x)) * exp(x); 2617 } 2618 2619 T b = B[0]; 2620 2621 for (uint8_t index = 1; index < 25; index++) { 2622 p = q; 2623 q = b; 2624 b = (T(8.0) / x - T(2.0)) * q - p + B[index]; 2625 } 2626 2627 return T(0.5) * (b - p) / sqrt(x); 2628 } // T scaled_modified_bessel_k0_forward(T x) 2629 ); // scaled_modified_bessel_k0_string 2630 2631 const auto modified_bessel_k1_string = modified_bessel_i1_string + jiterator_stringify( 2632 template<typename T> 2633 T modified_bessel_k1_forward(T x) { 2634 static const T A[] = { 2635 -7.02386347938628759343e-18, 2636 -2.42744985051936593393e-15, 2637 -6.66690169419932900609e-13, 2638 -1.41148839263352776110e-10, 2639 -2.21338763073472585583e-08, 2640 -2.43340614156596823496e-06, 2641 -1.73028895751305206302e-04, 2642 -6.97572385963986435018e-03, 2643 -1.22611180822657148235e-01, 2644 -3.53155960776544875667e-01, 2645 +1.52530022733894777053e+00, 2646 }; 2647 2648 static const T B[] = { 2649 -5.75674448366501715755e-18, 2650 +1.79405087314755922667e-17, 2651 -5.68946255844285935196e-17, 2652 +1.83809354436663880070e-16, 2653 -6.05704724837331885336e-16, 2654 +2.03870316562433424052e-15, 2655 -7.01983709041831346144e-15, 2656 +2.47715442448130437068e-14, 2657 -8.97670518232499435011e-14, 2658 +3.34841966607842919884e-13, 2659 -1.28917396095102890680e-12, 2660 +5.13963967348173025100e-12, 2661 -2.12996783842756842877e-11, 2662 +9.21831518760500529508e-11, 2663 -4.19035475934189648750e-10, 2664 +2.01504975519703286596e-09, 2665 -1.03457624656780970260e-08, 2666 +5.74108412545004946722e-08, 2667 -3.50196060308781257119e-07, 2668 +2.40648494783721712015e-06, 2669 -1.93619797416608296024e-05, 2670 +1.95215518471351631108e-04, 2671 -2.85781685962277938680e-03, 2672 +1.03923736576817238437e-01, 2673 +2.72062619048444266945e+00, 2674 }; 2675 2676 if (x == T(0.0)) { 2677 return INFINITY; 2678 } 2679 2680 if (x < T(0.0)) { 2681 return NAN; 2682 } 2683 2684 T p; 2685 T q = 0.0; 2686 2687 if (x <= T(2.0)) { 2688 T a = A[0]; 2689 2690 for (uint8_t index = 1; index < 11; index++) { 2691 p = q; 2692 q = a; 2693 a = (x * x - T(2.0)) * q - p + A[index]; 2694 } 2695 2696 return log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x; 2697 } 2698 2699 T b = B[0]; 2700 2701 for (uint8_t index = 1; index < 25; index++) { 2702 p = q; 2703 q = b; 2704 b = (T(8.0) / x - T(2.0)) * q - p + B[index]; 2705 } 2706 2707 return exp(-x) * (T(0.5) * (b - p)) / sqrt(x); 2708 } // modified_bessel_k1_forward(T x) 2709 ); // modified_bessel_k1_string 2710 2711 const auto scaled_modified_bessel_k1_string = modified_bessel_i1_string + jiterator_stringify( 2712 template<typename T> 2713 T scaled_modified_bessel_k1_forward(T x) { 2714 static const T A[] = { 2715 -7.02386347938628759343e-18, 2716 -2.42744985051936593393e-15, 2717 -6.66690169419932900609e-13, 2718 -1.41148839263352776110e-10, 2719 -2.21338763073472585583e-08, 2720 -2.43340614156596823496e-06, 2721 -1.73028895751305206302e-04, 2722 -6.97572385963986435018e-03, 2723 -1.22611180822657148235e-01, 2724 -3.53155960776544875667e-01, 2725 +1.52530022733894777053e+00, 2726 }; 2727 2728 static const T B[] = { 2729 -5.75674448366501715755e-18, 2730 +1.79405087314755922667e-17, 2731 -5.68946255844285935196e-17, 2732 +1.83809354436663880070e-16, 2733 -6.05704724837331885336e-16, 2734 +2.03870316562433424052e-15, 2735 -7.01983709041831346144e-15, 2736 +2.47715442448130437068e-14, 2737 -8.97670518232499435011e-14, 2738 +3.34841966607842919884e-13, 2739 -1.28917396095102890680e-12, 2740 +5.13963967348173025100e-12, 2741 -2.12996783842756842877e-11, 2742 +9.21831518760500529508e-11, 2743 -4.19035475934189648750e-10, 2744 +2.01504975519703286596e-09, 2745 -1.03457624656780970260e-08, 2746 +5.74108412545004946722e-08, 2747 -3.50196060308781257119e-07, 2748 +2.40648494783721712015e-06, 2749 -1.93619797416608296024e-05, 2750 +1.95215518471351631108e-04, 2751 -2.85781685962277938680e-03, 2752 +1.03923736576817238437e-01, 2753 +2.72062619048444266945e+00, 2754 }; 2755 2756 if (x == T(0.0)) { 2757 return INFINITY; 2758 } 2759 2760 if (x < T(0.0)) { 2761 return NAN; 2762 } 2763 2764 T p; 2765 T q = 0.0; 2766 2767 if (x <= T(2.0)) { 2768 T a = A[0]; 2769 2770 for (uint8_t index = 1; index < 11; index++) { 2771 p = q; 2772 q = a; 2773 a = (x * x - T(2.0)) * q - p + A[index]; 2774 } 2775 2776 return (log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x) * exp(x); 2777 } 2778 2779 T b = B[0]; 2780 2781 for (uint8_t index = 1; index < 25; index++) { 2782 p = q; 2783 q = b; 2784 b = (T(8.0) / x - T(2.0)) * q - p + B[index]; 2785 } 2786 2787 return (T(0.5) * (b - p) / sqrt(x)); 2788 } // T scaled_modified_bessel_k1_forward(T x) 2789 ); // scaled_modified_bessel_k1_string 2790 2791 const auto shifted_chebyshev_polynomial_t_string = jiterator_stringify( 2792 template<typename T> 2793 T shifted_chebyshev_polynomial_t_forward(T x, int64_t n) { 2794 if (n < 0) { 2795 return T(0.0); 2796 } 2797 2798 if (x == T(1.0)) { 2799 return T(1.0); 2800 } 2801 2802 if (x == T(0.0)) { 2803 if (n % 2 == 0) { 2804 return T(1.0); 2805 } 2806 2807 return T(-1.0); 2808 } 2809 2810 if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) { 2811 return cos(n * acos(x + x - T(1.0))); 2812 } 2813 2814 if (n == 0) { 2815 return T(1.0); 2816 } 2817 2818 if (n == 1) { 2819 return x + x - T(1.0); 2820 } 2821 2822 T p = T(1.0); 2823 T q = x + x - T(1.0); 2824 T r; 2825 2826 for (int64_t k = 2; k <= n; k++) { 2827 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p; 2828 p = q; 2829 q = r; 2830 } 2831 2832 return r; 2833 } // shifted_chebyshev_polynomial_t_forward(T x, int64_t n) 2834 2835 template<typename T> 2836 T shifted_chebyshev_polynomial_t_forward(T x, T n) { 2837 return shifted_chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n)); 2838 } // shifted_chebyshev_polynomial_t_forward(T x, T n) 2839 ); // shifted_chebyshev_polynomial_t_string 2840 2841 const auto shifted_chebyshev_polynomial_u_string = jiterator_stringify( 2842 template<typename T> 2843 T shifted_chebyshev_polynomial_u_forward(T x, int64_t n) { 2844 if (n < 0) { 2845 return T(0.0); 2846 } 2847 2848 if (x == T(1.0)) { 2849 return n + 1; 2850 } 2851 2852 if (x == T(0.0)) { 2853 if (n % 2 == 0) { 2854 return n + 1; 2855 } 2856 2857 return -(n + 1); 2858 } 2859 2860 if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) { 2861 if (sin(acos(x + x - T(1.0))) != T(0.0)) { 2862 return sin((n + 1) * acos(x + x - T(1.0))) / sin(acos(x + x - T(1.0))); 2863 } 2864 2865 return (n + 1) * cos((n + 1) * acos(x + x - T(1.0))) / (x + x - T(1.0)); 2866 } 2867 2868 if (n == 0) { 2869 return T(1.0); 2870 } 2871 2872 if (n == 1) { 2873 return x + x - T(1.0) + (x + x - T(1.0)); 2874 } 2875 2876 T p = T(1.0); 2877 T q = x + x - T(1.0) + (x + x - T(1.0)); 2878 T r; 2879 2880 for (int64_t k = 2; k <= n; k++) { 2881 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p; 2882 p = q; 2883 q = r; 2884 } 2885 2886 return r; 2887 } // shifted_chebyshev_polynomial_u_forward(T x, int64_t n) 2888 2889 template<typename T> 2890 T shifted_chebyshev_polynomial_u_forward(T x, T n) { 2891 return shifted_chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n)); 2892 } // shifted_chebyshev_polynomial_u_forward(T x, T n) 2893 ); // shifted_chebyshev_polynomial_u_string 2894 2895 const auto shifted_chebyshev_polynomial_v_string = jiterator_stringify( 2896 template<typename T> 2897 T shifted_chebyshev_polynomial_v_forward(T x, int64_t n) { 2898 if (n < 0) { 2899 return T(0.0); 2900 } 2901 2902 if (x == T(1.0)) { 2903 return T(1.0); 2904 } 2905 2906 if (x == T(0.0)) { 2907 if (n % 2 == 0) { 2908 return (n + n + 1); 2909 } 2910 2911 return -(n + n + 1); 2912 } 2913 2914 if ((n > 6) && (abs(x + x - T(1.0)) < T(1.0))) { 2915 if (sin(acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) { 2916 return cos(((n) + T(0.5)) * acos(x + x - T(1.0))) / cos(acos(x + x - T(1.0)) / T(2.0)); 2917 } 2918 2919 if (n % 2 == 0) { 2920 return n + n + 1; 2921 } 2922 2923 return -(n + n + 1); 2924 } 2925 2926 if (n == 0) { 2927 return T(1.0); 2928 } 2929 2930 if (n == 1) { 2931 return x + x - T(1.0) + (x + x - T(1.0)) - T(1.0); 2932 } 2933 2934 T p = T(1.0); 2935 T q = x + x - T(1.0) + (x + x - T(1.0)) - T(1.0); 2936 T r; 2937 2938 for (int64_t k = 2; k <= n; k++) { 2939 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p; 2940 p = q; 2941 q = r; 2942 } 2943 2944 return r; 2945 } // shifted_chebyshev_polynomial_v_forward(T x, int64_t n) 2946 2947 template<typename T> 2948 T shifted_chebyshev_polynomial_v_forward(T x, T n) { 2949 return shifted_chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n)); 2950 } // shifted_chebyshev_polynomial_v_forward(T x, T n) 2951 ); // shifted_chebyshev_polynomial_v_string 2952 2953 const auto shifted_chebyshev_polynomial_w_string = jiterator_stringify( 2954 template<typename T> 2955 T shifted_chebyshev_polynomial_w_forward(T x, int64_t n) { 2956 if (n < 0) { 2957 return T(0.0); 2958 } 2959 2960 if (x == T(1.0)) { 2961 return n + n + 1; 2962 } 2963 2964 if (x == T(0.0)) { 2965 if (n % 2 == 0) { 2966 return T(1.0); 2967 } 2968 2969 return T(-1.0); 2970 } 2971 2972 if ((n > 4) && (abs(x + x - T(1.0)) < T(1.0))) { 2973 if (cos(acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) { 2974 return sin((n + T(0.5)) * acos(x + x - T(1.0))) / sin(acos(x + x - T(1.0)) / T(2.0)); 2975 } 2976 2977 if (n % 2 == 0) { 2978 return T(1.0); 2979 } 2980 2981 return T(-1.0); 2982 } 2983 2984 if (n == 0) { 2985 return T(1.0); 2986 } 2987 2988 if (n == 1) { 2989 return x + x - T(1.0) + (x + x - T(1.0)) + T(1.0); 2990 } 2991 2992 T p = T(1.0); 2993 T q = x + x - T(1.0) + (x + x - T(1.0)) + T(1.0); 2994 T r; 2995 2996 for (int64_t k = 2; k <= n; k++) { 2997 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p; 2998 p = q; 2999 q = r; 3000 } 3001 3002 return r; 3003 } // shifted_chebyshev_polynomial_w_forward(T x, int64_t n) 3004 3005 template<typename T> 3006 T shifted_chebyshev_polynomial_w_forward(T x, T n) { 3007 return shifted_chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n)); 3008 } // shifted_chebyshev_polynomial_w_forward(T x, T n) 3009 ); // shifted_chebyshev_polynomial_w_string 3010 3011 const auto spherical_bessel_j0_string = jiterator_stringify( 3012 template<typename T> 3013 T spherical_bessel_j0_forward(T x) { 3014 if (isinf(x)) { 3015 return T(0.0); 3016 } 3017 3018 if (abs(x) < T(0.5)) { 3019 return T(1.0) + x * x * (T(-1.0) / T(6.0) + x * x * (T(1.0) / T(120.0) + x * x * (T(-1.0) / T(5040.0) + x * x * (T(1.0) / T(362880.0) + x * x * (T(-1.0) / T(39916800.0) + x * x * (T(1.0) / T(6227020800.0))))))); 3020 } 3021 3022 return sin(x) / x; 3023 } // T spherical_bessel_j0_forward(T x) 3024 ); // spherical_bessel_j0_string 3025 3026 #else // !AT_USE_JITERATOR() -- kernels must be precompiled 3027 3028 template <typename scalar_t> 3029 static inline C10_HOST_DEVICE scalar_t calc_gcd(scalar_t a_in, scalar_t b_in) { 3030 scalar_t a = ::abs(a_in); 3031 scalar_t b = ::abs(b_in); 3032 while (a != 0) { 3033 scalar_t c = a; 3034 a = b % a; 3035 b = c; 3036 } 3037 return b; 3038 } 3039 3040 /* 3041 * For licensing information, please refer to the cpu implementation located in "ATen/native/Math.h". 3042 */ 3043 template <typename scalar_t> 3044 static inline C10_HOST_DEVICE scalar_t calc_digamma(scalar_t in) { 3045 // [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma 3046 using accscalar_t = at::acc_type<scalar_t, /*is_cuda=*/true>; 3047 static const double PI_f64 = 3.14159265358979323846; 3048 const accscalar_t PSI_10 = 2.25175258906672110764; 3049 const accscalar_t A[] = { 3050 8.33333333333333333333E-2, 3051 -2.10927960927960927961E-2, 3052 7.57575757575757575758E-3, 3053 -4.16666666666666666667E-3, 3054 3.96825396825396825397E-3, 3055 -8.33333333333333333333E-3, 3056 8.33333333333333333333E-2, 3057 }; 3058 3059 accscalar_t x = static_cast<accscalar_t>(in); 3060 if (x == 0) { 3061 // As per C++ standard for gamma related functions and SciPy, 3062 // If the argument is ±0, ±∞ is returned 3063 return std::copysign(static_cast<scalar_t>(INFINITY), -x); 3064 } 3065 3066 bool x_is_integer = x == ::trunc(x); 3067 accscalar_t result = 0; 3068 if (x < 0) { 3069 if (x_is_integer) { 3070 // As per C++ standard for gamma related functions and SciPy, 3071 // If the argument is a negative integer, NaN is returned 3072 return static_cast<scalar_t>(NAN); 3073 } 3074 // Extracts the fractional part of x as r, since tan(pi * r) is more numerically 3075 // accurate than tan(pi * x). While these operations are mathematically equivalent 3076 // since both x and r are in radians and tan() has a periodicity of pi, in practice 3077 // the computation of pi * x is a source of error (when |x| > 1). 3078 double q, r; 3079 r = ::modf(static_cast<double>(x), &q); 3080 result = static_cast<accscalar_t>(- PI_f64 / ::tan(PI_f64 * r)); 3081 x = 1 - x; 3082 } 3083 3084 while (x < 10) { 3085 result -= 1 / x; 3086 x += 1; 3087 } 3088 if (x == 10) { 3089 return static_cast<scalar_t>(result + PSI_10); 3090 } 3091 3092 accscalar_t y = 0; 3093 if (x < 1.0e17) { 3094 accscalar_t z = 1 / (x * x); 3095 3096 accscalar_t polevl_result = 0; 3097 for (int i = 0; i <= 6; i++) { 3098 polevl_result = polevl_result * z + A[i]; 3099 } 3100 y = z * polevl_result; 3101 } 3102 3103 return static_cast<scalar_t>(::log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result); 3104 } 3105 3106 template <typename scalar_t> 3107 static inline C10_HOST_DEVICE scalar_t calc_trigamma(scalar_t in) { 3108 using accscalar_t = at::acc_type<scalar_t, /*is_cuda=*/true>; 3109 const accscalar_t PI = 3.14159265358979323846; 3110 accscalar_t x = static_cast<accscalar_t>(in); 3111 accscalar_t sign = +1; 3112 accscalar_t result = 0; 3113 if (x < 0.5f) { 3114 sign = -1; 3115 accscalar_t sin_pi_x = ::sin(PI * x); 3116 result -= (PI * PI) / (sin_pi_x * sin_pi_x); 3117 x = 1 - x; 3118 } 3119 for (int i = 0; i < 6; ++i) { 3120 result += 1 / (x * x); 3121 x += 1; 3122 } 3123 const accscalar_t one = static_cast<scalar_t>(1); 3124 const accscalar_t ixx = 1 / (x*x); 3125 result += (1 + 1 / (2*x) + ixx * (one/6 - ixx * (one/30 - ixx * (one/42)))) / x; 3126 return static_cast<scalar_t>(sign * result); 3127 } 3128 3129 /* 3130 * For licensing information and documentation, please refer to the cpu implementation located in "ATen/native/Math.h". 3131 */ 3132 template <typename scalar_t> 3133 static inline C10_HOST_DEVICE scalar_t 3134 chbevl(scalar_t _x, const scalar_t array[], size_t len) { 3135 static_assert(!std::is_same<scalar_t, Half>() && !std::is_same<scalar_t, BFloat16>(), "don't instantiate with low precision type"); 3136 3137 scalar_t b0, b1, b2; 3138 3139 b0 = array[0]; 3140 b1 = 0; 3141 3142 for (size_t i = 1; i < len; ++i) { 3143 b2 = b1; 3144 b1 = b0; 3145 b0 = _x * b1 - b2 + array[i]; 3146 } 3147 3148 return (0.5 * (b0 - b2)); 3149 } 3150 3151 /* 3152 * For licensing information and documentation, please refer to the cpu implementation located in "ATen/native/Math.h". 3153 */ 3154 template <typename T> 3155 C10_HOST_DEVICE inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() { 3156 /* Chebyshev coefficients for exp(-x) I0(x) 3157 * in the interval [0,8]. 3158 * 3159 * lim(x->0){ exp(-x) I0(x) } = 1. 3160 */ 3161 static const T coefficients[] = { 3162 -4.41534164647933937950E-18, 3.33079451882223809783E-17, 3163 -2.43127984654795469359E-16, 1.71539128555513303061E-15, 3164 -1.16853328779934516808E-14, 7.67618549860493561688E-14, 3165 -4.85644678311192946090E-13, 2.95505266312963983461E-12, 3166 -1.72682629144155570723E-11, 9.67580903537323691224E-11, 3167 -5.18979560163526290666E-10, 2.65982372468238665035E-9, 3168 -1.30002500998624804212E-8, 6.04699502254191894932E-8, 3169 -2.67079385394061173391E-7, 1.11738753912010371815E-6, 3170 -4.41673835845875056359E-6, 1.64484480707288970893E-5, 3171 -5.75419501008210370398E-5, 1.88502885095841655729E-4, 3172 -5.76375574538582365885E-4, 1.63947561694133579842E-3, 3173 -4.32430999505057594430E-3, 1.05464603945949983183E-2, 3174 -2.37374148058994688156E-2, 4.93052842396707084878E-2, 3175 -9.49010970480476444210E-2, 1.71620901522208775349E-1, 3176 -3.04682672343198398683E-1, 6.76795274409476084995E-1}; 3177 3178 return std::make_tuple(coefficients, 30); 3179 } 3180 3181 template <typename T> 3182 C10_HOST_DEVICE inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() { 3183 /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x) 3184 * in the inverted interval [8,infinity]. 3185 * 3186 * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi). 3187 */ 3188 static const T coefficients[] = { 3189 -7.23318048787475395456E-18, -4.83050448594418207126E-18, 3190 4.46562142029675999901E-17, 3.46122286769746109310E-17, 3191 -2.82762398051658348494E-16, -3.42548561967721913462E-16, 3192 1.77256013305652638360E-15, 3.81168066935262242075E-15, 3193 -9.55484669882830764870E-15, -4.15056934728722208663E-14, 3194 1.54008621752140982691E-14, 3.85277838274214270114E-13, 3195 7.18012445138366623367E-13, -1.79417853150680611778E-12, 3196 -1.32158118404477131188E-11, -3.14991652796324136454E-11, 3197 1.18891471078464383424E-11, 4.94060238822496958910E-10, 3198 3.39623202570838634515E-9, 2.26666899049817806459E-8, 3199 2.04891858946906374183E-7, 2.89137052083475648297E-6, 3200 6.88975834691682398426E-5, 3.36911647825569408990E-3, 3201 8.04490411014108831608E-1}; 3202 3203 return std::make_tuple(coefficients, 25); 3204 } 3205 3206 template <typename scalar_t> 3207 static inline C10_HOST_DEVICE scalar_t calc_i0(scalar_t _x) { 3208 static_assert(!std::is_same<scalar_t, Half>() && !std::is_same<scalar_t, BFloat16>(), "don't instantiate with low precision type"); 3209 // Upcast input for numerical accuracy purposes 3210 // Needed for accurate results if input is bfloat16 or float16 3211 scalar_t x = ::abs(_x); 3212 3213 if (x <= scalar_t{8.0}) { 3214 auto coeff_pair = chebyshev_coefficients_i0e_A<scalar_t>(); 3215 auto A = std::get<0>(coeff_pair); 3216 auto len = std::get<1>(coeff_pair); 3217 scalar_t y = (x / scalar_t{2.0}) - scalar_t{2.0}; 3218 return (::exp(x) * chbevl(y, A, len)); 3219 } 3220 3221 auto coeff_pair = chebyshev_coefficients_i0e_B<scalar_t>(); 3222 auto B = std::get<0>(coeff_pair); 3223 auto len = std::get<1>(coeff_pair); 3224 return (::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / ::sqrt(x)); 3225 } 3226 3227 template <typename T> 3228 C10_HOST_DEVICE inline 3229 typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type 3230 chebyshev_coefficients_i1e_A() { 3231 /* Chebyshev coefficients for exp(-x) I1(x) 3232 * in the interval [0,8]. 3233 * 3234 * lim(x->0){ exp(-x) I1(x) / x } = 1/2. 3235 */ 3236 static const T coefficients[] = { 3237 2.77791411276104639959E-18, -2.11142121435816608115E-17, 3238 1.55363195773620046921E-16, -1.10559694773538630805E-15, 3239 7.60068429473540693410E-15, -5.04218550472791168711E-14, 3240 3.22379336594557470981E-13, -1.98397439776494371520E-12, 3241 1.17361862988909016308E-11, -6.66348972350202774223E-11, 3242 3.62559028155211703701E-10, -1.88724975172282928790E-9, 3243 9.38153738649577178388E-9, -4.44505912879632808065E-8, 3244 2.00329475355213526229E-7, -8.56872026469545474066E-7, 3245 3.47025130813767847674E-6, -1.32731636560394358279E-5, 3246 4.78156510755005422638E-5, -1.61760815825896745588E-4, 3247 5.12285956168575772895E-4, -1.51357245063125314899E-3, 3248 4.15642294431288815669E-3, -1.05640848946261981558E-2, 3249 2.47264490306265168283E-2, -5.29459812080949914269E-2, 3250 1.02643658689847095384E-1, -1.76416518357834055153E-1, 3251 2.52587186443633654823E-1}; 3252 3253 return std::make_tuple(coefficients, 29); 3254 } 3255 3256 template <typename T> 3257 C10_HOST_DEVICE inline 3258 typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type 3259 chebyshev_coefficients_i1e_A() { 3260 /* Chebyshev coefficients for exp(-x) I1(x) 3261 * in the interval [0,8]. 3262 * 3263 * lim(x->0){ exp(-x) I1(x) / x } = 1/2. 3264 */ 3265 static const T coeff[] = { 3266 9.38153738649577178388E-9f, 3267 -4.44505912879632808065E-8f, 3268 2.00329475355213526229E-7f, 3269 -8.56872026469545474066E-7f, 3270 3.47025130813767847674E-6f, 3271 -1.32731636560394358279E-5f, 3272 4.78156510755005422638E-5f, 3273 -1.61760815825896745588E-4f, 3274 5.12285956168575772895E-4f, 3275 -1.51357245063125314899E-3f, 3276 4.15642294431288815669E-3f, 3277 -1.05640848946261981558E-2f, 3278 2.47264490306265168283E-2f, 3279 -5.29459812080949914269E-2f, 3280 1.02643658689847095384E-1f, 3281 -1.76416518357834055153E-1f, 3282 2.52587186443633654823E-1f}; 3283 return std::make_tuple(coeff, 17); 3284 }; 3285 3286 template <typename T> 3287 C10_HOST_DEVICE inline 3288 typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type 3289 chebyshev_coefficients_i1e_B() { 3290 /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) 3291 * in the inverted interval [8,infinity]. 3292 * 3293 * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). 3294 */ 3295 static const T coefficients[] = { 3296 7.51729631084210481353E-18, 4.41434832307170791151E-18, 3297 -4.65030536848935832153E-17, -3.20952592199342395980E-17, 3298 2.96262899764595013876E-16, 3.30820231092092828324E-16, 3299 -1.88035477551078244854E-15, -3.81440307243700780478E-15, 3300 1.04202769841288027642E-14, 4.27244001671195135429E-14, 3301 -2.10154184277266431302E-14, -4.08355111109219731823E-13, 3302 -7.19855177624590851209E-13, 2.03562854414708950722E-12, 3303 1.41258074366137813316E-11, 3.25260358301548823856E-11, 3304 -1.89749581235054123450E-11, -5.58974346219658380687E-10, 3305 -3.83538038596423702205E-9, -2.63146884688951950684E-8, 3306 -2.51223623787020892529E-7, -3.88256480887769039346E-6, 3307 -1.10588938762623716291E-4, -9.76109749136146840777E-3, 3308 7.78576235018280120474E-1}; 3309 3310 return std::make_tuple(coefficients, 25); 3311 } 3312 3313 template <typename T> 3314 C10_HOST_DEVICE inline 3315 typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type 3316 chebyshev_coefficients_i1e_B() { 3317 /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) 3318 * in the inverted interval [8,infinity]. 3319 * 3320 * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). 3321 */ 3322 static const T coeff[] = { 3323 -3.83538038596423702205E-9f, 3324 -2.63146884688951950684E-8f, 3325 -2.51223623787020892529E-7f, 3326 -3.88256480887769039346E-6f, 3327 -1.10588938762623716291E-4f, 3328 -9.76109749136146840777E-3f, 3329 7.78576235018280120474E-1f}; 3330 3331 return std::make_tuple(coeff, 7); 3332 }; 3333 3334 template <typename scalar_t> 3335 static inline C10_HOST_DEVICE scalar_t calc_i1(scalar_t _x) { 3336 const auto x = ::abs(_x); 3337 if (x <= scalar_t{8.0}) { 3338 auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>(); 3339 auto A = std::get<0>(coeff_pair); 3340 auto len = std::get<1>(coeff_pair); 3341 scalar_t y = x / scalar_t{2.0} - scalar_t{2.0}; 3342 const scalar_t out = ::exp(x) * x * chbevl(y, A, len); 3343 return (_x < scalar_t{0.0}) ? -out : out; 3344 } 3345 3346 auto coeff_pair = chebyshev_coefficients_i1e_B<scalar_t>(); 3347 auto B = std::get<0>(coeff_pair); 3348 auto len = std::get<1>(coeff_pair); 3349 const scalar_t out = (::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len)) / ::sqrt(x); 3350 return (_x < scalar_t{0.0}) ? -out : out; 3351 } 3352 3353 template <typename scalar_t> 3354 static inline C10_HOST_DEVICE scalar_t calc_i1e(scalar_t _x) { 3355 const auto x = ::abs(_x); 3356 if (x <= scalar_t{8.0}) { 3357 auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>(); 3358 auto A = std::get<0>(coeff_pair); 3359 auto len = std::get<1>(coeff_pair); 3360 const scalar_t y = x / scalar_t{2.0} - scalar_t{2.0}; 3361 const scalar_t out = chbevl(y, A, len) * x; 3362 return (_x < scalar_t{0.0}) ? -out : out; 3363 } 3364 3365 auto coeff_pair = chebyshev_coefficients_i1e_B<scalar_t>(); 3366 auto B = std::get<0>(coeff_pair); 3367 auto len = std::get<1>(coeff_pair); 3368 const scalar_t out = chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / ::sqrt(x); 3369 return (_x < scalar_t{0.0}) ? -out : out; 3370 } 3371 3372 #endif // AT_USE_JITERATOR() (this closes the "else" branch of a if/else preprocessor directive) 3373 3374 } // namespace native 3375 } // namespace at 3376