1 #pragma once
2
3 #include <ATen/AccumulateType.h>
4 #include <ATen/NumericUtils.h>
5 #include <ATen/jiterator_macros.h>
6 #include <c10/util/BFloat16.h>
7 #include <c10/util/Half.h>
8 #include <c10/util/MathConstants.h>
9 #include <cfloat>
10 #include <cmath>
11 #include <cstdint>
12 #include <cstdlib>
13 #include <limits>
14 #include <type_traits>
15
16 C10_CLANG_DIAGNOSTIC_PUSH()
17 #if C10_CLANG_HAS_WARNING("-Wimplicit-float-conversion")
18 C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-float-conversion")
19 #endif
20
21 /* The next function is taken from https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c.
22 Below is the copyright.
23 Output was modified to be inf or -inf when input is 1 or -1. */
24
25
26 /*
27 Copyright (c) 2014 Indiana University
28 All rights reserved.
29
30 Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,
31 Indiana University, Bloomington, IN
32
33 This software is licensed under the New BSD license:
34
35 Redistribution and use in source and binary forms,
36 with or without modification, are permitted provided
37 that the following conditions are met:
38
39 Redistributions of source code must retain the above
40 copyright notice, this list of conditions and the
41 following disclaimer.
42
43 Redistributions in binary form must reproduce the
44 above copyright notice, this list of conditions and
45 the following disclaimer in the documentation and/or
46 other materials provided with the distribution.
47
48 Neither the name of Indiana University nor
49 the names of its contributors may be used to endorse
50 or promote products derived from this software without
51 specific prior written permission.
52
53 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
54 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
55 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
56 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
58 THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
59 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
60 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
61 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
62 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
64 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
65 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
66 USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
67 POSSIBILITY OF SUCH DAMAGE.
68 */
69
70 namespace {
71 /*
72 * This function is derived from the implementation of the i0e function in the
73 * Cephes Math Library. See note [3-Clause BSD License for the Cephes Math
74 * Library].
75 *
76 * Computes an approximation of the exponentially scaled zeroth order modified
77 * Bessel function of the first kind. The approximation is actually two
78 * (sub)approximations, both using a Chebyshev polynomial expansion. One
79 * approximates the function over [0, 8], and the other over (8, infinity). This
80 * function takes the absolute value of all inputs to convert them into the
81 * domain of the approximation.
82 */
83 jiterator_also_stringify_as(jiterator_code(
84 template <typename T>
85 JITERATOR_HOST_DEVICE T chbevl(T x, const T array[], const int len) {
86 T b0, b1, b2;
87
88 b0 = array[0];
89 b1 = 0;
90
91 for (int i = 1; i < len; ++i) {
92 b2 = b1;
93 b1 = b0;
94 b0 = x * b1 - b2 + array[i];
95 }
96
97 return T{0.5} * (b0 - b2);
98 }
99
100 template <typename T>
101 JITERATOR_HOST_DEVICE T calc_i0e(T _x) {
102 T x = std::fabs(_x);
103
104 if (x <= T{8.0}) {
105 static const T coefficients[] = {
106 -4.41534164647933937950E-18, 3.33079451882223809783E-17,
107 -2.43127984654795469359E-16, 1.71539128555513303061E-15,
108 -1.16853328779934516808E-14, 7.67618549860493561688E-14,
109 -4.85644678311192946090E-13, 2.95505266312963983461E-12,
110 -1.72682629144155570723E-11, 9.67580903537323691224E-11,
111 -5.18979560163526290666E-10, 2.65982372468238665035E-9,
112 -1.30002500998624804212E-8, 6.04699502254191894932E-8,
113 -2.67079385394061173391E-7, 1.11738753912010371815E-6,
114 -4.41673835845875056359E-6, 1.64484480707288970893E-5,
115 -5.75419501008210370398E-5, 1.88502885095841655729E-4,
116 -5.76375574538582365885E-4, 1.63947561694133579842E-3,
117 -4.32430999505057594430E-3, 1.05464603945949983183E-2,
118 -2.37374148058994688156E-2, 4.93052842396707084878E-2,
119 -9.49010970480476444210E-2, 1.71620901522208775349E-1,
120 -3.04682672343198398683E-1, 6.76795274409476084995E-1};
121
122 T y = (x / T{2.0}) - T{2.0};
123 return chbevl(y, coefficients, int{30});
124 }
125
126 // x > 8
127 static const T coefficients[] = {
128 -7.23318048787475395456E-18, -4.83050448594418207126E-18,
129 4.46562142029675999901E-17, 3.46122286769746109310E-17,
130 -2.82762398051658348494E-16, -3.42548561967721913462E-16,
131 1.77256013305652638360E-15, 3.81168066935262242075E-15,
132 -9.55484669882830764870E-15, -4.15056934728722208663E-14,
133 1.54008621752140982691E-14, 3.85277838274214270114E-13,
134 7.18012445138366623367E-13, -1.79417853150680611778E-12,
135 -1.32158118404477131188E-11, -3.14991652796324136454E-11,
136 1.18891471078464383424E-11, 4.94060238822496958910E-10,
137 3.39623202570838634515E-9, 2.26666899049817806459E-8,
138 2.04891858946906374183E-7, 2.89137052083475648297E-6,
139 6.88975834691682398426E-5, 3.36911647825569408990E-3,
140 8.04490411014108831608E-1};
141
142 return chbevl(T{32.0} / x - T{2.0}, coefficients, int{25}) / std::sqrt(x);
143 }),
144 i0e_string); // i0e_string
145 }
146
147 #define CENTRAL_RANGE 0.7
148
149 template <typename T>
150 inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_erfinv(T y)151 calc_erfinv(T y) {
152 /* Function to calculate inverse error function. Rational approximation
153 is used to generate an initial approximation, which is then improved to
154 full accuracy by two steps of Newton's method. Code is a direct
155 translation of the erfinv m file in matlab version 2.0.
156 Author: Gary L. Pavlis, Indiana University
157 Date: February 1996
158 */
159 T x, z, num, dem; /*working variables */
160 /* coefficients in rational expansion */
161 T a[4] = { T(0.886226899), T(-1.645349621), T(0.914624893), T(-0.140543331) };
162 T b[4] = { T(-2.118377725), T(1.442710462), T(-0.329097515), T(0.012229801) };
163 T c[4] = { T(-1.970840454), T(-1.624906493), T(3.429567803), T(1.641345311) };
164 T d[2] = { T(3.543889200), T(1.637067800) };
165 T y_abs = std::abs(y);
166 if(y_abs > 1.0) return std::numeric_limits<T>::quiet_NaN();
167 #ifdef _WIN32
168 // error C2039: '_copysign': is not a member of 'std'
169 if(y_abs == 1.0) return copysign(std::numeric_limits<T>::infinity(), y);
170 #else
171 if(y_abs == 1.0) return std::copysign(std::numeric_limits<T>::infinity(), y);
172 #endif
173 if(y_abs <= static_cast<T>(CENTRAL_RANGE)) {
174 z = y * y;
175 num = (((a[3]*z + a[2])*z + a[1])*z + a[0]);
176 dem = ((((b[3]*z + b[2])*z + b[1])*z +b[0]) * z + static_cast<T>(1.0));
177 x = y * num / dem;
178 }
179 else{
180 z = std::sqrt(-std::log((static_cast<T>(1.0)-y_abs)/static_cast<T>(2.0)));
181 num = ((c[3]*z + c[2])*z + c[1]) * z + c[0];
182 dem = (d[1]*z + d[0])*z + static_cast<T>(1.0);
183 #ifdef _WIN32
184 // error C2039: '_copysign': is not a member of 'std'
185 x = copysign(num, y) / dem;
186 #else
187 x = std::copysign(num, y) / dem;
188 #endif
189 }
190 /* Two steps of Newton-Raphson correction */
191 x = x - (std::erf(x) - y) / ((static_cast<T>(2.0)/static_cast<T>(std::sqrt(c10::pi<double>)))*std::exp(-x*x));
192 x = x - (std::erf(x) - y) / ((static_cast<T>(2.0)/static_cast<T>(std::sqrt(c10::pi<double>)))*std::exp(-x*x));
193
194 return(x);
195 }
196
197 #undef CENTRAL_RANGE
198
199 /*
200 * Note [3-Clause BSD License for the Cephes Math Library]
201 * Code derived from implementations in the Cephes Math Library should mention its derivation and reference
202 * this note (ex. 'This function is derived from the implementation of X in the Cephes Math Library. See note
203 * [3-Clause BSD License for the Cephes Math Library]. The license is:
204 * Copyright (c) 2018, Steven Moshier
205 * All rights reserved.
206 *
207 * Redistribution and use in source and binary forms, with or without
208 * modification, are permitted provided that the following conditions are met:
209 * * Redistributions of source code must retain the above copyright
210 * notice, this list of conditions and the following disclaimer.
211 * * Redistributions in binary form must reproduce the above copyright
212 * notice, this list of conditions and the following disclaimer in the
213 * documentation and/or other materials provided with the distribution.
214 * * Neither the name of the nor the
215 * names of its contributors may be used to endorse or promote products
216 * derived from this software without specific prior written permission.
217 *
218 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
219 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
220 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
221 * DISCLAIMED. IN NO EVENT SHALL Steven Moshier BE LIABLE FOR ANY
222 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
223 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
224 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
225 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
226 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
227 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
228 */
229
230 /*
231 * This function is derived from the implementation of the zeta function in the Cephes Math Library.
232 * See note [3-Clause BSD License for the Cephes Math Library].
233 */
234 template <typename scalar_t, bool is_cuda=false>
zeta(scalar_t x,scalar_t q)235 C10_HOST_DEVICE inline scalar_t zeta(scalar_t x, scalar_t q) __ubsan_ignore_float_divide_by_zero__ {
236 using acc_t = at::acc_type<scalar_t, is_cuda>;
237 const acc_t MACHEP = acc_t{1.11022302462515654042E-16};
238 constexpr acc_t zero = acc_t{0.0};
239 constexpr acc_t half = acc_t{0.5};
240 constexpr acc_t one = acc_t{1.0};
241 static const acc_t A[] = {
242 12.0,
243 -720.0,
244 30240.0,
245 -1209600.0,
246 47900160.0,
247 -1.8924375803183791606e9, /*1.307674368e12/691*/
248 7.47242496e10,
249 -2.950130727918164224e12, /*1.067062284288e16/3617*/
250 1.1646782814350067249e14, /*5.109094217170944e18/43867*/
251 -4.5979787224074726105e15, /*8.028576626982912e20/174611*/
252 1.8152105401943546773e17, /*1.5511210043330985984e23/854513*/
253 -7.1661652561756670113e18 /*1.6938241367317436694528e27/236364091*/
254 };
255
256 int i = 0;
257 acc_t a, b, k, s, t, w;
258 if (x == one) {
259 return std::numeric_limits<scalar_t>::infinity();
260 }
261
262 if (x < one) {
263 return std::numeric_limits<scalar_t>::quiet_NaN();
264 }
265
266 if (q <= zero) {
267 if (q == std::floor(q)) {
268 return std::numeric_limits<scalar_t>::infinity();
269 }
270 if (x != std::floor(x)) {
271 return std::numeric_limits<scalar_t>::quiet_NaN();
272 }
273 }
274
275 s = std::pow(q, -x);
276 a = q;
277 i = 0;
278 b = zero;
279 while ((i < 9) || (a <= acc_t{9.0})) {
280 i += 1;
281 a += one;
282 b = ::pow(a, -x);
283 s += b;
284 if ((-MACHEP * s < b) && (b < MACHEP * s)) {
285 return static_cast<scalar_t>(s);
286 }
287 };
288
289 w = a;
290 s += b * w / (x - one);
291 s -= half * b;
292 a = one;
293 k = zero;
294 for (int i = 0; i < 12; i++) {
295 a *= x + k;
296 b /= w;
297 t = a * b / A[i];
298 s = s + t;
299 t = ::fabs(t / s);
300 if (t < MACHEP) {
301 return static_cast<scalar_t>(s);
302 }
303 k += one;
304 a *= x + k;
305 b /= w;
306 k += one;
307 }
308 return static_cast<scalar_t>(s);
309 }
310
311 /*
312 * This function is derived from the implementation of the digamma function in the Cephes Math Library.
313 * See note [3-Clause BSD License for the Cephes Math Library].
314 *
315 * Evaluates polynomial of degree N:
316 *
317 * 2 N
318 * y = C + C x + C x +...+ C x
319 * 0 1 2 N
320 *
321 * Coefficients are stored in reverse order:
322 *
323 * coef[0] = C , ..., coef[N] = C .
324 * N 0
325 */
326 template <typename T>
polevl(const T x,const T A[],size_t len)327 C10_HOST_DEVICE inline T polevl(const T x, const T A[], size_t len) {
328 T result = 0;
329 for (size_t i = 0; i <= len; i++) {
330 result = result * x + A[i];
331 }
332 return result;
333 }
334
trigamma(double x)335 inline double trigamma(double x) __ubsan_ignore_float_divide_by_zero__ {
336 double sign = +1;
337 double result = 0;
338 if (x < 0.5) {
339 sign = -1;
340 const double sin_pi_x = sin(c10::pi<double> * x);
341 result -= (c10::pi<double> * c10::pi<double>) / (sin_pi_x * sin_pi_x);
342 x = 1 - x;
343 }
344 for (int i = 0; i < 6; ++i) {
345 result += 1 / (x * x);
346 x += 1;
347 }
348 const double ixx = 1 / (x*x);
349 result += (1 + 1 / (2*x) + ixx * (1./6 - ixx * (1./30 - ixx * (1./42)))) / x;
350 return sign * result;
351 }
352
trigamma(float x)353 inline float trigamma(float x) __ubsan_ignore_float_divide_by_zero__ {
354 float sign = +1;
355 float result = 0;
356 if (x < 0.5f) {
357 sign = -1;
358 const float sin_pi_x = sinf(c10::pi<float> * x);
359 result -= (c10::pi<float> * c10::pi<float>) / (sin_pi_x * sin_pi_x);
360 x = 1 - x;
361 }
362 for (int i = 0; i < 6; ++i) {
363 result += 1 / (x * x);
364 x += 1;
365 }
366 const float ixx = 1 / (x*x);
367 result += (1 + 1 / (2*x) + ixx * (1.f/6 - ixx * (1.f/30 - ixx * (1.f/42)))) / x;
368 return sign * result;
369 }
370
371 /*
372 * This function is derived from the implementation of the digamma function in the Cephes Math Library.
373 * See note [3-Clause BSD License for the Cephes Math Library].
374 */
calc_digamma(double x)375 inline double calc_digamma(double x) {
376 // [C++ Standard Reference: Gamma Function] https://en.cppreference.com/w/cpp/numeric/math/tgamma
377 static double PSI_10 = 2.25175258906672110764;
378 if (x == 0) {
379 // As per C++ standard for gamma related functions and SciPy,
380 // If the argument is ±0, ±∞ is returned
381 return std::copysign(INFINITY, -x);
382 }
383
384 bool x_is_integer = x == trunc(x);
385 if (x < 0) {
386 if (x_is_integer) {
387 // As per C++ standard for gamma related functions and SciPy,
388 // If the argument is a negative integer, NaN is returned
389 return std::numeric_limits<double>::quiet_NaN();
390 }
391 // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
392 // accurate than tan(pi * x). While these operations are mathematically equivalent
393 // since both x and r are in radians and tan() has a periodicity of pi, in practice
394 // the computation of pi * x is a source of error (when |x| > 1).
395 double q, r;
396 r = std::modf(x, &q);
397 return calc_digamma(1 - x) - c10::pi<double> / tan(c10::pi<double> * r);
398 }
399
400 // Push x to be >= 10
401 double result = 0;
402 while (x < 10) {
403 result -= 1 / x;
404 x += 1;
405 }
406 if (x == 10) {
407 return result + PSI_10;
408 }
409
410 // Compute asymptotic digamma
411 static const double A[] = {
412 8.33333333333333333333E-2,
413 -2.10927960927960927961E-2,
414 7.57575757575757575758E-3,
415 -4.16666666666666666667E-3,
416 3.96825396825396825397E-3,
417 -8.33333333333333333333E-3,
418 8.33333333333333333333E-2,
419 };
420
421 double y = 0;
422 if (x < 1.0e17) {
423 double z = 1.0 / (x * x);
424 y = z * polevl(z, A, 6);
425 }
426 return result + log(x) - (0.5 / x) - y;
427 }
428
429 /*
430 * This function is derived from the implementation of the digamma function in the Cephes Math Library.
431 * See note [3-Clause BSD License for the Cephes Math Library].
432 */
calc_digamma(float x)433 inline float calc_digamma(float x) {
434 // See [C++ Standard Reference: Gamma Function]
435 static float PSI_10 = 2.25175258906672110764f;
436 if (x == 0) {
437 // As per C++ standard for gamma related functions and SciPy,
438 // If the argument is ±0, ±∞ is returned
439 return std::copysign(INFINITY, -x);
440 }
441
442 bool x_is_integer = x == truncf(x);
443 if (x < 0) {
444 if (x_is_integer) {
445 // As per C++ standard for gamma related functions and SciPy,
446 // If the argument is a negative integer, NaN is returned
447 return std::numeric_limits<float>::quiet_NaN();
448 }
449 // Extracts the fractional part of x as r, since tan(pi * r) is more numerically
450 // accurate than tan(pi * x). While these operations are mathematically equivalent
451 // since both x and r are in radians and tan() has a periodicity of pi, in practice
452 // the computation of pi * x is a source of error (when |x| > 1).
453 double q, r;
454 r = std::modf(x, &q);
455 float pi_over_tan_pi_x = (float)(c10::pi<double> / tan(c10::pi<double> * r));
456 return calc_digamma(1 - x) - pi_over_tan_pi_x;
457 }
458
459 // Push x to be >= 10
460 float result = 0;
461 while (x < 10) {
462 result -= 1 / x;
463 x += 1;
464 }
465 if (x == 10) {
466 return result + PSI_10;
467 }
468
469 // Compute asymptotic digamma
470 static const float A[] = {
471 8.33333333333333333333E-2f,
472 -2.10927960927960927961E-2f,
473 7.57575757575757575758E-3f,
474 -4.16666666666666666667E-3f,
475 3.96825396825396825397E-3f,
476 -8.33333333333333333333E-3f,
477 8.33333333333333333333E-2f,
478 };
479
480 float y = 0;
481 if (x < 1.0e17f) {
482 float z = 1 / (x * x);
483 y = z * polevl(z, A, 6);
484 }
485 return result + logf(x) - (0.5f / x) - y;
486 }
487
calc_digamma(c10::BFloat16 a)488 inline c10::BFloat16 calc_digamma(c10::BFloat16 a) {
489 return calc_digamma(static_cast<float>(a));
490 }
491
calc_digamma(c10::Half a)492 inline c10::Half calc_digamma(c10::Half a) {
493 return calc_digamma(static_cast<float>(a));
494 }
495
496 template <typename scalar_t, bool is_cuda=false>
calc_polygamma(scalar_t x,int n)497 inline C10_HOST_DEVICE scalar_t calc_polygamma(scalar_t x, int n) {
498 // already blocked if n <= 1
499 const auto one = scalar_t{1};
500 return ((n % 2) ? one : -one) *
501 std::exp(std::lgamma(static_cast<scalar_t>(n) + one)) *
502 zeta<scalar_t, is_cuda>(static_cast<scalar_t>(n + 1), x);
503 }
504
505 // regularized lower incomplete gamma
506 // the regularized lower, upper incomplete gamma, as well as their
507 // helper functions follow SciPy's implementation
508
509 /* References
510 * [igam1] "The Digital Library of Mathematical Functions", dlmf.nist.gov
511 * [igam2] Maddock et al., "Incomplete Gamma Functions",
512 * https://www.boost.org/doc/libs/1_61_0/libs/math/doc/html/math_toolkit/sf_gamma/igamma.html
513 */
514
515 /*
516 * This implementation of the regularized incomplete gamma functions and
517 * their helper functions are derived from the implementation of SciPy's
518 * gammainc, Cephes's igam and igamc, and Boost's Lanczos approximations.
519 * See NOTICE for the licenses.
520 */
521 template <typename scalar_t>
ratevl(scalar_t x,const scalar_t num[],int64_t M,const scalar_t denom[],int64_t N)522 scalar_t ratevl(scalar_t x, const scalar_t num[], int64_t M,
523 const scalar_t denom[], int64_t N) {
524 // evaluating rational function, i.e., the ratio of two polynomials
525 // the coefficients for numerator are given by `num` while coeffs for
526 // denumerator are given by `denom`
527
528 int64_t i, dir;
529 scalar_t y, num_ans, denom_ans;
530 scalar_t absx = std::fabs(x);
531 const scalar_t *p;
532
533 if (absx > 1) {
534 /* Evaluate as a polynomial in 1/x. */
535 dir = -1;
536 p = num + M;
537 y = 1 / x;
538 }
539 else {
540 dir = 1;
541 p = num;
542 y = x;
543 }
544
545 /* Evaluate the numerator */
546 num_ans = *p;
547 p += dir;
548 for (i = 1; i <= M; i++) {
549 num_ans = num_ans * y + *p;
550 p += dir;
551 }
552 /* Evaluate the denominator */
553 if (absx > 1) {
554 p = denom + N;
555 }
556 else {
557 p = denom;
558 }
559
560 denom_ans = *p;
561 p += dir;
562 for (i = 1; i <= N; i++) {
563 denom_ans = denom_ans * y + *p;
564 p += dir;
565 }
566 if (absx > 1) {
567 i = N - M;
568 return std::pow(x, i) * num_ans / denom_ans;
569 }
570 else {
571 return num_ans / denom_ans;
572 }
573 }
574
575 // SciPy's lanczos implementation is taken from Boost
576 /* (C) Copyright John Maddock 2006.
577 * Use, modification and distribution are subject to the
578 * Boost Software License, Version 1.0. See
579 * https://www.boost.org/LICENSE_1_0.txt or see NOTICE.
580 */
581 template <typename scalar_t>
lanczos_sum_expg_scaled(scalar_t x)582 static scalar_t lanczos_sum_expg_scaled(scalar_t x) {
583 // lanczos approximation
584 static const scalar_t lanczos_sum_expg_scaled_num[13] = {
585 0.006061842346248906525783753964555936883222,
586 0.5098416655656676188125178644804694509993,
587 19.51992788247617482847860966235652136208,
588 449.9445569063168119446858607650988409623,
589 6955.999602515376140356310115515198987526,
590 75999.29304014542649875303443598909137092,
591 601859.6171681098786670226533699352302507,
592 3481712.15498064590882071018964774556468,
593 14605578.08768506808414169982791359218571,
594 43338889.32467613834773723740590533316085,
595 86363131.28813859145546927288977868422342,
596 103794043.1163445451906271053616070238554,
597 56906521.91347156388090791033559122686859
598 };
599 static const scalar_t lanczos_sum_expg_scaled_denom[13] = {
600 1.,
601 66.,
602 1925.,
603 32670.,
604 357423.,
605 2637558.,
606 13339535.,
607 45995730.,
608 105258076.,
609 150917976.,
610 120543840.,
611 39916800.,
612 0.
613 };
614 return ratevl(x, lanczos_sum_expg_scaled_num,
615 sizeof(lanczos_sum_expg_scaled_num) / sizeof(lanczos_sum_expg_scaled_num[0]) - 1,
616 lanczos_sum_expg_scaled_denom,
617 sizeof(lanczos_sum_expg_scaled_denom) / sizeof(lanczos_sum_expg_scaled_denom[0]) - 1);
618 }
619
620 template <typename scalar_t>
_igam_helper_fac(scalar_t a,scalar_t x)621 static scalar_t _igam_helper_fac(scalar_t a, scalar_t x) {
622 // compute x^a * exp(-a) / gamma(a)
623 // corrected from (15) and (16) in [igam2] by replacing exp(x - a) with
624 // exp(a - x).
625
626 scalar_t ax, fac, res, num, numfac;
627 static scalar_t MAXLOG = std::is_same<scalar_t,double>::value ?
628 7.09782712893383996843E2 : 88.72283905206835;
629 static scalar_t EXP1 = 2.718281828459045;
630 static scalar_t lanczos_g = 6.024680040776729583740234375;
631
632 if (std::fabs(a - x) > 0.4 * std::fabs(a)) {
633 ax = a * std::log(x) - x - std::lgamma(a);
634 if (ax < -MAXLOG) {
635 return 0.0;
636 }
637 return std::exp(ax);
638 }
639
640 fac = a + lanczos_g - 0.5;
641 res = std::sqrt(fac / EXP1) / lanczos_sum_expg_scaled(a);
642
643 if ((a < 200) && (x < 200)) {
644 res *= std::exp(a - x) * std::pow(x / fac, a);
645 }
646 else {
647 num = x - a - lanczos_g + 0.5;
648 numfac = num / fac;
649 res *= std::exp(a * (std::log1p(numfac) - numfac) + x * (0.5 - lanczos_g) / fac);
650 }
651 return res;
652 }
653
654 template <typename scalar_t>
_igam_helper_series(scalar_t a,scalar_t x)655 static scalar_t _igam_helper_series(scalar_t a, scalar_t x) {
656 // Compute igam using DLMF 8.11.4. [igam1]
657 static scalar_t MACHEP = std::is_same<scalar_t, double>::value ?
658 1.11022302462515654042E-16 : 5.9604644775390625E-8;
659 static int MAXITER = 2000;
660
661 int i;
662 scalar_t ans, ax, c, r;
663
664 ax = _igam_helper_fac(a, x);
665 if (ax == 0.0) {
666 return 0.0;
667 }
668
669 /* power series */
670 r = a;
671 c = 1.0;
672 ans = 1.0;
673
674 for (i = 0; i < MAXITER; i++) {
675 r += 1.0;
676 c *= x / r;
677 ans += c;
678 if (c <= MACHEP * ans) {
679 break;
680 }
681 }
682 return (ans * ax / a);
683 }
684
685 template <typename scalar_t>
_igamc_helper_series(scalar_t a,scalar_t x)686 static scalar_t _igamc_helper_series(scalar_t a, scalar_t x) {
687 // Compute igamc using DLMF 8.7.3 [igam1]. This is related to the series in
688 // _igam_helper_series but extra care is taken to avoid cancellation.
689
690 int n;
691 scalar_t fac = 1;
692 scalar_t sum = 0;
693 scalar_t term, logx;
694 static scalar_t MAXITER = 2000;
695 static scalar_t MACHEP = std::is_same<scalar_t, double>::value ?
696 1.11022302462515654042E-16 : 5.9604644775390625E-8;
697
698 for (n = 1; n < MAXITER; n++) {
699 fac *= -x / n;
700 term = fac / (a + n);
701 sum += term;
702 if (std::fabs(term) <= MACHEP * std::fabs(sum)) {
703 break;
704 }
705 }
706
707 logx = std::log(x);
708 term = -std::expm1(a * logx - std::lgamma(1+a));
709 return term - std::exp(a * logx - std::lgamma(a)) * sum;
710 }
711
712 template <typename scalar_t>
_igam_helper_asymptotic_series(scalar_t a,scalar_t x,bool igam)713 static scalar_t _igam_helper_asymptotic_series(scalar_t a, scalar_t x, bool igam) {
714 // Compute igam/igamc using DLMF 8.12.3/8.12.4 [igam1]
715 static const scalar_t d[25][25] =
716 {{-3.3333333333333333e-1, 8.3333333333333333e-2, -1.4814814814814815e-2,
717 1.1574074074074074e-3, 3.527336860670194e-4, -1.7875514403292181e-4,
718 3.9192631785224378e-5, -2.1854485106799922e-6, -1.85406221071516e-6,
719 8.296711340953086e-7, -1.7665952736826079e-7, 6.7078535434014986e-9,
720 1.0261809784240308e-8, -4.3820360184533532e-9, 9.1476995822367902e-10,
721 -2.551419399494625e-11, -5.8307721325504251e-11, 2.4361948020667416e-11,
722 -5.0276692801141756e-12, 1.1004392031956135e-13, 3.3717632624009854e-13,
723 -1.3923887224181621e-13, 2.8534893807047443e-14, -5.1391118342425726e-16,
724 -1.9752288294349443e-15},
725 {-1.8518518518518519e-3, -3.4722222222222222e-3, 2.6455026455026455e-3,
726 -9.9022633744855967e-4, 2.0576131687242798e-4, -4.0187757201646091e-7,
727 -1.8098550334489978e-5, 7.6491609160811101e-6, -1.6120900894563446e-6,
728 4.6471278028074343e-9, 1.378633446915721e-7, -5.752545603517705e-8,
729 1.1951628599778147e-8, -1.7543241719747648e-11, -1.0091543710600413e-9,
730 4.1627929918425826e-10, -8.5639070264929806e-11, 6.0672151016047586e-14,
731 7.1624989648114854e-12, -2.9331866437714371e-12, 5.9966963656836887e-13,
732 -2.1671786527323314e-16, -4.9783399723692616e-14, 2.0291628823713425e-14,
733 -4.13125571381061e-15},
734 {4.1335978835978836e-3, -2.6813271604938272e-3, 7.7160493827160494e-4,
735 2.0093878600823045e-6, -1.0736653226365161e-4, 5.2923448829120125e-5,
736 -1.2760635188618728e-5, 3.4235787340961381e-8, 1.3721957309062933e-6,
737 -6.298992138380055e-7, 1.4280614206064242e-7, -2.0477098421990866e-10,
738 -1.4092529910867521e-8, 6.228974084922022e-9, -1.3670488396617113e-9,
739 9.4283561590146782e-13, 1.2872252400089318e-10, -5.5645956134363321e-11,
740 1.1975935546366981e-11, -4.1689782251838635e-15, -1.0940640427884594e-12,
741 4.6622399463901357e-13, -9.905105763906906e-14, 1.8931876768373515e-17,
742 8.8592218725911273e-15},
743 {6.4943415637860082e-4, 2.2947209362139918e-4, -4.6918949439525571e-4,
744 2.6772063206283885e-4, -7.5618016718839764e-5, -2.3965051138672967e-7,
745 1.1082654115347302e-5, -5.6749528269915966e-6, 1.4230900732435884e-6,
746 -2.7861080291528142e-11, -1.6958404091930277e-7, 8.0994649053880824e-8,
747 -1.9111168485973654e-8, 2.3928620439808118e-12, 2.0620131815488798e-9,
748 -9.4604966618551322e-10, 2.1541049775774908e-10, -1.388823336813903e-14,
749 -2.1894761681963939e-11, 9.7909989511716851e-12, -2.1782191880180962e-12,
750 6.2088195734079014e-17, 2.126978363279737e-13, -9.3446887915174333e-14,
751 2.0453671226782849e-14},
752 {-8.618882909167117e-4, 7.8403922172006663e-4, -2.9907248030319018e-4,
753 -1.4638452578843418e-6, 6.6414982154651222e-5, -3.9683650471794347e-5,
754 1.1375726970678419e-5, 2.5074972262375328e-10, -1.6954149536558306e-6,
755 8.9075075322053097e-7, -2.2929348340008049e-7, 2.956794137544049e-11,
756 2.8865829742708784e-8, -1.4189739437803219e-8, 3.4463580499464897e-9,
757 -2.3024517174528067e-13, -3.9409233028046405e-10, 1.8602338968504502e-10,
758 -4.356323005056618e-11, 1.2786001016296231e-15, 4.6792750266579195e-12,
759 -2.1492464706134829e-12, 4.9088156148096522e-13, -6.3385914848915603e-18,
760 -5.0453320690800944e-14},
761 {-3.3679855336635815e-4, -6.9728137583658578e-5, 2.7727532449593921e-4,
762 -1.9932570516188848e-4, 6.7977804779372078e-5, 1.419062920643967e-7,
763 -1.3594048189768693e-5, 8.0184702563342015e-6, -2.2914811765080952e-6,
764 -3.252473551298454e-10, 3.4652846491085265e-7, -1.8447187191171343e-7,
765 4.8240967037894181e-8, -1.7989466721743515e-14, -6.3061945000135234e-9,
766 3.1624176287745679e-9, -7.8409242536974293e-10, 5.1926791652540407e-15,
767 9.3589442423067836e-11, -4.5134262161632782e-11, 1.0799129993116827e-11,
768 -3.661886712685252e-17, -1.210902069055155e-12, 5.6807435849905643e-13,
769 -1.3249659916340829e-13},
770 {5.3130793646399222e-4, -5.9216643735369388e-4, 2.7087820967180448e-4,
771 7.9023532326603279e-7, -8.1539693675619688e-5, 5.6116827531062497e-5,
772 -1.8329116582843376e-5, -3.0796134506033048e-9, 3.4651553688036091e-6,
773 -2.0291327396058604e-6, 5.7887928631490037e-7, 2.338630673826657e-13,
774 -8.8286007463304835e-8, 4.7435958880408128e-8, -1.2545415020710382e-8,
775 8.6496488580102925e-14, 1.6846058979264063e-9, -8.5754928235775947e-10,
776 2.1598224929232125e-10, -7.6132305204761539e-16, -2.6639822008536144e-11,
777 1.3065700536611057e-11, -3.1799163902367977e-12, 4.7109761213674315e-18,
778 3.6902800842763467e-13},
779 {3.4436760689237767e-4, 5.1717909082605922e-5, -3.3493161081142236e-4,
780 2.812695154763237e-4, -1.0976582244684731e-4, -1.2741009095484485e-7,
781 2.7744451511563644e-5, -1.8263488805711333e-5, 5.7876949497350524e-6,
782 4.9387589339362704e-10, -1.0595367014026043e-6, 6.1667143761104075e-7,
783 -1.7562973359060462e-7, -1.2974473287015439e-12, 2.695423606288966e-8,
784 -1.4578352908731271e-8, 3.887645959386175e-9, -3.8810022510194121e-17,
785 -5.3279941738772867e-10, 2.7437977643314845e-10, -6.9957960920705679e-11,
786 2.5899863874868481e-17, 8.8566890996696381e-12, -4.403168815871311e-12,
787 1.0865561947091654e-12},
788 {-6.5262391859530942e-4, 8.3949872067208728e-4, -4.3829709854172101e-4,
789 -6.969091458420552e-7, 1.6644846642067548e-4, -1.2783517679769219e-4,
790 4.6299532636913043e-5, 4.5579098679227077e-9, -1.0595271125805195e-5,
791 6.7833429048651666e-6, -2.1075476666258804e-6, -1.7213731432817145e-11,
792 3.7735877416110979e-7, -2.1867506700122867e-7, 6.2202288040189269e-8,
793 6.5977038267330006e-16, -9.5903864974256858e-9, 5.2132144922808078e-9,
794 -1.3991589583935709e-9, 5.382058999060575e-16, 1.9484714275467745e-10,
795 -1.0127287556389682e-10, 2.6077347197254926e-11, -5.0904186999932993e-18,
796 -3.3721464474854592e-12},
797 {-5.9676129019274625e-4, -7.2048954160200106e-5, 6.7823088376673284e-4,
798 -6.4014752602627585e-4, 2.7750107634328704e-4, 1.8197008380465151e-7,
799 -8.4795071170685032e-5, 6.105192082501531e-5, -2.1073920183404862e-5,
800 -8.8585890141255994e-10, 4.5284535953805377e-6, -2.8427815022504408e-6,
801 8.7082341778646412e-7, 3.6886101871706965e-12, -1.5344695190702061e-7,
802 8.862466778790695e-8, -2.5184812301826817e-8, -1.0225912098215092e-14,
803 3.8969470758154777e-9, -2.1267304792235635e-9, 5.7370135528051385e-10,
804 -1.887749850169741e-19, -8.0931538694657866e-11, 4.2382723283449199e-11,
805 -1.1002224534207726e-11},
806 {1.3324454494800656e-3, -1.9144384985654775e-3, 1.1089369134596637e-3,
807 9.932404122642299e-7, -5.0874501293093199e-4, 4.2735056665392884e-4,
808 -1.6858853767910799e-4, -8.1301893922784998e-9, 4.5284402370562147e-5,
809 -3.127053674781734e-5, 1.044986828530338e-5, 4.8435226265680926e-11,
810 -2.1482565873456258e-6, 1.329369701097492e-6, -4.0295693092101029e-7,
811 -1.7567877666323291e-13, 7.0145043163668257e-8, -4.040787734999483e-8,
812 1.1474026743371963e-8, 3.9642746853563325e-18, -1.7804938269892714e-9,
813 9.7480262548731646e-10, -2.6405338676507616e-10, 5.794875163403742e-18,
814 3.7647749553543836e-11},
815 {1.579727660730835e-3, 1.6251626278391582e-4, -2.0633421035543276e-3,
816 2.1389686185689098e-3, -1.0108559391263003e-3, -3.9912705529919201e-7,
817 3.6235025084764691e-4, -2.8143901463712154e-4, 1.0449513336495887e-4,
818 2.1211418491830297e-9, -2.5779417251947842e-5, 1.7281818956040463e-5,
819 -5.6413773872904282e-6, -1.1024320105776174e-11, 1.1223224418895175e-6,
820 -6.8693396379526735e-7, 2.0653236975414887e-7, 4.6714772409838506e-14,
821 -3.5609886164949055e-8, 2.0470855345905963e-8, -5.8091738633283358e-9,
822 -1.332821287582869e-16, 9.0354604391335133e-10, -4.9598782517330834e-10,
823 1.3481607129399749e-10},
824 {-4.0725121195140166e-3, 6.4033628338080698e-3, -4.0410161081676618e-3,
825 -2.183732802866233e-6, 2.1740441801254639e-3, -1.9700440518418892e-3,
826 8.3595469747962458e-4, 1.9445447567109655e-8, -2.5779387120421696e-4,
827 1.9009987368139304e-4, -6.7696499937438965e-5, -1.4440629666426572e-10,
828 1.5712512518742269e-5, -1.0304008744776893e-5, 3.304517767401387e-6,
829 7.9829760242325709e-13, -6.4097794149313004e-7, 3.8894624761300056e-7,
830 -1.1618347644948869e-7, -2.816808630596451e-15, 1.9878012911297093e-8,
831 -1.1407719956357511e-8, 3.2355857064185555e-9, 4.1759468293455945e-20,
832 -5.0423112718105824e-10},
833 {-5.9475779383993003e-3, -5.4016476789260452e-4, 8.7910413550767898e-3,
834 -9.8576315587856125e-3, 5.0134695031021538e-3, 1.2807521786221875e-6,
835 -2.0626019342754683e-3, 1.7109128573523058e-3, -6.7695312714133799e-4,
836 -6.9011545676562133e-9, 1.8855128143995902e-4, -1.3395215663491969e-4,
837 4.6263183033528039e-5, 4.0034230613321351e-11, -1.0255652921494033e-5,
838 6.612086372797651e-6, -2.0913022027253008e-6, -2.0951775649603837e-13,
839 3.9756029041993247e-7, -2.3956211978815887e-7, 7.1182883382145864e-8,
840 8.925574873053455e-16, -1.2101547235064676e-8, 6.9350618248334386e-9,
841 -1.9661464453856102e-9},
842 {1.7402027787522711e-2, -2.9527880945699121e-2, 2.0045875571402799e-2,
843 7.0289515966903407e-6, -1.2375421071343148e-2, 1.1976293444235254e-2,
844 -5.4156038466518525e-3, -6.3290893396418616e-8, 1.8855118129005065e-3,
845 -1.473473274825001e-3, 5.5515810097708387e-4, 5.2406834412550662e-10,
846 -1.4357913535784836e-4, 9.9181293224943297e-5, -3.3460834749478311e-5,
847 -3.5755837291098993e-12, 7.1560851960630076e-6, -4.5516802628155526e-6,
848 1.4236576649271475e-6, 1.8803149082089664e-14, -2.6623403898929211e-7,
849 1.5950642189595716e-7, -4.7187514673841102e-8, -6.5107872958755177e-17,
850 7.9795091026746235e-9},
851 {3.0249124160905891e-2, 2.4817436002649977e-3, -4.9939134373457022e-2,
852 5.9915643009307869e-2, -3.2483207601623391e-2, -5.7212968652103441e-6,
853 1.5085251778569354e-2, -1.3261324005088445e-2, 5.5515262632426148e-3,
854 3.0263182257030016e-8, -1.7229548406756723e-3, 1.2893570099929637e-3,
855 -4.6845138348319876e-4, -1.830259937893045e-10, 1.1449739014822654e-4,
856 -7.7378565221244477e-5, 2.5625836246985201e-5, 1.0766165333192814e-12,
857 -5.3246809282422621e-6, 3.349634863064464e-6, -1.0381253128684018e-6,
858 -5.608909920621128e-15, 1.9150821930676591e-7, -1.1418365800203486e-7,
859 3.3654425209171788e-8},
860 {-9.9051020880159045e-2, 1.7954011706123486e-1, -1.2989606383463778e-1,
861 -3.1478872752284357e-5, 9.0510635276848131e-2, -9.2828824411184397e-2,
862 4.4412112839877808e-2, 2.7779236316835888e-7, -1.7229543805449697e-2,
863 1.4182925050891573e-2, -5.6214161633747336e-3, -2.39598509186381e-9,
864 1.6029634366079908e-3, -1.1606784674435773e-3, 4.1001337768153873e-4,
865 1.8365800754090661e-11, -9.5844256563655903e-5, 6.3643062337764708e-5,
866 -2.076250624489065e-5, -1.1806020912804483e-13, 4.2131808239120649e-6,
867 -2.6262241337012467e-6, 8.0770620494930662e-7, 6.0125912123632725e-16,
868 -1.4729737374018841e-7},
869 {-1.9994542198219728e-1, -1.5056113040026424e-2, 3.6470239469348489e-1,
870 -4.6435192311733545e-1, 2.6640934719197893e-1, 3.4038266027147191e-5,
871 -1.3784338709329624e-1, 1.276467178337056e-1, -5.6213828755200985e-2,
872 -1.753150885483011e-7, 1.9235592956768113e-2, -1.5088821281095315e-2,
873 5.7401854451350123e-3, 1.0622382710310225e-9, -1.5335082692563998e-3,
874 1.0819320643228214e-3, -3.7372510193945659e-4, -6.6170909729031985e-12,
875 8.4263617380909628e-5, -5.5150706827483479e-5, 1.7769536448348069e-5,
876 3.8827923210205533e-14, -3.53513697488768e-6, 2.1865832130045269e-6,
877 -6.6812849447625594e-7},
878 {7.2438608504029431e-1, -1.3918010932653375, 1.0654143352413968,
879 1.876173868950258e-4, -8.2705501176152696e-1, 8.9352433347828414e-1,
880 -4.4971003995291339e-1, -1.6107401567546652e-6, 1.9235590165271091e-1,
881 -1.6597702160042609e-1, 6.8882222681814333e-2, 1.3910091724608687e-8,
882 -2.146911561508663e-2, 1.6228980898865892e-2, -5.9796016172584256e-3,
883 -1.1287469112826745e-10, 1.5167451119784857e-3, -1.0478634293553899e-3,
884 3.5539072889126421e-4, 8.1704322111801517e-13, -7.7773013442452395e-5,
885 5.0291413897007722e-5, -1.6035083867000518e-5, 1.2469354315487605e-14,
886 3.1369106244517615e-6},
887 {1.6668949727276811, 1.165462765994632e-1, -3.3288393225018906,
888 4.4692325482864037, -2.6977693045875807, -2.600667859891061e-4,
889 1.5389017615694539, -1.4937962361134612, 6.8881964633233148e-1,
890 1.3077482004552385e-6, -2.5762963325596288e-1, 2.1097676102125449e-1,
891 -8.3714408359219882e-2, -7.7920428881354753e-9, 2.4267923064833599e-2,
892 -1.7813678334552311e-2, 6.3970330388900056e-3, 4.9430807090480523e-11,
893 -1.5554602758465635e-3, 1.0561196919903214e-3, -3.5277184460472902e-4,
894 9.3002334645022459e-14, 7.5285855026557172e-5, -4.8186515569156351e-5,
895 1.5227271505597605e-5},
896 {-6.6188298861372935, 1.3397985455142589e+1, -1.0789350606845146e+1,
897 -1.4352254537875018e-3, 9.2333694596189809, -1.0456552819547769e+1,
898 5.5105526029033471, 1.2024439690716742e-5, -2.5762961164755816,
899 2.3207442745387179, -1.0045728797216284, -1.0207833290021914e-7,
900 3.3975092171169466e-1, -2.6720517450757468e-1, 1.0235252851562706e-1,
901 8.4329730484871625e-10, -2.7998284958442595e-2, 2.0066274144976813e-2,
902 -7.0554368915086242e-3, 1.9402238183698188e-12, 1.6562888105449611e-3,
903 -1.1082898580743683e-3, 3.654545161310169e-4, -5.1290032026971794e-11,
904 -7.6340103696869031e-5},
905 {-1.7112706061976095e+1, -1.1208044642899116, 3.7131966511885444e+1,
906 -5.2298271025348962e+1, 3.3058589696624618e+1, 2.4791298976200222e-3,
907 -2.061089403411526e+1, 2.088672775145582e+1, -1.0045703956517752e+1,
908 -1.2238783449063012e-5, 4.0770134274221141, -3.473667358470195,
909 1.4329352617312006, 7.1359914411879712e-8, -4.4797257159115612e-1,
910 3.4112666080644461e-1, -1.2699786326594923e-1, -2.8953677269081528e-10,
911 3.3125776278259863e-2, -2.3274087021036101e-2, 8.0399993503648882e-3,
912 -1.177805216235265e-9, -1.8321624891071668e-3, 1.2108282933588665e-3,
913 -3.9479941246822517e-4},
914 {7.389033153567425e+1, -1.5680141270402273e+2, 1.322177542759164e+2,
915 1.3692876877324546e-2, -1.2366496885920151e+2, 1.4620689391062729e+2,
916 -8.0365587724865346e+1, -1.1259851148881298e-4, 4.0770132196179938e+1,
917 -3.8210340013273034e+1, 1.719522294277362e+1, 9.3519707955168356e-7,
918 -6.2716159907747034, 5.1168999071852637, -2.0319658112299095,
919 -4.9507215582761543e-9, 5.9626397294332597e-1, -4.4220765337238094e-1,
920 1.6079998700166273e-1, -2.4733786203223402e-8, -4.0307574759979762e-2,
921 2.7849050747097869e-2, -9.4751858992054221e-3, 6.419922235909132e-6,
922 2.1250180774699461e-3},
923 {2.1216837098382522e+2, 1.3107863022633868e+1, -4.9698285932871748e+2,
924 7.3121595266969204e+2, -4.8213821720890847e+2, -2.8817248692894889e-2,
925 3.2616720302947102e+2, -3.4389340280087117e+2, 1.7195193870816232e+2,
926 1.4038077378096158e-4, -7.52594195897599e+1, 6.651969984520934e+1,
927 -2.8447519748152462e+1, -7.613702615875391e-7, 9.5402237105304373,
928 -7.5175301113311376, 2.8943997568871961, -4.6612194999538201e-7,
929 -8.0615149598794088e-1, 5.8483006570631029e-1, -2.0845408972964956e-1,
930 1.4765818959305817e-4, 5.1000433863753019e-2, -3.3066252141883665e-2,
931 1.5109265210467774e-2},
932 {-9.8959643098322368e+2, 2.1925555360905233e+3, -1.9283586782723356e+3,
933 -1.5925738122215253e-1, 1.9569985945919857e+3, -2.4072514765081556e+3,
934 1.3756149959336496e+3, 1.2920735237496668e-3, -7.525941715948055e+2,
935 7.3171668742208716e+2, -3.4137023466220065e+2, -9.9857390260608043e-6,
936 1.3356313181291573e+2, -1.1276295161252794e+2, 4.6310396098204458e+1,
937 -7.9237387133614756e-6, -1.4510726927018646e+1, 1.1111771248100563e+1,
938 -4.1690817945270892, 3.1008219800117808e-3, 1.1220095449981468,
939 -7.6052379926149916e-1, 3.6262236505085254e-1, 2.216867741940747e-1,
940 4.8683443692930507e-1}};
941
942 int k, n, sgn;
943 int maxpow = 0;
944 static scalar_t MACHEP = std::is_same<scalar_t, double>::value ?
945 1.11022302462515654042E-16 : 5.9604644775390625E-8;
946 scalar_t lambda = x / a;
947 scalar_t sigma = (x - a) / a;
948 scalar_t eta, res, ck, ckterm, term, absterm;
949 scalar_t absoldterm = INFINITY;
950 scalar_t etapow[25] = {1};
951 scalar_t sum = 0;
952 scalar_t afac = 1;
953
954 if (igam) {
955 sgn = -1;
956 }
957 else {
958 sgn = 1;
959 }
960
961 if (lambda > 1) {
962 eta = std::sqrt(-2 * (std::log1p(sigma) - sigma));
963 }
964 else if (lambda < 1) {
965 eta = -std::sqrt(-2 * (std::log1p(sigma) - sigma));
966 }
967 else {
968 eta = 0;
969 }
970 res = 0.5 * std::erfc(sgn * eta * std::sqrt(a / 2));
971
972 for (k = 0; k < 25; k++) {
973 ck = d[k][0];
974 for (n = 1; n < 25; n++) {
975 if (n > maxpow) {
976 etapow[n] = eta * etapow[n-1];
977 maxpow += 1;
978 }
979 ckterm = d[k][n]*etapow[n];
980 ck += ckterm;
981 if (std::fabs(ckterm) < MACHEP * std::fabs(ck)) {
982 break;
983 }
984 }
985 term = ck * afac;
986 absterm = std::fabs(term);
987 if (absterm > absoldterm) {
988 break;
989 }
990 sum += term;
991 if (absterm < MACHEP * std::fabs(sum)) {
992 break;
993 }
994 absoldterm = absterm;
995 afac /= a;
996 }
997 res += sgn * std::exp(-0.5 * a * eta * eta) * sum / std::sqrt(2 * c10::pi<float> * a);
998
999 return res;
1000 }
1001
1002 template <typename scalar_t>
_igamc_helper_continued_fraction(scalar_t a,scalar_t x)1003 static scalar_t _igamc_helper_continued_fraction(scalar_t a, scalar_t x) {
1004 // Compute igamc using DLMF 8.9.2. [igam1]
1005 int i;
1006 scalar_t ans, ax, c, yc, r, t, y, z;
1007 scalar_t pk, pkm1, pkm2, qk, qkm1, qkm2;
1008 int MAXITER = 2000;
1009 static scalar_t MACHEP = std::is_same<scalar_t, double>::value ?
1010 1.11022302462515654042E-16 : 5.9604644775390625E-8;
1011 static scalar_t BIG = std::is_same<scalar_t,double>::value ?
1012 4.503599627370496e15 : 16777216.;
1013 static scalar_t BIGINV = std::is_same<scalar_t,double>::value ?
1014 2.22044604925031308085e-16 : 5.9604644775390625E-8;
1015
1016 ax = _igam_helper_fac(a, x);
1017 if (ax == 0.0) {
1018 return 0.0;
1019 }
1020
1021 /* continued fraction */
1022 y = 1.0 - a;
1023 z = x + y + 1.0;
1024 c = 0.0;
1025 pkm2 = 1.0;
1026 qkm2 = x;
1027 pkm1 = x + 1.0;
1028 qkm1 = z * x;
1029 ans = pkm1 / qkm1;
1030
1031 for (i = 0; i < MAXITER; i++) {
1032 c += 1.0;
1033 y += 1.0;
1034 z += 2.0;
1035 yc = y * c;
1036 pk = pkm1 * z - pkm2 * yc;
1037 qk = qkm1 * z - qkm2 * yc;
1038 if (qk != 0) {
1039 r = pk / qk;
1040 t = std::fabs((ans - r) / r);
1041 ans = r;
1042 }
1043 else {
1044 t = 1.0;
1045 }
1046 pkm2 = pkm1;
1047 pkm1 = pk;
1048 qkm2 = qkm1;
1049 qkm1 = qk;
1050 if (std::fabs(pk) > BIG) {
1051 pkm2 *= BIGINV;
1052 pkm1 *= BIGINV;
1053 qkm2 *= BIGINV;
1054 qkm1 *= BIGINV;
1055 }
1056 if (t <= MACHEP) {
1057 break;
1058 }
1059 }
1060 return ans * ax;
1061 }
1062
1063 template <typename scalar_t>
calc_igammac(scalar_t a,scalar_t x)1064 inline scalar_t calc_igammac(scalar_t a, scalar_t x) {
1065 /* the calculation of the regularized upper incomplete gamma function
1066 * is done differently based on the values of a and x:
1067 * - if x and/or a is at the boundary of defined region, then assign the
1068 * result at the boundary
1069 * - if a is large and a ~ x, then using Uniform Asymptotic Expansions for
1070 * Large Parameter (see DLMF 8.12.4 [igam1])
1071 * - if x > 1.1 and x < a, using the substraction from the regularized lower
1072 * incomplete gamma
1073 * - otherwise, calculate the series from [igam2] eq (5)
1074 */
1075 scalar_t absxma_a;
1076
1077 static scalar_t SMALL = 20.0;
1078 static scalar_t LARGE = 200.0;
1079 static scalar_t SMALLRATIO = 0.3;
1080 static scalar_t LARGERATIO = 4.5;
1081
1082 // note that in SciPy, a and x are non-negative, with exclusive 0s (i.e.,
1083 // at most 1 of them can be 0), where igammac(0, x) = 0.0 iff x > 0.
1084 if ((x < 0) || (a < 0)) {
1085 // out of defined-region of the function
1086 return std::numeric_limits<scalar_t>::quiet_NaN();
1087 }
1088 else if (a == 0) {
1089 if (x > 0) {
1090 return 0.0;
1091 }
1092 else {
1093 return std::numeric_limits<scalar_t>::quiet_NaN();
1094 }
1095 }
1096 else if (x == 0) {
1097 return 1.0;
1098 }
1099 else if (std::isinf(a)) {
1100 if (std::isinf(x)) {
1101 return std::numeric_limits<scalar_t>::quiet_NaN();
1102 }
1103 return 1.0;
1104 }
1105 else if (std::isinf(x)) {
1106 return 0.0;
1107 }
1108
1109 absxma_a = std::fabs(x - a) / a;
1110 if ((a > SMALL) && (a < LARGE) && (absxma_a < SMALLRATIO)) {
1111 return _igam_helper_asymptotic_series(a, x, 0);
1112 }
1113 else if ((a > LARGE) && (absxma_a < LARGERATIO / std::sqrt(a))) {
1114 return _igam_helper_asymptotic_series(a, x, 0);
1115 }
1116
1117 if (x > 1.1) {
1118 if (x < a) {
1119 return 1.0 - _igam_helper_series(a, x);
1120 }
1121 else {
1122 return _igamc_helper_continued_fraction(a, x);
1123 }
1124 }
1125 else if (x <= 0.5) {
1126 if (-0.4 / std::log(x) < a) {
1127 return 1.0 - _igam_helper_series(a, x);
1128 }
1129 else {
1130 return _igamc_helper_series(a, x);
1131 }
1132 }
1133 else {
1134 if (x * 1.1 < a) {
1135 return 1.0 - _igam_helper_series(a, x);
1136 }
1137 else {
1138 return _igamc_helper_series(a, x);
1139 }
1140 }
1141 }
1142
1143 template <typename scalar_t>
calc_igamma(scalar_t a,scalar_t x)1144 scalar_t calc_igamma(scalar_t a, scalar_t x) {
1145 /* the calculation of the regularized lower incomplete gamma function
1146 * is done differently based on the values of a and x:
1147 * - if x and/or a is at the boundary of defined region, then assign the
1148 * result at the boundary
1149 * - if a is large and a ~ x, then using Uniform Asymptotic Expansions for
1150 * Large Parameter (see DLMF 8.12.3 [igam1])
1151 * - if x > 1 and x > a, using the substraction from the regularized upper
1152 * incomplete gamma
1153 * - otherwise, calculate the series from [igam2] eq (4)
1154 */
1155 scalar_t absxma_a;
1156 static scalar_t SMALL = 20.0;
1157 static scalar_t LARGE = 200.0;
1158 static scalar_t SMALLRATIO = 0.3;
1159 static scalar_t LARGERATIO = 4.5;
1160
1161 // boundary values following SciPy
1162 // note that in SciPy, a and x are non-negative, with exclusive 0s (i.e.,
1163 // at most 1 of them can be 0), where igamma(0, x) = 1.0 iff x > 0.
1164 if ((x < 0) || (a < 0)) {
1165 // out of defined-region of the function
1166 return std::numeric_limits<scalar_t>::quiet_NaN();
1167 }
1168 else if (a == 0) {
1169 if (x > 0) {
1170 return 1.0;
1171 }
1172 else {
1173 return std::numeric_limits<scalar_t>::quiet_NaN();
1174 }
1175 }
1176 else if (x == 0) {
1177 return 0.0; // zero integration limit
1178 }
1179 else if (std::isinf(a)) {
1180 if (std::isinf(x)) {
1181 return std::numeric_limits<scalar_t>::quiet_NaN();
1182 }
1183 return 0.0;
1184 }
1185 else if (std::isinf(x)) {
1186 return 1.0;
1187 }
1188
1189 /* Asymptotic regime where a ~ x. See [igam2] */
1190 absxma_a = std::fabs(x - a) / a;
1191 if ((a > SMALL) && (a < LARGE) && (absxma_a < SMALLRATIO)) {
1192 return _igam_helper_asymptotic_series(a, x, 1);
1193 }
1194 else if ((a > LARGE) && (absxma_a < LARGERATIO / std::sqrt(a))) {
1195 return _igam_helper_asymptotic_series(a, x, 1);
1196 }
1197
1198 if ((x > 1.0) && (x > a)) {
1199 return 1.0 - calc_igammac(a, x);
1200 }
1201
1202 return _igam_helper_series(a, x);
1203 }
1204
1205 template <>
1206 C10_UNUSED inline c10::BFloat16 calc_igamma<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
1207 return calc_igamma<float>(float(a), float(x));
1208 }
1209
1210 template <>
1211 C10_UNUSED inline c10::Half calc_igamma<c10::Half>(c10::Half a, c10::Half x) {
1212 return calc_igamma<float>(float(a), float(x));
1213 }
1214
1215 template <>
1216 C10_UNUSED inline c10::BFloat16 calc_igammac<c10::BFloat16>(c10::BFloat16 a, c10::BFloat16 x) {
1217 return calc_igammac<float>(float(a), float(x));
1218 }
1219
1220 template <>
1221 C10_UNUSED inline c10::Half calc_igammac<c10::Half>(c10::Half a, c10::Half x) {
1222 return calc_igammac<float>(float(a), float(x));
1223 }
1224
calc_erfinv(c10::BFloat16 a)1225 inline c10::BFloat16 calc_erfinv(c10::BFloat16 a) { return calc_erfinv(float(a)); }
1226
1227 template <typename T>
abs_impl(T v)1228 inline T abs_impl(T v) {
1229 return std::abs(v);
1230 }
1231
1232 template <>
abs_impl(uint8_t v)1233 C10_UNUSED inline uint8_t abs_impl(uint8_t v) {
1234 return v;
1235 }
1236
1237 template <typename T>
1238 inline typename std::enable_if<std::is_integral<T>::value, T>::type
calc_gcd(T a,T b)1239 calc_gcd(T a, T b) {
1240 a = abs_impl(a);
1241 b = abs_impl(b);
1242 while (a != 0) {
1243 T c = a;
1244 a = b % a;
1245 b = c;
1246 }
1247 return b;
1248 }
1249
1250 template <typename T>
exp2_impl(T x)1251 C10_HOST_DEVICE T exp2_impl(T x) {
1252 return std::exp2(x);
1253 }
1254
1255 template <typename T>
exp2_impl(c10::complex<T> x)1256 C10_HOST_DEVICE c10::complex<T> exp2_impl(c10::complex<T> x) {
1257 // There is no std::exp2 overload for complex, so instead
1258 // use the identity 2^x = e^(ln(2) * x)
1259 constexpr auto ln2 = c10::ln_2<T>;
1260 return std::exp(ln2 * x);
1261 }
1262
1263 /*
1264 * This function is derived from the implementation of the chbevl function in the Cephes Math Library.
1265 * See note [3-Clause BSD License for the Cephes Math Library].
1266 *
1267 * Evaluates the series
1268 *
1269 * len-1
1270 * - '
1271 * y = > array[i] T (x/2)
1272 * - i
1273 * i=0
1274 *
1275 * of Chebyshev polynomials Ti at argument x/2.
1276 *
1277 * Coefficients are stored in reverse order, i.e. the zero order term is last in the array. Note len is the number of
1278 * coefficients, not the order.
1279 *
1280 * If coefficients are for the interval a to b, x must have been transformed to x -> 2(2x - b - a)/(b-a) before
1281 * entering the routine. This maps x from (a, b) to (-1, 1), over which the Chebyshev polynomials are defined.
1282 *
1283 * If the coefficients are for the inverted interval, in which (a, b) is mapped to (1/b, 1/a), the transformation
1284 * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, this becomes x -> 4a/x - 1.
1285 */
1286 template <typename T>
1287 inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
chbevl(const T x,const T array[],size_t len)1288 chbevl(const T x, const T array[], size_t len) {
1289 T b0, b1, b2;
1290
1291 b0 = array[0];
1292 b1 = static_cast<T>(0.0);
1293
1294 for (size_t i = 1; i < len; ++i) {
1295 b2 = b1;
1296 b1 = b0;
1297 b0 = x * b1 - b2 + array[i];
1298 }
1299
1300 return (static_cast<T>(0.5) * (b0 - b2));
1301 }
1302
1303 /*
1304 * This function is derived from the implementation of the i0 function in the Cephes Math Library.
1305 * See note [3-Clause BSD License for the Cephes Math Library].
1306 *
1307 * Computes an approximation of the zeroth order modified Bessel function of the first kind.
1308 * The approximation is actually two (sub)approximations, both using a Chebyshev polynomial expansion.
1309 * One approximates the function over [0, 8], and the other over (8, infinity). This function takes the absolute value
1310 * of all inputs to convert them into the domain of the approximation.
1311 */
1312 template <typename T>
chebyshev_coefficients_i0e_A()1313 inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_A() {
1314 /* Chebyshev coefficients for exp(-x) I0(x)
1315 * in the interval [0,8].
1316 *
1317 * lim(x->0){ exp(-x) I0(x) } = 1.
1318 */
1319 static const T coeff[] = {
1320 -4.41534164647933937950E-18, 3.33079451882223809783E-17,
1321 -2.43127984654795469359E-16, 1.71539128555513303061E-15,
1322 -1.16853328779934516808E-14, 7.67618549860493561688E-14,
1323 -4.85644678311192946090E-13, 2.95505266312963983461E-12,
1324 -1.72682629144155570723E-11, 9.67580903537323691224E-11,
1325 -5.18979560163526290666E-10, 2.65982372468238665035E-9,
1326 -1.30002500998624804212E-8, 6.04699502254191894932E-8,
1327 -2.67079385394061173391E-7, 1.11738753912010371815E-6,
1328 -4.41673835845875056359E-6, 1.64484480707288970893E-5,
1329 -5.75419501008210370398E-5, 1.88502885095841655729E-4,
1330 -5.76375574538582365885E-4, 1.63947561694133579842E-3,
1331 -4.32430999505057594430E-3, 1.05464603945949983183E-2,
1332 -2.37374148058994688156E-2, 4.93052842396707084878E-2,
1333 -9.49010970480476444210E-2, 1.71620901522208775349E-1,
1334 -3.04682672343198398683E-1, 6.76795274409476084995E-1};
1335 return std::make_tuple(coeff, 30);
1336 };
1337
1338 template <typename T>
chebyshev_coefficients_i0e_B()1339 inline std::tuple<const T*, size_t> chebyshev_coefficients_i0e_B() {
1340 /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x)
1341 * in the inverted interval [8,infinity].
1342 *
1343 * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi).
1344 */
1345 static const T coeff[] = {
1346 -7.23318048787475395456E-18, -4.83050448594418207126E-18,
1347 4.46562142029675999901E-17, 3.46122286769746109310E-17,
1348 -2.82762398051658348494E-16, -3.42548561967721913462E-16,
1349 1.77256013305652638360E-15, 3.81168066935262242075E-15,
1350 -9.55484669882830764870E-15, -4.15056934728722208663E-14,
1351 1.54008621752140982691E-14, 3.85277838274214270114E-13,
1352 7.18012445138366623367E-13, -1.79417853150680611778E-12,
1353 -1.32158118404477131188E-11, -3.14991652796324136454E-11,
1354 1.18891471078464383424E-11, 4.94060238822496958910E-10,
1355 3.39623202570838634515E-9, 2.26666899049817806459E-8,
1356 2.04891858946906374183E-7, 2.89137052083475648297E-6,
1357 6.88975834691682398426E-5, 3.36911647825569408990E-3,
1358 8.04490411014108831608E-1};
1359
1360 return std::make_tuple(coeff, 25);
1361 };
1362
1363 template <typename T>
1364 inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_A()1365 chebyshev_coefficients_i1e_A() {
1366 /* Chebyshev coefficients for exp(-x) I1(x)
1367 * in the interval [0,8].
1368 *
1369 * lim(x->0){ exp(-x) I1(x) / x } = 1/2.
1370 */
1371 static const T coeff[] = {
1372 2.77791411276104639959E-18, -2.11142121435816608115E-17,
1373 1.55363195773620046921E-16, -1.10559694773538630805E-15,
1374 7.60068429473540693410E-15, -5.04218550472791168711E-14,
1375 3.22379336594557470981E-13, -1.98397439776494371520E-12,
1376 1.17361862988909016308E-11, -6.66348972350202774223E-11,
1377 3.62559028155211703701E-10, -1.88724975172282928790E-9,
1378 9.38153738649577178388E-9, -4.44505912879632808065E-8,
1379 2.00329475355213526229E-7, -8.56872026469545474066E-7,
1380 3.47025130813767847674E-6, -1.32731636560394358279E-5,
1381 4.78156510755005422638E-5, -1.61760815825896745588E-4,
1382 5.12285956168575772895E-4, -1.51357245063125314899E-3,
1383 4.15642294431288815669E-3, -1.05640848946261981558E-2,
1384 2.47264490306265168283E-2, -5.29459812080949914269E-2,
1385 1.02643658689847095384E-1, -1.76416518357834055153E-1,
1386 2.52587186443633654823E-1};
1387 return std::make_tuple(coeff, 29);
1388 };
1389
1390 template <typename T>
1391 inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_A()1392 chebyshev_coefficients_i1e_A() {
1393 /* Chebyshev coefficients for exp(-x) I1(x)
1394 * in the interval [0,8].
1395 *
1396 * lim(x->0){ exp(-x) I1(x) / x } = 1/2.
1397 */
1398 static const T coeff[] = {
1399 9.38153738649577178388E-9f,
1400 -4.44505912879632808065E-8f,
1401 2.00329475355213526229E-7f,
1402 -8.56872026469545474066E-7f,
1403 3.47025130813767847674E-6f,
1404 -1.32731636560394358279E-5f,
1405 4.78156510755005422638E-5f,
1406 -1.61760815825896745588E-4f,
1407 5.12285956168575772895E-4f,
1408 -1.51357245063125314899E-3f,
1409 4.15642294431288815669E-3f,
1410 -1.05640848946261981558E-2f,
1411 2.47264490306265168283E-2f,
1412 -5.29459812080949914269E-2f,
1413 1.02643658689847095384E-1f,
1414 -1.76416518357834055153E-1f,
1415 2.52587186443633654823E-1f};
1416 return std::make_tuple(coeff, 17);
1417 };
1418
1419 template <typename T>
1420 inline typename std::enable_if<std::is_same<double, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_B()1421 chebyshev_coefficients_i1e_B() {
1422 /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
1423 * in the inverted interval [8,infinity].
1424 *
1425 * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi).
1426 */
1427 static const T coeff[] = {
1428 7.51729631084210481353E-18, 4.41434832307170791151E-18,
1429 -4.65030536848935832153E-17, -3.20952592199342395980E-17,
1430 2.96262899764595013876E-16, 3.30820231092092828324E-16,
1431 -1.88035477551078244854E-15, -3.81440307243700780478E-15,
1432 1.04202769841288027642E-14, 4.27244001671195135429E-14,
1433 -2.10154184277266431302E-14, -4.08355111109219731823E-13,
1434 -7.19855177624590851209E-13, 2.03562854414708950722E-12,
1435 1.41258074366137813316E-11, 3.25260358301548823856E-11,
1436 -1.89749581235054123450E-11, -5.58974346219658380687E-10,
1437 -3.83538038596423702205E-9, -2.63146884688951950684E-8,
1438 -2.51223623787020892529E-7, -3.88256480887769039346E-6,
1439 -1.10588938762623716291E-4, -9.76109749136146840777E-3,
1440 7.78576235018280120474E-1};
1441
1442 return std::make_tuple(coeff, 25);
1443 };
1444
1445 template <typename T>
1446 inline typename std::enable_if<std::is_same<float, T>::value, std::tuple<const T*, size_t>>::type
chebyshev_coefficients_i1e_B()1447 chebyshev_coefficients_i1e_B() {
1448 /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x)
1449 * in the inverted interval [8,infinity].
1450 *
1451 * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi).
1452 */
1453 static const T coeff[] = {
1454 -3.83538038596423702205E-9f,
1455 -2.63146884688951950684E-8f,
1456 -2.51223623787020892529E-7f,
1457 -3.88256480887769039346E-6f,
1458 -1.10588938762623716291E-4f,
1459 -9.76109749136146840777E-3f,
1460 7.78576235018280120474E-1f};
1461
1462 return std::make_tuple(coeff, 7);
1463 };
1464
1465 template <typename T>
1466 inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i0(T _x)1467 calc_i0(T _x) {
1468 T x = std::abs(_x);
1469
1470 if (x <= T{8.0}) {
1471 auto coeff_pair = chebyshev_coefficients_i0e_A<T>();
1472 auto A = std::get<0>(coeff_pair);
1473 auto len = std::get<1>(coeff_pair);
1474 T y = (x / T{2.0}) - T{2.0};
1475 return static_cast<T>(std::exp(x) * chbevl(y, A, len));
1476 }
1477 auto coeff_pair = chebyshev_coefficients_i0e_B<T>();
1478 auto B = std::get<0>(coeff_pair);
1479 auto len = std::get<1>(coeff_pair);
1480 return std::exp(x) * chbevl(T{32.0} / x - T{2.0}, B, len) / std::sqrt(x);
1481 }
1482
1483 // Upcast bfloat16 input to float for numerical accuracy purposes
calc_i0(c10::BFloat16 a)1484 inline c10::BFloat16 calc_i0(c10::BFloat16 a) { return calc_i0(static_cast<float>(a)); }
1485
1486 /*
1487 * This function is derived from the implementation of the i1 function in the Cephes Math Library.
1488 * See note [3-Clause BSD License for the Cephes Math Library].
1489 *
1490 * Computes an approximation of the first order modified Bessel function of the first kind.
1491 * The approximation is actually two (sub)approximations, both using a Chebyshev polynomial expansion.
1492 * One approximates the function over [0, 8], and the other over (8, infinity). This function takes the absolute value
1493 * of all inputs to convert them into the domain of the approximation.
1494 */
1495 template <typename T>
1496 inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i1(T _x)1497 calc_i1(T _x) {
1498 T x = std::abs(_x);
1499
1500 if (x <= T{8.0}) {
1501 auto coeff_pair = chebyshev_coefficients_i1e_A<T>();
1502 auto A = std::get<0>(coeff_pair);
1503 auto len = std::get<1>(coeff_pair);
1504 T y = (x / T{2.0}) - T{2.0};
1505 const T out = std::exp(x) * x * chbevl(y, A, len);
1506 return (_x < T{0.0}) ? -out : out;
1507 }
1508 auto coeff_pair = chebyshev_coefficients_i1e_B<T>();
1509 auto B = std::get<0>(coeff_pair);
1510 auto len = std::get<1>(coeff_pair);
1511 const T out = (std::exp(x) * chbevl(T{32.0} / x - T{2.0}, B, len)) / std::sqrt(x);
1512 return (_x < T{0.0}) ? -out : out;
1513 }
1514
1515 /*
1516 * This function is derived from the implementation of the i1e function in the Cephes Math Library.
1517 * See note [3-Clause BSD License for the Cephes Math Library].
1518 *
1519 * Computes an approximation of the exponentially scaled first order modified Bessel function of the first kind.
1520 * The approximation is actually two (sub)approximations, both using a Chebyshev polynomial expansion.
1521 * One approximates the function over [0, 8], and the other over (8, infinity). This function takes the absolute value
1522 * of all inputs to convert them into the domain of the approximation.
1523 */
1524 template <typename T>
1525 inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_i1e(T _x)1526 calc_i1e(T _x) {
1527 T x = std::abs(_x);
1528
1529 if (x <= T{8.0}) {
1530 auto coeff_pair = chebyshev_coefficients_i1e_A<T>();
1531 auto A = std::get<0>(coeff_pair);
1532 auto len = std::get<1>(coeff_pair);
1533 T y = (x / T{2.0}) - T{2.0};
1534 const T out = chbevl(y, A, len) * x;
1535 return (_x < T{0.0}) ? -out : out;
1536 }
1537 auto coeff_pair = chebyshev_coefficients_i1e_B<T>();
1538 auto B = std::get<0>(coeff_pair);
1539 auto len = std::get<1>(coeff_pair);
1540 const auto out = chbevl(T{32.0} / x - T{2.0}, B, len) / std::sqrt(x);
1541 return (_x < T{0.0}) ? -out : out;
1542 }
1543
1544 /*
1545 * This function is derived from the implementation of the i1e function in the Cephes Math Library.
1546 * See note [3-Clause BSD License for the Cephes Math Library].
1547 *
1548 * Computes the argument, x, for which the area under the Gaussian probability density function
1549 * (integrated from minus infinity to x) is equal to y.
1550 */
1551 template <typename T>
calc_ndtri(T y0)1552 inline C10_HOST_DEVICE T calc_ndtri(T y0) {
1553
1554 /* sqrt(2pi) */
1555 constexpr T s2pi = 2.50662827463100050242E0;
1556 constexpr T one = 1;
1557 constexpr T zero = 0;
1558
1559 /* approximation for 0 <= |y - 0.5| <= 3/8 */
1560 static const T P0[5] = {
1561 -5.99633501014107895267E1,
1562 9.80010754185999661536E1,
1563 -5.66762857469070293439E1,
1564 1.39312609387279679503E1,
1565 -1.23916583867381258016E0,
1566 };
1567
1568 static const T Q0[9] = {
1569 1.00000000000000000000E0,
1570 1.95448858338141759834E0,
1571 4.67627912898881538453E0,
1572 8.63602421390890590575E1,
1573 -2.25462687854119370527E2,
1574 2.00260212380060660359E2,
1575 -8.20372256168333339912E1,
1576 1.59056225126211695515E1,
1577 -1.18331621121330003142E0,
1578 };
1579
1580 /* Approximation for interval z = sqrt(-2 log y ) between 2 and 8
1581 * i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.
1582 */
1583 static const T P1[9] = {
1584 4.05544892305962419923E0,
1585 3.15251094599893866154E1,
1586 5.71628192246421288162E1,
1587 4.40805073893200834700E1,
1588 1.46849561928858024014E1,
1589 2.18663306850790267539E0,
1590 -1.40256079171354495875E-1,
1591 -3.50424626827848203418E-2,
1592 -8.57456785154685413611E-4,
1593 };
1594
1595 static const T Q1[9] = {
1596 1.00000000000000000000E0,
1597 1.57799883256466749731E1,
1598 4.53907635128879210584E1,
1599 4.13172038254672030440E1,
1600 1.50425385692907503408E1,
1601 2.50464946208309415979E0,
1602 -1.42182922854787788574E-1,
1603 -3.80806407691578277194E-2,
1604 -9.33259480895457427372E-4,
1605 };
1606
1607 /* Approximation for interval z = sqrt(-2 log y ) between 8 and 64
1608 * i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
1609 */
1610
1611 static const T P2[9] = {
1612 3.23774891776946035970E0,
1613 6.91522889068984211695E0,
1614 3.93881025292474443415E0,
1615 1.33303460815807542389E0,
1616 2.01485389549179081538E-1,
1617 1.23716634817820021358E-2,
1618 3.01581553508235416007E-4,
1619 2.65806974686737550832E-6,
1620 6.23974539184983293730E-9,
1621 };
1622
1623 static const T Q2[9] = {
1624 1.00000000000000000000E0,
1625 6.02427039364742014255E0,
1626 3.67983563856160859403E0,
1627 1.37702099489081330271E0,
1628 2.16236993594496635890E-1,
1629 1.34204006088543189037E-2,
1630 3.28014464682127739104E-4,
1631 2.89247864745380683936E-6,
1632 6.79019408009981274425E-9,
1633 };
1634
1635 if (y0 == zero) {
1636 return -std::numeric_limits<T>::infinity();
1637 }
1638 if (y0 == one) {
1639 return std::numeric_limits<T>::infinity();
1640 }
1641 if (y0 < zero || y0 > one) {
1642 return std::numeric_limits<T>::quiet_NaN();
1643 }
1644 bool code = true;
1645 T y = y0;
1646 if (y > one - T{0.13533528323661269189}) { /* 0.135... = exp(-2) */
1647 y = one - y;
1648 code = false;
1649 }
1650
1651 if (y > T{0.13533528323661269189}) {
1652 y = y - T{0.5};
1653 const T y2 = y * y;
1654 T x = y + y * (y2 * polevl(y2, P0, 4) / polevl(y2, Q0, 8));
1655 return (x * s2pi);
1656 }
1657
1658 T x = ::sqrt(T{-2.0} * ::log(y));
1659 const T x0 = x - ::log(x) / x;
1660
1661 const T z = one / x;
1662 T x1;
1663 if (x < T{8.0}) /* y > exp(-32) = 1.2664165549e-14 */
1664 {
1665 x1 = z * polevl(z, P1, 8) / polevl(z, Q1, 8);
1666 } else {
1667 x1 = z * polevl(z, P2, 8) / polevl(z, Q2, 8);
1668 }
1669 x = x0 - x1;
1670 if (code) {
1671 x = -x;
1672 }
1673 return x;
1674 }
1675
1676 /* The next function is taken from http://ab-initio.mit.edu/Faddeev */
1677
1678 /* Copyright (c) 2012 Massachusetts Institute of Technology
1679 *
1680 * Permission is hereby granted, free of charge, to any person obtaining
1681 * a copy of this software and associated documentation files (the
1682 * "Software"), to deal in the Software without restriction, including
1683 * without limitation the rights to use, copy, modify, merge, publish,
1684 * distribute, sublicense, and/or sell copies of the Software, and to
1685 * permit persons to whom the Software is furnished to do so, subject to
1686 * the following conditions:
1687 *
1688 * The above copyright notice and this permission notice shall be
1689 * included in all copies or substantial portions of the Software.
1690 *
1691 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1692 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1693 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1694 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
1695 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1696 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1697 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1698 */
1699
1700 /* erfcx(x) = exp(x^2) erfc(x) function, for real x, written by
1701 Steven G. Johnson, October 2012.
1702
1703 This function combines a few different ideas.
1704
1705 First, for x > 50, it uses a continued-fraction expansion (same as
1706 for the Faddeeva function, but with algebraic simplifications for z=i*x).
1707
1708 Second, for 0 <= x <= 50, it uses Chebyshev polynomial approximations,
1709 but with two twists:
1710
1711 a) It maps x to y = 4 / (4+x) in [0,1]. This simple transformation,
1712 inspired by a similar transformation in the octave-forge/specfun
1713 erfcx by Soren Hauberg, results in much faster Chebyshev convergence
1714 than other simple transformations I have examined.
1715
1716 b) Instead of using a single Chebyshev polynomial for the entire
1717 [0,1] y interval, we break the interval up into 100 equal
1718 subintervals, with a switch/lookup table, and use much lower
1719 degree Chebyshev polynomials in each subinterval. This greatly
1720 improves performance in my tests.
1721
1722 For x < 0, we use the relationship erfcx(-x) = 2 exp(x^2) - erfc(x),
1723 with the usual checks for overflow etcetera.
1724
1725 Performance-wise, it seems to be substantially faster than either
1726 the SLATEC DERFC function [or an erfcx function derived therefrom]
1727 or Cody's CALERF function (from netlib.org/specfun), while
1728 retaining near machine precision in accuracy. */
1729
1730 /* Given y100=100*y, where y = 4/(4+x) for x >= 0, compute erfc(x).
1731
1732 Uses a look-up table of 100 different Chebyshev polynomials
1733 for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
1734 with the help of Maple and a little shell script. This allows
1735 the Chebyshev polynomials to be of significantly lower degree (about 1/4)
1736 compared to fitting the whole [0,1] interval with a single polynomial. */
1737
1738
1739 template <typename T>
1740 C10_HOST_DEVICE inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
erfcx_y100(T y100)1741 erfcx_y100(T y100)
1742 {
1743 switch (static_cast<int>(y100)) {
1744 case 0: {
1745 T t = 2*y100 - 1;
1746 return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t;
1747 }
1748 case 1: {
1749 T t = 2*y100 - 3;
1750 return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t;
1751 }
1752 case 2: {
1753 T t = 2*y100 - 5;
1754 return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t;
1755 }
1756 case 3: {
1757 T t = 2*y100 - 7;
1758 return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t;
1759 }
1760 case 4: {
1761 T t = 2*y100 - 9;
1762 return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t;
1763 }
1764 case 5: {
1765 T t = 2*y100 - 11;
1766 return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t;
1767 }
1768 case 6: {
1769 T t = 2*y100 - 13;
1770 return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t;
1771 }
1772 case 7: {
1773 T t = 2*y100 - 15;
1774 return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t;
1775 }
1776 case 8: {
1777 T t = 2*y100 - 17;
1778 return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t;
1779 }
1780 case 9: {
1781 T t = 2*y100 - 19;
1782 return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t;
1783 }
1784 case 10: {
1785 T t = 2*y100 - 21;
1786 return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t;
1787 }
1788 case 11: {
1789 T t = 2*y100 - 23;
1790 return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t;
1791 }
1792 case 12: {
1793 T t = 2*y100 - 25;
1794 return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t;
1795 }
1796 case 13: {
1797 T t = 2*y100 - 27;
1798 return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t;
1799 }
1800 case 14: {
1801 T t = 2*y100 - 29;
1802 return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t;
1803 }
1804 case 15: {
1805 T t = 2*y100 - 31;
1806 return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t;
1807 }
1808 case 16: {
1809 T t = 2*y100 - 33;
1810 return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t;
1811 }
1812 case 17: {
1813 T t = 2*y100 - 35;
1814 return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t;
1815 }
1816 case 18: {
1817 T t = 2*y100 - 37;
1818 return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t;
1819 }
1820 case 19: {
1821 T t = 2*y100 - 39;
1822 return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t;
1823 }
1824 case 20: {
1825 T t = 2*y100 - 41;
1826 return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t;
1827 }
1828 case 21: {
1829 T t = 2*y100 - 43;
1830 return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t;
1831 }
1832 case 22: {
1833 T t = 2*y100 - 45;
1834 return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t;
1835 }
1836 case 23: {
1837 T t = 2*y100 - 47;
1838 return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t;
1839 }
1840 case 24: {
1841 T t = 2*y100 - 49;
1842 return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t;
1843 }
1844 case 25: {
1845 T t = 2*y100 - 51;
1846 return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t;
1847 }
1848 case 26: {
1849 T t = 2*y100 - 53;
1850 return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t;
1851 }
1852 case 27: {
1853 T t = 2*y100 - 55;
1854 return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t;
1855 }
1856 case 28: {
1857 T t = 2*y100 - 57;
1858 return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t;
1859 }
1860 case 29: {
1861 T t = 2*y100 - 59;
1862 return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t;
1863 }
1864 case 30: {
1865 T t = 2*y100 - 61;
1866 return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t;
1867 }
1868 case 31: {
1869 T t = 2*y100 - 63;
1870 return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t;
1871 }
1872 case 32: {
1873 T t = 2*y100 - 65;
1874 return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t;
1875 }
1876 case 33: {
1877 T t = 2*y100 - 67;
1878 return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t;
1879 }
1880 case 34: {
1881 T t = 2*y100 - 69;
1882 return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t;
1883 }
1884 case 35: {
1885 T t = 2*y100 - 71;
1886 return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t;
1887 }
1888 case 36: {
1889 T t = 2*y100 - 73;
1890 return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t;
1891 }
1892 case 37: {
1893 T t = 2*y100 - 75;
1894 return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t;
1895 }
1896 case 38: {
1897 T t = 2*y100 - 77;
1898 return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t;
1899 }
1900 case 39: {
1901 T t = 2*y100 - 79;
1902 return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t;
1903 }
1904 case 40: {
1905 T t = 2*y100 - 81;
1906 return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t;
1907 }
1908 case 41: {
1909 T t = 2*y100 - 83;
1910 return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t;
1911 }
1912 case 42: {
1913 T t = 2*y100 - 85;
1914 return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t;
1915 }
1916 case 43: {
1917 T t = 2*y100 - 87;
1918 return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t;
1919 }
1920 case 44: {
1921 T t = 2*y100 - 89;
1922 return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t;
1923 }
1924 case 45: {
1925 T t = 2*y100 - 91;
1926 return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t;
1927 }
1928 case 46: {
1929 T t = 2*y100 - 93;
1930 return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t;
1931 }
1932 case 47: {
1933 T t = 2*y100 - 95;
1934 return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t;
1935 }
1936 case 48: {
1937 T t = 2*y100 - 97;
1938 return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t;
1939 }
1940 case 49: {
1941 T t = 2*y100 - 99;
1942 return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t;
1943 }
1944 case 50: {
1945 T t = 2*y100 - 101;
1946 return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t;
1947 }
1948 case 51: {
1949 T t = 2*y100 - 103;
1950 return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t;
1951 }
1952 case 52: {
1953 T t = 2*y100 - 105;
1954 return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t;
1955 }
1956 case 53: {
1957 T t = 2*y100 - 107;
1958 return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t;
1959 }
1960 case 54: {
1961 T t = 2*y100 - 109;
1962 return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t;
1963 }
1964 case 55: {
1965 T t = 2*y100 - 111;
1966 return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t;
1967 }
1968 case 56: {
1969 T t = 2*y100 - 113;
1970 return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t;
1971 }
1972 case 57: {
1973 T t = 2*y100 - 115;
1974 return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t;
1975 }
1976 case 58: {
1977 T t = 2*y100 - 117;
1978 return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t;
1979 }
1980 case 59: {
1981 T t = 2*y100 - 119;
1982 return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t;
1983 }
1984 case 60: {
1985 T t = 2*y100 - 121;
1986 return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t;
1987 }
1988 case 61: {
1989 T t = 2*y100 - 123;
1990 return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t;
1991 }
1992 case 62: {
1993 T t = 2*y100 - 125;
1994 return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t;
1995 }
1996 case 63: {
1997 T t = 2*y100 - 127;
1998 return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t;
1999 }
2000 case 64: {
2001 T t = 2*y100 - 129;
2002 return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t;
2003 }
2004 case 65: {
2005 T t = 2*y100 - 131;
2006 return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t;
2007 }
2008 case 66: {
2009 T t = 2*y100 - 133;
2010 return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t;
2011 }
2012 case 67: {
2013 T t = 2*y100 - 135;
2014 return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t;
2015 }
2016 case 68: {
2017 T t = 2*y100 - 137;
2018 return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t;
2019 }
2020 case 69: {
2021 T t = 2*y100 - 139;
2022 return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t;
2023 }
2024 case 70: {
2025 T t = 2*y100 - 141;
2026 return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t;
2027 }
2028 case 71: {
2029 T t = 2*y100 - 143;
2030 return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t;
2031 }
2032 case 72: {
2033 T t = 2*y100 - 145;
2034 return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t;
2035 }
2036 case 73: {
2037 T t = 2*y100 - 147;
2038 return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t;
2039 }
2040 case 74: {
2041 T t = 2*y100 - 149;
2042 return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t;
2043 }
2044 case 75: {
2045 T t = 2*y100 - 151;
2046 return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t;
2047 }
2048 case 76: {
2049 T t = 2*y100 - 153;
2050 return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t;
2051 }
2052 case 77: {
2053 T t = 2*y100 - 155;
2054 return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t;
2055 }
2056 case 78: {
2057 T t = 2*y100 - 157;
2058 return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t;
2059 }
2060 case 79: {
2061 T t = 2*y100 - 159;
2062 return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t;
2063 }
2064 case 80: {
2065 T t = 2*y100 - 161;
2066 return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t;
2067 }
2068 case 81: {
2069 T t = 2*y100 - 163;
2070 return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t;
2071 }
2072 case 82: {
2073 T t = 2*y100 - 165;
2074 return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t;
2075 }
2076 case 83: {
2077 T t = 2*y100 - 167;
2078 return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t;
2079 }
2080 case 84: {
2081 T t = 2*y100 - 169;
2082 return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t;
2083 }
2084 case 85: {
2085 T t = 2*y100 - 171;
2086 return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t;
2087 }
2088 case 86: {
2089 T t = 2*y100 - 173;
2090 return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t;
2091 }
2092 case 87: {
2093 T t = 2*y100 - 175;
2094 return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t;
2095 }
2096 case 88: {
2097 T t = 2*y100 - 177;
2098 return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t;
2099 }
2100 case 89: {
2101 T t = 2*y100 - 179;
2102 return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t;
2103 }
2104 case 90: {
2105 T t = 2*y100 - 181;
2106 return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t;
2107 }
2108 case 91: {
2109 T t = 2*y100 - 183;
2110 return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t;
2111 }
2112 case 92: {
2113 T t = 2*y100 - 185;
2114 return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t;
2115 }
2116 case 93: {
2117 T t = 2*y100 - 187;
2118 return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t;
2119 }
2120 case 94: {
2121 T t = 2*y100 - 189;
2122 return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t;
2123 }
2124 case 95: {
2125 T t = 2*y100 - 191;
2126 return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t;
2127 }
2128 case 96: {
2129 T t = 2*y100 - 193;
2130 return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t;
2131 }
2132 case 97: {
2133 T t = 2*y100 - 195;
2134 return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t;
2135 }
2136 case 98: {
2137 T t = 2*y100 - 197;
2138 return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t;
2139 }
2140 case 99: {
2141 T t = 2*y100 - 199;
2142 return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t;
2143 }
2144 }
2145 // we only get here if y = 1, i.e. |x| < 4*eps, in which case
2146 // erfcx is within 1e-15 of 1..
2147 return 1.0;
2148 }
2149
2150 template <typename T>
2151 C10_HOST_DEVICE inline typename std::enable_if<std::is_floating_point<T>::value, T>::type
calc_erfcx(T x)2152 calc_erfcx(T x)
2153 {
2154 if (at::_isnan(x)) {
2155 return x;
2156 }
2157
2158 if (x >= 0) {
2159 if (x > 50) { // continued-fraction expansion is faster
2160 const T ispi = 0.56418958354775628694807945156; // 1 / sqrt(pi)
2161 if (x > 5e7) { // 1-term expansion, important to avoid overflow
2162 return ispi / x;
2163 }
2164 /* 5-term expansion (rely on compiler for CSE), simplified from:
2165 ispi / (x+0.5/(x+1/(x+1.5/(x+2/x)))) */
2166 return ispi*((x*x) * (x*x+4.5) + 2) / (x * ((x*x) * (x*x+5) + 3.75));
2167 }
2168 return erfcx_y100(400/(4+x));
2169 }
2170 else {
2171 if (x < -26.7) {
2172 return std::numeric_limits<T>::infinity();
2173 }
2174 else if (x < -6.1) {
2175 return 2*exp(x*x);
2176 }
2177 else {
2178 return 2*exp(x*x) - erfcx_y100(400/(4-x));
2179 }
2180 }
2181 }
2182
2183 /*
2184 * Logarithm of Gaussian cumulative distribution function.
2185
2186 * This implementation of log_ndtr and its helper functions
2187 * follow SciPy's implementation
2188 * See NOTICE for the licenses.
2189 */
2190 template <typename T>
calc_log_ndtr(T x)2191 inline C10_HOST_DEVICE T calc_log_ndtr(T x) {
2192 T t = x * c10::frac_sqrt_2<T>;
2193 if (x < T{-1.0}) {
2194 return std::log(calc_erfcx(-t) / 2) - t * t;
2195 } else {
2196 return std::log1p(-std::erfc(t) / 2);
2197 }
2198 }
2199
2200 template<typename T>
airy_ai_forward(T x)2201 inline C10_HOST_DEVICE T airy_ai_forward(T x) {
2202 static const T AN[] = {
2203 +3.46538101525629032477e-01,
2204 +1.20075952739645805542e+01,
2205 +7.62796053615234516538e+01,
2206 +1.68089224934630576269e+02,
2207 +1.59756391350164413639e+02,
2208 +7.05360906840444183113e+01,
2209 +1.40264691163389668864e+01,
2210 +9.99999999999999995305e-01,
2211 };
2212
2213 static const T AD[] = {
2214 +5.67594532638770212846e-01,
2215 +1.47562562584847203173e+01,
2216 +8.45138970141474626562e+01,
2217 +1.77318088145400459522e+02,
2218 +1.64234692871529701831e+02,
2219 +7.14778400825575695274e+01,
2220 +1.40959135607834029598e+01,
2221 +1.00000000000000000470e+00,
2222 };
2223
2224 static const T AFN[] = {
2225 -1.31696323418331795333e-01,
2226 -6.26456544431912369773e-01,
2227 -6.93158036036933542233e-01,
2228 -2.79779981545119124951e-01,
2229 -4.91900132609500318020e-02,
2230 -4.06265923594885404393e-03,
2231 -1.59276496239262096340e-04,
2232 -2.77649108155232920844e-06,
2233 -1.67787698489114633780e-08,
2234 };
2235
2236 static const T AFD[] = {
2237 +1.33560420706553243746e+01,
2238 +3.26825032795224613948e+01,
2239 +2.67367040941499554804e+01,
2240 +9.18707402907259625840e+00,
2241 +1.47529146771666414581e+00,
2242 +1.15687173795188044134e-01,
2243 +4.40291641615211203805e-03,
2244 +7.54720348287414296618e-05,
2245 +4.51850092970580378464e-07,
2246 };
2247
2248 static const T AGN[] = {
2249 +1.97339932091685679179e-02,
2250 +3.91103029615688277255e-01,
2251 +1.06579897599595591108e+00,
2252 +9.39169229816650230044e-01,
2253 +3.51465656105547619242e-01,
2254 +6.33888919628925490927e-02,
2255 +5.85804113048388458567e-03,
2256 +2.82851600836737019778e-04,
2257 +6.98793669997260967291e-06,
2258 +8.11789239554389293311e-08,
2259 +3.41551784765923618484e-10,
2260 };
2261
2262 static const T AGD[] = {
2263 +9.30892908077441974853e+00,
2264 +1.98352928718312140417e+01,
2265 +1.55646628932864612953e+01,
2266 +5.47686069422975497931e+00,
2267 +9.54293611618961883998e-01,
2268 +8.64580826352392193095e-02,
2269 +4.12656523824222607191e-03,
2270 +1.01259085116509135510e-04,
2271 +1.17166733214413521882e-06,
2272 +4.91834570062930015649e-09,
2273 };
2274
2275 int domain_flag = 0;
2276
2277 T ai;
2278
2279 if (std::isinf(x)) {
2280 return std::numeric_limits<T>::quiet_NaN();
2281 }
2282
2283 if (x > T(103.892)) {
2284 return T(0.0);
2285 }
2286
2287 T f;
2288 T g;
2289 T k;
2290
2291 if (x < T(-2.09)) {
2292 T z = T(1.0) / (T(-2.0) * x * std::sqrt(-x) / T(3.0));
2293
2294 T afn = 0.0;
2295
2296 for (uint8_t index = 0; index <= 8; index++) {
2297 afn = afn * (z * z) + AFN[index];
2298 }
2299
2300 T afd = 0.0;
2301
2302 for (uint8_t index = 0; index <= 8; index++) {
2303 afd = afd * (z * z) + AFD[index];
2304 }
2305
2306 T agn = 0.0;
2307
2308 for (uint8_t index = 0; index <= 10 + 0; index++) {
2309 agn = agn * (z * z) + AGN[index];
2310 }
2311
2312 T agd = 0.0;
2313
2314 for (uint8_t index = 0; index <= 10 - 1; index++) {
2315 agd = agd * (z * z) + AGD[index];
2316 }
2317
2318 T t = T(-2.0) * x * std::sqrt(-x) / T(3.0) + T(0.25) * c10::pi<T>;
2319
2320 return T(5.64189583547756286948e-01) / std::sqrt(std::sqrt(-x)) * (std::sin(t) * (T(1.0) + z * z * afn / afd) - std::cos(t) * (z * agn / agd));
2321 }
2322
2323 if (x >= T(2.09)) {
2324 domain_flag = 5;
2325
2326 T zeta = T(2.0) * x * std::sqrt(x) / T(3.0);
2327
2328 T an = 0.0;
2329
2330 for (uint8_t index = 0; index <= 7; index++) {
2331 an = an * (T(1.0) / zeta) + AN[index];
2332 }
2333
2334 T ad = 0.0;
2335
2336 for (uint8_t index = 0; index <= 7; index++) {
2337 ad = ad * (T(1.0) / zeta) + AD[index];
2338 }
2339
2340 ai = T(5.64189583547756286948e-01) * (an / ad) / (T(2.0) * std::sqrt(std::sqrt(x)) * std::exp(zeta));
2341
2342 if (x > T(8.3203353)) {
2343 return ai;
2344 }
2345 }
2346
2347 f = 1.0;
2348 g = x;
2349 k = 1.0;
2350
2351 T m = 1.0;
2352 T n = x;
2353 T t = 1.0;
2354 T z = x * x * x;
2355
2356 while (t > std::numeric_limits<T>::epsilon()) {
2357 m *= z;
2358 k += T(1.0);
2359 m /= k;
2360 n *= z;
2361 k += T(1.0);
2362 n /= k;
2363 m /= k;
2364 f += m;
2365 k += T(1.0);
2366 n /= k;
2367 g += n;
2368
2369 t = std::abs(m / f);
2370 }
2371
2372 if ((domain_flag & 1) == 0) {
2373 return T(0.355028053887817239260) * f - T(0.258819403792806798405) * g;
2374 }
2375
2376 return ai;
2377 } // T airy_ai(T x)
2378
2379 template<typename T>
bessel_j0_forward(T x)2380 inline C10_HOST_DEVICE T bessel_j0_forward(T x) {
2381 static const T PP[] = {
2382 +7.96936729297347051624e-04,
2383 +8.28352392107440799803e-02,
2384 +1.23953371646414299388e+00,
2385 +5.44725003058768775090e+00,
2386 +8.74716500199817011941e+00,
2387 +5.30324038235394892183e+00,
2388 +9.99999999999999997821e-01,
2389 };
2390
2391 static const T PQ[] = {
2392 +9.24408810558863637013e-04,
2393 +8.56288474354474431428e-02,
2394 +1.25352743901058953537e+00,
2395 +5.47097740330417105182e+00,
2396 +8.76190883237069594232e+00,
2397 +5.30605288235394617618e+00,
2398 +1.00000000000000000218e+00,
2399 };
2400
2401 static const T QP[] = {
2402 -1.13663838898469149931e-02,
2403 -1.28252718670509318512e+00,
2404 -1.95539544257735972385e+01,
2405 -9.32060152123768231369e+01,
2406 -1.77681167980488050595e+02,
2407 -1.47077505154951170175e+02,
2408 -5.14105326766599330220e+01,
2409 -6.05014350600728481186e+00,
2410 };
2411
2412 static const T QQ[] = {
2413 +6.43178256118178023184e+01,
2414 +8.56430025976980587198e+02,
2415 +3.88240183605401609683e+03,
2416 +7.24046774195652478189e+03,
2417 +5.93072701187316984827e+03,
2418 +2.06209331660327847417e+03,
2419 +2.42005740240291393179e+02,
2420 };
2421
2422 static const T RP[] = {
2423 -4.79443220978201773821e+09,
2424 +1.95617491946556577543e+12,
2425 -2.49248344360967716204e+14,
2426 +9.70862251047306323952e+15,
2427 };
2428
2429 static const T RQ[] = {
2430 +4.99563147152651017219e+02,
2431 +1.73785401676374683123e+05,
2432 +4.84409658339962045305e+07,
2433 +1.11855537045356834862e+10,
2434 +2.11277520115489217587e+12,
2435 +3.10518229857422583814e+14,
2436 +3.18121955943204943306e+16,
2437 +1.71086294081043136091e+18,
2438 };
2439
2440 if (x < T(0)) {
2441 x = -x;
2442 }
2443
2444 if (x <= T(5.0)) {
2445 if (x < T(0.00001)) {
2446 return T(1.0) - x * x / T(4.0);
2447 }
2448
2449 T rp = 0.0;
2450
2451 for (uint8_t index = 0; index <= 3; index++) {
2452 rp = rp * (x * x) + RP[index];
2453 }
2454
2455 T rq = 0.0;
2456
2457 for (uint8_t index = 0; index <= 7; index++) {
2458 rq = rq * (x * x) + RQ[index];
2459 }
2460
2461 return (x * x - T(5.78318596294678452118e+00)) * (x * x - T(3.04712623436620863991e+01)) * rp / rq;
2462 }
2463
2464 T pp = 0.0;
2465
2466 for (uint8_t index = 0; index <= 6; index++) {
2467 pp = pp * (T(25.0) / (x * x)) + PP[index];
2468 }
2469
2470 T pq = 0.0;
2471
2472 for (uint8_t index = 0; index <= 6; index++) {
2473 pq = pq * (T(25.0) / (x * x)) + PQ[index];
2474 }
2475
2476 T qp = 0.0;
2477
2478 for (uint8_t index = 0; index <= 7; index++) {
2479 qp = qp * (T(25.0) / (x * x)) + QP[index];
2480 }
2481
2482 T qq = 0.0;
2483
2484 for (uint8_t index = 0; index <= 6; index++) {
2485 qq = qq * (T(25.0) / (x * x)) + QQ[index];
2486 }
2487
2488 return (pp / pq * std::cos(x - T(0.785398163397448309615660845819875721)) - T(5.0) / x * (qp / qq) * std::sin(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / std::sqrt(x);
2489 } // bessel_j0_forward(T x)
2490
2491 template<typename T>
bessel_j1_forward(T x)2492 inline C10_HOST_DEVICE T bessel_j1_forward(T x) {
2493 static const T PP[] = {
2494 +7.62125616208173112003e-04,
2495 +7.31397056940917570436e-02,
2496 +1.12719608129684925192e+00,
2497 +5.11207951146807644818e+00,
2498 +8.42404590141772420927e+00,
2499 +5.21451598682361504063e+00,
2500 +1.00000000000000000254e+00,
2501 };
2502
2503 static const T PQ[] = {
2504 +5.71323128072548699714e-04,
2505 +6.88455908754495404082e-02,
2506 +1.10514232634061696926e+00,
2507 +5.07386386128601488557e+00,
2508 +8.39985554327604159757e+00,
2509 +5.20982848682361821619e+00,
2510 +9.99999999999999997461e-01,
2511 };
2512
2513 static const T QP[] = {
2514 +5.10862594750176621635e-02,
2515 +4.98213872951233449420e+00,
2516 +7.58238284132545283818e+01,
2517 +3.66779609360150777800e+02,
2518 +7.10856304998926107277e+02,
2519 +5.97489612400613639965e+02,
2520 +2.11688757100572135698e+02,
2521 +2.52070205858023719784e+01,
2522 };
2523
2524 static const T QQ[] = {
2525 +7.42373277035675149943e+01,
2526 +1.05644886038262816351e+03,
2527 +4.98641058337653607651e+03,
2528 +9.56231892404756170795e+03,
2529 +7.99704160447350683650e+03,
2530 +2.82619278517639096600e+03,
2531 +3.36093607810698293419e+02,
2532 };
2533
2534 static const T RP[] = {
2535 -8.99971225705559398224e+08,
2536 +4.52228297998194034323e+11,
2537 -7.27494245221818276015e+13,
2538 +3.68295732863852883286e+15,
2539 };
2540
2541 static const T RQ[] = {
2542 +6.20836478118054335476e+02,
2543 +2.56987256757748830383e+05,
2544 +8.35146791431949253037e+07,
2545 +2.21511595479792499675e+10,
2546 +4.74914122079991414898e+12,
2547 +7.84369607876235854894e+14,
2548 +8.95222336184627338078e+16,
2549 +5.32278620332680085395e+18,
2550 };
2551
2552 if (x < T(0.0)) {
2553 return -bessel_j1_forward(-x);
2554 }
2555
2556 if (x <= T(5.0)) {
2557 T rp = 0.0;
2558
2559 for (uint8_t index = 0; index <= 3; index++) {
2560 rp = rp * (x * x) + RP[index];
2561 }
2562
2563 T rq = 0.0;
2564
2565 for (uint8_t index = 0; index <= 7; index++) {
2566 rq = rq * (x * x) + RQ[index];
2567 }
2568
2569 return rp / rq * x * (x * x - T(1.46819706421238932572e+01)) * (x * x - T(4.92184563216946036703e+01));
2570 }
2571
2572 T pp = 0.0;
2573
2574 for (uint8_t index = 0; index <= 6; index++) {
2575 pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index];
2576 }
2577
2578 T pq = 0.0;
2579
2580 for (uint8_t index = 0; index <= 6; index++) {
2581 pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index];
2582 }
2583
2584 T qp = 0.0;
2585
2586 for (uint8_t index = 0; index <= 7; index++) {
2587 qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index];
2588 }
2589
2590 T qq = 0.0;
2591
2592 for (uint8_t index = 0; index <= 6; index++) {
2593 qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index];
2594 }
2595
2596 return (pp / pq * std::cos(x - T(2.356194490192344928846982537459627163)) - T(5.0) / x * (qp / qq) * std::sin(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / std::sqrt(x);
2597 } // bessel_j1_forward(T x)
2598
2599 template<typename T>
bessel_y0_forward(T x)2600 inline C10_HOST_DEVICE T bessel_y0_forward(T x) {
2601 static const T PP[] = {
2602 +7.96936729297347051624e-04,
2603 +8.28352392107440799803e-02,
2604 +1.23953371646414299388e+00,
2605 +5.44725003058768775090e+00,
2606 +8.74716500199817011941e+00,
2607 +5.30324038235394892183e+00,
2608 +9.99999999999999997821e-01,
2609 };
2610
2611 static const T PQ[] = {
2612 +9.24408810558863637013e-04,
2613 +8.56288474354474431428e-02,
2614 +1.25352743901058953537e+00,
2615 +5.47097740330417105182e+00,
2616 +8.76190883237069594232e+00,
2617 +5.30605288235394617618e+00,
2618 +1.00000000000000000218e+00,
2619 };
2620
2621 static const T QP[] = {
2622 -1.13663838898469149931e-02,
2623 -1.28252718670509318512e+00,
2624 -1.95539544257735972385e+01,
2625 -9.32060152123768231369e+01,
2626 -1.77681167980488050595e+02,
2627 -1.47077505154951170175e+02,
2628 -5.14105326766599330220e+01,
2629 -6.05014350600728481186e+00,
2630 };
2631
2632 static const T QQ[] = {
2633 +6.43178256118178023184e+01,
2634 +8.56430025976980587198e+02,
2635 +3.88240183605401609683e+03,
2636 +7.24046774195652478189e+03,
2637 +5.93072701187316984827e+03,
2638 +2.06209331660327847417e+03,
2639 +2.42005740240291393179e+02,
2640 };
2641
2642 static const T YP[] = {
2643 +1.55924367855235737965e+04,
2644 -1.46639295903971606143e+07,
2645 +5.43526477051876500413e+09,
2646 -9.82136065717911466409e+11,
2647 +8.75906394395366999549e+13,
2648 -3.46628303384729719441e+15,
2649 +4.42733268572569800351e+16,
2650 -1.84950800436986690637e+16,
2651 };
2652
2653 static const T YQ[] = {
2654 +1.04128353664259848412e+03,
2655 +6.26107330137134956842e+05,
2656 +2.68919633393814121987e+08,
2657 +8.64002487103935000337e+10,
2658 +2.02979612750105546709e+13,
2659 +3.17157752842975028269e+15,
2660 +2.50596256172653059228e+17,
2661 };
2662
2663 if (x <= T(5.0)) {
2664 if (x == T(0.0)) {
2665 return -std::numeric_limits<T>::infinity();
2666 }
2667
2668 if (x < T(0.0)) {
2669 return std::numeric_limits<T>::quiet_NaN();
2670 }
2671
2672 T yp = 0.0;
2673
2674 for (uint8_t index = 0; index <= 7; index++) {
2675 yp = yp * (x * x) + YP[index];
2676 }
2677
2678 T yq = 0.0;
2679
2680 for (uint8_t index = 0; index <= 6; index++) {
2681 yq = yq * (x * x) + YQ[index];
2682 }
2683
2684 return yp / yq + (T(0.636619772367581343075535053490057448) * std::log(x) * bessel_j0_forward(x));
2685 }
2686
2687 T pp = 0.0;
2688
2689 for (uint8_t index = 0; index <= 6; index++) {
2690 pp = pp * (T(25.0) / (x * x)) + PP[index];
2691 }
2692
2693 T pq = 0.0;
2694
2695 for (uint8_t index = 0; index <= 6; index++) {
2696 pq = pq * (T(25.0) / (x * x)) + PQ[index];
2697 }
2698
2699 T qp = 0.0;
2700
2701 for (uint8_t index = 0; index <= 7; index++) {
2702 qp = qp * (T(25.0) / (x * x)) + QP[index];
2703 }
2704
2705 T qq = 0.0;
2706
2707 for (uint8_t index = 0; index <= 6; index++) {
2708 qq = qq * (T(25.0) / (x * x)) + QQ[index];
2709 }
2710
2711 return (pp / pq * std::sin(x - T(0.785398163397448309615660845819875721)) + T(5.0) / x * (qp / qq) * std::cos(x - T(0.785398163397448309615660845819875721))) * T(0.797884560802865355879892119868763737) / std::sqrt(x);
2712 } // bessel_y0_forward(T x)
2713
2714 template<typename T>
bessel_y1_forward(T x)2715 inline C10_HOST_DEVICE T bessel_y1_forward(T x) {
2716 static const T PP[] = {
2717 +7.62125616208173112003e-04,
2718 +7.31397056940917570436e-02,
2719 +1.12719608129684925192e+00,
2720 +5.11207951146807644818e+00,
2721 +8.42404590141772420927e+00,
2722 +5.21451598682361504063e+00,
2723 +1.00000000000000000254e+00,
2724 };
2725
2726 static const T PQ[] = {
2727 +5.71323128072548699714e-04,
2728 +6.88455908754495404082e-02,
2729 +1.10514232634061696926e+00,
2730 +5.07386386128601488557e+00,
2731 +8.39985554327604159757e+00,
2732 +5.20982848682361821619e+00,
2733 +9.99999999999999997461e-01,
2734 };
2735
2736 static const T QP[] = {
2737 +5.10862594750176621635e-02,
2738 +4.98213872951233449420e+00,
2739 +7.58238284132545283818e+01,
2740 +3.66779609360150777800e+02,
2741 +7.10856304998926107277e+02,
2742 +5.97489612400613639965e+02,
2743 +2.11688757100572135698e+02,
2744 +2.52070205858023719784e+01,
2745 };
2746
2747 static const T QQ[] = {
2748 +7.42373277035675149943e+01,
2749 +1.05644886038262816351e+03,
2750 +4.98641058337653607651e+03,
2751 +9.56231892404756170795e+03,
2752 +7.99704160447350683650e+03,
2753 +2.82619278517639096600e+03,
2754 +3.36093607810698293419e+02,
2755 };
2756
2757 static const T YP[] = {
2758 +1.26320474790178026440e+09,
2759 -6.47355876379160291031e+11,
2760 +1.14509511541823727583e+14,
2761 -8.12770255501325109621e+15,
2762 +2.02439475713594898196e+17,
2763 -7.78877196265950026825e+17,
2764 };
2765
2766 static const T YQ[] = {
2767 +5.94301592346128195359e+02,
2768 +2.35564092943068577943e+05,
2769 +7.34811944459721705660e+07,
2770 +1.87601316108706159478e+10,
2771 +3.88231277496238566008e+12,
2772 +6.20557727146953693363e+14,
2773 +6.87141087355300489866e+16,
2774 +3.97270608116560655612e+18,
2775 };
2776
2777 if (x <= T(5.0)) {
2778 if (x == T(0.0)) {
2779 return -std::numeric_limits<T>::infinity();
2780 }
2781
2782 if (x <= T(0.0)) {
2783 return std::numeric_limits<T>::quiet_NaN();
2784 }
2785
2786 T yp = 0.0;
2787
2788 for (uint8_t index = 0; index <= 5; index++) {
2789 yp = yp * (x * x) + YP[index];
2790 }
2791
2792 T yq = 0.0;
2793
2794 for (uint8_t index = 0; index <= 7; index++) {
2795 yq = yq * (x * x) + YQ[index];
2796 }
2797
2798 return x * (yp / yq) + (T(0.636619772367581343075535053490057448) * (bessel_j1_forward(x) * std::log(x) - T(1.0) / x));
2799 }
2800
2801 T pp = 0.0;
2802
2803 for (uint8_t index = 0; index <= 6; index++) {
2804 pp = pp * (T(5.0) / x * (T(5.0) / x)) + PP[index];
2805 }
2806
2807 T pq = 0.0;
2808
2809 for (uint8_t index = 0; index <= 6; index++) {
2810 pq = pq * (T(5.0) / x * (T(5.0) / x)) + PQ[index];
2811 }
2812
2813 T qp = 0.0;
2814
2815 for (uint8_t index = 0; index <= 7; index++) {
2816 qp = qp * (T(5.0) / x * (T(5.0) / x)) + QP[index];
2817 }
2818
2819 T qq = 0.0;
2820
2821 for (uint8_t index = 0; index <= 6; index++) {
2822 qq = qq * (T(5.0) / x * (T(5.0) / x)) + QQ[index];
2823 }
2824
2825 return (pp / pq * std::sin(x - T(2.356194490192344928846982537459627163)) + T(5.0) / x * (qp / qq) * std::cos(x - T(2.356194490192344928846982537459627163))) * T(0.797884560802865355879892119868763737) / std::sqrt(x);
2826 } // bessel_y1_forward(T x)
2827
2828 template<typename T>
chebyshev_polynomial_t_forward(T x,int64_t n)2829 inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, int64_t n) {
2830 if (n < 0) {
2831 return T(0.0);
2832 }
2833
2834 if (std::abs(x) == T(1.0)) {
2835 if (x > T(0.0) || n % 2 == 0) {
2836 return T(1.0);
2837 }
2838
2839 return T(-1.0);
2840 }
2841
2842 if ((n > 6) && (std::abs(x) < T(1.0))) {
2843 return std::cos(n * std::acos(x));
2844 }
2845
2846 if (n == 0) {
2847 return T(1.0);
2848 }
2849
2850 if (n == 1) {
2851 return x;
2852 }
2853
2854 T p = T(1.0);
2855 T q = x;
2856 T r;
2857
2858 for (int64_t k = 2; k <= n; k++) {
2859 r = (x + x) * q - p;
2860 p = q;
2861 q = r;
2862 }
2863
2864 return r;
2865 } // chebyshev_polynomial_t_forward(T x, int64_t n)
2866
2867 template<typename T, bool is_cuda=false>
chebyshev_polynomial_t_forward(T x,T n)2868 inline C10_HOST_DEVICE T chebyshev_polynomial_t_forward(T x, T n) {
2869 return chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
2870 } // chebyshev_polynomial_t_forward(T x, T n)
2871
2872 template<typename T>
chebyshev_polynomial_u_forward(T x,int64_t n)2873 inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, int64_t n) {
2874 if (n < 0) {
2875 return T(0.0);
2876 }
2877
2878 if (std::abs(x) == T(1.0)) {
2879 if (x > T(0.0) || n % 2 == 0) {
2880 return n + 1;
2881 }
2882
2883 return -(n + 1);
2884 }
2885
2886 if ((n > 8) && (std::abs(x) < T(1.0))) {
2887 if (std::sin(std::acos(x)) != T(0.0)) {
2888 return std::sin((n + 1) * std::acos(x)) / std::sin(std::acos(x));
2889 }
2890
2891 return (n + 1) * std::cos((n + 1) * std::acos(x)) / x;
2892 }
2893
2894 if (n == 0) {
2895 return T(1.0);
2896 }
2897
2898 if (n == 1) {
2899 return x + x;
2900 }
2901
2902 T p = T(1.0);
2903 T q = x + x;
2904 T r;
2905
2906 for (int64_t k = 2; k <= n; k++) {
2907 r = (x + x) * q - p;
2908 p = q;
2909 q = r;
2910 }
2911
2912 return r;
2913 } // chebyshev_polynomial_u_forward(T x, int64_t n)
2914
2915 template<typename T, bool is_cuda=false>
chebyshev_polynomial_u_forward(T x,T n)2916 inline C10_HOST_DEVICE T chebyshev_polynomial_u_forward(T x, T n) {
2917 return chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
2918 } // chebyshev_polynomial_u_forward(T x, T n)
2919
2920 template<typename T>
chebyshev_polynomial_v_forward(T x,int64_t n)2921 inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, int64_t n) {
2922 if (n < 0) {
2923 return T(0.0);
2924 }
2925
2926 if (std::abs(x) == T(1.0)) {
2927 if (x > T(0.0)) {
2928 return T(1.0);
2929 }
2930
2931 if (n % 2 == 0) {
2932 return n + n + 1;
2933 }
2934
2935 return -(n + n + 1);
2936 }
2937
2938 if ((n > 8) && (std::abs(x) < T(1.0))) {
2939 if (std::sin(std::acos(x) / T(2.0)) != T(1.0)) {
2940 return std::cos((n + T(0.5)) * std::acos(x)) / std::cos(std::acos(x) / T(2.0));
2941 }
2942
2943 if (n % 2 == 0) {
2944 return n + n + 1;
2945 }
2946
2947 return -(n + n + 1);
2948 }
2949
2950 if (n == 0) {
2951 return T(1.0);
2952 }
2953
2954 if (n == 1) {
2955 return x + x - T(1.0);
2956 }
2957
2958 T p = T(1.0);
2959 T q = x + x - T(1.0);
2960 T r;
2961
2962 for (int64_t k = 2; k <= n; k++) {
2963 r = (x + x) * q - p;
2964 p = q;
2965 q = r;
2966 }
2967
2968 return r;
2969 } // chebyshev_polynomial_v_forward(T x, int64_t n)
2970
2971 template<typename T, bool is_cuda=false>
chebyshev_polynomial_v_forward(T x,T n)2972 inline C10_HOST_DEVICE T chebyshev_polynomial_v_forward(T x, T n) {
2973 return chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
2974 } // chebyshev_polynomial_v_forward(T x, T n)
2975
2976 template<typename T>
chebyshev_polynomial_w_forward(T x,int64_t n)2977 inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, int64_t n) {
2978 if (n < 0) {
2979 return T(0.0);
2980 }
2981
2982 if (std::abs(x) == T(1.0)) {
2983 if (x > T(0.0)) {
2984 return n + n + 1;
2985 }
2986
2987 if (n % 2 == 0) {
2988 return T(1.0);
2989 }
2990
2991 return T(-1.0);
2992 }
2993
2994 if ((n > 8) && (std::abs(x) < T(1.0))) {
2995 if (std::cos(std::acos(x) / T(2.0)) != T(1.0)) {
2996 return std::sin((n + T(0.5)) * std::acos(x)) / std::sin(std::acos(x) / T(2.0));
2997 }
2998
2999 if (x > T(0.0)) {
3000 return n + n + 1;
3001 }
3002
3003 if (n % 2 == 0) {
3004 return T(1.0);
3005 }
3006
3007 return T(-1.0);
3008 }
3009
3010 if (n == 0) {
3011 return T(1.0);
3012 }
3013
3014 if (n == 1) {
3015 return x + x + T(1.0);
3016 }
3017
3018 T p = T(1.0);
3019 T q = x + x + T(1.0);
3020 T r;
3021
3022 for (int64_t k = 2; k <= n; k++) {
3023 r = (x + x) * q - p;
3024 p = q;
3025 q = r;
3026 }
3027
3028 return r;
3029 } // chebyshev_polynomial_w_forward(T x, int64_t n)
3030
3031 template<typename T, bool is_cuda=false>
chebyshev_polynomial_w_forward(T x,T n)3032 inline C10_HOST_DEVICE T chebyshev_polynomial_w_forward(T x, T n) {
3033 return chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
3034 } // chebyshev_polynomial_w_forward(T x, T n)
3035
3036 template<typename T>
hermite_polynomial_h_forward(T x,int64_t n)3037 inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, int64_t n) {
3038 if (n < 0) {
3039 return T(0.0);
3040 }
3041
3042 if (n == 0) {
3043 return T(1.0);
3044 }
3045
3046 if (n == 1) {
3047 return x + x;
3048 }
3049
3050 T p = T(1.0);
3051 T q = x + x;
3052 T r = T(0.0);
3053
3054 for (int64_t k = 2; k < n + n; k += 2) {
3055 r = (x + x) * q - k * p;
3056 p = q;
3057 q = r;
3058 }
3059
3060 return r;
3061 } // hermite_polynomial_h_forward(T x, int64_t n)
3062
3063 template<typename T, bool is_cuda=false, std::enable_if_t<!std::is_floating_point<T>::value, int> = 0>
hermite_polynomial_h_forward(T x,T n)3064 inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
3065 return hermite_polynomial_h_forward(x, static_cast<int64_t>(n));
3066 } // hermite_polynomial_h_forward(T x, T n)
3067
3068 template<typename T, bool is_cuda=false, std::enable_if_t<std::is_floating_point<T>::value, int> = 0>
hermite_polynomial_h_forward(T x,T n)3069 inline C10_HOST_DEVICE T hermite_polynomial_h_forward(T x, T n) {
3070 return hermite_polynomial_h_forward(x, ((!std::isinf(n)) && (!std::isnan(n))) ? static_cast<int64_t>(n) : static_cast<int64_t>(-1));
3071 } // hermite_polynomial_h_forward(T x, T n)
3072
3073 template<typename T>
hermite_polynomial_he_forward(T x,int64_t n)3074 inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, int64_t n) {
3075 if (n < 0) {
3076 return T(0.0);
3077 }
3078
3079 if (n == 0) {
3080 return T(1.0);
3081 }
3082
3083 if (n == 1) {
3084 return x;
3085 }
3086
3087 T p = T(1.0);
3088 T q = x;
3089 T r;
3090
3091 for (int64_t k = 1; k < n; k++) {
3092 r = x * q - k * p;
3093 p = q;
3094 q = r;
3095 }
3096
3097 return r;
3098 } // hermite_polynomial_he_forward(T x, int64_t n)
3099
3100 template<typename T, bool is_cuda=false>
hermite_polynomial_he_forward(T x,T n)3101 inline C10_HOST_DEVICE T hermite_polynomial_he_forward(T x, T n) {
3102 return hermite_polynomial_he_forward(x, static_cast<int64_t>(n));
3103 } // hermite_polynomial_he_forward(T x, T n)
3104
3105 template<typename T>
laguerre_polynomial_l_forward(T x,int64_t n)3106 inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, int64_t n) {
3107 if (n < 0) {
3108 return T(0.0);
3109 }
3110
3111 if (std::abs(x) == T(0.0)) {
3112 return T(1.0);
3113 }
3114
3115 if (n == 0) {
3116 return T(1.0);
3117 }
3118
3119 if (n == 1) {
3120 return T(1.0) - x;
3121 }
3122
3123 T p = T(1.0);
3124 T q = T(1.0) - x;
3125 T r;
3126
3127 for (int64_t k = 1; k < n; k++) {
3128 r = (((k + k) + (T(1.0) - x)) * q - k * p) / (k + 1);
3129 p = q;
3130 q = r;
3131 }
3132
3133 return r;
3134 } // laguerre_polynomial_l_forward(T x, int64_t n)
3135
3136 template<typename T, bool is_cuda=false>
laguerre_polynomial_l_forward(T x,T n)3137 inline C10_HOST_DEVICE T laguerre_polynomial_l_forward(T x, T n) {
3138 return laguerre_polynomial_l_forward(x, static_cast<int64_t>(n));
3139 } // laguerre_polynomial_l_forward(T x, T n)
3140
3141 template<typename T>
legendre_polynomial_p_forward(T x,int64_t n)3142 inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, int64_t n) {
3143 if (n < 0) {
3144 return T(0.0);
3145 }
3146
3147 if (std::abs(x) == T(1.0)) {
3148 if (x > T(0.0) || n % 2 == 0) {
3149 return T(1.0);
3150 }
3151
3152 return T(-1.0);
3153 }
3154
3155 if (n == 0) {
3156 return T(1.0);
3157 }
3158
3159 if (n == 1) {
3160 return x;
3161 }
3162
3163 T p = T(1.0);
3164 T q = x;
3165 T r;
3166
3167 for (int64_t k = 1; k < n; k++) {
3168 r = ((k + k + 1) * x * q - k * p) / (k + 1);
3169 p = q;
3170 q = r;
3171 }
3172
3173 return r;
3174 } // legendre_polynomial_p_forward(T x, int64_t n)
3175
3176 template<typename T, bool is_cuda=false>
legendre_polynomial_p_forward(T x,T n)3177 inline C10_HOST_DEVICE T legendre_polynomial_p_forward(T x, T n) {
3178 return legendre_polynomial_p_forward(x, static_cast<int64_t>(n));
3179 } // legendre_polynomial_p_forward(T x, T n)
3180
3181 template<typename T>
modified_bessel_i0_forward(T x)3182 inline C10_HOST_DEVICE T modified_bessel_i0_forward(T x) {
3183 static const T A[] = {
3184 -4.41534164647933937950e-18,
3185 +3.33079451882223809783e-17,
3186 -2.43127984654795469359e-16,
3187 +1.71539128555513303061e-15,
3188 -1.16853328779934516808e-14,
3189 +7.67618549860493561688e-14,
3190 -4.85644678311192946090e-13,
3191 +2.95505266312963983461e-12,
3192 -1.72682629144155570723e-11,
3193 +9.67580903537323691224e-11,
3194 -5.18979560163526290666e-10,
3195 +2.65982372468238665035e-09,
3196 -1.30002500998624804212e-08,
3197 +6.04699502254191894932e-08,
3198 -2.67079385394061173391e-07,
3199 +1.11738753912010371815e-06,
3200 -4.41673835845875056359e-06,
3201 +1.64484480707288970893e-05,
3202 -5.75419501008210370398e-05,
3203 +1.88502885095841655729e-04,
3204 -5.76375574538582365885e-04,
3205 +1.63947561694133579842e-03,
3206 -4.32430999505057594430e-03,
3207 +1.05464603945949983183e-02,
3208 -2.37374148058994688156e-02,
3209 +4.93052842396707084878e-02,
3210 -9.49010970480476444210e-02,
3211 +1.71620901522208775349e-01,
3212 -3.04682672343198398683e-01,
3213 +6.76795274409476084995e-01,
3214 };
3215
3216 static const T B[] = {
3217 -7.23318048787475395456e-18,
3218 -4.83050448594418207126e-18,
3219 +4.46562142029675999901e-17,
3220 +3.46122286769746109310e-17,
3221 -2.82762398051658348494e-16,
3222 -3.42548561967721913462e-16,
3223 +1.77256013305652638360e-15,
3224 +3.81168066935262242075e-15,
3225 -9.55484669882830764870e-15,
3226 -4.15056934728722208663e-14,
3227 +1.54008621752140982691e-14,
3228 +3.85277838274214270114e-13,
3229 +7.18012445138366623367e-13,
3230 -1.79417853150680611778e-12,
3231 -1.32158118404477131188e-11,
3232 -3.14991652796324136454e-11,
3233 +1.18891471078464383424e-11,
3234 +4.94060238822496958910e-10,
3235 +3.39623202570838634515e-09,
3236 +2.26666899049817806459e-08,
3237 +2.04891858946906374183e-07,
3238 +2.89137052083475648297e-06,
3239 +6.88975834691682398426e-05,
3240 +3.36911647825569408990e-03,
3241 +8.04490411014108831608e-01,
3242 };
3243
3244 T p;
3245 T q = 0.0;
3246
3247 if (std::abs(x) <= T(8.0)) {
3248 T a = A[0];
3249
3250 for (uint8_t index = 1; index < 30; index++) {
3251 p = q;
3252 q = a;
3253 a = ((std::abs(x) / T(2.0)) - T(2.0)) * q - p + A[index];
3254 }
3255
3256 return std::exp(std::abs(x)) * (T(0.5) * (a - p));
3257 }
3258
3259 T b = B[0];
3260
3261 for (uint8_t index = 1; index < 25; index++) {
3262 p = q;
3263 q = b;
3264 b = (T(32.0) / std::abs(x) - T(2.0)) * q - p + B[index];
3265 }
3266
3267 return std::exp(std::abs(x)) * (T(0.5) * (b - p)) / std::sqrt(std::abs(x));
3268 } // modified_bessel_i0_forward(T x)
3269
3270 template<typename T>
modified_bessel_i1_forward(T x)3271 inline C10_HOST_DEVICE T modified_bessel_i1_forward(T x) {
3272 static const T A[] = {
3273 +2.77791411276104639959e-18,
3274 -2.11142121435816608115e-17,
3275 +1.55363195773620046921e-16,
3276 -1.10559694773538630805e-15,
3277 +7.60068429473540693410e-15,
3278 -5.04218550472791168711e-14,
3279 +3.22379336594557470981e-13,
3280 -1.98397439776494371520e-12,
3281 +1.17361862988909016308e-11,
3282 -6.66348972350202774223e-11,
3283 +3.62559028155211703701e-10,
3284 -1.88724975172282928790e-09,
3285 +9.38153738649577178388e-09,
3286 -4.44505912879632808065e-08,
3287 +2.00329475355213526229e-07,
3288 -8.56872026469545474066e-07,
3289 +3.47025130813767847674e-06,
3290 -1.32731636560394358279e-05,
3291 +4.78156510755005422638e-05,
3292 -1.61760815825896745588e-04,
3293 +5.12285956168575772895e-04,
3294 -1.51357245063125314899e-03,
3295 +4.15642294431288815669e-03,
3296 -1.05640848946261981558e-02,
3297 +2.47264490306265168283e-02,
3298 -5.29459812080949914269e-02,
3299 +1.02643658689847095384e-01,
3300 -1.76416518357834055153e-01,
3301 +2.52587186443633654823e-01,
3302 };
3303
3304 static const T B[] = {
3305 +7.51729631084210481353e-18,
3306 +4.41434832307170791151e-18,
3307 -4.65030536848935832153e-17,
3308 -3.20952592199342395980e-17,
3309 +2.96262899764595013876e-16,
3310 +3.30820231092092828324e-16,
3311 -1.88035477551078244854e-15,
3312 -3.81440307243700780478e-15,
3313 +1.04202769841288027642e-14,
3314 +4.27244001671195135429e-14,
3315 -2.10154184277266431302e-14,
3316 -4.08355111109219731823e-13,
3317 -7.19855177624590851209e-13,
3318 +2.03562854414708950722e-12,
3319 +1.41258074366137813316e-11,
3320 +3.25260358301548823856e-11,
3321 -1.89749581235054123450e-11,
3322 -5.58974346219658380687e-10,
3323 -3.83538038596423702205e-09,
3324 -2.63146884688951950684e-08,
3325 -2.51223623787020892529e-07,
3326 -3.88256480887769039346e-06,
3327 -1.10588938762623716291e-04,
3328 -9.76109749136146840777e-03,
3329 +7.78576235018280120474e-01,
3330 };
3331
3332 T p;
3333 T q = 0.0;
3334
3335 if (std::abs(x) <= T(8.0)) {
3336 T a = A[0];
3337
3338 for (uint8_t index = 1; index < 29; index++) {
3339 p = q;
3340 q = a;
3341 a = ((std::abs(x) / T(2.0)) - T(2.0)) * q - p + A[index];
3342 }
3343
3344 if (x < T(0.0)) {
3345 return -(T(0.5) * (a - p) * std::abs(x) * std::exp(std::abs(x)));
3346 }
3347
3348 return T(0.5) * (a - p) * std::abs(x) * std::exp(std::abs(x));
3349 }
3350
3351 T b = B[0];
3352
3353 for (uint8_t index = 1; index < 25; index++) {
3354 p = q;
3355 q = b;
3356 b = (T(32.0) / std::abs(x) - T(2.0)) * q - p + B[index];
3357 }
3358
3359 if (x < T(0.0)) {
3360 return -(std::exp(std::abs(x)) * (T(0.5) * (b - p)) / std::sqrt(std::abs(x)));
3361 }
3362
3363 return std::exp(std::abs(x)) * (T(0.5) * (b - p)) / std::sqrt(std::abs(x));
3364 } // modified_bessel_i1_forward(T x)
3365
3366 template<typename T>
modified_bessel_k0_forward(T x)3367 inline C10_HOST_DEVICE T modified_bessel_k0_forward(T x) {
3368 static const T A[] = {
3369 +1.37446543561352307156e-16,
3370 +4.25981614279661018399e-14,
3371 +1.03496952576338420167e-11,
3372 +1.90451637722020886025e-09,
3373 +2.53479107902614945675e-07,
3374 +2.28621210311945178607e-05,
3375 +1.26461541144692592338e-03,
3376 +3.59799365153615016266e-02,
3377 +3.44289899924628486886e-01,
3378 -5.35327393233902768720e-01,
3379 };
3380
3381 static const T B[] = {
3382 +5.30043377268626276149e-18,
3383 -1.64758043015242134646e-17,
3384 +5.21039150503902756861e-17,
3385 -1.67823109680541210385e-16,
3386 +5.51205597852431940784e-16,
3387 -1.84859337734377901440e-15,
3388 +6.34007647740507060557e-15,
3389 -2.22751332699166985548e-14,
3390 +8.03289077536357521100e-14,
3391 -2.98009692317273043925e-13,
3392 +1.14034058820847496303e-12,
3393 -4.51459788337394416547e-12,
3394 +1.85594911495471785253e-11,
3395 -7.95748924447710747776e-11,
3396 +3.57739728140030116597e-10,
3397 -1.69753450938905987466e-09,
3398 +8.57403401741422608519e-09,
3399 -4.66048989768794782956e-08,
3400 +2.76681363944501510342e-07,
3401 -1.83175552271911948767e-06,
3402 +1.39498137188764993662e-05,
3403 -1.28495495816278026384e-04,
3404 +1.56988388573005337491e-03,
3405 -3.14481013119645005427e-02,
3406 +2.44030308206595545468e+00,
3407 };
3408
3409 if (x == T(0.0)) {
3410 return std::numeric_limits<T>::infinity();
3411 }
3412
3413 if (x < T(0.0)) {
3414 return std::numeric_limits<T>::quiet_NaN();
3415 }
3416
3417 T p;
3418 T q = 0.0;
3419
3420 if (x <= T(2.0)) {
3421 T a = A[0];
3422
3423 for (uint8_t index = 1; index < 10; index++) {
3424 p = q;
3425 q = a;
3426 a = (x * x - T(2.0)) * q - p + A[index];
3427 }
3428
3429 return T(0.5) * (a - p) - std::log(0.5 * x) * modified_bessel_i0_forward(x);
3430 }
3431
3432 T b = B[0];
3433
3434 for (uint8_t index = 1; index < 25; index++) {
3435 p = q;
3436 q = b;
3437 b = (T(8.0) / x - T(2.0)) * q - p + B[index];
3438 }
3439
3440 return std::exp(-x) * (T(0.5) * (b - p)) / std::sqrt(x);
3441 } // modified_bessel_k0_forward(T x)
3442
3443 template<typename T>
modified_bessel_k1_forward(T x)3444 inline C10_HOST_DEVICE T modified_bessel_k1_forward(T x) {
3445 static const T A[] = {
3446 -7.02386347938628759343e-18,
3447 -2.42744985051936593393e-15,
3448 -6.66690169419932900609e-13,
3449 -1.41148839263352776110e-10,
3450 -2.21338763073472585583e-08,
3451 -2.43340614156596823496e-06,
3452 -1.73028895751305206302e-04,
3453 -6.97572385963986435018e-03,
3454 -1.22611180822657148235e-01,
3455 -3.53155960776544875667e-01,
3456 +1.52530022733894777053e+00,
3457 };
3458
3459 static const T B[] = {
3460 -5.75674448366501715755e-18,
3461 +1.79405087314755922667e-17,
3462 -5.68946255844285935196e-17,
3463 +1.83809354436663880070e-16,
3464 -6.05704724837331885336e-16,
3465 +2.03870316562433424052e-15,
3466 -7.01983709041831346144e-15,
3467 +2.47715442448130437068e-14,
3468 -8.97670518232499435011e-14,
3469 +3.34841966607842919884e-13,
3470 -1.28917396095102890680e-12,
3471 +5.13963967348173025100e-12,
3472 -2.12996783842756842877e-11,
3473 +9.21831518760500529508e-11,
3474 -4.19035475934189648750e-10,
3475 +2.01504975519703286596e-09,
3476 -1.03457624656780970260e-08,
3477 +5.74108412545004946722e-08,
3478 -3.50196060308781257119e-07,
3479 +2.40648494783721712015e-06,
3480 -1.93619797416608296024e-05,
3481 +1.95215518471351631108e-04,
3482 -2.85781685962277938680e-03,
3483 +1.03923736576817238437e-01,
3484 +2.72062619048444266945e+00,
3485 };
3486
3487 if (x == T(0.0)) {
3488 return std::numeric_limits<T>::infinity();
3489 }
3490
3491 if (x < T(0.0)) {
3492 return std::numeric_limits<T>::quiet_NaN();
3493 }
3494
3495 T p;
3496 T q = 0.0;
3497
3498 if (x <= T(2.0)) {
3499 T a = A[0];
3500
3501 for (uint8_t index = 1; index < 11; index++) {
3502 p = q;
3503 q = a;
3504 a = (x * x - T(2.0)) * q - p + A[index];
3505 }
3506
3507 return std::log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x;
3508 }
3509
3510 T b = B[0];
3511
3512 for (uint8_t index = 1; index < 25; index++) {
3513 p = q;
3514 q = b;
3515 b = (T(8.0) / x - T(2.0)) * q - p + B[index];
3516 }
3517
3518 return std::exp(-x) * (T(0.5) * (b - p)) / std::sqrt(x);
3519 } // modified_bessel_k1_forward(T x)
3520
3521 template<typename T>
scaled_modified_bessel_k0_forward(T x)3522 inline C10_HOST_DEVICE T scaled_modified_bessel_k0_forward(T x) {
3523 static const T A[] = {
3524 +1.37446543561352307156e-16,
3525 +4.25981614279661018399e-14,
3526 +1.03496952576338420167e-11,
3527 +1.90451637722020886025e-09,
3528 +2.53479107902614945675e-07,
3529 +2.28621210311945178607e-05,
3530 +1.26461541144692592338e-03,
3531 +3.59799365153615016266e-02,
3532 +3.44289899924628486886e-01,
3533 -5.35327393233902768720e-01,
3534 };
3535
3536 static const T B[] = {
3537 +5.30043377268626276149e-18,
3538 -1.64758043015242134646e-17,
3539 +5.21039150503902756861e-17,
3540 -1.67823109680541210385e-16,
3541 +5.51205597852431940784e-16,
3542 -1.84859337734377901440e-15,
3543 +6.34007647740507060557e-15,
3544 -2.22751332699166985548e-14,
3545 +8.03289077536357521100e-14,
3546 -2.98009692317273043925e-13,
3547 +1.14034058820847496303e-12,
3548 -4.51459788337394416547e-12,
3549 +1.85594911495471785253e-11,
3550 -7.95748924447710747776e-11,
3551 +3.57739728140030116597e-10,
3552 -1.69753450938905987466e-09,
3553 +8.57403401741422608519e-09,
3554 -4.66048989768794782956e-08,
3555 +2.76681363944501510342e-07,
3556 -1.83175552271911948767e-06,
3557 +1.39498137188764993662e-05,
3558 -1.28495495816278026384e-04,
3559 +1.56988388573005337491e-03,
3560 -3.14481013119645005427e-02,
3561 +2.44030308206595545468e+00,
3562 };
3563
3564 if (x == T(0.0)) {
3565 return std::numeric_limits<T>::infinity();
3566 }
3567
3568 if (x < T(0.0)) {
3569 return std::numeric_limits<T>::quiet_NaN();
3570 }
3571
3572 T p;
3573 T q = 0.0;
3574
3575 if (x <= T(2.0)) {
3576 T a = A[0];
3577
3578 for (uint64_t index = 1; index < 10; index++) {
3579 p = q;
3580 q = a;
3581 a = (x * x - T(2.0)) * q - p + A[index];
3582 }
3583
3584 return (T(0.5) * (a - p) - std::log(T(0.5) * x) * modified_bessel_i0_forward(x)) * std::exp(x);
3585 }
3586
3587 T b = B[0];
3588
3589 for (uint64_t index = 1; index < 25; index++) {
3590 p = q;
3591 q = b;
3592 b = (T(8.0) / x - T(2.0)) * q - p + B[index];
3593 }
3594
3595 return T(0.5) * (b - p) / std::sqrt(x);
3596 } // T scaled_modified_bessel_k0_forward(T x)
3597
3598 template<typename T>
scaled_modified_bessel_k1_forward(T x)3599 inline C10_HOST_DEVICE T scaled_modified_bessel_k1_forward(T x) {
3600 static const T A[] = {
3601 -7.02386347938628759343e-18,
3602 -2.42744985051936593393e-15,
3603 -6.66690169419932900609e-13,
3604 -1.41148839263352776110e-10,
3605 -2.21338763073472585583e-08,
3606 -2.43340614156596823496e-06,
3607 -1.73028895751305206302e-04,
3608 -6.97572385963986435018e-03,
3609 -1.22611180822657148235e-01,
3610 -3.53155960776544875667e-01,
3611 +1.52530022733894777053e+00,
3612 };
3613
3614 static const T B[] = {
3615 -5.75674448366501715755e-18,
3616 +1.79405087314755922667e-17,
3617 -5.68946255844285935196e-17,
3618 +1.83809354436663880070e-16,
3619 -6.05704724837331885336e-16,
3620 +2.03870316562433424052e-15,
3621 -7.01983709041831346144e-15,
3622 +2.47715442448130437068e-14,
3623 -8.97670518232499435011e-14,
3624 +3.34841966607842919884e-13,
3625 -1.28917396095102890680e-12,
3626 +5.13963967348173025100e-12,
3627 -2.12996783842756842877e-11,
3628 +9.21831518760500529508e-11,
3629 -4.19035475934189648750e-10,
3630 +2.01504975519703286596e-09,
3631 -1.03457624656780970260e-08,
3632 +5.74108412545004946722e-08,
3633 -3.50196060308781257119e-07,
3634 +2.40648494783721712015e-06,
3635 -1.93619797416608296024e-05,
3636 +1.95215518471351631108e-04,
3637 -2.85781685962277938680e-03,
3638 +1.03923736576817238437e-01,
3639 +2.72062619048444266945e+00,
3640 };
3641
3642 if (x == T(0.0)) {
3643 return std::numeric_limits<T>::infinity();
3644 }
3645
3646 if (x < T(0.0)) {
3647 return std::numeric_limits<T>::quiet_NaN();
3648 }
3649
3650 T p;
3651 T q = 0.0;
3652
3653 if (x <= T(2.0)) {
3654 T a = A[0];
3655
3656 for (uint64_t index = 1; index < 11; index++) {
3657 p = q;
3658 q = a;
3659 a = (x * x - T(2.0)) * q - p + A[index];
3660 }
3661
3662 return (std::log(T(0.5) * x) * modified_bessel_i1_forward(x) + T(0.5) * (a - p) / x) * std::exp(x);
3663 }
3664
3665 T b = B[0];
3666
3667 for (uint64_t index = 1; index < 25; index++) {
3668 p = q;
3669 q = b;
3670 b = (T(8.0) / x - T(2.0)) * q - p + B[index];
3671 }
3672
3673 return (T(0.5) * (b - p) / std::sqrt(x));
3674 } // T scaled_modified_bessel_k1_forward(T x)
3675
3676 template<typename T>
shifted_chebyshev_polynomial_t_forward(T x,int64_t n)3677 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, int64_t n) {
3678 if (n < 0) {
3679 return T(0.0);
3680 }
3681
3682 if (x == T(1.0)) {
3683 return T(1.0);
3684 }
3685
3686 if (x == T(0.0)) {
3687 if (n % 2 == 0) {
3688 return T(1.0);
3689 }
3690
3691 return T(-1.0);
3692 }
3693
3694 if ((n > 6) && (std::abs(x + x - T(1.0)) < T(1.0))) {
3695 return std::cos(n * std::acos(x + x - T(1.0)));
3696 }
3697
3698 if (n == 0) {
3699 return T(1.0);
3700 }
3701
3702 if (n == 1) {
3703 return x + x - T(1.0);
3704 }
3705
3706 T p = T(1.0);
3707 T q = x + x - T(1.0);
3708 T r;
3709
3710 for (int64_t k = 2; k <= n; k++) {
3711 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
3712 p = q;
3713 q = r;
3714 }
3715
3716 return r;
3717 } // shifted_chebyshev_polynomial_t_forward(T x, int64_t n)
3718
3719 template<typename T, bool is_cuda=false>
shifted_chebyshev_polynomial_t_forward(T x,T n)3720 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_t_forward(T x, T n) {
3721 return shifted_chebyshev_polynomial_t_forward(x, static_cast<int64_t>(n));
3722 } // shifted_chebyshev_polynomial_t_forward(T x, T n)
3723
3724 template<typename T>
shifted_chebyshev_polynomial_u_forward(T x,int64_t n)3725 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, int64_t n) {
3726 if (n < 0) {
3727 return T(0.0);
3728 }
3729
3730 if (x == T(1.0)) {
3731 return n + 1;
3732 }
3733
3734 if (x == T(0.0)) {
3735 if (n % 2 == 0) {
3736 return n + 1;
3737 }
3738
3739 return -(n + 1);
3740 }
3741
3742 if ((n > 6) && (std::abs(x + x - T(1.0)) < T(1.0))) {
3743 if (std::sin(std::acos(x + x - T(1.0))) != T(0.0)) {
3744 return std::sin((n + 1) * std::acos(x + x - T(1.0))) / std::sin(std::acos(x + x - T(1.0)));
3745 }
3746
3747 return (n + 1) * std::cos((n + 1) * std::acos(x + x - T(1.0))) / (x + x - T(1.0));
3748 }
3749
3750 if (n == 0) {
3751 return T(1.0);
3752 }
3753
3754 if (n == 1) {
3755 return x + x - T(1.0) + (x + x - T(1.0));
3756 }
3757
3758 T p = T(1.0);
3759 T q = x + x - T(1.0) + (x + x - T(1.0));
3760 T r;
3761
3762 for (int64_t k = 2; k <= n; k++) {
3763 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
3764 p = q;
3765 q = r;
3766 }
3767
3768 return r;
3769 } // shifted_chebyshev_polynomial_u_forward(T x, int64_t n)
3770
3771 template<typename T, bool is_cuda=false>
shifted_chebyshev_polynomial_u_forward(T x,T n)3772 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_u_forward(T x, T n) {
3773 return shifted_chebyshev_polynomial_u_forward(x, static_cast<int64_t>(n));
3774 } // shifted_chebyshev_polynomial_u_forward(T x, T n)
3775
3776 template<typename T>
shifted_chebyshev_polynomial_v_forward(T x,int64_t n)3777 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, int64_t n) {
3778 if (n < 0) {
3779 return T(0.0);
3780 }
3781
3782 if (x == T(1.0)) {
3783 return T(1.0);
3784 }
3785
3786 if (x == T(0.0)) {
3787 if (n % 2 == 0) {
3788 return (n + n + 1);
3789 }
3790
3791 return -(n + n + 1);
3792 }
3793
3794 if ((n > 6) && (std::abs(x + x - T(1.0)) < T(1.0))) {
3795 if (std::sin(std::acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) {
3796 return std::cos(((n) + T(0.5)) * std::acos(x + x - T(1.0))) / std::cos(std::acos(x + x - T(1.0)) / T(2.0));
3797 }
3798
3799 if (n % 2 == 0) {
3800 return n + n + 1;
3801 }
3802
3803 return -(n + n + 1);
3804 }
3805
3806 if (n == 0) {
3807 return T(1.0);
3808 }
3809
3810 if (n == 1) {
3811 return x + x - T(1.0) + (x + x - T(1.0)) - T(1.0);
3812 }
3813
3814 T p = T(1.0);
3815 T q = x + x - T(1.0) + (x + x - T(1.0)) - T(1.0);
3816 T r;
3817
3818 for (int64_t k = 2; k <= n; k++) {
3819 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
3820 p = q;
3821 q = r;
3822 }
3823
3824 return r;
3825 } // shifted_chebyshev_polynomial_v_forward(T x, int64_t n)
3826
3827 template<typename T, bool is_cuda=false>
shifted_chebyshev_polynomial_v_forward(T x,T n)3828 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_v_forward(T x, T n) {
3829 return shifted_chebyshev_polynomial_v_forward(x, static_cast<int64_t>(n));
3830 } // shifted_chebyshev_polynomial_v_forward(T x, T n)
3831
3832 template<typename T>
shifted_chebyshev_polynomial_w_forward(T x,int64_t n)3833 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, int64_t n) {
3834 if (n < 0) {
3835 return T(0.0);
3836 }
3837
3838 if (x == T(1.0)) {
3839 return n + n + 1;
3840 }
3841
3842 if (x == T(0.0)) {
3843 if (n % 2 == 0) {
3844 return T(1.0);
3845 }
3846
3847 return T(-1.0);
3848 }
3849
3850 if ((n > 4) && (std::abs(x + x - T(1.0)) < T(1.0))) {
3851 if (std::cos(std::acos(x + x - T(1.0)) / T(2.0)) != T(1.0)) {
3852 return std::sin((n + T(0.5)) * std::acos(x + x - T(1.0))) / std::sin(std::acos(x + x - T(1.0)) / T(2.0));
3853 }
3854
3855 if (n % 2 == 0) {
3856 return T(1.0);
3857 }
3858
3859 return T(-1.0);
3860 }
3861
3862 if (n == 0) {
3863 return T(1.0);
3864 }
3865
3866 if (n == 1) {
3867 return x + x - T(1.0) + (x + x - T(1.0)) + T(1.0);
3868 }
3869
3870 T p = T(1.0);
3871 T q = x + x - T(1.0) + (x + x - T(1.0)) + T(1.0);
3872 T r;
3873
3874 for (int64_t k = 2; k <= n; k++) {
3875 r = (x + x - T(1.0) + (x + x - T(1.0))) * q - p;
3876 p = q;
3877 q = r;
3878 }
3879
3880 return r;
3881 } // shifted_chebyshev_polynomial_w_forward(T x, int64_t n)
3882
3883 template<typename T, bool is_cuda=false>
shifted_chebyshev_polynomial_w_forward(T x,T n)3884 inline C10_HOST_DEVICE T shifted_chebyshev_polynomial_w_forward(T x, T n) {
3885 return shifted_chebyshev_polynomial_w_forward(x, static_cast<int64_t>(n));
3886 } // shifted_chebyshev_polynomial_w_forward(T x, T n)
3887
3888 template<typename T>
spherical_bessel_j0_forward(T x)3889 inline C10_HOST_DEVICE T spherical_bessel_j0_forward(T x) {
3890 if (std::isinf(x)) {
3891 return T(0.0);
3892 }
3893
3894 if (std::abs(x) < T(0.5)) {
3895 return T(1.0) + x * x * (T(-1.0) / T(6.0) + x * x * (T(1.0) / T(120.0) + x * x * (T(-1.0) / T(5040.0) + x * x * (T(1.0) / T(362880.0) + x * x * (T(-1.0) / T(39916800.0) + x * x * (T(1.0) / T(6227020800.0)))))));
3896 }
3897
3898 return std::sin(x) / x;
3899 } // T spherical_bessel_j0_forward(T x)
3900
3901 C10_CLANG_DIAGNOSTIC_POP()
3902