xref: /aosp_15_r20/external/pytorch/aten/src/ATen/core/TransformationHelper.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <ATen/NumericUtils.h>
2 #include <c10/macros/Macros.h>
3 #include <c10/util/Half.h>
4 #include <c10/util/BFloat16.h>
5 #include <c10/util/MathConstants.h>
6 #include <cmath>
7 #include <cstdint>
8 #include <cassert>
9 #include <limits>
10 #include <type_traits>
11 
12 namespace at {
13 
14 // Using DistAccumType in accumulate types for distributions.
15 // Note: Ideally we'd be using ATen/AccumulateType.h but looks
16 // like the there is some inconsistency in how accumulate types
17 // are mapped currently, e.g. for the cpu side, float is mapped
18 // to double.
19 template <typename T>
20 struct DistAccumType {  };
21 
22 #if defined(__CUDACC__) || defined(__HIPCC__)
23 template <> struct DistAccumType<half> { using type = float; };
24 #endif
25 template <> struct DistAccumType<BFloat16> { using type = float; };
26 template <> struct DistAccumType<Half> { using type = float; };
27 template <> struct DistAccumType<float> { using type = float; };
28 template <> struct DistAccumType<double> { using type = double; };
29 
30 template <typename T>
31 using dist_acctype = typename DistAccumType<T>::type;
32 
33 namespace transformation {
34 
35 /**
36  * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified.
37  * `range` is `to - from`
38  * `base` is `from`
39  */
40 template <typename T, typename V>
41 C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) {
42   return static_cast<T>(static_cast<int64_t>((val % range) + base));
43 }
44 
45 /**
46  * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None
47  */
48 template <typename T, typename V>
49 C10_HOST_DEVICE inline T uniform_int_full_range(V val) {
50   return static_cast<T>(static_cast<int64_t>(val));
51 }
52 
53 /**
54  * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`.
55  * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double
56  * in this overloaded version
57  */
58 template <typename T, typename V>
59 C10_HOST_DEVICE inline std::enable_if_t<!(std::is_floating_point_v<T>), T>uniform_int(V val) {
60   if constexpr (std::is_same_v<T, bool>) {
61     return static_cast<bool>(val & 1);
62   } else if constexpr (std::is_same_v<T, int64_t>) {
63     return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1));
64   } else if constexpr (std::is_same_v<T, at::Half> || std::is_same_v<T, at::BFloat16>) {
65     return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1));
66   } else if constexpr (std::is_integral_v<T>) {
67     return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1));
68   } else {
69     assert(false);
70     return 0;
71   }
72 }
73 
74 /**
75  * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`,
76  * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version.
77  */
78 template<typename T, typename V>
79 C10_HOST_DEVICE inline std::enable_if_t<std::is_floating_point_v<T>, T>uniform_int(V val) {
80   return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1));
81 }
82 
83 template <typename T, typename V>
84 C10_HOST_DEVICE inline dist_acctype<T> uniform_real(V val, T from, T to) {
85   constexpr auto MASK = static_cast<V>((static_cast<uint64_t>(1) << std::numeric_limits<T>::digits) - 1);
86   constexpr auto DIVISOR = static_cast<dist_acctype<T>>(1) / (static_cast<uint64_t>(1) << std::numeric_limits<T>::digits);
87   dist_acctype<T> x = (val & MASK) * DIVISOR;
88   return (x * (to - from) + from);
89 }
90 
91 /**
92  * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to
93  * normally distributed with `mean` and standard deviation `std`.
94  */
95 template <typename T>
96 C10_HOST_DEVICE inline T normal(T val, T mean, T std) {
97   return val * std + mean;
98 }
99 
100 /**
101  * Transforms uniformly distributed `val` between 0.0 and 1.0 to
102  * Cauchy distribution with location parameter `median` and scale parameter `sigma`.
103  */
104 template <typename T>
105 C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) {
106   // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
107   // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps),
108   // thus we clip those values.
109   constexpr T eps = std::numeric_limits<T>::epsilon();
110   constexpr T one_minus_eps = 1 - eps;
111   constexpr T zero_plus_eps = 0 + eps;
112   val = (val > one_minus_eps ? one_minus_eps : val);
113   val = (val < zero_plus_eps ? zero_plus_eps : val);
114   return median + sigma * at::tan(c10::pi<T> * (val - static_cast<T>(0.5)));
115 }
116 
117 template <>
118 C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) {
119   // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
120   return median + sigma * at::tan(c10::pi<double> * (val - static_cast<double>(0.5)));
121 }
122 
123 /**
124  * Transforms uniformly distributed `val` between 0.0 and 1.0 to
125  * exponentially distributed with `lambda` parameter of the distribution.
126  */
127 template <typename T>
128 C10_HOST_DEVICE inline T exponential(T val, T lambda) {
129   // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates
130   // Different implementations for CUDA and CPU to preserve original logic
131   // TODO: must be investigated and unified!!!
132   // https://github.com/pytorch/pytorch/issues/38662
133 #if defined(__CUDACC__) || defined(__HIPCC__)
134       // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706
135       // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0.
136       // we need log to be not 0, and not underflow when converted to half
137       // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args
138   auto log = val >= static_cast<T>(1.) - std::numeric_limits<T>::epsilon() / 2
139       ? -std::numeric_limits<T>::epsilon() / 2
140       : at::log(val);
141   return static_cast<T>(-1.0) / lambda * log;
142 #else
143   return static_cast<T>(-1.0) / lambda * at::log1p(-val);
144 #endif
145 }
146 
147 /**
148  * Transforms uniformly distributed `val` between 0.0 and 1.0 to
149  * geometrically distributed with success probability `p`.
150  */
151 template <typename T>
152 C10_HOST_DEVICE inline T geometric(T val, T p) {
153   // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions
154   return static_cast<T>(::ceil(at::log(val) / at::log1p(-p)));
155 }
156 
157 /**
158  * Transforms normally distributed `val` to log-normally distributed.
159  */
160 template <typename T>
161 C10_HOST_DEVICE inline T log_normal(T val) {
162   // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles
163   return at::exp(val);
164 }
165 
166 /**
167  * Transforms uniformly distributed `val` between 0.0 and 1.0 to
168  * bernoulli distributed with success probability `p`.
169  */
170 template <typename T>
171 C10_HOST_DEVICE inline T bernoulli(T val, T p) {
172   return val < p;
173 }
174 
175 }} // namespace at::transformation
176