1 #include <ATen/NumericUtils.h> 2 #include <c10/macros/Macros.h> 3 #include <c10/util/Half.h> 4 #include <c10/util/BFloat16.h> 5 #include <c10/util/MathConstants.h> 6 #include <cmath> 7 #include <cstdint> 8 #include <cassert> 9 #include <limits> 10 #include <type_traits> 11 12 namespace at { 13 14 // Using DistAccumType in accumulate types for distributions. 15 // Note: Ideally we'd be using ATen/AccumulateType.h but looks 16 // like the there is some inconsistency in how accumulate types 17 // are mapped currently, e.g. for the cpu side, float is mapped 18 // to double. 19 template <typename T> 20 struct DistAccumType { }; 21 22 #if defined(__CUDACC__) || defined(__HIPCC__) 23 template <> struct DistAccumType<half> { using type = float; }; 24 #endif 25 template <> struct DistAccumType<BFloat16> { using type = float; }; 26 template <> struct DistAccumType<Half> { using type = float; }; 27 template <> struct DistAccumType<float> { using type = float; }; 28 template <> struct DistAccumType<double> { using type = double; }; 29 30 template <typename T> 31 using dist_acctype = typename DistAccumType<T>::type; 32 33 namespace transformation { 34 35 /** 36 * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified. 37 * `range` is `to - from` 38 * `base` is `from` 39 */ 40 template <typename T, typename V> 41 C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) { 42 return static_cast<T>(static_cast<int64_t>((val % range) + base)); 43 } 44 45 /** 46 * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None 47 */ 48 template <typename T, typename V> 49 C10_HOST_DEVICE inline T uniform_int_full_range(V val) { 50 return static_cast<T>(static_cast<int64_t>(val)); 51 } 52 53 /** 54 * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`. 55 * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double 56 * in this overloaded version 57 */ 58 template <typename T, typename V> 59 C10_HOST_DEVICE inline std::enable_if_t<!(std::is_floating_point_v<T>), T>uniform_int(V val) { 60 if constexpr (std::is_same_v<T, bool>) { 61 return static_cast<bool>(val & 1); 62 } else if constexpr (std::is_same_v<T, int64_t>) { 63 return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1)); 64 } else if constexpr (std::is_same_v<T, at::Half> || std::is_same_v<T, at::BFloat16>) { 65 return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1)); 66 } else if constexpr (std::is_integral_v<T>) { 67 return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1)); 68 } else { 69 assert(false); 70 return 0; 71 } 72 } 73 74 /** 75 * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`, 76 * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version. 77 */ 78 template<typename T, typename V> 79 C10_HOST_DEVICE inline std::enable_if_t<std::is_floating_point_v<T>, T>uniform_int(V val) { 80 return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1)); 81 } 82 83 template <typename T, typename V> 84 C10_HOST_DEVICE inline dist_acctype<T> uniform_real(V val, T from, T to) { 85 constexpr auto MASK = static_cast<V>((static_cast<uint64_t>(1) << std::numeric_limits<T>::digits) - 1); 86 constexpr auto DIVISOR = static_cast<dist_acctype<T>>(1) / (static_cast<uint64_t>(1) << std::numeric_limits<T>::digits); 87 dist_acctype<T> x = (val & MASK) * DIVISOR; 88 return (x * (to - from) + from); 89 } 90 91 /** 92 * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to 93 * normally distributed with `mean` and standard deviation `std`. 94 */ 95 template <typename T> 96 C10_HOST_DEVICE inline T normal(T val, T mean, T std) { 97 return val * std + mean; 98 } 99 100 /** 101 * Transforms uniformly distributed `val` between 0.0 and 1.0 to 102 * Cauchy distribution with location parameter `median` and scale parameter `sigma`. 103 */ 104 template <typename T> 105 C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { 106 // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function 107 // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps), 108 // thus we clip those values. 109 constexpr T eps = std::numeric_limits<T>::epsilon(); 110 constexpr T one_minus_eps = 1 - eps; 111 constexpr T zero_plus_eps = 0 + eps; 112 val = (val > one_minus_eps ? one_minus_eps : val); 113 val = (val < zero_plus_eps ? zero_plus_eps : val); 114 return median + sigma * at::tan(c10::pi<T> * (val - static_cast<T>(0.5))); 115 } 116 117 template <> 118 C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) { 119 // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function 120 return median + sigma * at::tan(c10::pi<double> * (val - static_cast<double>(0.5))); 121 } 122 123 /** 124 * Transforms uniformly distributed `val` between 0.0 and 1.0 to 125 * exponentially distributed with `lambda` parameter of the distribution. 126 */ 127 template <typename T> 128 C10_HOST_DEVICE inline T exponential(T val, T lambda) { 129 // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates 130 // Different implementations for CUDA and CPU to preserve original logic 131 // TODO: must be investigated and unified!!! 132 // https://github.com/pytorch/pytorch/issues/38662 133 #if defined(__CUDACC__) || defined(__HIPCC__) 134 // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706 135 // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0. 136 // we need log to be not 0, and not underflow when converted to half 137 // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args 138 auto log = val >= static_cast<T>(1.) - std::numeric_limits<T>::epsilon() / 2 139 ? -std::numeric_limits<T>::epsilon() / 2 140 : at::log(val); 141 return static_cast<T>(-1.0) / lambda * log; 142 #else 143 return static_cast<T>(-1.0) / lambda * at::log1p(-val); 144 #endif 145 } 146 147 /** 148 * Transforms uniformly distributed `val` between 0.0 and 1.0 to 149 * geometrically distributed with success probability `p`. 150 */ 151 template <typename T> 152 C10_HOST_DEVICE inline T geometric(T val, T p) { 153 // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions 154 return static_cast<T>(::ceil(at::log(val) / at::log1p(-p))); 155 } 156 157 /** 158 * Transforms normally distributed `val` to log-normally distributed. 159 */ 160 template <typename T> 161 C10_HOST_DEVICE inline T log_normal(T val) { 162 // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles 163 return at::exp(val); 164 } 165 166 /** 167 * Transforms uniformly distributed `val` between 0.0 and 1.0 to 168 * bernoulli distributed with success probability `p`. 169 */ 170 template <typename T> 171 C10_HOST_DEVICE inline T bernoulli(T val, T p) { 172 return val < p; 173 } 174 175 }} // namespace at::transformation 176