1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// 10 // 11 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 12 // See https://llvm.org/LICENSE.txt for license information. 13 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 14 // 15 //===----------------------------------------------------------------------===// 16 // 17 // This file contains some functions that are useful for math stuff. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #pragma once 22 23 #include <algorithm> 24 #include <cassert> 25 #include <climits> 26 #include <cmath> 27 #include <cstdint> 28 #include <cstring> 29 #include <limits> 30 #include <type_traits> 31 32 #ifdef __ANDROID_NDK__ 33 #include <android/api-level.h> 34 #endif 35 36 #ifndef __has_builtin 37 #define __has_builtin(x) 0 38 #endif 39 40 #ifndef LLVM_GNUC_PREREQ 41 #if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) 42 #define LLVM_GNUC_PREREQ(maj, min, patch) \ 43 ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \ 44 ((maj) << 20) + ((min) << 10) + (patch)) 45 #elif defined(__GNUC__) && defined(__GNUC_MINOR__) 46 #define LLVM_GNUC_PREREQ(maj, min, patch) \ 47 ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10)) 48 #else 49 #define LLVM_GNUC_PREREQ(maj, min, patch) 0 50 #endif 51 #endif 52 53 #ifdef _MSC_VER 54 // Declare these intrinsics manually rather including intrin.h. It's very 55 // expensive, and MathExtras.h is popular. 56 // #include <intrin.h> 57 extern "C" { 58 unsigned char _BitScanForward(unsigned long* _Index, unsigned long _Mask); 59 unsigned char _BitScanForward64(unsigned long* _Index, unsigned __int64 _Mask); 60 unsigned char _BitScanReverse(unsigned long* _Index, unsigned long _Mask); 61 unsigned char _BitScanReverse64(unsigned long* _Index, unsigned __int64 _Mask); 62 } 63 #endif 64 65 namespace executorch { 66 namespace llvm { 67 /// The behavior an operation has on an input of 0. 68 enum ZeroBehavior { 69 /// The returned value is undefined. 70 ZB_Undefined, 71 /// The returned value is numeric_limits<T>::max() 72 ZB_Max, 73 /// The returned value is numeric_limits<T>::digits 74 ZB_Width 75 }; 76 77 namespace detail { 78 template <typename T, std::size_t SizeOfT> 79 struct TrailingZerosCounter { countTrailingZerosCounter80 static std::size_t count(T Val, ZeroBehavior) { 81 if (!Val) 82 return std::numeric_limits<T>::digits; 83 if (Val & 0x1) 84 return 0; 85 86 // Bisection method. 87 std::size_t ZeroBits = 0; 88 T Shift = std::numeric_limits<T>::digits >> 1; 89 T Mask = std::numeric_limits<T>::max() >> Shift; 90 while (Shift) { 91 if ((Val & Mask) == 0) { 92 Val >>= Shift; 93 ZeroBits |= Shift; 94 } 95 Shift >>= 1; 96 Mask >>= Shift; 97 } 98 return ZeroBits; 99 } 100 }; 101 102 #if (defined(__GNUC__) && __GNUC__ >= 4) || defined(_MSC_VER) 103 template <typename T> 104 struct TrailingZerosCounter<T, 4> { 105 static std::size_t count(T Val, ZeroBehavior ZB) { 106 if (ZB != ZB_Undefined && Val == 0) 107 return 32; 108 109 #if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0) 110 return __builtin_ctz(Val); 111 #elif defined(_MSC_VER) 112 unsigned long Index; 113 _BitScanForward(&Index, Val); 114 return Index; 115 #endif 116 } 117 }; 118 119 #if !defined(_MSC_VER) || defined(_M_X64) 120 template <typename T> 121 struct TrailingZerosCounter<T, 8> { 122 static std::size_t count(T Val, ZeroBehavior ZB) { 123 if (ZB != ZB_Undefined && Val == 0) 124 return 64; 125 126 #if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0) 127 return __builtin_ctzll(Val); 128 #elif defined(_MSC_VER) 129 unsigned long Index; 130 _BitScanForward64(&Index, Val); 131 return Index; 132 #endif 133 } 134 }; 135 #endif 136 #endif 137 } // namespace detail 138 139 /// Count number of 0's from the least significant bit to the most 140 /// stopping at the first 1. 141 /// 142 /// Only unsigned integral types are allowed. 143 /// 144 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are 145 /// valid arguments. 146 template <typename T> 147 std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { 148 static_assert( 149 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed, 150 "Only unsigned integral types are allowed."); 151 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); 152 } 153 154 namespace detail { 155 template <typename T, std::size_t SizeOfT> 156 struct LeadingZerosCounter { 157 static std::size_t count(T Val, ZeroBehavior) { 158 if (!Val) 159 return std::numeric_limits<T>::digits; 160 161 // Bisection method. 162 std::size_t ZeroBits = 0; 163 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { 164 T Tmp = Val >> Shift; 165 if (Tmp) 166 Val = Tmp; 167 else 168 ZeroBits |= Shift; 169 } 170 return ZeroBits; 171 } 172 }; 173 174 #if (defined(__GNUC__) && __GNUC__ >= 4) || defined(_MSC_VER) 175 template <typename T> 176 struct LeadingZerosCounter<T, 4> { 177 static std::size_t count(T Val, ZeroBehavior ZB) { 178 if (ZB != ZB_Undefined && Val == 0) 179 return 32; 180 181 #if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0) 182 return __builtin_clz(Val); 183 #elif defined(_MSC_VER) 184 unsigned long Index; 185 _BitScanReverse(&Index, Val); 186 return Index ^ 31; 187 #endif 188 } 189 }; 190 191 #if !defined(_MSC_VER) || defined(_M_X64) 192 template <typename T> 193 struct LeadingZerosCounter<T, 8> { 194 static std::size_t count(T Val, ZeroBehavior ZB) { 195 if (ZB != ZB_Undefined && Val == 0) 196 return 64; 197 198 #if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0) 199 return __builtin_clzll(Val); 200 #elif defined(_MSC_VER) 201 unsigned long Index; 202 _BitScanReverse64(&Index, Val); 203 return Index ^ 63; 204 #endif 205 } 206 }; 207 #endif 208 #endif 209 } // namespace detail 210 211 /// Count number of 0's from the most significant bit to the least 212 /// stopping at the first 1. 213 /// 214 /// Only unsigned integral types are allowed. 215 /// 216 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are 217 /// valid arguments. 218 template <typename T> 219 std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { 220 static_assert( 221 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed, 222 "Only unsigned integral types are allowed."); 223 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); 224 } 225 226 /// Get the index of the first set bit starting from the least 227 /// significant bit. 228 /// 229 /// Only unsigned integral types are allowed. 230 /// 231 /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are 232 /// valid arguments. 233 template <typename T> 234 T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { 235 if (ZB == ZB_Max && Val == 0) 236 return std::numeric_limits<T>::max(); 237 238 return countTrailingZeros(Val, ZB_Undefined); 239 } 240 241 /// Create a bitmask with the N right-most bits set to 1, and all other 242 /// bits set to 0. Only unsigned types are allowed. 243 template <typename T> 244 T maskTrailingOnes(unsigned N) { 245 static_assert(std::is_unsigned<T>::value, "Invalid type!"); 246 const unsigned Bits = CHAR_BIT * sizeof(T); 247 assert(N <= Bits && "Invalid bit index"); 248 return N == 0 ? 0 : (T(-1) >> (Bits - N)); 249 } 250 251 /// Create a bitmask with the N left-most bits set to 1, and all other 252 /// bits set to 0. Only unsigned types are allowed. 253 template <typename T> 254 T maskLeadingOnes(unsigned N) { 255 return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); 256 } 257 258 /// Create a bitmask with the N right-most bits set to 0, and all other 259 /// bits set to 1. Only unsigned types are allowed. 260 template <typename T> 261 T maskTrailingZeros(unsigned N) { 262 return maskLeadingOnes<T>(CHAR_BIT * sizeof(T) - N); 263 } 264 265 /// Create a bitmask with the N left-most bits set to 0, and all other 266 /// bits set to 1. Only unsigned types are allowed. 267 template <typename T> 268 T maskLeadingZeros(unsigned N) { 269 return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); 270 } 271 272 /// Get the index of the last set bit starting from the least 273 /// significant bit. 274 /// 275 /// Only unsigned integral types are allowed. 276 /// 277 /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are 278 /// valid arguments. 279 template <typename T> 280 T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { 281 if (ZB == ZB_Max && Val == 0) 282 return std::numeric_limits<T>::max(); 283 284 // Use ^ instead of - because both gcc and llvm can remove the associated ^ 285 // in the __builtin_clz intrinsic on x86. 286 return countLeadingZeros(Val, ZB_Undefined) ^ 287 (std::numeric_limits<T>::digits - 1); 288 } 289 290 /// Macro compressed bit reversal table for 256 bits. 291 /// 292 /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable 293 static const unsigned char BitReverseTable256[256] = { 294 #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 295 #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) 296 #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) 297 R6(0), 298 R6(2), 299 R6(1), 300 R6(3) 301 #undef R2 302 #undef R4 303 #undef R6 304 }; 305 306 /// Reverse the bits in \p Val. 307 template <typename T> 308 T reverseBits(T Val) { 309 unsigned char in[sizeof(Val)]; 310 unsigned char out[sizeof(Val)]; 311 std::memcpy(in, &Val, sizeof(Val)); 312 for (unsigned i = 0; i < sizeof(Val); ++i) 313 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; 314 std::memcpy(&Val, out, sizeof(Val)); 315 return Val; 316 } 317 318 // NOTE: The following support functions use the _32/_64 extensions instead of 319 // type overloading so that signed and unsigned integers can be used without 320 // ambiguity. 321 322 /// Return the high 32 bits of a 64 bit value. 323 constexpr inline uint32_t Hi_32(uint64_t Value) { 324 return static_cast<uint32_t>(Value >> 32); 325 } 326 327 /// Return the low 32 bits of a 64 bit value. 328 constexpr inline uint32_t Lo_32(uint64_t Value) { 329 return static_cast<uint32_t>(Value); 330 } 331 332 /// Make a 64-bit integer from a high / low pair of 32-bit integers. 333 constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { 334 return ((uint64_t)High << 32) | (uint64_t)Low; 335 } 336 337 /// Checks if an integer fits into the given bit width. 338 template <unsigned N> 339 constexpr inline bool isInt(int64_t x) { 340 return N >= 64 || 341 (-(INT64_C(1) << (N - 1)) <= x && x < (INT64_C(1) << (N - 1))); 342 } 343 // Template specializations to get better code for common cases. 344 template <> 345 constexpr inline bool isInt<8>(int64_t x) { 346 return static_cast<int8_t>(x) == x; 347 } 348 template <> 349 constexpr inline bool isInt<16>(int64_t x) { 350 return static_cast<int16_t>(x) == x; 351 } 352 template <> 353 constexpr inline bool isInt<32>(int64_t x) { 354 return static_cast<int32_t>(x) == x; 355 } 356 357 /// Checks if a signed integer is an N bit number shifted left by S. 358 template <unsigned N, unsigned S> 359 constexpr inline bool isShiftedInt(int64_t x) { 360 static_assert( 361 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); 362 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); 363 return isInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); 364 } 365 366 /// Checks if an unsigned integer fits into the given bit width. 367 /// 368 /// This is written as two functions rather than as simply 369 /// 370 /// return N >= 64 || X < (UINT64_C(1) << N); 371 /// 372 /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting 373 /// left too many places. 374 template <unsigned N> 375 constexpr inline typename std::enable_if<(N < 64), bool>::type isUInt( 376 uint64_t X) { 377 static_assert(N > 0, "isUInt<0> doesn't make sense"); 378 return X < (UINT64_C(1) << (N)); 379 } 380 template <unsigned N> 381 constexpr inline typename std::enable_if<N >= 64, bool>::type isUInt( 382 uint64_t /*X*/) { 383 return true; 384 } 385 386 // Template specializations to get better code for common cases. 387 template <> 388 constexpr inline bool isUInt<8>(uint64_t x) { 389 return static_cast<uint8_t>(x) == x; 390 } 391 template <> 392 constexpr inline bool isUInt<16>(uint64_t x) { 393 return static_cast<uint16_t>(x) == x; 394 } 395 template <> 396 constexpr inline bool isUInt<32>(uint64_t x) { 397 return static_cast<uint32_t>(x) == x; 398 } 399 400 /// Checks if a unsigned integer is an N bit number shifted left by S. 401 template <unsigned N, unsigned S> 402 constexpr inline bool isShiftedUInt(uint64_t x) { 403 static_assert( 404 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); 405 static_assert( 406 N + S <= 64, "isShiftedUInt<N, S> with N + S > 64 is too wide."); 407 // Per the two static_asserts above, S must be strictly less than 64. So 408 // 1 << S is not undefined behavior. 409 return isUInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); 410 } 411 412 /// Gets the maximum value for a N-bit unsigned integer. 413 inline uint64_t maxUIntN(uint64_t N) { 414 assert(N > 0 && N <= 64 && "integer width out of range"); 415 416 // uint64_t(1) << 64 is undefined behavior, so we can't do 417 // (uint64_t(1) << N) - 1 418 // without checking first that N != 64. But this works and doesn't have a 419 // branch. 420 return UINT64_MAX >> (64 - N); 421 } 422 423 // Ignore the false warning "Arithmetic overflow" for MSVC 424 #ifdef _MSC_VER 425 #pragma warning(push) 426 #pragma warning(disable : 4146) 427 #endif 428 429 /// Gets the minimum value for a N-bit signed integer. 430 inline int64_t minIntN(int64_t N) { 431 assert(N > 0 && N <= 64 && "integer width out of range"); 432 433 return -(UINT64_C(1) << (N - 1)); 434 } 435 436 #ifdef _MSC_VER 437 #pragma warning(pop) 438 #endif 439 440 /// Gets the maximum value for a N-bit signed integer. 441 inline int64_t maxIntN(int64_t N) { 442 assert(N > 0 && N <= 64 && "integer width out of range"); 443 444 // This relies on two's complement wraparound when N == 64, so we convert to 445 // int64_t only at the very end to avoid UB. 446 return (UINT64_C(1) << (N - 1)) - 1; 447 } 448 449 /// Checks if an unsigned integer fits into the given (dynamic) bit width. 450 inline bool isUIntN(unsigned N, uint64_t x) { 451 return N >= 64 || x <= maxUIntN(N); 452 } 453 454 /// Checks if an signed integer fits into the given (dynamic) bit width. 455 inline bool isIntN(unsigned N, int64_t x) { 456 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); 457 } 458 459 /// Return true if the argument is a non-empty sequence of ones starting at the 460 /// least significant bit with the remainder zero (32 bit version). 461 /// Ex. isMask_32(0x0000FFFFU) == true. 462 constexpr inline bool isMask_32(uint32_t Value) { 463 return Value && ((Value + 1) & Value) == 0; 464 } 465 466 /// Return true if the argument is a non-empty sequence of ones starting at the 467 /// least significant bit with the remainder zero (64 bit version). 468 constexpr inline bool isMask_64(uint64_t Value) { 469 return Value && ((Value + 1) & Value) == 0; 470 } 471 472 /// Return true if the argument contains a non-empty sequence of ones with the 473 /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. 474 constexpr inline bool isShiftedMask_32(uint32_t Value) { 475 return Value && isMask_32((Value - 1) | Value); 476 } 477 478 /// Return true if the argument contains a non-empty sequence of ones with the 479 /// remainder zero (64 bit version.) 480 constexpr inline bool isShiftedMask_64(uint64_t Value) { 481 return Value && isMask_64((Value - 1) | Value); 482 } 483 484 /// Return true if the argument is a power of two > 0. 485 /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) 486 constexpr inline bool isPowerOf2_32(uint32_t Value) { 487 return Value && !(Value & (Value - 1)); 488 } 489 490 /// Return true if the argument is a power of two > 0 (64 bit edition.) 491 constexpr inline bool isPowerOf2_64(uint64_t Value) { 492 return Value && !(Value & (Value - 1)); 493 } 494 495 /// Count the number of ones from the most significant bit to the first 496 /// zero bit. 497 /// 498 /// Ex. countLeadingOnes(0xFF0FFF00) == 8. 499 /// Only unsigned integral types are allowed. 500 /// 501 /// \param ZB the behavior on an input of all ones. Only ZB_Width and 502 /// ZB_Undefined are valid arguments. 503 template <typename T> 504 std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { 505 static_assert( 506 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed, 507 "Only unsigned integral types are allowed."); 508 return countLeadingZeros<T>(~Value, ZB); 509 } 510 511 /// Count the number of ones from the least significant bit to the first 512 /// zero bit. 513 /// 514 /// Ex. countTrailingOnes(0x00FF00FF) == 8. 515 /// Only unsigned integral types are allowed. 516 /// 517 /// \param ZB the behavior on an input of all ones. Only ZB_Width and 518 /// ZB_Undefined are valid arguments. 519 template <typename T> 520 std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { 521 static_assert( 522 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed, 523 "Only unsigned integral types are allowed."); 524 return countTrailingZeros<T>(~Value, ZB); 525 } 526 527 namespace detail { 528 template <typename T, std::size_t SizeOfT> 529 struct PopulationCounter { 530 static unsigned count(T Value) { 531 // Generic version, forward to 32 bits. 532 static_assert(SizeOfT <= 4, "Not implemented!"); 533 #if defined(__GNUC__) && __GNUC__ >= 4 534 return __builtin_popcount(Value); 535 #else 536 uint32_t v = Value; 537 v = v - ((v >> 1) & 0x55555555); 538 v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 539 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; 540 #endif 541 } 542 }; 543 544 template <typename T> 545 struct PopulationCounter<T, 8> { 546 static unsigned count(T Value) { 547 #if defined(__GNUC__) && __GNUC__ >= 4 548 return __builtin_popcountll(Value); 549 #else 550 uint64_t v = Value; 551 v = v - ((v >> 1) & 0x5555555555555555ULL); 552 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); 553 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 554 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); 555 #endif 556 } 557 }; 558 } // namespace detail 559 560 /// Count the number of set bits in a value. 561 /// Ex. countPopulation(0xF000F000) = 8 562 /// Returns 0 if the word is zero. 563 template <typename T> 564 inline unsigned countPopulation(T Value) { 565 static_assert( 566 std::numeric_limits<T>::is_integer && !std::numeric_limits<T>::is_signed, 567 "Only unsigned integral types are allowed."); 568 return detail::PopulationCounter<T, sizeof(T)>::count(Value); 569 } 570 571 /// Return the log base 2 of the specified value. 572 inline double Log2(double Value) { 573 #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 574 return __builtin_log(Value) / __builtin_log(2.0); 575 #else 576 return log2(Value); 577 #endif 578 } 579 580 /// Return the floor log base 2 of the specified value, -1 if the value is zero. 581 /// (32 bit edition.) 582 /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 583 inline unsigned Log2_32(uint32_t Value) { 584 return static_cast<unsigned>(31 - countLeadingZeros(Value)); 585 } 586 587 /// Return the floor log base 2 of the specified value, -1 if the value is zero. 588 /// (64 bit edition.) 589 inline unsigned Log2_64(uint64_t Value) { 590 return static_cast<unsigned>(63 - countLeadingZeros(Value)); 591 } 592 593 /// Return the ceil log base 2 of the specified value, 32 if the value is zero. 594 /// (32 bit edition). 595 /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 596 inline unsigned Log2_32_Ceil(uint32_t Value) { 597 return static_cast<unsigned>(32 - countLeadingZeros(Value - 1)); 598 } 599 600 /// Return the ceil log base 2 of the specified value, 64 if the value is zero. 601 /// (64 bit edition.) 602 inline unsigned Log2_64_Ceil(uint64_t Value) { 603 return static_cast<unsigned>(64 - countLeadingZeros(Value - 1)); 604 } 605 606 /// Return the greatest common divisor of the values using Euclid's algorithm. 607 inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { 608 while (B) { 609 uint64_t T = B; 610 B = A % B; 611 A = T; 612 } 613 return A; 614 } 615 616 /// This function takes a 64-bit integer and returns the bit equivalent double. 617 inline double BitsToDouble(uint64_t Bits) { 618 double D; 619 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); 620 memcpy(&D, &Bits, sizeof(Bits)); 621 return D; 622 } 623 624 /// This function takes a 32-bit integer and returns the bit equivalent float. 625 inline float BitsToFloat(uint32_t Bits) { 626 // TODO: Use bit_cast once C++20 becomes available. 627 float F; 628 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); 629 memcpy(&F, &Bits, sizeof(Bits)); 630 return F; 631 } 632 633 /// This function takes a double and returns the bit equivalent 64-bit integer. 634 /// Note that copying doubles around changes the bits of NaNs on some hosts, 635 /// notably x86, so this routine cannot be used if these bits are needed. 636 inline uint64_t DoubleToBits(double Double) { 637 uint64_t Bits; 638 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); 639 memcpy(&Bits, &Double, sizeof(Double)); 640 return Bits; 641 } 642 643 /// This function takes a float and returns the bit equivalent 32-bit integer. 644 /// Note that copying floats around changes the bits of NaNs on some hosts, 645 /// notably x86, so this routine cannot be used if these bits are needed. 646 inline uint32_t FloatToBits(float Float) { 647 uint32_t Bits; 648 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); 649 memcpy(&Bits, &Float, sizeof(Float)); 650 return Bits; 651 } 652 653 /// A and B are either alignments or offsets. Return the minimum alignment that 654 /// may be assumed after adding the two together. 655 constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { 656 // The largest power of 2 that divides both A and B. 657 // 658 // Replace "-Value" by "1+~Value" in the following commented code to avoid 659 // MSVC warning C4146 660 // return (A | B) & -(A | B); 661 return (A | B) & (1 + ~(A | B)); 662 } 663 664 /// Aligns \c Addr to \c Alignment bytes, rounding up. 665 /// 666 /// Alignment should be a power of two. This method rounds up, so 667 /// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8. 668 inline uintptr_t alignAddr(const void* Addr, size_t Alignment) { 669 assert( 670 Alignment && isPowerOf2_64((uint64_t)Alignment) && 671 "Alignment is not a power of two!"); 672 673 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr); 674 675 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1)); 676 } 677 678 /// Returns the necessary adjustment for aligning \c Ptr to \c Alignment 679 /// bytes, rounding up. 680 inline size_t alignmentAdjustment(const void* Ptr, size_t Alignment) { 681 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr; 682 } 683 684 /// Returns the next power of two (in 64-bits) that is strictly greater than A. 685 /// Returns zero on overflow. 686 inline uint64_t NextPowerOf2(uint64_t A) { 687 A |= (A >> 1); 688 A |= (A >> 2); 689 A |= (A >> 4); 690 A |= (A >> 8); 691 A |= (A >> 16); 692 A |= (A >> 32); 693 return A + 1; 694 } 695 696 /// Returns the power of two which is less than or equal to the given value. 697 /// Essentially, it is a floor operation across the domain of powers of two. 698 inline uint64_t PowerOf2Floor(uint64_t A) { 699 if (!A) 700 return 0; 701 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); 702 } 703 704 /// Returns the power of two which is greater than or equal to the given value. 705 /// Essentially, it is a ceil operation across the domain of powers of two. 706 inline uint64_t PowerOf2Ceil(uint64_t A) { 707 if (!A) 708 return 0; 709 return NextPowerOf2(A - 1); 710 } 711 712 /// Returns the next integer (mod 2**64) that is greater than or equal to 713 /// \p Value and is a multiple of \p Align. \p Align must be non-zero. 714 /// 715 /// If non-zero \p Skew is specified, the return value will be a minimal 716 /// integer that is greater than or equal to \p Value and equal to 717 /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than 718 /// \p Align, its value is adjusted to '\p Skew mod \p Align'. 719 /// 720 /// Examples: 721 /// \code 722 /// alignTo(5, 8) = 8 723 /// alignTo(17, 8) = 24 724 /// alignTo(~0LL, 8) = 0 725 /// alignTo(321, 255) = 510 726 /// 727 /// alignTo(5, 8, 7) = 7 728 /// alignTo(17, 8, 1) = 17 729 /// alignTo(~0LL, 8, 3) = 3 730 /// alignTo(321, 255, 42) = 552 731 /// \endcode 732 inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { 733 assert(Align != 0u && "Align can't be 0."); 734 Skew %= Align; 735 return (Value + Align - 1 - Skew) / Align * Align + Skew; 736 } 737 738 /// Returns the next integer (mod 2**64) that is greater than or equal to 739 /// \p Value and is a multiple of \c Align. \c Align must be non-zero. 740 template <uint64_t Align> 741 constexpr inline uint64_t alignTo(uint64_t Value) { 742 static_assert(Align != 0u, "Align must be non-zero"); 743 return (Value + Align - 1) / Align * Align; 744 } 745 746 /// Returns the integer ceil(Numerator / Denominator). 747 inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { 748 return alignTo(Numerator, Denominator) / Denominator; 749 } 750 751 /// \c alignTo for contexts where a constant expression is required. 752 /// \sa alignTo 753 /// 754 /// \todo FIXME: remove when \c constexpr becomes really \c constexpr 755 template <uint64_t Align> 756 struct AlignTo { 757 static_assert(Align != 0u, "Align must be non-zero"); 758 template <uint64_t Value> 759 struct from_value { 760 static const uint64_t value = (Value + Align - 1) / Align * Align; 761 }; 762 }; 763 764 /// Returns the largest uint64_t less than or equal to \p Value and is 765 /// \p Skew mod \p Align. \p Align must be non-zero 766 inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { 767 assert(Align != 0u && "Align can't be 0."); 768 Skew %= Align; 769 return (Value - Skew) / Align * Align + Skew; 770 } 771 772 /// Returns the offset to the next integer (mod 2**64) that is greater than 773 /// or equal to \p Value and is a multiple of \p Align. \p Align must be 774 /// non-zero. 775 inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) { 776 return alignTo(Value, Align) - Value; 777 } 778 779 /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. 780 /// Requires 0 < B <= 32. 781 template <unsigned B> 782 constexpr inline int32_t SignExtend32(uint32_t X) { 783 static_assert(B > 0, "Bit width can't be 0."); 784 static_assert(B <= 32, "Bit width out of range."); 785 return int32_t(X << (32 - B)) >> (32 - B); 786 } 787 788 /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. 789 /// Requires 0 < B < 32. 790 inline int32_t SignExtend32(uint32_t X, unsigned B) { 791 assert(B > 0 && "Bit width can't be 0."); 792 assert(B <= 32 && "Bit width out of range."); 793 return int32_t(X << (32 - B)) >> (32 - B); 794 } 795 796 /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. 797 /// Requires 0 < B < 64. 798 template <unsigned B> 799 constexpr inline int64_t SignExtend64(uint64_t x) { 800 static_assert(B > 0, "Bit width can't be 0."); 801 static_assert(B <= 64, "Bit width out of range."); 802 return int64_t(x << (64 - B)) >> (64 - B); 803 } 804 805 /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. 806 /// Requires 0 < B < 64. 807 inline int64_t SignExtend64(uint64_t X, unsigned B) { 808 assert(B > 0 && "Bit width can't be 0."); 809 assert(B <= 64 && "Bit width out of range."); 810 return int64_t(X << (64 - B)) >> (64 - B); 811 } 812 813 /// Subtract two unsigned integers, X and Y, of type T and return the absolute 814 /// value of the result. 815 template <typename T> 816 typename std::enable_if<std::is_unsigned<T>::value, T>::type AbsoluteDifference( 817 T X, 818 T Y) { 819 return std::max(X, Y) - std::min(X, Y); 820 } 821 822 /// Add two unsigned integers, X and Y, of type T. Clamp the result to the 823 /// maximum representable value of T on overflow. ResultOverflowed indicates if 824 /// the result is larger than the maximum representable value of type T. 825 template <typename T> 826 typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingAdd( 827 T X, 828 T Y, 829 bool* ResultOverflowed = nullptr) { 830 bool Dummy; 831 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 832 // Hacker's Delight, p. 29 833 T Z = X + Y; 834 Overflowed = (Z < X || Z < Y); 835 if (Overflowed) 836 return std::numeric_limits<T>::max(); 837 else 838 return Z; 839 } 840 841 /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the 842 /// maximum representable value of T on overflow. ResultOverflowed indicates if 843 /// the result is larger than the maximum representable value of type T. 844 template <typename T> 845 typename std::enable_if<std::is_unsigned<T>::value, T>::type SaturatingMultiply( 846 T X, 847 T Y, 848 bool* ResultOverflowed = nullptr) { 849 bool Dummy; 850 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 851 852 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that 853 // because it fails for uint16_t (where multiplication can have undefined 854 // behavior due to promotion to int), and requires a division in addition 855 // to the multiplication. 856 857 Overflowed = false; 858 859 // Log2(Z) would be either Log2Z or Log2Z + 1. 860 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z 861 // will necessarily be less than Log2Max as desired. 862 int Log2Z = Log2_64(X) + Log2_64(Y); 863 const T Max = std::numeric_limits<T>::max(); 864 int Log2Max = Log2_64(Max); 865 if (Log2Z < Log2Max) { 866 return X * Y; 867 } 868 if (Log2Z > Log2Max) { 869 Overflowed = true; 870 return Max; 871 } 872 873 // We're going to use the top bit, and maybe overflow one 874 // bit past it. Multiply all but the bottom bit then add 875 // that on at the end. 876 T Z = (X >> 1) * Y; 877 if (Z & ~(Max >> 1)) { 878 Overflowed = true; 879 return Max; 880 } 881 Z <<= 1; 882 if (X & 1) 883 return SaturatingAdd(Z, Y, ResultOverflowed); 884 885 return Z; 886 } 887 888 /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to 889 /// the product. Clamp the result to the maximum representable value of T on 890 /// overflow. ResultOverflowed indicates if the result is larger than the 891 /// maximum representable value of type T. 892 template <typename T> 893 typename std::enable_if<std::is_unsigned<T>::value, T>::type 894 SaturatingMultiplyAdd(T X, T Y, T A, bool* ResultOverflowed = nullptr) { 895 bool Dummy; 896 bool& Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 897 898 T Product = SaturatingMultiply(X, Y, &Overflowed); 899 if (Overflowed) 900 return Product; 901 902 return SaturatingAdd(A, Product, &Overflowed); 903 } 904 905 /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. 906 extern const float huge_valf; 907 } // namespace llvm 908 } // namespace executorch 909