1*5f32b710SXin Li #pragma once 2*5f32b710SXin Li #ifndef FP16_BITCASTS_H 3*5f32b710SXin Li #define FP16_BITCASTS_H 4*5f32b710SXin Li 5*5f32b710SXin Li #if defined(__cplusplus) && (__cplusplus >= 201103L) 6*5f32b710SXin Li #include <cstdint> 7*5f32b710SXin Li #elif !defined(__OPENCL_VERSION__) 8*5f32b710SXin Li #include <stdint.h> 9*5f32b710SXin Li #endif 10*5f32b710SXin Li 11*5f32b710SXin Li #if defined(__INTEL_COMPILER) 12*5f32b710SXin Li #include <immintrin.h> 13*5f32b710SXin Li #endif 14*5f32b710SXin Li 15*5f32b710SXin Li #if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) 16*5f32b710SXin Li #include <intrin.h> 17*5f32b710SXin Li #endif 18*5f32b710SXin Li 19*5f32b710SXin Li fp32_from_bits(uint32_t w)20*5f32b710SXin Listatic inline float fp32_from_bits(uint32_t w) { 21*5f32b710SXin Li #if defined(__OPENCL_VERSION__) 22*5f32b710SXin Li return as_float(w); 23*5f32b710SXin Li #elif defined(__CUDA_ARCH__) 24*5f32b710SXin Li return __uint_as_float((unsigned int) w); 25*5f32b710SXin Li #elif defined(__INTEL_COMPILER) 26*5f32b710SXin Li return _castu32_f32(w); 27*5f32b710SXin Li #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) 28*5f32b710SXin Li return _CopyFloatFromInt32((__int32) w); 29*5f32b710SXin Li #else 30*5f32b710SXin Li union { 31*5f32b710SXin Li uint32_t as_bits; 32*5f32b710SXin Li float as_value; 33*5f32b710SXin Li } fp32 = { w }; 34*5f32b710SXin Li return fp32.as_value; 35*5f32b710SXin Li #endif 36*5f32b710SXin Li } 37*5f32b710SXin Li fp32_to_bits(float f)38*5f32b710SXin Listatic inline uint32_t fp32_to_bits(float f) { 39*5f32b710SXin Li #if defined(__OPENCL_VERSION__) 40*5f32b710SXin Li return as_uint(f); 41*5f32b710SXin Li #elif defined(__CUDA_ARCH__) 42*5f32b710SXin Li return (uint32_t) __float_as_uint(f); 43*5f32b710SXin Li #elif defined(__INTEL_COMPILER) 44*5f32b710SXin Li return _castf32_u32(f); 45*5f32b710SXin Li #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) 46*5f32b710SXin Li return (uint32_t) _CopyInt32FromFloat(f); 47*5f32b710SXin Li #else 48*5f32b710SXin Li union { 49*5f32b710SXin Li float as_value; 50*5f32b710SXin Li uint32_t as_bits; 51*5f32b710SXin Li } fp32 = { f }; 52*5f32b710SXin Li return fp32.as_bits; 53*5f32b710SXin Li #endif 54*5f32b710SXin Li } 55*5f32b710SXin Li fp64_from_bits(uint64_t w)56*5f32b710SXin Listatic inline double fp64_from_bits(uint64_t w) { 57*5f32b710SXin Li #if defined(__OPENCL_VERSION__) 58*5f32b710SXin Li return as_double(w); 59*5f32b710SXin Li #elif defined(__CUDA_ARCH__) 60*5f32b710SXin Li return __longlong_as_double((long long) w); 61*5f32b710SXin Li #elif defined(__INTEL_COMPILER) 62*5f32b710SXin Li return _castu64_f64(w); 63*5f32b710SXin Li #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) 64*5f32b710SXin Li return _CopyDoubleFromInt64((__int64) w); 65*5f32b710SXin Li #else 66*5f32b710SXin Li union { 67*5f32b710SXin Li uint64_t as_bits; 68*5f32b710SXin Li double as_value; 69*5f32b710SXin Li } fp64 = { w }; 70*5f32b710SXin Li return fp64.as_value; 71*5f32b710SXin Li #endif 72*5f32b710SXin Li } 73*5f32b710SXin Li fp64_to_bits(double f)74*5f32b710SXin Listatic inline uint64_t fp64_to_bits(double f) { 75*5f32b710SXin Li #if defined(__OPENCL_VERSION__) 76*5f32b710SXin Li return as_ulong(f); 77*5f32b710SXin Li #elif defined(__CUDA_ARCH__) 78*5f32b710SXin Li return (uint64_t) __double_as_longlong(f); 79*5f32b710SXin Li #elif defined(__INTEL_COMPILER) 80*5f32b710SXin Li return _castf64_u64(f); 81*5f32b710SXin Li #elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) 82*5f32b710SXin Li return (uint64_t) _CopyInt64FromDouble(f); 83*5f32b710SXin Li #else 84*5f32b710SXin Li union { 85*5f32b710SXin Li double as_value; 86*5f32b710SXin Li uint64_t as_bits; 87*5f32b710SXin Li } fp64 = { f }; 88*5f32b710SXin Li return fp64.as_bits; 89*5f32b710SXin Li #endif 90*5f32b710SXin Li } 91*5f32b710SXin Li 92*5f32b710SXin Li #endif /* FP16_BITCASTS_H */ 93