1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SHARED_RPC_UTIL_H 10 #define LLVM_LIBC_SHARED_RPC_UTIL_H 11 12 #include <stddef.h> 13 #include <stdint.h> 14 15 #if defined(__NVPTX__) || defined(__AMDGPU__) 16 #include <gpuintrin.h> 17 #define RPC_TARGET_IS_GPU 18 #endif 19 20 // Workaround for missing __has_builtin in < GCC 10. 21 #ifndef __has_builtin 22 #define __has_builtin(x) 0 23 #endif 24 25 #ifndef RPC_INLINE 26 #define RPC_INLINE inline 27 #endif 28 29 namespace rpc { 30 31 template <typename T> struct type_identity { 32 using type = T; 33 }; 34 35 template <class T, T v> struct type_constant { 36 static inline constexpr T value = v; 37 }; 38 39 template <class T> struct remove_reference : type_identity<T> {}; 40 template <class T> struct remove_reference<T &> : type_identity<T> {}; 41 template <class T> struct remove_reference<T &&> : type_identity<T> {}; 42 43 template <class T> struct is_const : type_constant<bool, false> {}; 44 template <class T> struct is_const<const T> : type_constant<bool, true> {}; 45 46 /// Freestanding implementation of std::move. 47 template <class T> 48 RPC_INLINE constexpr typename remove_reference<T>::type &&move(T &&t) { 49 return static_cast<typename remove_reference<T>::type &&>(t); 50 } 51 52 /// Freestanding implementation of std::forward. 53 template <typename T> 54 RPC_INLINE constexpr T &&forward(typename remove_reference<T>::type &value) { 55 return static_cast<T &&>(value); 56 } 57 template <typename T> 58 RPC_INLINE constexpr T &&forward(typename remove_reference<T>::type &&value) { 59 return static_cast<T &&>(value); 60 } 61 62 struct in_place_t { 63 RPC_INLINE explicit in_place_t() = default; 64 }; 65 66 struct nullopt_t { 67 RPC_INLINE constexpr explicit nullopt_t() = default; 68 }; 69 70 constexpr inline in_place_t in_place{}; 71 constexpr inline nullopt_t nullopt{}; 72 73 /// Freestanding and minimal implementation of std::optional. 74 template <typename T> class optional { 75 template <typename U> struct OptionalStorage { 76 union { 77 char empty; 78 U stored_value; 79 }; 80 81 bool in_use = false; 82 83 RPC_INLINE ~OptionalStorage() { reset(); } 84 85 RPC_INLINE constexpr OptionalStorage() : empty() {} 86 87 template <typename... Args> 88 RPC_INLINE constexpr explicit OptionalStorage(in_place_t, Args &&...args) 89 : stored_value(forward<Args>(args)...) {} 90 91 RPC_INLINE constexpr void reset() { 92 if (in_use) 93 stored_value.~U(); 94 in_use = false; 95 } 96 }; 97 98 OptionalStorage<T> storage; 99 100 public: 101 RPC_INLINE constexpr optional() = default; 102 RPC_INLINE constexpr optional(nullopt_t) {} 103 104 RPC_INLINE constexpr optional(const T &t) : storage(in_place, t) { 105 storage.in_use = true; 106 } 107 RPC_INLINE constexpr optional(const optional &) = default; 108 109 RPC_INLINE constexpr optional(T &&t) : storage(in_place, move(t)) { 110 storage.in_use = true; 111 } 112 RPC_INLINE constexpr optional(optional &&O) = default; 113 114 RPC_INLINE constexpr optional &operator=(T &&t) { 115 storage = move(t); 116 return *this; 117 } 118 RPC_INLINE constexpr optional &operator=(optional &&) = default; 119 120 RPC_INLINE constexpr optional &operator=(const T &t) { 121 storage = t; 122 return *this; 123 } 124 RPC_INLINE constexpr optional &operator=(const optional &) = default; 125 126 RPC_INLINE constexpr void reset() { storage.reset(); } 127 128 RPC_INLINE constexpr const T &value() const & { return storage.stored_value; } 129 130 RPC_INLINE constexpr T &value() & { return storage.stored_value; } 131 132 RPC_INLINE constexpr explicit operator bool() const { return storage.in_use; } 133 RPC_INLINE constexpr bool has_value() const { return storage.in_use; } 134 RPC_INLINE constexpr const T *operator->() const { 135 return &storage.stored_value; 136 } 137 RPC_INLINE constexpr T *operator->() { return &storage.stored_value; } 138 RPC_INLINE constexpr const T &operator*() const & { 139 return storage.stored_value; 140 } 141 RPC_INLINE constexpr T &operator*() & { return storage.stored_value; } 142 143 RPC_INLINE constexpr T &&value() && { return move(storage.stored_value); } 144 RPC_INLINE constexpr T &&operator*() && { return move(storage.stored_value); } 145 }; 146 147 /// Suspend the thread briefly to assist the thread scheduler during busy loops. 148 RPC_INLINE void sleep_briefly() { 149 #if defined(__NVPTX__) 150 if (__nvvm_reflect("__CUDA_ARCH") >= 700) 151 asm("nanosleep.u32 64;" ::: "memory"); 152 #elif defined(__AMDGPU__) 153 __builtin_amdgcn_s_sleep(2); 154 #elif __has_builtin(__builtin_ia32_pause) 155 __builtin_ia32_pause(); 156 #elif __has_builtin(__builtin_arm_isb) 157 __builtin_arm_isb(0xf); 158 #else 159 // Simply do nothing if sleeping isn't supported on this platform. 160 #endif 161 } 162 163 /// Conditional to indicate if this process is running on the GPU. 164 RPC_INLINE constexpr bool is_process_gpu() { 165 #ifdef RPC_TARGET_IS_GPU 166 return true; 167 #else 168 return false; 169 #endif 170 } 171 172 /// Wait for all lanes in the group to complete. 173 RPC_INLINE void sync_lane(uint64_t lane_mask) { 174 #ifdef RPC_TARGET_IS_GPU 175 return __gpu_sync_lane(lane_mask); 176 #endif 177 } 178 179 /// Copies the value from the first active thread to the rest. 180 RPC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { 181 #ifdef RPC_TARGET_IS_GPU 182 return __gpu_read_first_lane_u32(lane_mask, x); 183 #else 184 return x; 185 #endif 186 } 187 188 /// Returns the number lanes that participate in the RPC interface. 189 RPC_INLINE uint32_t get_num_lanes() { 190 #ifdef RPC_TARGET_IS_GPU 191 return __gpu_num_lanes(); 192 #else 193 return 1; 194 #endif 195 } 196 197 /// Returns the id of the thread inside of an AMD wavefront executing together. 198 RPC_INLINE uint64_t get_lane_mask() { 199 #ifdef RPC_TARGET_IS_GPU 200 return __gpu_lane_mask(); 201 #else 202 return 1; 203 #endif 204 } 205 206 /// Returns the id of the thread inside of an AMD wavefront executing together. 207 RPC_INLINE uint32_t get_lane_id() { 208 #ifdef RPC_TARGET_IS_GPU 209 return __gpu_lane_id(); 210 #else 211 return 0; 212 #endif 213 } 214 215 /// Conditional that is only true for a single thread in a lane. 216 RPC_INLINE bool is_first_lane(uint64_t lane_mask) { 217 #ifdef RPC_TARGET_IS_GPU 218 return __gpu_is_first_in_lane(lane_mask); 219 #else 220 return true; 221 #endif 222 } 223 224 /// Returns a bitmask of threads in the current lane for which \p x is true. 225 RPC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { 226 #ifdef RPC_TARGET_IS_GPU 227 return __gpu_ballot(lane_mask, x); 228 #else 229 return x; 230 #endif 231 } 232 233 /// Return \p val aligned "upwards" according to \p align. 234 template <typename V, typename A> 235 RPC_INLINE constexpr V align_up(V val, A align) { 236 return ((val + V(align) - 1) / V(align)) * V(align); 237 } 238 239 /// Utility to provide a unified interface between the CPU and GPU's memory 240 /// model. On the GPU stack variables are always private to a lane so we can 241 /// simply use the variable passed in. On the CPU we need to allocate enough 242 /// space for the whole lane and index into it. 243 template <typename V> RPC_INLINE V &lane_value(V *val, uint32_t id) { 244 if constexpr (is_process_gpu()) 245 return *val; 246 return val[id]; 247 } 248 249 /// Advance the \p p by \p bytes. 250 template <typename T, typename U> RPC_INLINE T *advance(T *ptr, U bytes) { 251 if constexpr (is_const<T>::value) 252 return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) + 253 bytes); 254 else 255 return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes); 256 } 257 258 /// Wrapper around the optimal memory copy implementation for the target. 259 RPC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) { 260 __builtin_memcpy(dst, src, count); 261 } 262 263 template <class T> RPC_INLINE constexpr const T &max(const T &a, const T &b) { 264 return (a < b) ? b : a; 265 } 266 267 } // namespace rpc 268 269 #endif // LLVM_LIBC_SHARED_RPC_UTIL_H 270