xref: /aosp_15_r20/external/llvm-libc/libc/shared/rpc_util.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SHARED_RPC_UTIL_H
10 #define LLVM_LIBC_SHARED_RPC_UTIL_H
11 
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #if defined(__NVPTX__) || defined(__AMDGPU__)
16 #include <gpuintrin.h>
17 #define RPC_TARGET_IS_GPU
18 #endif
19 
20 // Workaround for missing __has_builtin in < GCC 10.
21 #ifndef __has_builtin
22 #define __has_builtin(x) 0
23 #endif
24 
25 #ifndef RPC_INLINE
26 #define RPC_INLINE inline
27 #endif
28 
29 namespace rpc {
30 
31 template <typename T> struct type_identity {
32   using type = T;
33 };
34 
35 template <class T, T v> struct type_constant {
36   static inline constexpr T value = v;
37 };
38 
39 template <class T> struct remove_reference : type_identity<T> {};
40 template <class T> struct remove_reference<T &> : type_identity<T> {};
41 template <class T> struct remove_reference<T &&> : type_identity<T> {};
42 
43 template <class T> struct is_const : type_constant<bool, false> {};
44 template <class T> struct is_const<const T> : type_constant<bool, true> {};
45 
46 /// Freestanding implementation of std::move.
47 template <class T>
48 RPC_INLINE constexpr typename remove_reference<T>::type &&move(T &&t) {
49   return static_cast<typename remove_reference<T>::type &&>(t);
50 }
51 
52 /// Freestanding implementation of std::forward.
53 template <typename T>
54 RPC_INLINE constexpr T &&forward(typename remove_reference<T>::type &value) {
55   return static_cast<T &&>(value);
56 }
57 template <typename T>
58 RPC_INLINE constexpr T &&forward(typename remove_reference<T>::type &&value) {
59   return static_cast<T &&>(value);
60 }
61 
62 struct in_place_t {
63   RPC_INLINE explicit in_place_t() = default;
64 };
65 
66 struct nullopt_t {
67   RPC_INLINE constexpr explicit nullopt_t() = default;
68 };
69 
70 constexpr inline in_place_t in_place{};
71 constexpr inline nullopt_t nullopt{};
72 
73 /// Freestanding and minimal implementation of std::optional.
74 template <typename T> class optional {
75   template <typename U> struct OptionalStorage {
76     union {
77       char empty;
78       U stored_value;
79     };
80 
81     bool in_use = false;
82 
83     RPC_INLINE ~OptionalStorage() { reset(); }
84 
85     RPC_INLINE constexpr OptionalStorage() : empty() {}
86 
87     template <typename... Args>
88     RPC_INLINE constexpr explicit OptionalStorage(in_place_t, Args &&...args)
89         : stored_value(forward<Args>(args)...) {}
90 
91     RPC_INLINE constexpr void reset() {
92       if (in_use)
93         stored_value.~U();
94       in_use = false;
95     }
96   };
97 
98   OptionalStorage<T> storage;
99 
100 public:
101   RPC_INLINE constexpr optional() = default;
102   RPC_INLINE constexpr optional(nullopt_t) {}
103 
104   RPC_INLINE constexpr optional(const T &t) : storage(in_place, t) {
105     storage.in_use = true;
106   }
107   RPC_INLINE constexpr optional(const optional &) = default;
108 
109   RPC_INLINE constexpr optional(T &&t) : storage(in_place, move(t)) {
110     storage.in_use = true;
111   }
112   RPC_INLINE constexpr optional(optional &&O) = default;
113 
114   RPC_INLINE constexpr optional &operator=(T &&t) {
115     storage = move(t);
116     return *this;
117   }
118   RPC_INLINE constexpr optional &operator=(optional &&) = default;
119 
120   RPC_INLINE constexpr optional &operator=(const T &t) {
121     storage = t;
122     return *this;
123   }
124   RPC_INLINE constexpr optional &operator=(const optional &) = default;
125 
126   RPC_INLINE constexpr void reset() { storage.reset(); }
127 
128   RPC_INLINE constexpr const T &value() const & { return storage.stored_value; }
129 
130   RPC_INLINE constexpr T &value() & { return storage.stored_value; }
131 
132   RPC_INLINE constexpr explicit operator bool() const { return storage.in_use; }
133   RPC_INLINE constexpr bool has_value() const { return storage.in_use; }
134   RPC_INLINE constexpr const T *operator->() const {
135     return &storage.stored_value;
136   }
137   RPC_INLINE constexpr T *operator->() { return &storage.stored_value; }
138   RPC_INLINE constexpr const T &operator*() const & {
139     return storage.stored_value;
140   }
141   RPC_INLINE constexpr T &operator*() & { return storage.stored_value; }
142 
143   RPC_INLINE constexpr T &&value() && { return move(storage.stored_value); }
144   RPC_INLINE constexpr T &&operator*() && { return move(storage.stored_value); }
145 };
146 
147 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
148 RPC_INLINE void sleep_briefly() {
149 #if defined(__NVPTX__)
150   if (__nvvm_reflect("__CUDA_ARCH") >= 700)
151     asm("nanosleep.u32 64;" ::: "memory");
152 #elif defined(__AMDGPU__)
153   __builtin_amdgcn_s_sleep(2);
154 #elif __has_builtin(__builtin_ia32_pause)
155   __builtin_ia32_pause();
156 #elif __has_builtin(__builtin_arm_isb)
157   __builtin_arm_isb(0xf);
158 #else
159   // Simply do nothing if sleeping isn't supported on this platform.
160 #endif
161 }
162 
163 /// Conditional to indicate if this process is running on the GPU.
164 RPC_INLINE constexpr bool is_process_gpu() {
165 #ifdef RPC_TARGET_IS_GPU
166   return true;
167 #else
168   return false;
169 #endif
170 }
171 
172 /// Wait for all lanes in the group to complete.
173 RPC_INLINE void sync_lane(uint64_t lane_mask) {
174 #ifdef RPC_TARGET_IS_GPU
175   return __gpu_sync_lane(lane_mask);
176 #endif
177 }
178 
179 /// Copies the value from the first active thread to the rest.
180 RPC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {
181 #ifdef RPC_TARGET_IS_GPU
182   return __gpu_read_first_lane_u32(lane_mask, x);
183 #else
184   return x;
185 #endif
186 }
187 
188 /// Returns the number lanes that participate in the RPC interface.
189 RPC_INLINE uint32_t get_num_lanes() {
190 #ifdef RPC_TARGET_IS_GPU
191   return __gpu_num_lanes();
192 #else
193   return 1;
194 #endif
195 }
196 
197 /// Returns the id of the thread inside of an AMD wavefront executing together.
198 RPC_INLINE uint64_t get_lane_mask() {
199 #ifdef RPC_TARGET_IS_GPU
200   return __gpu_lane_mask();
201 #else
202   return 1;
203 #endif
204 }
205 
206 /// Returns the id of the thread inside of an AMD wavefront executing together.
207 RPC_INLINE uint32_t get_lane_id() {
208 #ifdef RPC_TARGET_IS_GPU
209   return __gpu_lane_id();
210 #else
211   return 0;
212 #endif
213 }
214 
215 /// Conditional that is only true for a single thread in a lane.
216 RPC_INLINE bool is_first_lane(uint64_t lane_mask) {
217 #ifdef RPC_TARGET_IS_GPU
218   return __gpu_is_first_in_lane(lane_mask);
219 #else
220   return true;
221 #endif
222 }
223 
224 /// Returns a bitmask of threads in the current lane for which \p x is true.
225 RPC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
226 #ifdef RPC_TARGET_IS_GPU
227   return __gpu_ballot(lane_mask, x);
228 #else
229   return x;
230 #endif
231 }
232 
233 /// Return \p val aligned "upwards" according to \p align.
234 template <typename V, typename A>
235 RPC_INLINE constexpr V align_up(V val, A align) {
236   return ((val + V(align) - 1) / V(align)) * V(align);
237 }
238 
239 /// Utility to provide a unified interface between the CPU and GPU's memory
240 /// model. On the GPU stack variables are always private to a lane so we can
241 /// simply use the variable passed in. On the CPU we need to allocate enough
242 /// space for the whole lane and index into it.
243 template <typename V> RPC_INLINE V &lane_value(V *val, uint32_t id) {
244   if constexpr (is_process_gpu())
245     return *val;
246   return val[id];
247 }
248 
249 /// Advance the \p p by \p bytes.
250 template <typename T, typename U> RPC_INLINE T *advance(T *ptr, U bytes) {
251   if constexpr (is_const<T>::value)
252     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
253                                  bytes);
254   else
255     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
256 }
257 
258 /// Wrapper around the optimal memory copy implementation for the target.
259 RPC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) {
260   __builtin_memcpy(dst, src, count);
261 }
262 
263 template <class T> RPC_INLINE constexpr const T &max(const T &a, const T &b) {
264   return (a < b) ? b : a;
265 }
266 
267 } // namespace rpc
268 
269 #endif // LLVM_LIBC_SHARED_RPC_UTIL_H
270