1*67e74705SXin Li /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2*67e74705SXin Li * 3*67e74705SXin Li * Permission is hereby granted, free of charge, to any person obtaining a copy 4*67e74705SXin Li * of this software and associated documentation files (the "Software"), to deal 5*67e74705SXin Li * in the Software without restriction, including without limitation the rights 6*67e74705SXin Li * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7*67e74705SXin Li * copies of the Software, and to permit persons to whom the Software is 8*67e74705SXin Li * furnished to do so, subject to the following conditions: 9*67e74705SXin Li * 10*67e74705SXin Li * The above copyright notice and this permission notice shall be included in 11*67e74705SXin Li * all copies or substantial portions of the Software. 12*67e74705SXin Li * 13*67e74705SXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14*67e74705SXin Li * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15*67e74705SXin Li * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16*67e74705SXin Li * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17*67e74705SXin Li * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18*67e74705SXin Li * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19*67e74705SXin Li * THE SOFTWARE. 20*67e74705SXin Li * 21*67e74705SXin Li *===-----------------------------------------------------------------------=== 22*67e74705SXin Li */ 23*67e74705SXin Li 24*67e74705SXin Li #ifndef __CUDA_BUILTIN_VARS_H 25*67e74705SXin Li #define __CUDA_BUILTIN_VARS_H 26*67e74705SXin Li 27*67e74705SXin Li // Forward declares from vector_types.h. 28*67e74705SXin Li struct uint3; 29*67e74705SXin Li struct dim3; 30*67e74705SXin Li 31*67e74705SXin Li // The file implements built-in CUDA variables using __declspec(property). 32*67e74705SXin Li // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 33*67e74705SXin Li // All read accesses of built-in variable fields get converted into calls to a 34*67e74705SXin Li // getter function which in turn calls the appropriate builtin to fetch the 35*67e74705SXin Li // value. 36*67e74705SXin Li // 37*67e74705SXin Li // Example: 38*67e74705SXin Li // int x = threadIdx.x; 39*67e74705SXin Li // IR output: 40*67e74705SXin Li // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 41*67e74705SXin Li // PTX output: 42*67e74705SXin Li // mov.u32 %r2, %tid.x; 43*67e74705SXin Li 44*67e74705SXin Li #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 45*67e74705SXin Li __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 46*67e74705SXin Li static inline __attribute__((always_inline)) \ 47*67e74705SXin Li __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 48*67e74705SXin Li return INTRINSIC; \ 49*67e74705SXin Li } 50*67e74705SXin Li 51*67e74705SXin Li #if __cplusplus >= 201103L 52*67e74705SXin Li #define __DELETE =delete 53*67e74705SXin Li #else 54*67e74705SXin Li #define __DELETE 55*67e74705SXin Li #endif 56*67e74705SXin Li 57*67e74705SXin Li // Make sure nobody can create instances of the special varible types. nvcc 58*67e74705SXin Li // also disallows taking address of special variables, so we disable address-of 59*67e74705SXin Li // operator as well. 60*67e74705SXin Li #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 61*67e74705SXin Li __attribute__((device)) TypeName() __DELETE; \ 62*67e74705SXin Li __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 63*67e74705SXin Li __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 64*67e74705SXin Li __attribute__((device)) TypeName *operator&() const __DELETE 65*67e74705SXin Li 66*67e74705SXin Li struct __cuda_builtin_threadIdx_t { 67*67e74705SXin Li __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 68*67e74705SXin Li __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 69*67e74705SXin Li __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 70*67e74705SXin Li // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 71*67e74705SXin Li // uint3). This function is defined after we pull in vector_types.h. 72*67e74705SXin Li __attribute__((device)) operator uint3() const; 73*67e74705SXin Li private: 74*67e74705SXin Li __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 75*67e74705SXin Li }; 76*67e74705SXin Li 77*67e74705SXin Li struct __cuda_builtin_blockIdx_t { 78*67e74705SXin Li __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 79*67e74705SXin Li __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 80*67e74705SXin Li __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 81*67e74705SXin Li // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 82*67e74705SXin Li // uint3). This function is defined after we pull in vector_types.h. 83*67e74705SXin Li __attribute__((device)) operator uint3() const; 84*67e74705SXin Li private: 85*67e74705SXin Li __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 86*67e74705SXin Li }; 87*67e74705SXin Li 88*67e74705SXin Li struct __cuda_builtin_blockDim_t { 89*67e74705SXin Li __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 90*67e74705SXin Li __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 91*67e74705SXin Li __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 92*67e74705SXin Li // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 93*67e74705SXin Li // dim3). This function is defined after we pull in vector_types.h. 94*67e74705SXin Li __attribute__((device)) operator dim3() const; 95*67e74705SXin Li private: 96*67e74705SXin Li __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 97*67e74705SXin Li }; 98*67e74705SXin Li 99*67e74705SXin Li struct __cuda_builtin_gridDim_t { 100*67e74705SXin Li __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 101*67e74705SXin Li __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 102*67e74705SXin Li __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 103*67e74705SXin Li // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 104*67e74705SXin Li // dim3). This function is defined after we pull in vector_types.h. 105*67e74705SXin Li __attribute__((device)) operator dim3() const; 106*67e74705SXin Li private: 107*67e74705SXin Li __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 108*67e74705SXin Li }; 109*67e74705SXin Li 110*67e74705SXin Li #define __CUDA_BUILTIN_VAR \ 111*67e74705SXin Li extern const __attribute__((device)) __attribute__((weak)) 112*67e74705SXin Li __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 113*67e74705SXin Li __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 114*67e74705SXin Li __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 115*67e74705SXin Li __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 116*67e74705SXin Li 117*67e74705SXin Li // warpSize should translate to read of %WARP_SZ but there's currently no 118*67e74705SXin Li // builtin to do so. According to PTX v4.2 docs 'to date, all target 119*67e74705SXin Li // architectures have a WARP_SZ value of 32'. 120*67e74705SXin Li __attribute__((device)) const int warpSize = 32; 121*67e74705SXin Li 122*67e74705SXin Li #undef __CUDA_DEVICE_BUILTIN 123*67e74705SXin Li #undef __CUDA_BUILTIN_VAR 124*67e74705SXin Li #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 125*67e74705SXin Li 126*67e74705SXin Li #endif /* __CUDA_BUILTIN_VARS_H */ 127