1*67e74705SXin Li // Test for linking with CUDA's libdevice as outlined in 2*67e74705SXin Li // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice 3*67e74705SXin Li // 4*67e74705SXin Li // REQUIRES: nvptx-registered-target 5*67e74705SXin Li // 6*67e74705SXin Li // Prepare bitcode file to link with 7*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ 8*67e74705SXin Li // RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll 9*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ 10*67e74705SXin Li // RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll 11*67e74705SXin Li // 12*67e74705SXin Li // Make sure function in device-code gets linked in and internalized. 13*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 14*67e74705SXin Li // RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \ 15*67e74705SXin Li // RUN: -disable-llvm-passes -o - %s \ 16*67e74705SXin Li // RUN: | FileCheck %s -check-prefix CHECK-IR 17*67e74705SXin Li // 18*67e74705SXin Li // Make sure we can link two bitcode files. 19*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 20*67e74705SXin Li // RUN: -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \ 21*67e74705SXin Li // RUN: -emit-llvm -disable-llvm-passes -o - %s \ 22*67e74705SXin Li // RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2 23*67e74705SXin Li // 24*67e74705SXin Li // Make sure function in device-code gets linked but is not internalized 25*67e74705SXin Li // without -fcuda-uses-libdevice 26*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 27*67e74705SXin Li // RUN: -mlink-bitcode-file %t.bc -emit-llvm \ 28*67e74705SXin Li // RUN: -disable-llvm-passes -o - %s \ 29*67e74705SXin Li // RUN: | FileCheck %s -check-prefix CHECK-IR-NLD 30*67e74705SXin Li // 31*67e74705SXin Li // Make sure NVVMReflect pass is enabled in NVPTX back-end. 32*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 33*67e74705SXin Li // RUN: -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \ 34*67e74705SXin Li // RUN: -backend-option -debug-pass=Structure 2>&1 \ 35*67e74705SXin Li // RUN: | FileCheck %s -check-prefix CHECK-REFLECT 36*67e74705SXin Li 37*67e74705SXin Li #include "Inputs/cuda.h" 38*67e74705SXin Li 39*67e74705SXin Li __device__ float device_mul_or_add(float a, float b); 40*67e74705SXin Li extern "C" __device__ double __nv_sin(double x); 41*67e74705SXin Li extern "C" __device__ double __nv_exp(double x); 42*67e74705SXin Li 43*67e74705SXin Li // CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf( 44*67e74705SXin Li // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf( should_not_be_internalized(float * data)45*67e74705SXin Li__device__ void should_not_be_internalized(float *data) {} 46*67e74705SXin Li 47*67e74705SXin Li // Make sure kernel call has not been internalized. 48*67e74705SXin Li // CHECK-IR-LABEL: define void @_Z6kernelPfS_ 49*67e74705SXin Li // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_( kernel(float * out,float * in)50*67e74705SXin Li__global__ __attribute__((used)) void kernel(float *out, float *in) { 51*67e74705SXin Li *out = device_mul_or_add(in[0], in[1]); 52*67e74705SXin Li *out += __nv_exp(__nv_sin(*out)); 53*67e74705SXin Li should_not_be_internalized(out); 54*67e74705SXin Li } 55*67e74705SXin Li 56*67e74705SXin Li // Make sure device_mul_or_add() is present in IR, is internal and 57*67e74705SXin Li // calls __nvvm_reflect(). 58*67e74705SXin Li // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( 59*67e74705SXin Li // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff( 60*67e74705SXin Li // CHECK-IR: call i32 @__nvvm_reflect 61*67e74705SXin Li // CHECK-IR: ret float 62*67e74705SXin Li 63*67e74705SXin Li // Make sure we've linked in and internalized only needed functions 64*67e74705SXin Li // from the second bitcode file. 65*67e74705SXin Li // CHECK-IR-2-LABEL: define internal double @__nv_sin 66*67e74705SXin Li // CHECK-IR-2-LABEL: define internal double @__nv_exp 67*67e74705SXin Li // CHECK-IR-2-NOT: double @__unused 68*67e74705SXin Li 69*67e74705SXin Li // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. 70*67e74705SXin Li // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1 71