xref: /aosp_15_r20/external/clang/test/CodeGenCUDA/link-device-bitcode.cu (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // Test for linking with CUDA's libdevice as outlined in
2*67e74705SXin Li // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
3*67e74705SXin Li //
4*67e74705SXin Li // REQUIRES: nvptx-registered-target
5*67e74705SXin Li //
6*67e74705SXin Li // Prepare bitcode file to link with
7*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
8*67e74705SXin Li // RUN:    -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
9*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
10*67e74705SXin Li // RUN:    -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
11*67e74705SXin Li //
12*67e74705SXin Li // Make sure function in device-code gets linked in and internalized.
13*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
14*67e74705SXin Li // RUN:    -mlink-cuda-bitcode %t.bc  -emit-llvm \
15*67e74705SXin Li // RUN:    -disable-llvm-passes -o - %s \
16*67e74705SXin Li // RUN:    | FileCheck %s -check-prefix CHECK-IR
17*67e74705SXin Li //
18*67e74705SXin Li // Make sure we can link two bitcode files.
19*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
20*67e74705SXin Li // RUN:    -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \
21*67e74705SXin Li // RUN:    -emit-llvm -disable-llvm-passes -o - %s \
22*67e74705SXin Li // RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
23*67e74705SXin Li //
24*67e74705SXin Li // Make sure function in device-code gets linked but is not internalized
25*67e74705SXin Li // without -fcuda-uses-libdevice
26*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
27*67e74705SXin Li // RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
28*67e74705SXin Li // RUN:    -disable-llvm-passes -o - %s \
29*67e74705SXin Li // RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
30*67e74705SXin Li //
31*67e74705SXin Li // Make sure NVVMReflect pass is enabled in NVPTX back-end.
32*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
33*67e74705SXin Li // RUN:    -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \
34*67e74705SXin Li // RUN:    -backend-option -debug-pass=Structure 2>&1 \
35*67e74705SXin Li // RUN:    | FileCheck %s -check-prefix CHECK-REFLECT
36*67e74705SXin Li 
37*67e74705SXin Li #include "Inputs/cuda.h"
38*67e74705SXin Li 
39*67e74705SXin Li __device__ float device_mul_or_add(float a, float b);
40*67e74705SXin Li extern "C" __device__ double __nv_sin(double x);
41*67e74705SXin Li extern "C" __device__ double __nv_exp(double x);
42*67e74705SXin Li 
43*67e74705SXin Li // CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
44*67e74705SXin Li // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
should_not_be_internalized(float * data)45*67e74705SXin Li __device__ void should_not_be_internalized(float *data) {}
46*67e74705SXin Li 
47*67e74705SXin Li // Make sure kernel call has not been internalized.
48*67e74705SXin Li // CHECK-IR-LABEL: define void @_Z6kernelPfS_
49*67e74705SXin Li // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
kernel(float * out,float * in)50*67e74705SXin Li __global__ __attribute__((used)) void kernel(float *out, float *in) {
51*67e74705SXin Li   *out = device_mul_or_add(in[0], in[1]);
52*67e74705SXin Li   *out += __nv_exp(__nv_sin(*out));
53*67e74705SXin Li   should_not_be_internalized(out);
54*67e74705SXin Li }
55*67e74705SXin Li 
56*67e74705SXin Li // Make sure device_mul_or_add() is present in IR, is internal and
57*67e74705SXin Li // calls __nvvm_reflect().
58*67e74705SXin Li // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
59*67e74705SXin Li // CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
60*67e74705SXin Li // CHECK-IR: call i32 @__nvvm_reflect
61*67e74705SXin Li // CHECK-IR: ret float
62*67e74705SXin Li 
63*67e74705SXin Li // Make sure we've linked in and internalized only needed functions
64*67e74705SXin Li // from the second bitcode file.
65*67e74705SXin Li // CHECK-IR-2-LABEL: define internal double @__nv_sin
66*67e74705SXin Li // CHECK-IR-2-LABEL: define internal double @__nv_exp
67*67e74705SXin Li // CHECK-IR-2-NOT: double @__unused
68*67e74705SXin Li 
69*67e74705SXin Li // Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
70*67e74705SXin Li // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
71