1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix PTX 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix PTX 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -nvptx-use-infer-addrspace | FileCheck %s --check-prefix PTX 4*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -S -nvptx-favor-non-generic -dce | FileCheck %s --check-prefix IR 5*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -S -nvptx-infer-addrspace | FileCheck %s --check-prefix IR --check-prefix IR-WITH-LOOP 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Worker@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 8*9880d681SAndroid Build Coastguard Worker@scalar = internal addrspace(3) global float 0.000000e+00, align 4 9*9880d681SAndroid Build Coastguard Worker@generic_scalar = internal global float 0.000000e+00, align 4 10*9880d681SAndroid Build Coastguard Worker 11*9880d681SAndroid Build Coastguard Workerdefine float @ld_from_shared() { 12*9880d681SAndroid Build Coastguard Worker %1 = addrspacecast float* @generic_scalar to float addrspace(3)* 13*9880d681SAndroid Build Coastguard Worker %2 = load float, float addrspace(3)* %1 14*9880d681SAndroid Build Coastguard Worker ret float %2 15*9880d681SAndroid Build Coastguard Worker} 16*9880d681SAndroid Build Coastguard Worker 17*9880d681SAndroid Build Coastguard Worker; Verifies nvptx-favor-non-generic correctly optimizes generic address space 18*9880d681SAndroid Build Coastguard Worker; usage to non-generic address space usage for the patterns we claim to handle: 19*9880d681SAndroid Build Coastguard Worker; 1. load cast 20*9880d681SAndroid Build Coastguard Worker; 2. store cast 21*9880d681SAndroid Build Coastguard Worker; 3. load gep cast 22*9880d681SAndroid Build Coastguard Worker; 4. store gep cast 23*9880d681SAndroid Build Coastguard Worker; gep and cast can be an instruction or a constant expression. This function 24*9880d681SAndroid Build Coastguard Worker; tries all possible combinations. 25*9880d681SAndroid Build Coastguard Workerdefine void @ld_st_shared_f32(i32 %i, float %v) { 26*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_st_shared_f32 27*9880d681SAndroid Build Coastguard Worker; IR-NOT: addrspacecast 28*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_st_shared_f32( 29*9880d681SAndroid Build Coastguard Worker ; load cast 30*9880d681SAndroid Build Coastguard Worker %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 31*9880d681SAndroid Build Coastguard Worker call void @use(float %1) 32*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar]; 33*9880d681SAndroid Build Coastguard Worker ; store cast 34*9880d681SAndroid Build Coastguard Worker store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 35*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [scalar], %f{{[0-9]+}}; 36*9880d681SAndroid Build Coastguard Worker ; use syncthreads to disable optimizations across components 37*9880d681SAndroid Build Coastguard Worker call void @llvm.nvvm.barrier0() 38*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0; 39*9880d681SAndroid Build Coastguard Worker 40*9880d681SAndroid Build Coastguard Worker ; cast; load 41*9880d681SAndroid Build Coastguard Worker %2 = addrspacecast float addrspace(3)* @scalar to float* 42*9880d681SAndroid Build Coastguard Worker %3 = load float, float* %2, align 4 43*9880d681SAndroid Build Coastguard Worker call void @use(float %3) 44*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar]; 45*9880d681SAndroid Build Coastguard Worker ; cast; store 46*9880d681SAndroid Build Coastguard Worker store float %v, float* %2, align 4 47*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [scalar], %f{{[0-9]+}}; 48*9880d681SAndroid Build Coastguard Worker call void @llvm.nvvm.barrier0() 49*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0; 50*9880d681SAndroid Build Coastguard Worker 51*9880d681SAndroid Build Coastguard Worker ; load gep cast 52*9880d681SAndroid Build Coastguard Worker %4 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 53*9880d681SAndroid Build Coastguard Worker call void @use(float %4) 54*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20]; 55*9880d681SAndroid Build Coastguard Worker ; store gep cast 56*9880d681SAndroid Build Coastguard Worker store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 57*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [array+20], %f{{[0-9]+}}; 58*9880d681SAndroid Build Coastguard Worker call void @llvm.nvvm.barrier0() 59*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0; 60*9880d681SAndroid Build Coastguard Worker 61*9880d681SAndroid Build Coastguard Worker ; gep cast; load 62*9880d681SAndroid Build Coastguard Worker %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5 63*9880d681SAndroid Build Coastguard Worker %6 = load float, float* %5, align 4 64*9880d681SAndroid Build Coastguard Worker call void @use(float %6) 65*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20]; 66*9880d681SAndroid Build Coastguard Worker ; gep cast; store 67*9880d681SAndroid Build Coastguard Worker store float %v, float* %5, align 4 68*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [array+20], %f{{[0-9]+}}; 69*9880d681SAndroid Build Coastguard Worker call void @llvm.nvvm.barrier0() 70*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0; 71*9880d681SAndroid Build Coastguard Worker 72*9880d681SAndroid Build Coastguard Worker ; cast; gep; load 73*9880d681SAndroid Build Coastguard Worker %7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]* 74*9880d681SAndroid Build Coastguard Worker %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i 75*9880d681SAndroid Build Coastguard Worker %9 = load float, float* %8, align 4 76*9880d681SAndroid Build Coastguard Worker call void @use(float %9) 77*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}]; 78*9880d681SAndroid Build Coastguard Worker ; cast; gep; store 79*9880d681SAndroid Build Coastguard Worker store float %v, float* %8, align 4 80*9880d681SAndroid Build Coastguard Worker; PTX: st.shared.f32 [%{{(r|rl|rd)[0-9]+}}], %f{{[0-9]+}}; 81*9880d681SAndroid Build Coastguard Worker call void @llvm.nvvm.barrier0() 82*9880d681SAndroid Build Coastguard Worker; PTX: bar.sync 0; 83*9880d681SAndroid Build Coastguard Worker 84*9880d681SAndroid Build Coastguard Worker ret void 85*9880d681SAndroid Build Coastguard Worker} 86*9880d681SAndroid Build Coastguard Worker 87*9880d681SAndroid Build Coastguard Worker; When hoisting an addrspacecast between different pointer types, replace the 88*9880d681SAndroid Build Coastguard Worker; addrspacecast with a bitcast. 89*9880d681SAndroid Build Coastguard Workerdefine i32 @ld_int_from_float() { 90*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_int_from_float 91*9880d681SAndroid Build Coastguard Worker; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) 92*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_int_from_float( 93*9880d681SAndroid Build Coastguard Worker; PTX: ld.shared.u{{(32|64)}} 94*9880d681SAndroid Build Coastguard Worker %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4 95*9880d681SAndroid Build Coastguard Worker ret i32 %1 96*9880d681SAndroid Build Coastguard Worker} 97*9880d681SAndroid Build Coastguard Worker 98*9880d681SAndroid Build Coastguard Workerdefine i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) { 99*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @ld_int_from_global_float( 100*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld_int_from_global_float( 101*9880d681SAndroid Build Coastguard Worker %1 = addrspacecast float addrspace(1)* %input to float* 102*9880d681SAndroid Build Coastguard Worker %2 = getelementptr float, float* %1, i32 %i 103*9880d681SAndroid Build Coastguard Worker; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i 104*9880d681SAndroid Build Coastguard Worker %3 = getelementptr float, float* %2, i32 %j 105*9880d681SAndroid Build Coastguard Worker; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j 106*9880d681SAndroid Build Coastguard Worker %4 = bitcast float* %3 to i32* 107*9880d681SAndroid Build Coastguard Worker; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)* 108*9880d681SAndroid Build Coastguard Worker %5 = load i32, i32* %4 109*9880d681SAndroid Build Coastguard Worker; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}} 110*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: ld.global 111*9880d681SAndroid Build Coastguard Worker ret i32 %5 112*9880d681SAndroid Build Coastguard Worker} 113*9880d681SAndroid Build Coastguard Worker 114*9880d681SAndroid Build Coastguard Workerdefine void @nested_const_expr() { 115*9880d681SAndroid Build Coastguard Worker; PTX-LABEL: nested_const_expr( 116*9880d681SAndroid Build Coastguard Worker ; store 1 to bitcast(gep(addrspacecast(array), 0, 1)) 117*9880d681SAndroid Build Coastguard Worker store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4 118*9880d681SAndroid Build Coastguard Worker; PTX: mov.u32 %r1, 1; 119*9880d681SAndroid Build Coastguard Worker; PTX-NEXT: st.shared.u32 [array+4], %r1; 120*9880d681SAndroid Build Coastguard Worker ret void 121*9880d681SAndroid Build Coastguard Worker} 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Workerdefine void @rauw(float addrspace(1)* %input) { 124*9880d681SAndroid Build Coastguard Worker %generic_input = addrspacecast float addrspace(1)* %input to float* 125*9880d681SAndroid Build Coastguard Worker %addr = getelementptr float, float* %generic_input, i64 10 126*9880d681SAndroid Build Coastguard Worker %v = load float, float* %addr 127*9880d681SAndroid Build Coastguard Worker store float %v, float* %addr 128*9880d681SAndroid Build Coastguard Worker ret void 129*9880d681SAndroid Build Coastguard Worker; IR-LABEL: @rauw( 130*9880d681SAndroid Build Coastguard Worker; IR-NEXT: %addr = getelementptr float, float addrspace(1)* %input, i64 10 131*9880d681SAndroid Build Coastguard Worker; IR-NEXT: %v = load float, float addrspace(1)* %addr 132*9880d681SAndroid Build Coastguard Worker; IR-NEXT: store float %v, float addrspace(1)* %addr 133*9880d681SAndroid Build Coastguard Worker; IR-NEXT: ret void 134*9880d681SAndroid Build Coastguard Worker} 135*9880d681SAndroid Build Coastguard Worker 136*9880d681SAndroid Build Coastguard Workerdefine void @loop() { 137*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP-LABEL: @loop( 138*9880d681SAndroid Build Coastguard Workerentry: 139*9880d681SAndroid Build Coastguard Worker %p = addrspacecast [10 x float] addrspace(3)* @array to float* 140*9880d681SAndroid Build Coastguard Worker %end = getelementptr float, float* %p, i64 10 141*9880d681SAndroid Build Coastguard Worker br label %loop 142*9880d681SAndroid Build Coastguard Worker 143*9880d681SAndroid Build Coastguard Workerloop: 144*9880d681SAndroid Build Coastguard Worker %i = phi float* [ %p, %entry ], [ %i2, %loop ] 145*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ] 146*9880d681SAndroid Build Coastguard Worker %v = load float, float* %i 147*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %v = load float, float addrspace(3)* %i 148*9880d681SAndroid Build Coastguard Worker call void @use(float %v) 149*9880d681SAndroid Build Coastguard Worker %i2 = getelementptr float, float* %i, i64 1 150*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %i2 = getelementptr float, float addrspace(3)* %i, i64 1 151*9880d681SAndroid Build Coastguard Worker %exit_cond = icmp eq float* %i2, %end 152*9880d681SAndroid Build Coastguard Worker br i1 %exit_cond, label %exit, label %loop 153*9880d681SAndroid Build Coastguard Worker 154*9880d681SAndroid Build Coastguard Workerexit: 155*9880d681SAndroid Build Coastguard Worker ret void 156*9880d681SAndroid Build Coastguard Worker} 157*9880d681SAndroid Build Coastguard Worker 158*9880d681SAndroid Build Coastguard Worker@generic_end = external global float* 159*9880d681SAndroid Build Coastguard Worker 160*9880d681SAndroid Build Coastguard Workerdefine void @loop_with_generic_bound() { 161*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP-LABEL: @loop_with_generic_bound( 162*9880d681SAndroid Build Coastguard Workerentry: 163*9880d681SAndroid Build Coastguard Worker %p = addrspacecast [10 x float] addrspace(3)* @array to float* 164*9880d681SAndroid Build Coastguard Worker %end = load float*, float** @generic_end 165*9880d681SAndroid Build Coastguard Worker br label %loop 166*9880d681SAndroid Build Coastguard Worker 167*9880d681SAndroid Build Coastguard Workerloop: 168*9880d681SAndroid Build Coastguard Worker %i = phi float* [ %p, %entry ], [ %i2, %loop ] 169*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: phi float addrspace(3)* [ %p, %entry ], [ %i2, %loop ] 170*9880d681SAndroid Build Coastguard Worker %v = load float, float* %i 171*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %v = load float, float addrspace(3)* %i 172*9880d681SAndroid Build Coastguard Worker call void @use(float %v) 173*9880d681SAndroid Build Coastguard Worker %i2 = getelementptr float, float* %i, i64 1 174*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: %i2 = getelementptr float, float addrspace(3)* %i, i64 1 175*9880d681SAndroid Build Coastguard Worker %exit_cond = icmp eq float* %i2, %end 176*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: addrspacecast float addrspace(3)* %i2 to float* 177*9880d681SAndroid Build Coastguard Worker; IR-WITH-LOOP: icmp eq float* %{{[0-9]+}}, %end 178*9880d681SAndroid Build Coastguard Worker br i1 %exit_cond, label %exit, label %loop 179*9880d681SAndroid Build Coastguard Worker 180*9880d681SAndroid Build Coastguard Workerexit: 181*9880d681SAndroid Build Coastguard Worker ret void 182*9880d681SAndroid Build Coastguard Worker} 183*9880d681SAndroid Build Coastguard Worker 184*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.nvvm.barrier0() #3 185*9880d681SAndroid Build Coastguard Worker 186*9880d681SAndroid Build Coastguard Workerdeclare void @use(float) 187*9880d681SAndroid Build Coastguard Worker 188*9880d681SAndroid Build Coastguard Workerattributes #3 = { noduplicate nounwind } 189