1target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" 2target triple = "nvptx64-unknown-unknown" 3 4%struct.uint3 = type { i32, i32, i32 } 5%struct.dim3 = type { i32, i32, i32 } 6 7@blockIdx = external addrspace(1) global %struct.uint3 8@blockDim = external addrspace(1) global %struct.dim3 9@threadIdx = external addrspace(1) global %struct.uint3 10 11; Function Attrs: alwaysinline nounwind readnone 12define float @expf(float %f) #0 { 13entry: 14 %f.addr = alloca float, align 4 15 store float %f, float* %f.addr, align 4 16 %0 = load float, float* %f.addr, align 4 17 %call = call float @__nv_expf(float %0) 18 ret float %call 19} 20 21declare float @__nv_expf(float) #1 22 23; Function Attrs: nounwind 24define void @cuda_saxpy(i32* %n, float* %a, float* %x, float* %y) #2 { 25entry: 26 %n.addr = alloca i32*, align 8 27 %a.addr = alloca float*, align 8 28 %x.addr = alloca float*, align 8 29 %y.addr = alloca float*, align 8 30 %i = alloca i32, align 4 31 store i32* %n, i32** %n.addr, align 8 32 store float* %a, float** %a.addr, align 8 33 store float* %x, float** %x.addr, align 8 34 store float* %y, float** %y.addr, align 8 35 %0 = load i32, i32* getelementptr inbounds (%struct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @blockIdx to %struct.uint3*), i32 0, i32 0), align 4 36 %1 = load i32, i32* getelementptr inbounds (%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*), i32 0, i32 0), align 4 37 %mul = mul i32 %0, %1 38 %2 = load i32, i32* getelementptr inbounds (%struct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @threadIdx to %struct.uint3*), i32 0, i32 0), align 4 39 %add = add i32 %mul, %2 40 store i32 %add, i32* %i, align 4 41 %3 = load i32, i32* %i, align 4 42 %4 = load i32*, i32** %n.addr, align 8 43 %arrayidx = getelementptr inbounds i32, i32* %4, i64 0 44 %5 = load i32, i32* %arrayidx, align 4 45 %cmp = icmp slt i32 %3, %5 46 br i1 %cmp, label %if.then, label %if.end 47 48if.then: ; preds = %entry 49 %6 = load float*, float** %a.addr, align 8 50 %arrayidx1 = getelementptr inbounds float, float* %6, i64 0 51 %7 = load float, float* %arrayidx1, align 4 52 %8 = load i32, i32* %i, align 4 53 %idxprom = sext i32 %8 to i64 54 %9 = load float*, float** %x.addr, align 8 55 %arrayidx2 = getelementptr inbounds float, float* %9, i64 %idxprom 56 %10 = load float, float* %arrayidx2, align 4 57 %mul3 = fmul float %7, %10 58 %11 = load i32, i32* %i, align 4 59 %idxprom4 = sext i32 %11 to i64 60 %12 = load float*, float** %y.addr, align 8 61 %arrayidx5 = getelementptr inbounds float, float* %12, i64 %idxprom4 62 %13 = load float, float* %arrayidx5, align 4 63 %add6 = fadd float %mul3, %13 64 %14 = load i32, i32* %i, align 4 65 %idxprom7 = sext i32 %14 to i64 66 %15 = load float*, float** %y.addr, align 8 67 %arrayidx8 = getelementptr inbounds float, float* %15, i64 %idxprom7 68 store float %add6, float* %arrayidx8, align 4 69 br label %if.end 70 71if.end: ; preds = %if.then, %entry 72 ret void 73} 74 75; Function Attrs: nounwind 76define void @cuda_saxpy_s(i32* %n, float* %a, float* %x, float* %y) #2 { 77entry: 78 %n.addr = alloca i32*, align 8 79 %a.addr = alloca float*, align 8 80 %x.addr = alloca float*, align 8 81 %y.addr = alloca float*, align 8 82 %i = alloca i32, align 4 83 store i32* %n, i32** %n.addr, align 8 84 store float* %a, float** %a.addr, align 8 85 store float* %x, float** %x.addr, align 8 86 store float* %y, float** %y.addr, align 8 87 %0 = load i32, i32* getelementptr inbounds (%struct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @blockIdx to %struct.uint3*), i32 0, i32 0), align 4 88 %1 = load i32, i32* getelementptr inbounds (%struct.dim3, %struct.dim3* addrspacecast (%struct.dim3 addrspace(1)* @blockDim to %struct.dim3*), i32 0, i32 0), align 4 89 %mul = mul i32 %0, %1 90 %2 = load i32, i32* getelementptr inbounds (%struct.uint3, %struct.uint3* addrspacecast (%struct.uint3 addrspace(1)* @threadIdx to %struct.uint3*), i32 0, i32 0), align 4 91 %add = add i32 %mul, %2 92 store i32 %add, i32* %i, align 4 93 call void @llvm.cuda.syncthreads() 94 %3 = load i32, i32* %i, align 4 95 %4 = load i32*, i32** %n.addr, align 8 96 %arrayidx = getelementptr inbounds i32, i32* %4, i64 0 97 %5 = load i32, i32* %arrayidx, align 4 98 %cmp = icmp slt i32 %3, %5 99 br i1 %cmp, label %if.then, label %if.end 100 101if.then: ; preds = %entry 102 %6 = load float*, float** %a.addr, align 8 103 %arrayidx1 = getelementptr inbounds float, float* %6, i64 0 104 %7 = load float, float* %arrayidx1, align 4 105 %8 = load i32, i32* %i, align 4 106 %idxprom = sext i32 %8 to i64 107 %9 = load float*, float** %x.addr, align 8 108 %arrayidx2 = getelementptr inbounds float, float* %9, i64 %idxprom 109 %10 = load float, float* %arrayidx2, align 4 110 %mul3 = fmul float %7, %10 111 %11 = load i32, i32* %i, align 4 112 %idxprom4 = sext i32 %11 to i64 113 %12 = load float*, float** %y.addr, align 8 114 %arrayidx5 = getelementptr inbounds float, float* %12, i64 %idxprom4 115 %13 = load float, float* %arrayidx5, align 4 116 %add6 = fadd float %mul3, %13 117 %14 = load i32, i32* %i, align 4 118 %idxprom7 = sext i32 %14 to i64 119 %15 = load float*, float** %y.addr, align 8 120 %arrayidx8 = getelementptr inbounds float, float* %15, i64 %idxprom7 121 store float %add6, float* %arrayidx8, align 4 122 br label %if.end 123 124if.end: ; preds = %if.then, %entry 125 ret void 126} 127 128; Function Attrs: nounwind 129declare void @llvm.cuda.syncthreads() #3 130 131attributes #0 = { alwaysinline nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 132attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 133attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 134attributes #3 = { nounwind } 135 136!nvvm.annotations = !{!0, !1} 137!llvm.ident = !{!2} 138 139!0 = !{void (i32*, float*, float*, float*)* @cuda_saxpy, !"kernel", i32 1} 140!1 = !{void (i32*, float*, float*, float*)* @cuda_saxpy_s, !"kernel", i32 1} 141!2 = !{!"clang version xla-trunk (trunk r203011)"} 142