1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4 5; Verify we fold loads into unary sse intrinsics only when optimizing for size 6 7define float @rcpss(float* %a) { 8; SSE-LABEL: rcpss: 9; SSE: # BB#0: 10; SSE-NEXT: movss (%rdi), %xmm0 11; SSE-NEXT: rcpss %xmm0, %xmm0 12; SSE-NEXT: retq 13; 14; AVX-LABEL: rcpss: 15; AVX: # BB#0: 16; AVX-NEXT: vmovss (%rdi), %xmm0 17; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 18; AVX-NEXT: retq 19 %ld = load float, float* %a 20 %ins = insertelement <4 x float> undef, float %ld, i32 0 21 %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 22 %ext = extractelement <4 x float> %res, i32 0 23 ret float %ext 24} 25 26define float @rsqrtss(float* %a) { 27; SSE-LABEL: rsqrtss: 28; SSE: # BB#0: 29; SSE-NEXT: movss (%rdi), %xmm0 30; SSE-NEXT: rsqrtss %xmm0, %xmm0 31; SSE-NEXT: retq 32; 33; AVX-LABEL: rsqrtss: 34; AVX: # BB#0: 35; AVX-NEXT: vmovss (%rdi), %xmm0 36; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 37; AVX-NEXT: retq 38 %ld = load float, float* %a 39 %ins = insertelement <4 x float> undef, float %ld, i32 0 40 %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 41 %ext = extractelement <4 x float> %res, i32 0 42 ret float %ext 43} 44 45define float @sqrtss(float* %a) { 46; SSE-LABEL: sqrtss: 47; SSE: # BB#0: 48; SSE-NEXT: movss (%rdi), %xmm0 49; SSE-NEXT: sqrtss %xmm0, %xmm0 50; SSE-NEXT: retq 51; 52; AVX-LABEL: sqrtss: 53; AVX: # BB#0: 54; AVX-NEXT: vmovss (%rdi), %xmm0 55; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 56; AVX-NEXT: retq 57 %ld = load float, float* %a 58 %ins = insertelement <4 x float> undef, float %ld, i32 0 59 %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 60 %ext = extractelement <4 x float> %res, i32 0 61 ret float %ext 62} 63 64define double @sqrtsd(double* %a) { 65; SSE-LABEL: sqrtsd: 66; SSE: # BB#0: 67; SSE-NEXT: movsd (%rdi), %xmm0 68; SSE-NEXT: sqrtsd %xmm0, %xmm0 69; SSE-NEXT: retq 70; 71; AVX-LABEL: sqrtsd: 72; AVX: # BB#0: 73; AVX-NEXT: vmovsd (%rdi), %xmm0 74; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 75; AVX-NEXT: retq 76 %ld = load double, double* %a 77 %ins = insertelement <2 x double> undef, double %ld, i32 0 78 %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 79 %ext = extractelement <2 x double> %res, i32 0 80 ret double %ext 81} 82 83define float @rcpss_size(float* %a) optsize { 84; SSE-LABEL: rcpss_size: 85; SSE: # BB#0: 86; SSE-NEXT: rcpss (%rdi), %xmm0 87; SSE-NEXT: retq 88; 89; AVX-LABEL: rcpss_size: 90; AVX: # BB#0: 91; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0 92; AVX-NEXT: retq 93 %ld = load float, float* %a 94 %ins = insertelement <4 x float> undef, float %ld, i32 0 95 %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins) 96 %ext = extractelement <4 x float> %res, i32 0 97 ret float %ext 98} 99 100define float @rsqrtss_size(float* %a) optsize { 101; SSE-LABEL: rsqrtss_size: 102; SSE: # BB#0: 103; SSE-NEXT: rsqrtss (%rdi), %xmm0 104; SSE-NEXT: retq 105; 106; AVX-LABEL: rsqrtss_size: 107; AVX: # BB#0: 108; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0 109; AVX-NEXT: retq 110 %ld = load float, float* %a 111 %ins = insertelement <4 x float> undef, float %ld, i32 0 112 %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins) 113 %ext = extractelement <4 x float> %res, i32 0 114 ret float %ext 115} 116 117define float @sqrtss_size(float* %a) optsize{ 118; SSE-LABEL: sqrtss_size: 119; SSE: # BB#0: 120; SSE-NEXT: sqrtss (%rdi), %xmm0 121; SSE-NEXT: retq 122; 123; AVX-LABEL: sqrtss_size: 124; AVX: # BB#0: 125; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 126; AVX-NEXT: retq 127 %ld = load float, float* %a 128 %ins = insertelement <4 x float> undef, float %ld, i32 0 129 %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins) 130 %ext = extractelement <4 x float> %res, i32 0 131 ret float %ext 132} 133 134define double @sqrtsd_size(double* %a) optsize { 135; SSE-LABEL: sqrtsd_size: 136; SSE: # BB#0: 137; SSE-NEXT: sqrtsd (%rdi), %xmm0 138; SSE-NEXT: retq 139; 140; AVX-LABEL: sqrtsd_size: 141; AVX: # BB#0: 142; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 143; AVX-NEXT: retq 144 %ld = load double, double* %a 145 %ins = insertelement <2 x double> undef, double %ld, i32 0 146 %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins) 147 %ext = extractelement <2 x double> %res, i32 0 148 ret double %ext 149} 150 151declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 152declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 153declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 154declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 155