1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 3 4define i32 @test1(float %x) { 5; CHECK-LABEL: test1: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %res = bitcast float %x to i32 10 ret i32 %res 11} 12 13define <4 x i32> @test2(i32 %x) { 14; CHECK-LABEL: test2: 15; CHECK: ## BB#0: 16; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 17; CHECK-NEXT: retq ## encoding: [0xc3] 18 %res = insertelement <4 x i32>undef, i32 %x, i32 0 19 ret <4 x i32>%res 20} 21 22define <2 x i64> @test3(i64 %x) { 23; CHECK-LABEL: test3: 24; CHECK: ## BB#0: 25; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] 26; CHECK-NEXT: retq ## encoding: [0xc3] 27 %res = insertelement <2 x i64>undef, i64 %x, i32 0 28 ret <2 x i64>%res 29} 30 31define <4 x i32> @test4(i32* %x) { 32; CHECK-LABEL: test4: 33; CHECK: ## BB#0: 34; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 35; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 36; CHECK-NEXT: retq ## encoding: [0xc3] 37 %y = load i32, i32* %x 38 %res = insertelement <4 x i32>undef, i32 %y, i32 0 39 ret <4 x i32>%res 40} 41 42define void @test5(float %x, float* %y) { 43; CHECK-LABEL: test5: 44; CHECK: ## BB#0: 45; CHECK-NEXT: vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07] 46; CHECK-NEXT: retq ## encoding: [0xc3] 47 store float %x, float* %y, align 4 48 ret void 49} 50 51define void @test6(double %x, double* %y) { 52; CHECK-LABEL: test6: 53; CHECK: ## BB#0: 54; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07] 55; CHECK-NEXT: retq ## encoding: [0xc3] 56 store double %x, double* %y, align 8 57 ret void 58} 59 60define float @test7(i32* %x) { 61; CHECK-LABEL: test7: 62; CHECK: ## BB#0: 63; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] 64; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 65; CHECK-NEXT: retq ## encoding: [0xc3] 66 %y = load i32, i32* %x 67 %res = bitcast i32 %y to float 68 ret float %res 69} 70 71define i32 @test8(<4 x i32> %x) { 72; CHECK-LABEL: test8: 73; CHECK: ## BB#0: 74; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] 75; CHECK-NEXT: retq ## encoding: [0xc3] 76 %res = extractelement <4 x i32> %x, i32 0 77 ret i32 %res 78} 79 80define i64 @test9(<2 x i64> %x) { 81; CHECK-LABEL: test9: 82; CHECK: ## BB#0: 83; CHECK-NEXT: vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0] 84; CHECK-NEXT: retq ## encoding: [0xc3] 85 %res = extractelement <2 x i64> %x, i32 0 86 ret i64 %res 87} 88 89define <4 x i32> @test10(i32* %x) { 90; CHECK-LABEL: test10: 91; CHECK: ## BB#0: 92; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 93; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 94; CHECK-NEXT: retq ## encoding: [0xc3] 95 %y = load i32, i32* %x, align 4 96 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 97 ret <4 x i32>%res 98} 99 100define <4 x float> @test11(float* %x) { 101; CHECK-LABEL: test11: 102; CHECK: ## BB#0: 103; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] 104; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 105; CHECK-NEXT: retq ## encoding: [0xc3] 106 %y = load float, float* %x, align 4 107 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 108 ret <4 x float>%res 109} 110 111define <2 x double> @test12(double* %x) { 112; CHECK-LABEL: test12: 113; CHECK: ## BB#0: 114; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07] 115; CHECK-NEXT: ## xmm0 = mem[0],zero 116; CHECK-NEXT: retq ## encoding: [0xc3] 117 %y = load double, double* %x, align 8 118 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 119 ret <2 x double>%res 120} 121 122define <2 x i64> @test13(i64 %x) { 123; CHECK-LABEL: test13: 124; CHECK: ## BB#0: 125; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] 126; CHECK-NEXT: retq ## encoding: [0xc3] 127 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 128 ret <2 x i64>%res 129} 130 131define <4 x i32> @test14(i32 %x) { 132; CHECK-LABEL: test14: 133; CHECK: ## BB#0: 134; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 135; CHECK-NEXT: retq ## encoding: [0xc3] 136 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 137 ret <4 x i32>%res 138} 139 140define <4 x i32> @test15(i32* %x) { 141; CHECK-LABEL: test15: 142; CHECK: ## BB#0: 143; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 144; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 145; CHECK-NEXT: retq ## encoding: [0xc3] 146 %y = load i32, i32* %x, align 4 147 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 148 ret <4 x i32>%res 149} 150 151define <16 x i32> @test16(i8 * %addr) { 152; CHECK-LABEL: test16: 153; CHECK: ## BB#0: 154; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07] 155; CHECK-NEXT: retq ## encoding: [0xc3] 156 %vaddr = bitcast i8* %addr to <16 x i32>* 157 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 158 ret <16 x i32>%res 159} 160 161define <16 x i32> @test17(i8 * %addr) { 162; CHECK-LABEL: test17: 163; CHECK: ## BB#0: 164; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07] 165; CHECK-NEXT: retq ## encoding: [0xc3] 166 %vaddr = bitcast i8* %addr to <16 x i32>* 167 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 168 ret <16 x i32>%res 169} 170 171define void @test18(i8 * %addr, <8 x i64> %data) { 172; CHECK-LABEL: test18: 173; CHECK: ## BB#0: 174; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07] 175; CHECK-NEXT: retq ## encoding: [0xc3] 176 %vaddr = bitcast i8* %addr to <8 x i64>* 177 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 178 ret void 179} 180 181define void @test19(i8 * %addr, <16 x i32> %data) { 182; CHECK-LABEL: test19: 183; CHECK: ## BB#0: 184; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07] 185; CHECK-NEXT: retq ## encoding: [0xc3] 186 %vaddr = bitcast i8* %addr to <16 x i32>* 187 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 188 ret void 189} 190 191define void @test20(i8 * %addr, <16 x i32> %data) { 192; CHECK-LABEL: test20: 193; CHECK: ## BB#0: 194; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07] 195; CHECK-NEXT: retq ## encoding: [0xc3] 196 %vaddr = bitcast i8* %addr to <16 x i32>* 197 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 198 ret void 199} 200 201define <8 x i64> @test21(i8 * %addr) { 202; CHECK-LABEL: test21: 203; CHECK: ## BB#0: 204; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] 205; CHECK-NEXT: retq ## encoding: [0xc3] 206 %vaddr = bitcast i8* %addr to <8 x i64>* 207 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 208 ret <8 x i64>%res 209} 210 211define void @test22(i8 * %addr, <8 x i64> %data) { 212; CHECK-LABEL: test22: 213; CHECK: ## BB#0: 214; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07] 215; CHECK-NEXT: retq ## encoding: [0xc3] 216 %vaddr = bitcast i8* %addr to <8 x i64>* 217 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 218 ret void 219} 220 221define <8 x i64> @test23(i8 * %addr) { 222; CHECK-LABEL: test23: 223; CHECK: ## BB#0: 224; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 225; CHECK-NEXT: retq ## encoding: [0xc3] 226 %vaddr = bitcast i8* %addr to <8 x i64>* 227 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 228 ret <8 x i64>%res 229} 230 231define void @test24(i8 * %addr, <8 x double> %data) { 232; CHECK-LABEL: test24: 233; CHECK: ## BB#0: 234; CHECK-NEXT: vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07] 235; CHECK-NEXT: retq ## encoding: [0xc3] 236 %vaddr = bitcast i8* %addr to <8 x double>* 237 store <8 x double>%data, <8 x double>* %vaddr, align 64 238 ret void 239} 240 241define <8 x double> @test25(i8 * %addr) { 242; CHECK-LABEL: test25: 243; CHECK: ## BB#0: 244; CHECK-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07] 245; CHECK-NEXT: retq ## encoding: [0xc3] 246 %vaddr = bitcast i8* %addr to <8 x double>* 247 %res = load <8 x double>, <8 x double>* %vaddr, align 64 248 ret <8 x double>%res 249} 250 251define void @test26(i8 * %addr, <16 x float> %data) { 252; CHECK-LABEL: test26: 253; CHECK: ## BB#0: 254; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 255; CHECK-NEXT: retq ## encoding: [0xc3] 256 %vaddr = bitcast i8* %addr to <16 x float>* 257 store <16 x float>%data, <16 x float>* %vaddr, align 64 258 ret void 259} 260 261define <16 x float> @test27(i8 * %addr) { 262; CHECK-LABEL: test27: 263; CHECK: ## BB#0: 264; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 265; CHECK-NEXT: retq ## encoding: [0xc3] 266 %vaddr = bitcast i8* %addr to <16 x float>* 267 %res = load <16 x float>, <16 x float>* %vaddr, align 64 268 ret <16 x float>%res 269} 270 271define void @test28(i8 * %addr, <8 x double> %data) { 272; CHECK-LABEL: test28: 273; CHECK: ## BB#0: 274; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07] 275; CHECK-NEXT: retq ## encoding: [0xc3] 276 %vaddr = bitcast i8* %addr to <8 x double>* 277 store <8 x double>%data, <8 x double>* %vaddr, align 1 278 ret void 279} 280 281define <8 x double> @test29(i8 * %addr) { 282; CHECK-LABEL: test29: 283; CHECK: ## BB#0: 284; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07] 285; CHECK-NEXT: retq ## encoding: [0xc3] 286 %vaddr = bitcast i8* %addr to <8 x double>* 287 %res = load <8 x double>, <8 x double>* %vaddr, align 1 288 ret <8 x double>%res 289} 290 291define void @test30(i8 * %addr, <16 x float> %data) { 292; CHECK-LABEL: test30: 293; CHECK: ## BB#0: 294; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 295; CHECK-NEXT: retq ## encoding: [0xc3] 296 %vaddr = bitcast i8* %addr to <16 x float>* 297 store <16 x float>%data, <16 x float>* %vaddr, align 1 298 ret void 299} 300 301define <16 x float> @test31(i8 * %addr) { 302; CHECK-LABEL: test31: 303; CHECK: ## BB#0: 304; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 305; CHECK-NEXT: retq ## encoding: [0xc3] 306 %vaddr = bitcast i8* %addr to <16 x float>* 307 %res = load <16 x float>, <16 x float>* %vaddr, align 1 308 ret <16 x float>%res 309} 310 311define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 312; CHECK-LABEL: test32: 313; CHECK: ## BB#0: 314; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 315; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] 316; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] 317; CHECK-NEXT: retq ## encoding: [0xc3] 318 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 319 %vaddr = bitcast i8* %addr to <16 x i32>* 320 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 321 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 322 ret <16 x i32>%res 323} 324 325define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 326; CHECK-LABEL: test33: 327; CHECK: ## BB#0: 328; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 329; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] 330; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] 331; CHECK-NEXT: retq ## encoding: [0xc3] 332 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 333 %vaddr = bitcast i8* %addr to <16 x i32>* 334 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 335 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 336 ret <16 x i32>%res 337} 338 339define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { 340; CHECK-LABEL: test34: 341; CHECK: ## BB#0: 342; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 343; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] 344; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07] 345; CHECK-NEXT: retq ## encoding: [0xc3] 346 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 347 %vaddr = bitcast i8* %addr to <16 x i32>* 348 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 349 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 350 ret <16 x i32>%res 351} 352 353define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { 354; CHECK-LABEL: test35: 355; CHECK: ## BB#0: 356; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 357; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] 358; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07] 359; CHECK-NEXT: retq ## encoding: [0xc3] 360 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 361 %vaddr = bitcast i8* %addr to <16 x i32>* 362 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 363 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 364 ret <16 x i32>%res 365} 366 367define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 368; CHECK-LABEL: test36: 369; CHECK: ## BB#0: 370; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 371; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] 372; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] 373; CHECK-NEXT: retq ## encoding: [0xc3] 374 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 375 %vaddr = bitcast i8* %addr to <8 x i64>* 376 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 377 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 378 ret <8 x i64>%res 379} 380 381define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 382; CHECK-LABEL: test37: 383; CHECK: ## BB#0: 384; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 385; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] 386; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] 387; CHECK-NEXT: retq ## encoding: [0xc3] 388 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 389 %vaddr = bitcast i8* %addr to <8 x i64>* 390 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 391 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 392 ret <8 x i64>%res 393} 394 395define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { 396; CHECK-LABEL: test38: 397; CHECK: ## BB#0: 398; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 399; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] 400; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07] 401; CHECK-NEXT: retq ## encoding: [0xc3] 402 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 403 %vaddr = bitcast i8* %addr to <8 x i64>* 404 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 405 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 406 ret <8 x i64>%res 407} 408 409define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { 410; CHECK-LABEL: test39: 411; CHECK: ## BB#0: 412; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 413; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] 414; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07] 415; CHECK-NEXT: retq ## encoding: [0xc3] 416 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 417 %vaddr = bitcast i8* %addr to <8 x i64>* 418 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 419 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 420 ret <8 x i64>%res 421} 422 423define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 424; CHECK-LABEL: test40: 425; CHECK: ## BB#0: 426; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 427; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] 428; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] 429; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] 430; CHECK-NEXT: retq ## encoding: [0xc3] 431 %mask = fcmp one <16 x float> %mask1, zeroinitializer 432 %vaddr = bitcast i8* %addr to <16 x float>* 433 %r = load <16 x float>, <16 x float>* %vaddr, align 64 434 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 435 ret <16 x float>%res 436} 437 438define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 439; CHECK-LABEL: test41: 440; CHECK: ## BB#0: 441; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 442; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] 443; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] 444; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] 445; CHECK-NEXT: retq ## encoding: [0xc3] 446 %mask = fcmp one <16 x float> %mask1, zeroinitializer 447 %vaddr = bitcast i8* %addr to <16 x float>* 448 %r = load <16 x float>, <16 x float>* %vaddr, align 1 449 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 450 ret <16 x float>%res 451} 452 453define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { 454; CHECK-LABEL: test42: 455; CHECK: ## BB#0: 456; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 457; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] 458; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] 459; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] 460; CHECK-NEXT: retq ## encoding: [0xc3] 461 %mask = fcmp one <16 x float> %mask1, zeroinitializer 462 %vaddr = bitcast i8* %addr to <16 x float>* 463 %r = load <16 x float>, <16 x float>* %vaddr, align 64 464 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 465 ret <16 x float>%res 466} 467 468define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { 469; CHECK-LABEL: test43: 470; CHECK: ## BB#0: 471; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 472; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] 473; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] 474; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07] 475; CHECK-NEXT: retq ## encoding: [0xc3] 476 %mask = fcmp one <16 x float> %mask1, zeroinitializer 477 %vaddr = bitcast i8* %addr to <16 x float>* 478 %r = load <16 x float>, <16 x float>* %vaddr, align 1 479 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 480 ret <16 x float>%res 481} 482 483define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 484; CHECK-LABEL: test44: 485; CHECK: ## BB#0: 486; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 487; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] 488; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] 489; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] 490; CHECK-NEXT: retq ## encoding: [0xc3] 491 %mask = fcmp one <8 x double> %mask1, zeroinitializer 492 %vaddr = bitcast i8* %addr to <8 x double>* 493 %r = load <8 x double>, <8 x double>* %vaddr, align 64 494 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 495 ret <8 x double>%res 496} 497 498define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 499; CHECK-LABEL: test45: 500; CHECK: ## BB#0: 501; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 502; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] 503; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] 504; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] 505; CHECK-NEXT: retq ## encoding: [0xc3] 506 %mask = fcmp one <8 x double> %mask1, zeroinitializer 507 %vaddr = bitcast i8* %addr to <8 x double>* 508 %r = load <8 x double>, <8 x double>* %vaddr, align 1 509 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 510 ret <8 x double>%res 511} 512 513define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { 514; CHECK-LABEL: test46: 515; CHECK: ## BB#0: 516; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 517; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] 518; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] 519; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07] 520; CHECK-NEXT: retq ## encoding: [0xc3] 521 %mask = fcmp one <8 x double> %mask1, zeroinitializer 522 %vaddr = bitcast i8* %addr to <8 x double>* 523 %r = load <8 x double>, <8 x double>* %vaddr, align 64 524 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 525 ret <8 x double>%res 526} 527 528define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { 529; CHECK-LABEL: test47: 530; CHECK: ## BB#0: 531; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 532; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] 533; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] 534; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07] 535; CHECK-NEXT: retq ## encoding: [0xc3] 536 %mask = fcmp one <8 x double> %mask1, zeroinitializer 537 %vaddr = bitcast i8* %addr to <8 x double>* 538 %r = load <8 x double>, <8 x double>* %vaddr, align 1 539 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 540 ret <8 x double>%res 541} 542