1*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
2*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
3*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
4*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
5*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
6*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
7*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
8*67e74705SXin Li // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
9*67e74705SXin Li
h1(float * c,float * a,double b[],int size)10*67e74705SXin Li void h1(float *c, float *a, double b[], int size)
11*67e74705SXin Li {
12*67e74705SXin Li // CHECK-LABEL: define void @h1
13*67e74705SXin Li int t = 0;
14*67e74705SXin Li #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
15*67e74705SXin Li // CHECK: [[C_PTRINT:%.+]] = ptrtoint
16*67e74705SXin Li // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
17*67e74705SXin Li // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
18*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
19*67e74705SXin Li // CHECK: [[A_PTRINT:%.+]] = ptrtoint
20*67e74705SXin Li
21*67e74705SXin Li // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
22*67e74705SXin Li // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
23*67e74705SXin Li // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
24*67e74705SXin Li // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
25*67e74705SXin Li // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
26*67e74705SXin Li
27*67e74705SXin Li // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
28*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
29*67e74705SXin Li // CHECK: [[B_PTRINT:%.+]] = ptrtoint
30*67e74705SXin Li
31*67e74705SXin Li // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
32*67e74705SXin Li // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
33*67e74705SXin Li // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
34*67e74705SXin Li // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
35*67e74705SXin Li // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
36*67e74705SXin Li
37*67e74705SXin Li // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
38*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
39*67e74705SXin Li for (int i = 0; i < size; ++i) {
40*67e74705SXin Li c[i] = a[i] * a[i] + b[i] * b[t];
41*67e74705SXin Li ++t;
42*67e74705SXin Li }
43*67e74705SXin Li // do not emit parallel_loop_access metadata due to usage of safelen clause.
44*67e74705SXin Li // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
45*67e74705SXin Li #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
46*67e74705SXin Li // CHECK: [[C_PTRINT:%.+]] = ptrtoint
47*67e74705SXin Li // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
48*67e74705SXin Li // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
49*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
50*67e74705SXin Li // CHECK: [[A_PTRINT:%.+]] = ptrtoint
51*67e74705SXin Li
52*67e74705SXin Li // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
53*67e74705SXin Li // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
54*67e74705SXin Li // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
55*67e74705SXin Li // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
56*67e74705SXin Li // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
57*67e74705SXin Li
58*67e74705SXin Li // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
59*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
60*67e74705SXin Li // CHECK: [[B_PTRINT:%.+]] = ptrtoint
61*67e74705SXin Li
62*67e74705SXin Li // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
63*67e74705SXin Li // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
64*67e74705SXin Li // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
65*67e74705SXin Li // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
66*67e74705SXin Li // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
67*67e74705SXin Li
68*67e74705SXin Li // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
69*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
70*67e74705SXin Li for (int i = 0; i < size; ++i) {
71*67e74705SXin Li c[i] = a[i] * a[i] + b[i] * b[t];
72*67e74705SXin Li ++t;
73*67e74705SXin Li }
74*67e74705SXin Li // do not emit parallel_loop_access metadata due to usage of safelen clause.
75*67e74705SXin Li // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
76*67e74705SXin Li #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
77*67e74705SXin Li // CHECK: [[C_PTRINT:%.+]] = ptrtoint
78*67e74705SXin Li // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
79*67e74705SXin Li // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
80*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
81*67e74705SXin Li // CHECK: [[A_PTRINT:%.+]] = ptrtoint
82*67e74705SXin Li
83*67e74705SXin Li // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
84*67e74705SXin Li // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
85*67e74705SXin Li // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
86*67e74705SXin Li // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
87*67e74705SXin Li // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
88*67e74705SXin Li
89*67e74705SXin Li // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
90*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
91*67e74705SXin Li // CHECK: [[B_PTRINT:%.+]] = ptrtoint
92*67e74705SXin Li
93*67e74705SXin Li // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
94*67e74705SXin Li // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
95*67e74705SXin Li // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
96*67e74705SXin Li // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
97*67e74705SXin Li // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
98*67e74705SXin Li
99*67e74705SXin Li // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
100*67e74705SXin Li // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
101*67e74705SXin Li for (int i = 0; i < size; ++i) {
102*67e74705SXin Li c[i] = a[i] * a[i] + b[i] * b[t];
103*67e74705SXin Li ++t;
104*67e74705SXin Li // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
105*67e74705SXin Li }
106*67e74705SXin Li }
107*67e74705SXin Li
h2(float * c,float * a,float * b,int size)108*67e74705SXin Li void h2(float *c, float *a, float *b, int size)
109*67e74705SXin Li {
110*67e74705SXin Li // CHECK-LABEL: define void @h2
111*67e74705SXin Li int t = 0;
112*67e74705SXin Li #pragma omp simd linear(t)
113*67e74705SXin Li for (int i = 0; i < size; ++i) {
114*67e74705SXin Li c[i] = a[i] * a[i] + b[i] * b[t];
115*67e74705SXin Li ++t;
116*67e74705SXin Li // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access [[LOOP_H2_HEADER:![0-9]+]]
117*67e74705SXin Li }
118*67e74705SXin Li }
119*67e74705SXin Li
h3(float * c,float * a,float * b,int size)120*67e74705SXin Li void h3(float *c, float *a, float *b, int size)
121*67e74705SXin Li {
122*67e74705SXin Li // CHECK-LABEL: define void @h3
123*67e74705SXin Li #pragma omp simd
124*67e74705SXin Li for (int i = 0; i < size; ++i) {
125*67e74705SXin Li for (int j = 0; j < size; ++j) {
126*67e74705SXin Li c[j*i] = a[i] * b[j];
127*67e74705SXin Li }
128*67e74705SXin Li }
129*67e74705SXin Li // do not emit parallel_loop_access for nested loop.
130*67e74705SXin Li // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
131*67e74705SXin Li }
132*67e74705SXin Li
133*67e74705SXin Li // Metadata for h1:
134*67e74705SXin Li // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}
135*67e74705SXin Li // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
136*67e74705SXin Li // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
137*67e74705SXin Li // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}
138*67e74705SXin Li // CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
139*67e74705SXin Li // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}
140*67e74705SXin Li //
141*67e74705SXin Li // Metadata for h2:
142*67e74705SXin Li // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]]}
143*67e74705SXin Li //
144*67e74705SXin Li // Metadata for h3:
145*67e74705SXin Li // CHECK: [[LOOP_H3_HEADER:![0-9]+]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]]}
146*67e74705SXin Li //
147