xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s
3*9880d681SAndroid Build Coastguard Worker; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
6*9880d681SAndroid Build Coastguard Worker%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
7*9880d681SAndroid Build Coastguard Worker
8*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_const_store:
9*9880d681SAndroid Build Coastguard Worker; save 1,2,3 ... as one big integer.
10*9880d681SAndroid Build Coastguard Worker; CHECK: movabsq $578437695752307201
11*9880d681SAndroid Build Coastguard Worker; CHECK: ret
12*9880d681SAndroid Build Coastguard Workerdefine void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
13*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
14*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
15*9880d681SAndroid Build Coastguard Worker.lr.ph:
16*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
17*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
18*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
19*9880d681SAndroid Build Coastguard Worker  store i8 1, i8* %2, align 1
20*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
21*9880d681SAndroid Build Coastguard Worker  store i8 2, i8* %3, align 1
22*9880d681SAndroid Build Coastguard Worker  %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
23*9880d681SAndroid Build Coastguard Worker  store i8 3, i8* %4, align 1
24*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
25*9880d681SAndroid Build Coastguard Worker  store i8 4, i8* %5, align 1
26*9880d681SAndroid Build Coastguard Worker  %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
27*9880d681SAndroid Build Coastguard Worker  store i8 5, i8* %6, align 1
28*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
29*9880d681SAndroid Build Coastguard Worker  store i8 6, i8* %7, align 1
30*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
31*9880d681SAndroid Build Coastguard Worker  store i8 7, i8* %8, align 1
32*9880d681SAndroid Build Coastguard Worker  %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
33*9880d681SAndroid Build Coastguard Worker  store i8 8, i8* %9, align 1
34*9880d681SAndroid Build Coastguard Worker  %10 = add nsw i32 %i.02, 1
35*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
36*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %10, %count
37*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %.lr.ph
38*9880d681SAndroid Build Coastguard Worker._crit_edge:
39*9880d681SAndroid Build Coastguard Worker  ret void
40*9880d681SAndroid Build Coastguard Worker}
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker; No vectors because we use noimplicitfloat
43*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_const_store_no_vec:
44*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vmovups
45*9880d681SAndroid Build Coastguard Worker; CHECK: ret
46*9880d681SAndroid Build Coastguard Workerdefine void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
47*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
48*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
49*9880d681SAndroid Build Coastguard Worker.lr.ph:
50*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
51*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
52*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
53*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %2, align 4
54*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
55*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %3, align 4
56*9880d681SAndroid Build Coastguard Worker  %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
57*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %4, align 4
58*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
59*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %5, align 4
60*9880d681SAndroid Build Coastguard Worker  %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
61*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %6, align 4
62*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
63*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %7, align 4
64*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
65*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %8, align 4
66*9880d681SAndroid Build Coastguard Worker  %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
67*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %9, align 4
68*9880d681SAndroid Build Coastguard Worker  %10 = add nsw i32 %i.02, 1
69*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
70*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %10, %count
71*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %.lr.ph
72*9880d681SAndroid Build Coastguard Worker._crit_edge:
73*9880d681SAndroid Build Coastguard Worker  ret void
74*9880d681SAndroid Build Coastguard Worker}
75*9880d681SAndroid Build Coastguard Worker
76*9880d681SAndroid Build Coastguard Worker; Move the constants using a single vector store.
77*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_const_store_vec:
78*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
79*9880d681SAndroid Build Coastguard Worker; CHECK: ret
80*9880d681SAndroid Build Coastguard Workerdefine void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
81*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
82*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
83*9880d681SAndroid Build Coastguard Worker.lr.ph:
84*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
85*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
86*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
87*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %2, align 4
88*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
89*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %3, align 4
90*9880d681SAndroid Build Coastguard Worker  %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
91*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %4, align 4
92*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
93*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %5, align 4
94*9880d681SAndroid Build Coastguard Worker  %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
95*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %6, align 4
96*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
97*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %7, align 4
98*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
99*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %8, align 4
100*9880d681SAndroid Build Coastguard Worker  %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
101*9880d681SAndroid Build Coastguard Worker  store i32 0, i32* %9, align 4
102*9880d681SAndroid Build Coastguard Worker  %10 = add nsw i32 %i.02, 1
103*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
104*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %10, %count
105*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %.lr.ph
106*9880d681SAndroid Build Coastguard Worker._crit_edge:
107*9880d681SAndroid Build Coastguard Worker  ret void
108*9880d681SAndroid Build Coastguard Worker}
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Worker; Move the first 4 constants as a single vector. Move the rest as scalars.
111*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_nonconst_store:
112*9880d681SAndroid Build Coastguard Worker; CHECK: movl $67305985
113*9880d681SAndroid Build Coastguard Worker; CHECK: movb
114*9880d681SAndroid Build Coastguard Worker; CHECK: movb
115*9880d681SAndroid Build Coastguard Worker; CHECK: movb
116*9880d681SAndroid Build Coastguard Worker; CHECK: movb
117*9880d681SAndroid Build Coastguard Worker; CHECK: ret
118*9880d681SAndroid Build Coastguard Workerdefine void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
119*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
120*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
121*9880d681SAndroid Build Coastguard Worker.lr.ph:
122*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
123*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
124*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
125*9880d681SAndroid Build Coastguard Worker  store i8 1, i8* %2, align 1
126*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
127*9880d681SAndroid Build Coastguard Worker  store i8 2, i8* %3, align 1
128*9880d681SAndroid Build Coastguard Worker  %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
129*9880d681SAndroid Build Coastguard Worker  store i8 3, i8* %4, align 1
130*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
131*9880d681SAndroid Build Coastguard Worker  store i8 4, i8* %5, align 1
132*9880d681SAndroid Build Coastguard Worker  %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
133*9880d681SAndroid Build Coastguard Worker  store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
134*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
135*9880d681SAndroid Build Coastguard Worker  store i8 6, i8* %7, align 1
136*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
137*9880d681SAndroid Build Coastguard Worker  store i8 7, i8* %8, align 1
138*9880d681SAndroid Build Coastguard Worker  %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
139*9880d681SAndroid Build Coastguard Worker  store i8 8, i8* %9, align 1
140*9880d681SAndroid Build Coastguard Worker  %10 = add nsw i32 %i.02, 1
141*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
142*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %10, %count
143*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %.lr.ph
144*9880d681SAndroid Build Coastguard Worker._crit_edge:
145*9880d681SAndroid Build Coastguard Worker  ret void
146*9880d681SAndroid Build Coastguard Worker}
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Worker
149*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_loads_i16:
150*9880d681SAndroid Build Coastguard Worker;  load:
151*9880d681SAndroid Build Coastguard Worker; BWON:  movzwl
152*9880d681SAndroid Build Coastguard Worker; BWOFF: movw
153*9880d681SAndroid Build Coastguard Worker;  store:
154*9880d681SAndroid Build Coastguard Worker; CHECK: movw
155*9880d681SAndroid Build Coastguard Worker; CHECK: ret
156*9880d681SAndroid Build Coastguard Workerdefine void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
157*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
158*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
159*9880d681SAndroid Build Coastguard Worker
160*9880d681SAndroid Build Coastguard Worker.lr.ph:                                           ; preds = %0
161*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
162*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
163*9880d681SAndroid Build Coastguard Worker  br label %4
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Worker; <label>:4                                       ; preds = %4, %.lr.ph
166*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
167*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
168*9880d681SAndroid Build Coastguard Worker  %5 = load i8, i8* %2, align 1
169*9880d681SAndroid Build Coastguard Worker  %6 = load i8, i8* %3, align 1
170*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
171*9880d681SAndroid Build Coastguard Worker  store i8 %5, i8* %7, align 1
172*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
173*9880d681SAndroid Build Coastguard Worker  store i8 %6, i8* %8, align 1
174*9880d681SAndroid Build Coastguard Worker  %9 = add nsw i32 %i.02, 1
175*9880d681SAndroid Build Coastguard Worker  %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
176*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %9, %count
177*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %4
178*9880d681SAndroid Build Coastguard Worker
179*9880d681SAndroid Build Coastguard Worker._crit_edge:                                      ; preds = %4, %0
180*9880d681SAndroid Build Coastguard Worker  ret void
181*9880d681SAndroid Build Coastguard Worker}
182*9880d681SAndroid Build Coastguard Worker
183*9880d681SAndroid Build Coastguard Worker; The loads and the stores are interleaved. Can't merge them.
184*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: no_merge_loads:
185*9880d681SAndroid Build Coastguard Worker; BWON:  movzbl
186*9880d681SAndroid Build Coastguard Worker; BWOFF: movb
187*9880d681SAndroid Build Coastguard Worker; CHECK: movb
188*9880d681SAndroid Build Coastguard Worker; BWON:  movzbl
189*9880d681SAndroid Build Coastguard Worker; BWOFF: movb
190*9880d681SAndroid Build Coastguard Worker; CHECK: movb
191*9880d681SAndroid Build Coastguard Worker; CHECK: ret
192*9880d681SAndroid Build Coastguard Workerdefine void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
193*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
194*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker.lr.ph:                                           ; preds = %0
197*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
198*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
199*9880d681SAndroid Build Coastguard Worker  br label %a4
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Workera4:                                       ; preds = %4, %.lr.ph
202*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
203*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
204*9880d681SAndroid Build Coastguard Worker  %a5 = load i8, i8* %2, align 1
205*9880d681SAndroid Build Coastguard Worker  %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
206*9880d681SAndroid Build Coastguard Worker  store i8 %a5, i8* %a7, align 1
207*9880d681SAndroid Build Coastguard Worker  %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
208*9880d681SAndroid Build Coastguard Worker  %a6 = load i8, i8* %3, align 1
209*9880d681SAndroid Build Coastguard Worker  store i8 %a6, i8* %a8, align 1
210*9880d681SAndroid Build Coastguard Worker  %a9 = add nsw i32 %i.02, 1
211*9880d681SAndroid Build Coastguard Worker  %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
212*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %a9, %count
213*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %a4
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Worker._crit_edge:                                      ; preds = %4, %0
216*9880d681SAndroid Build Coastguard Worker  ret void
217*9880d681SAndroid Build Coastguard Worker}
218*9880d681SAndroid Build Coastguard Worker
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_loads_integer:
221*9880d681SAndroid Build Coastguard Worker;  load:
222*9880d681SAndroid Build Coastguard Worker; CHECK: movq
223*9880d681SAndroid Build Coastguard Worker;  store:
224*9880d681SAndroid Build Coastguard Worker; CHECK: movq
225*9880d681SAndroid Build Coastguard Worker; CHECK: ret
226*9880d681SAndroid Build Coastguard Workerdefine void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
227*9880d681SAndroid Build Coastguard Worker  %1 = icmp sgt i32 %count, 0
228*9880d681SAndroid Build Coastguard Worker  br i1 %1, label %.lr.ph, label %._crit_edge
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker.lr.ph:                                           ; preds = %0
231*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
232*9880d681SAndroid Build Coastguard Worker  %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
233*9880d681SAndroid Build Coastguard Worker  br label %4
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Worker; <label>:4                                       ; preds = %4, %.lr.ph
236*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
237*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
238*9880d681SAndroid Build Coastguard Worker  %5 = load i32, i32* %2
239*9880d681SAndroid Build Coastguard Worker  %6 = load i32, i32* %3
240*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
241*9880d681SAndroid Build Coastguard Worker  store i32 %5, i32* %7
242*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
243*9880d681SAndroid Build Coastguard Worker  store i32 %6, i32* %8
244*9880d681SAndroid Build Coastguard Worker  %9 = add nsw i32 %i.02, 1
245*9880d681SAndroid Build Coastguard Worker  %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
246*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %9, %count
247*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %4
248*9880d681SAndroid Build Coastguard Worker
249*9880d681SAndroid Build Coastguard Worker._crit_edge:                                      ; preds = %4, %0
250*9880d681SAndroid Build Coastguard Worker  ret void
251*9880d681SAndroid Build Coastguard Worker}
252*9880d681SAndroid Build Coastguard Worker
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_loads_vector:
255*9880d681SAndroid Build Coastguard Worker;  load:
256*9880d681SAndroid Build Coastguard Worker; CHECK: movups
257*9880d681SAndroid Build Coastguard Worker;  store:
258*9880d681SAndroid Build Coastguard Worker; CHECK: movups
259*9880d681SAndroid Build Coastguard Worker; CHECK: ret
260*9880d681SAndroid Build Coastguard Workerdefine void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
261*9880d681SAndroid Build Coastguard Worker  %a1 = icmp sgt i32 %count, 0
262*9880d681SAndroid Build Coastguard Worker  br i1 %a1, label %.lr.ph, label %._crit_edge
263*9880d681SAndroid Build Coastguard Worker
264*9880d681SAndroid Build Coastguard Worker.lr.ph:                                           ; preds = %0
265*9880d681SAndroid Build Coastguard Worker  %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
266*9880d681SAndroid Build Coastguard Worker  %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
267*9880d681SAndroid Build Coastguard Worker  %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
268*9880d681SAndroid Build Coastguard Worker  %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
269*9880d681SAndroid Build Coastguard Worker  br label %block4
270*9880d681SAndroid Build Coastguard Worker
271*9880d681SAndroid Build Coastguard Workerblock4:                                       ; preds = %4, %.lr.ph
272*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
273*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
274*9880d681SAndroid Build Coastguard Worker  %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
275*9880d681SAndroid Build Coastguard Worker  %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
276*9880d681SAndroid Build Coastguard Worker  %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
277*9880d681SAndroid Build Coastguard Worker  %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
278*9880d681SAndroid Build Coastguard Worker  %b1 = load i32, i32* %a2
279*9880d681SAndroid Build Coastguard Worker  %b2 = load i32, i32* %a3
280*9880d681SAndroid Build Coastguard Worker  %b3 = load i32, i32* %a4
281*9880d681SAndroid Build Coastguard Worker  %b4 = load i32, i32* %a5
282*9880d681SAndroid Build Coastguard Worker  store i32 %b1, i32* %a7
283*9880d681SAndroid Build Coastguard Worker  store i32 %b2, i32* %a8
284*9880d681SAndroid Build Coastguard Worker  store i32 %b3, i32* %a9
285*9880d681SAndroid Build Coastguard Worker  store i32 %b4, i32* %a10
286*9880d681SAndroid Build Coastguard Worker  %c9 = add nsw i32 %i.02, 1
287*9880d681SAndroid Build Coastguard Worker  %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
288*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %c9, %count
289*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %block4
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Worker._crit_edge:                                      ; preds = %4, %0
292*9880d681SAndroid Build Coastguard Worker  ret void
293*9880d681SAndroid Build Coastguard Worker}
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Worker;; On x86, even unaligned copies should be merged to vector ops.
296*9880d681SAndroid Build Coastguard Worker;; TODO: however, this cannot happen at the moment, due to brokenness
297*9880d681SAndroid Build Coastguard Worker;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
298*9880d681SAndroid Build Coastguard Worker;; visitSTORE.
299*9880d681SAndroid Build Coastguard Worker
300*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_loads_no_align:
301*9880d681SAndroid Build Coastguard Worker;  load:
302*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vmovups ;; TODO
303*9880d681SAndroid Build Coastguard Worker;  store:
304*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: vmovups ;; TODO
305*9880d681SAndroid Build Coastguard Worker; CHECK: ret
306*9880d681SAndroid Build Coastguard Workerdefine void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
307*9880d681SAndroid Build Coastguard Worker  %a1 = icmp sgt i32 %count, 0
308*9880d681SAndroid Build Coastguard Worker  br i1 %a1, label %.lr.ph, label %._crit_edge
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Worker.lr.ph:                                           ; preds = %0
311*9880d681SAndroid Build Coastguard Worker  %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
312*9880d681SAndroid Build Coastguard Worker  %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
313*9880d681SAndroid Build Coastguard Worker  %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
314*9880d681SAndroid Build Coastguard Worker  %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
315*9880d681SAndroid Build Coastguard Worker  br label %block4
316*9880d681SAndroid Build Coastguard Worker
317*9880d681SAndroid Build Coastguard Workerblock4:                                       ; preds = %4, %.lr.ph
318*9880d681SAndroid Build Coastguard Worker  %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
319*9880d681SAndroid Build Coastguard Worker  %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
320*9880d681SAndroid Build Coastguard Worker  %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
321*9880d681SAndroid Build Coastguard Worker  %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
322*9880d681SAndroid Build Coastguard Worker  %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
323*9880d681SAndroid Build Coastguard Worker  %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
324*9880d681SAndroid Build Coastguard Worker  %b1 = load i32, i32* %a2, align 1
325*9880d681SAndroid Build Coastguard Worker  %b2 = load i32, i32* %a3, align 1
326*9880d681SAndroid Build Coastguard Worker  %b3 = load i32, i32* %a4, align 1
327*9880d681SAndroid Build Coastguard Worker  %b4 = load i32, i32* %a5, align 1
328*9880d681SAndroid Build Coastguard Worker  store i32 %b1, i32* %a7, align 1
329*9880d681SAndroid Build Coastguard Worker  store i32 %b2, i32* %a8, align 1
330*9880d681SAndroid Build Coastguard Worker  store i32 %b3, i32* %a9, align 1
331*9880d681SAndroid Build Coastguard Worker  store i32 %b4, i32* %a10, align 1
332*9880d681SAndroid Build Coastguard Worker  %c9 = add nsw i32 %i.02, 1
333*9880d681SAndroid Build Coastguard Worker  %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
334*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %c9, %count
335*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %._crit_edge, label %block4
336*9880d681SAndroid Build Coastguard Worker
337*9880d681SAndroid Build Coastguard Worker._crit_edge:                                      ; preds = %4, %0
338*9880d681SAndroid Build Coastguard Worker  ret void
339*9880d681SAndroid Build Coastguard Worker}
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker; Make sure that we merge the consecutive load/store sequence below and use a
342*9880d681SAndroid Build Coastguard Worker; word (16 bit) instead of a byte copy.
343*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
344*9880d681SAndroid Build Coastguard Worker; BWON: movzwl   (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
345*9880d681SAndroid Build Coastguard Worker; BWOFF: movw    (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
346*9880d681SAndroid Build Coastguard Worker; CHECK: movw    %[[REG]], (%{{.*}})
347*9880d681SAndroid Build Coastguard Workerdefine void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
348*9880d681SAndroid Build Coastguard Worker  br label %1
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Worker; <label>:1
351*9880d681SAndroid Build Coastguard Worker  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
352*9880d681SAndroid Build Coastguard Worker  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
353*9880d681SAndroid Build Coastguard Worker  %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
354*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds i64, i64* %.0, i64 1
355*9880d681SAndroid Build Coastguard Worker  %3 = load i64, i64* %.0, align 1
356*9880d681SAndroid Build Coastguard Worker  %4 = getelementptr inbounds i8, i8* %c, i64 %3
357*9880d681SAndroid Build Coastguard Worker  %5 = load i8, i8* %4, align 1
358*9880d681SAndroid Build Coastguard Worker  %6 = add i64 %3, 1
359*9880d681SAndroid Build Coastguard Worker  %7 = getelementptr inbounds i8, i8* %c, i64 %6
360*9880d681SAndroid Build Coastguard Worker  %8 = load i8, i8* %7, align 1
361*9880d681SAndroid Build Coastguard Worker  store i8 %5, i8* %.08, align 1
362*9880d681SAndroid Build Coastguard Worker  %9 = getelementptr inbounds i8, i8* %.08, i64 1
363*9880d681SAndroid Build Coastguard Worker  store i8 %8, i8* %9, align 1
364*9880d681SAndroid Build Coastguard Worker  %10 = getelementptr inbounds i8, i8* %.08, i64 2
365*9880d681SAndroid Build Coastguard Worker  %11 = add nsw i32 %.09, -1
366*9880d681SAndroid Build Coastguard Worker  %12 = icmp eq i32 %11, 0
367*9880d681SAndroid Build Coastguard Worker  br i1 %12, label %13, label %1
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Worker; <label>:13
370*9880d681SAndroid Build Coastguard Worker  ret void
371*9880d681SAndroid Build Coastguard Worker}
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Worker; Make sure that we merge the consecutive load/store sequence below and use a
374*9880d681SAndroid Build Coastguard Worker; word (16 bit) instead of a byte copy even if there are intermediate sign
375*9880d681SAndroid Build Coastguard Worker; extensions.
376*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
377*9880d681SAndroid Build Coastguard Worker; BWON: movzwl   (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
378*9880d681SAndroid Build Coastguard Worker; BWOFF: movw    (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
379*9880d681SAndroid Build Coastguard Worker; CHECK: movw    %[[REG]], (%{{.*}})
380*9880d681SAndroid Build Coastguard Workerdefine void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
381*9880d681SAndroid Build Coastguard Worker  br label %1
382*9880d681SAndroid Build Coastguard Worker
383*9880d681SAndroid Build Coastguard Worker; <label>:1
384*9880d681SAndroid Build Coastguard Worker  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
385*9880d681SAndroid Build Coastguard Worker  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
386*9880d681SAndroid Build Coastguard Worker  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
387*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds i8, i8* %.0, i64 1
388*9880d681SAndroid Build Coastguard Worker  %3 = load i8, i8* %.0, align 1
389*9880d681SAndroid Build Coastguard Worker  %4 = sext i8 %3 to i64
390*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds i8, i8* %c, i64 %4
391*9880d681SAndroid Build Coastguard Worker  %6 = load i8, i8* %5, align 1
392*9880d681SAndroid Build Coastguard Worker  %7 = add i64 %4, 1
393*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds i8, i8* %c, i64 %7
394*9880d681SAndroid Build Coastguard Worker  %9 = load i8, i8* %8, align 1
395*9880d681SAndroid Build Coastguard Worker  store i8 %6, i8* %.08, align 1
396*9880d681SAndroid Build Coastguard Worker  %10 = getelementptr inbounds i8, i8* %.08, i64 1
397*9880d681SAndroid Build Coastguard Worker  store i8 %9, i8* %10, align 1
398*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds i8, i8* %.08, i64 2
399*9880d681SAndroid Build Coastguard Worker  %12 = add nsw i32 %.09, -1
400*9880d681SAndroid Build Coastguard Worker  %13 = icmp eq i32 %12, 0
401*9880d681SAndroid Build Coastguard Worker  br i1 %13, label %14, label %1
402*9880d681SAndroid Build Coastguard Worker
403*9880d681SAndroid Build Coastguard Worker; <label>:14
404*9880d681SAndroid Build Coastguard Worker  ret void
405*9880d681SAndroid Build Coastguard Worker}
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Worker; However, we can only merge ignore sign extensions when they are on all memory
408*9880d681SAndroid Build Coastguard Worker; computations;
409*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex:
410*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
411*9880d681SAndroid Build Coastguard Worker; CHECK-NOT: movw    [[REG]], (%{{.*}})
412*9880d681SAndroid Build Coastguard Workerdefine void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
413*9880d681SAndroid Build Coastguard Worker  br label %1
414*9880d681SAndroid Build Coastguard Worker
415*9880d681SAndroid Build Coastguard Worker; <label>:1
416*9880d681SAndroid Build Coastguard Worker  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
417*9880d681SAndroid Build Coastguard Worker  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
418*9880d681SAndroid Build Coastguard Worker  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
419*9880d681SAndroid Build Coastguard Worker  %2 = getelementptr inbounds i8, i8* %.0, i64 1
420*9880d681SAndroid Build Coastguard Worker  %3 = load i8, i8* %.0, align 1
421*9880d681SAndroid Build Coastguard Worker  %4 = sext i8 %3 to i64
422*9880d681SAndroid Build Coastguard Worker  %5 = getelementptr inbounds i8, i8* %c, i64 %4
423*9880d681SAndroid Build Coastguard Worker  %6 = load i8, i8* %5, align 1
424*9880d681SAndroid Build Coastguard Worker  %7 = add i8 %3, 1
425*9880d681SAndroid Build Coastguard Worker  %wrap.4 = sext i8 %7 to i64
426*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4
427*9880d681SAndroid Build Coastguard Worker  %9 = load i8, i8* %8, align 1
428*9880d681SAndroid Build Coastguard Worker  store i8 %6, i8* %.08, align 1
429*9880d681SAndroid Build Coastguard Worker  %10 = getelementptr inbounds i8, i8* %.08, i64 1
430*9880d681SAndroid Build Coastguard Worker  store i8 %9, i8* %10, align 1
431*9880d681SAndroid Build Coastguard Worker  %11 = getelementptr inbounds i8, i8* %.08, i64 2
432*9880d681SAndroid Build Coastguard Worker  %12 = add nsw i32 %.09, -1
433*9880d681SAndroid Build Coastguard Worker  %13 = icmp eq i32 %12, 0
434*9880d681SAndroid Build Coastguard Worker  br i1 %13, label %14, label %1
435*9880d681SAndroid Build Coastguard Worker
436*9880d681SAndroid Build Coastguard Worker; <label>:14
437*9880d681SAndroid Build Coastguard Worker  ret void
438*9880d681SAndroid Build Coastguard Worker}
439*9880d681SAndroid Build Coastguard Worker
440*9880d681SAndroid Build Coastguard Worker; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
441*9880d681SAndroid Build Coastguard Workerdefine void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
442*9880d681SAndroid Build Coastguard Worker  %vecext0 = extractelement <8 x float> %v, i32 0
443*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %v, i32 1
444*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %v, i32 2
445*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %v, i32 3
446*9880d681SAndroid Build Coastguard Worker  %vecext4 = extractelement <8 x float> %v, i32 4
447*9880d681SAndroid Build Coastguard Worker  %vecext5 = extractelement <8 x float> %v, i32 5
448*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x float> %v, i32 6
449*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x float> %v, i32 7
450*9880d681SAndroid Build Coastguard Worker  %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
451*9880d681SAndroid Build Coastguard Worker  %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2
452*9880d681SAndroid Build Coastguard Worker  %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3
453*9880d681SAndroid Build Coastguard Worker  %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4
454*9880d681SAndroid Build Coastguard Worker  %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5
455*9880d681SAndroid Build Coastguard Worker  %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6
456*9880d681SAndroid Build Coastguard Worker  %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7
457*9880d681SAndroid Build Coastguard Worker  store float %vecext0, float* %ptr, align 4
458*9880d681SAndroid Build Coastguard Worker  store float %vecext1, float* %arrayidx1, align 4
459*9880d681SAndroid Build Coastguard Worker  store float %vecext2, float* %arrayidx2, align 4
460*9880d681SAndroid Build Coastguard Worker  store float %vecext3, float* %arrayidx3, align 4
461*9880d681SAndroid Build Coastguard Worker  store float %vecext4, float* %arrayidx4, align 4
462*9880d681SAndroid Build Coastguard Worker  store float %vecext5, float* %arrayidx5, align 4
463*9880d681SAndroid Build Coastguard Worker  store float %vecext6, float* %arrayidx6, align 4
464*9880d681SAndroid Build Coastguard Worker  store float %vecext7, float* %arrayidx7, align 4
465*9880d681SAndroid Build Coastguard Worker  ret void
466*9880d681SAndroid Build Coastguard Worker
467*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_vec_element_store
468*9880d681SAndroid Build Coastguard Worker; CHECK: vmovups
469*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vzeroupper
470*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq
471*9880d681SAndroid Build Coastguard Worker}
472*9880d681SAndroid Build Coastguard Worker
473*9880d681SAndroid Build Coastguard Worker; PR21711 - Merge vector stores into wider vector stores.
474*9880d681SAndroid Build Coastguard Worker; These should be merged into 32-byte stores.
475*9880d681SAndroid Build Coastguard Workerdefine void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
476*9880d681SAndroid Build Coastguard Worker  %idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
477*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
478*9880d681SAndroid Build Coastguard Worker  %idx2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
479*9880d681SAndroid Build Coastguard Worker  %idx3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 6
480*9880d681SAndroid Build Coastguard Worker  %shuffle0 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
481*9880d681SAndroid Build Coastguard Worker  %shuffle1 = shufflevector <8 x float> %v1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
482*9880d681SAndroid Build Coastguard Worker  %shuffle2 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
483*9880d681SAndroid Build Coastguard Worker  %shuffle3 = shufflevector <8 x float> %v2, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
484*9880d681SAndroid Build Coastguard Worker  store <4 x float> %shuffle0, <4 x float>* %idx0, align 16
485*9880d681SAndroid Build Coastguard Worker  store <4 x float> %shuffle1, <4 x float>* %idx1, align 16
486*9880d681SAndroid Build Coastguard Worker  store <4 x float> %shuffle2, <4 x float>* %idx2, align 16
487*9880d681SAndroid Build Coastguard Worker  store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
488*9880d681SAndroid Build Coastguard Worker  ret void
489*9880d681SAndroid Build Coastguard Worker
490*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_vec_extract_stores
491*9880d681SAndroid Build Coastguard Worker; CHECK:      vmovups %ymm0, 48(%rdi)
492*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
493*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vzeroupper
494*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq
495*9880d681SAndroid Build Coastguard Worker}
496*9880d681SAndroid Build Coastguard Worker
497*9880d681SAndroid Build Coastguard Worker; Merging vector stores when sourced from vector loads is not currently handled.
498*9880d681SAndroid Build Coastguard Workerdefine void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
499*9880d681SAndroid Build Coastguard Worker  %load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0
500*9880d681SAndroid Build Coastguard Worker  %load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1
501*9880d681SAndroid Build Coastguard Worker  %v0 = load <4 x float>, <4 x float>* %load_idx0
502*9880d681SAndroid Build Coastguard Worker  %v1 = load <4 x float>, <4 x float>* %load_idx1
503*9880d681SAndroid Build Coastguard Worker  %store_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 0
504*9880d681SAndroid Build Coastguard Worker  %store_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 1
505*9880d681SAndroid Build Coastguard Worker  store <4 x float> %v0, <4 x float>* %store_idx0, align 16
506*9880d681SAndroid Build Coastguard Worker  store <4 x float> %v1, <4 x float>* %store_idx1, align 16
507*9880d681SAndroid Build Coastguard Worker  ret void
508*9880d681SAndroid Build Coastguard Worker
509*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_vec_stores_from_loads
510*9880d681SAndroid Build Coastguard Worker; CHECK:      vmovaps
511*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovaps
512*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovaps
513*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovaps
514*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq
515*9880d681SAndroid Build Coastguard Worker}
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Worker; Merging vector stores when sourced from a constant vector is not currently handled.
518*9880d681SAndroid Build Coastguard Workerdefine void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
519*9880d681SAndroid Build Coastguard Worker  %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
520*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
521*9880d681SAndroid Build Coastguard Worker  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
522*9880d681SAndroid Build Coastguard Worker  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
523*9880d681SAndroid Build Coastguard Worker  ret void
524*9880d681SAndroid Build Coastguard Worker
525*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_vec_stores_of_constants
526*9880d681SAndroid Build Coastguard Worker; CHECK:      vxorps
527*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovaps
528*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vmovaps
529*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq
530*9880d681SAndroid Build Coastguard Worker}
531*9880d681SAndroid Build Coastguard Worker
532*9880d681SAndroid Build Coastguard Worker; This is a minimized test based on real code that was failing.
533*9880d681SAndroid Build Coastguard Worker; We could merge stores (and loads) like this...
534*9880d681SAndroid Build Coastguard Worker
535*9880d681SAndroid Build Coastguard Workerdefine void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
536*9880d681SAndroid Build Coastguard Worker  %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
537*9880d681SAndroid Build Coastguard Worker  %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
538*9880d681SAndroid Build Coastguard Worker  %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
539*9880d681SAndroid Build Coastguard Worker  %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5
540*9880d681SAndroid Build Coastguard Worker
541*9880d681SAndroid Build Coastguard Worker  %a0 = load i64, i64* %idx0, align 8
542*9880d681SAndroid Build Coastguard Worker  store i64 %a0, i64* %idx4, align 8
543*9880d681SAndroid Build Coastguard Worker
544*9880d681SAndroid Build Coastguard Worker  %b = bitcast i64* %idx1 to <2 x i64>*
545*9880d681SAndroid Build Coastguard Worker  %v = load <2 x i64>, <2 x i64>* %b, align 8
546*9880d681SAndroid Build Coastguard Worker  %a1 = extractelement <2 x i64> %v, i32 0
547*9880d681SAndroid Build Coastguard Worker  store i64 %a1, i64* %idx5, align 8
548*9880d681SAndroid Build Coastguard Worker  ret void
549*9880d681SAndroid Build Coastguard Worker
550*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: merge_vec_element_and_scalar_load
551*9880d681SAndroid Build Coastguard Worker; CHECK:      movq	(%rdi), %rax
552*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq	%rax, 32(%rdi)
553*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq	8(%rdi), %rax
554*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: movq	%rax, 40(%rdi)
555*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq
556*9880d681SAndroid Build Coastguard Worker}
557