xref: /aosp_15_r20/external/llvm/test/CodeGen/AArch64/bitfield-insert.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
2
3; First, a simple example from Clang. The registers could plausibly be
4; different, but probably won't be.
5
6%struct.foo = type { i8, [2 x i8], i8 }
7
8define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
9; CHECK-LABEL: from_clang:
10; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
11
12entry:
13  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
14  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
15  %bf.value = shl i32 %n, 3
16  %0 = and i32 %bf.value, 120
17  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
18  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
19  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
20  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
21  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
22  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
23  ret [1 x i64] %.fca.0.insert
24}
25
26define void @test_whole32(i32* %existing, i32* %new) {
27; CHECK-LABEL: test_whole32:
28
29; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
30
31  %oldval = load volatile i32, i32* %existing
32  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
33
34  %newval = load volatile i32, i32* %new
35  %newval_shifted = shl i32 %newval, 26
36  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
37
38  %combined = or i32 %oldval_keep, %newval_masked
39  store volatile i32 %combined, i32* %existing
40
41  ret void
42}
43
44define void @test_whole64(i64* %existing, i64* %new) {
45; CHECK-LABEL: test_whole64:
46; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
47; CHECK-NOT: and
48; CHECK: ret
49
50  %oldval = load volatile i64, i64* %existing
51  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
52
53  %newval = load volatile i64, i64* %new
54  %newval_shifted = shl i64 %newval, 26
55  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
56
57  %combined = or i64 %oldval_keep, %newval_masked
58  store volatile i64 %combined, i64* %existing
59
60  ret void
61}
62
63define void @test_whole32_from64(i64* %existing, i64* %new) {
64; CHECK-LABEL: test_whole32_from64:
65
66
67; CHECK: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16
68
69; CHECK: ret
70
71  %oldval = load volatile i64, i64* %existing
72  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
73
74  %newval = load volatile i64, i64* %new
75  %newval_masked = and i64 %newval, 65535 ; = 0xffff
76
77  %combined = or i64 %oldval_keep, %newval_masked
78  store volatile i64 %combined, i64* %existing
79
80  ret void
81}
82
83define void @test_32bit_masked(i32 *%existing, i32 *%new) {
84; CHECK-LABEL: test_32bit_masked:
85
86; CHECK: and
87; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
88
89  %oldval = load volatile i32, i32* %existing
90  %oldval_keep = and i32 %oldval, 135 ; = 0x87
91
92  %newval = load volatile i32, i32* %new
93  %newval_shifted = shl i32 %newval, 3
94  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
95
96  %combined = or i32 %oldval_keep, %newval_masked
97  store volatile i32 %combined, i32* %existing
98
99  ret void
100}
101
102define void @test_64bit_masked(i64 *%existing, i64 *%new) {
103; CHECK-LABEL: test_64bit_masked:
104; CHECK: and
105; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
106
107  %oldval = load volatile i64, i64* %existing
108  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
109
110  %newval = load volatile i64, i64* %new
111  %newval_shifted = shl i64 %newval, 40
112  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
113
114  %combined = or i64 %newval_masked, %oldval_keep
115  store volatile i64 %combined, i64* %existing
116
117  ret void
118}
119
120; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
121define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
122; CHECK-LABEL: test_32bit_complexmask:
123
124; CHECK: and
125; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
126
127  %oldval = load volatile i32, i32* %existing
128  %oldval_keep = and i32 %oldval, 647 ; = 0x287
129
130  %newval = load volatile i32, i32* %new
131  %newval_shifted = shl i32 %newval, 3
132  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
133
134  %combined = or i32 %oldval_keep, %newval_masked
135  store volatile i32 %combined, i32* %existing
136
137  ret void
138}
139
140; Neither mask is is a contiguous set of 1s. BFI can't be used
141define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
142; CHECK-LABEL: test_32bit_badmask:
143; CHECK-NOT: bfi
144; CHECK-NOT: bfm
145; CHECK: ret
146
147  %oldval = load volatile i32, i32* %existing
148  %oldval_keep = and i32 %oldval, 135 ; = 0x87
149
150  %newval = load volatile i32, i32* %new
151  %newval_shifted = shl i32 %newval, 3
152  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
153
154  %combined = or i32 %oldval_keep, %newval_masked
155  store volatile i32 %combined, i32* %existing
156
157  ret void
158}
159
160; Ditto
161define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
162; CHECK-LABEL: test_64bit_badmask:
163; CHECK-NOT: bfi
164; CHECK-NOT: bfm
165; CHECK: ret
166
167  %oldval = load volatile i64, i64* %existing
168  %oldval_keep = and i64 %oldval, 135 ; = 0x87
169
170  %newval = load volatile i64, i64* %new
171  %newval_shifted = shl i64 %newval, 3
172  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
173
174  %combined = or i64 %oldval_keep, %newval_masked
175  store volatile i64 %combined, i64* %existing
176
177  ret void
178}
179
180; Bitfield insert where there's a left-over shr needed at the beginning
181; (e.g. result of str.bf1 = str.bf2)
182define void @test_32bit_with_shr(i32* %existing, i32* %new) {
183; CHECK-LABEL: test_32bit_with_shr:
184
185  %oldval = load volatile i32, i32* %existing
186  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
187
188  %newval = load i32, i32* %new
189  %newval_shifted = shl i32 %newval, 12
190  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
191
192  %combined = or i32 %oldval_keep, %newval_masked
193  store volatile i32 %combined, i32* %existing
194; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
195; CHECK: bfi {{w[0-9]+}}, [[BIT]], #26, #5
196
197  ret void
198}
199
200; Bitfield insert where the second or operand is a better match to be folded into the BFM
201define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
202; CHECK-LABEL: test_32bit_opnd1_better:
203
204  %oldval = load volatile i32, i32* %existing
205  %oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
206
207  %newval = load i32, i32* %new
208  %newval_shifted = shl i32 %newval, 16
209  %newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
210
211  %combined = or i32 %oldval_keep, %newval_masked
212  store volatile i32 %combined, i32* %existing
213; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff
214; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8
215
216  ret void
217}
218
219; Tests when all the bits from one operand are not useful
220define i32 @test_nouseful_bits(i8 %a, i32 %b) {
221; CHECK-LABEL: test_nouseful_bits:
222; CHECK: bfi
223; CHECK: bfi
224; CHECK: bfi
225; CHECK-NOT: bfi
226; CHECK-NOT: or
227; CHECK: lsl
228  %conv = zext i8 %a to i32     ;   0  0  0  A
229  %shl = shl i32 %b, 8          ;   B2 B1 B0 0
230  %or = or i32 %conv, %shl      ;   B2 B1 B0 A
231  %shl.1 = shl i32 %or, 8       ;   B1 B0 A 0
232  %or.1 = or i32 %conv, %shl.1  ;   B1 B0 A A
233  %shl.2 = shl i32 %or.1, 8     ;   B0 A A 0
234  %or.2 = or i32 %conv, %shl.2  ;   B0 A A A
235  %shl.3 = shl i32 %or.2, 8     ;   A A A 0
236  %or.3 = or i32 %conv, %shl.3  ;   A A A A
237  %shl.4 = shl i32 %or.3, 8     ;   A A A 0
238  ret i32 %shl.4
239}
240
241define void @test_nouseful_strb(i32* %ptr32, i8* %ptr8, i32 %x)  {
242entry:
243; CHECK-LABEL: @test_nouseful_strb
244; CHECK: ldr [[REG1:w[0-9]+]],
245; CHECK-NOT:  and {{w[0-9]+}}, {{w[0-9]+}}, #0xf8
246; CHECK-NEXT: bfxil [[REG1]], w2, #16, #3
247; CHECK-NEXT: strb [[REG1]],
248; CHECK-NEXT: ret
249  %0 = load i32, i32* %ptr32, align 8
250  %and = and i32 %0, -8
251  %shr = lshr i32 %x, 16
252  %and1 = and i32 %shr, 7
253  %or = or i32 %and, %and1
254  %trunc = trunc i32 %or to i8
255  store i8 %trunc, i8* %ptr8
256  ret void
257}
258
259define void @test_nouseful_strh(i32* %ptr32, i16* %ptr16, i32 %x)  {
260entry:
261; CHECK-LABEL: @test_nouseful_strh
262; CHECK: ldr [[REG1:w[0-9]+]],
263; CHECK-NOT:  and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0
264; CHECK-NEXT: bfxil [[REG1]], w2, #16, #4
265; CHECK-NEXT: strh [[REG1]],
266; CHECK-NEXT: ret
267  %0 = load i32, i32* %ptr32, align 8
268  %and = and i32 %0, -16
269  %shr = lshr i32 %x, 16
270  %and1 = and i32 %shr, 15
271  %or = or i32 %and, %and1
272  %trunc = trunc i32 %or to i16
273  store i16 %trunc, i16* %ptr16
274  ret void
275}
276
277define void @test_nouseful_sturb(i32* %ptr32, i8* %ptr8, i32 %x)  {
278entry:
279; CHECK-LABEL: @test_nouseful_sturb
280; CHECK: ldr [[REG1:w[0-9]+]],
281; CHECK-NOT:  and {{w[0-9]+}}, {{w[0-9]+}}, #0xf8
282; CHECK-NEXT: bfxil [[REG1]], w2, #16, #3
283; CHECK-NEXT: sturb [[REG1]],
284; CHECK-NEXT: ret
285  %0 = load i32, i32* %ptr32, align 8
286  %and = and i32 %0, -8
287  %shr = lshr i32 %x, 16
288  %and1 = and i32 %shr, 7
289  %or = or i32 %and, %and1
290  %trunc = trunc i32 %or to i8
291  %gep = getelementptr i8, i8* %ptr8, i64 -1
292  store i8 %trunc, i8* %gep
293  ret void
294}
295
296define void @test_nouseful_sturh(i32* %ptr32, i16* %ptr16, i32 %x)  {
297entry:
298; CHECK-LABEL: @test_nouseful_sturh
299; CHECK: ldr [[REG1:w[0-9]+]],
300; CHECK-NOT:  and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0
301; CHECK-NEXT: bfxil [[REG1]], w2, #16, #4
302; CHECK-NEXT: sturh [[REG1]],
303; CHECK-NEXT: ret
304  %0 = load i32, i32* %ptr32, align 8
305  %and = and i32 %0, -16
306  %shr = lshr i32 %x, 16
307  %and1 = and i32 %shr, 15
308  %or = or i32 %and, %and1
309  %trunc = trunc i32 %or to i16
310  %gep = getelementptr i16, i16* %ptr16, i64 -1
311  store i16 %trunc, i16* %gep
312  ret void
313}
314
315; The next set of tests generate a BFXIL from 'or (and X, Mask0Imm),
316; (and Y, Mask1Imm)' iff Mask0Imm and ~Mask1Imm are equivalent and one of the
317; MaskImms is a shifted mask (e.g., 0x000ffff0).
318
319; CHECK-LABEL: @test_or_and_and1
320; CHECK: lsr w8, w1, #4
321; CHECK: bfi w0, w8, #4, #12
322define i32 @test_or_and_and1(i32 %a, i32 %b) {
323entry:
324  %and = and i32 %a, -65521 ; 0xffff000f
325  %and1 = and i32 %b, 65520 ; 0x0000fff0
326  %or = or i32 %and1, %and
327  ret i32 %or
328}
329
330; CHECK-LABEL: @test_or_and_and2
331; CHECK: lsr w8, w0, #4
332; CHECK: bfi w1, w8, #4, #12
333define i32 @test_or_and_and2(i32 %a, i32 %b) {
334entry:
335  %and = and i32 %a, 65520   ; 0x0000fff0
336  %and1 = and i32 %b, -65521 ; 0xffff000f
337  %or = or i32 %and1, %and
338  ret i32 %or
339}
340
341; CHECK-LABEL: @test_or_and_and3
342; CHECK: lsr x8, x1, #16
343; CHECK: bfi x0, x8, #16, #32
344define i64 @test_or_and_and3(i64 %a, i64 %b) {
345entry:
346  %and = and i64 %a, -281474976645121 ; 0xffff00000000ffff
347  %and1 = and i64 %b, 281474976645120 ; 0x0000ffffffff0000
348  %or = or i64 %and1, %and
349  ret i64 %or
350}
351
352; Don't convert 'and' with multiple uses.
353; CHECK-LABEL: @test_or_and_and4
354; CHECK: and w8, w0, #0xffff000f
355; CHECK: and w9, w1, #0xfff0
356; CHECK: orr w0, w9, w8
357; CHECK: str w8, [x2
358define i32 @test_or_and_and4(i32 %a, i32 %b, i32* %ptr) {
359entry:
360  %and = and i32 %a, -65521
361  store i32 %and, i32* %ptr, align 4
362  %and2 = and i32 %b, 65520
363  %or = or i32 %and2, %and
364  ret i32 %or
365}
366
367; Don't convert 'and' with multiple uses.
368; CHECK-LABEL: @test_or_and_and5
369; CHECK: and w8, w1, #0xfff0
370; CHECK: and w9, w0, #0xffff000f
371; CHECK: orr w0, w8, w9
372; CHECK: str w8, [x2]
373define i32 @test_or_and_and5(i32 %a, i32 %b, i32* %ptr) {
374entry:
375  %and = and i32 %b, 65520
376  store i32 %and, i32* %ptr, align 4
377  %and1 = and i32 %a, -65521
378  %or = or i32 %and, %and1
379  ret i32 %or
380}
381
382; CHECK-LABEL: @test1
383; CHECK: mov [[REG:w[0-9]+]], #5
384; CHECK: bfxil w0, [[REG]], #0, #4
385define i32 @test1(i32 %a) {
386  %1 = and i32 %a, -16 ; 0xfffffff0
387  %2 = or i32 %1, 5    ; 0x00000005
388  ret i32 %2
389}
390
391; CHECK-LABEL: @test2
392; CHECK: mov [[REG:w[0-9]+]], #10
393; CHECK: bfi w0, [[REG]], #22, #4
394define i32 @test2(i32 %a) {
395  %1 = and i32 %a, -62914561 ; 0xfc3fffff
396  %2 = or i32 %1, 41943040   ; 0x06400000
397  ret i32 %2
398}
399
400; CHECK-LABEL: @test3
401; CHECK: mov [[REG:x[0-9]+]], #5
402; CHECK: bfxil x0, [[REG]], #0, #3
403define i64 @test3(i64 %a) {
404  %1 = and i64 %a, -8 ; 0xfffffffffffffff8
405  %2 = or i64 %1, 5   ; 0x0000000000000005
406  ret i64 %2
407}
408
409; CHECK-LABEL: @test4
410; CHECK: mov [[REG:x[0-9]+]], #9
411; CHECK: bfi x0, [[REG]], #1, #7
412define i64 @test4(i64 %a) {
413  %1 = and i64 %a, -255 ; 0xffffffffffffff01
414  %2 = or i64 %1,  18   ; 0x0000000000000012
415  ret i64 %2
416}
417
418; Don't generate BFI/BFXIL if the immediate can be encoded in the ORR.
419; CHECK-LABEL: @test5
420; CHECK: and [[REG:w[0-9]+]], w0, #0xfffffff0
421; CHECK: orr w0, [[REG]], #0x6
422define i32 @test5(i32 %a) {
423  %1 = and i32 %a, 4294967280 ; 0xfffffff0
424  %2 = or i32 %1, 6           ; 0x00000006
425  ret i32 %2
426}
427
428; BFXIL will use the same constant as the ORR, so we don't care how the constant
429; is materialized (it's an equal cost either way).
430; CHECK-LABEL: @test6
431; CHECK: mov [[REG:w[0-9]+]], #720896
432; CHECK: movk [[REG]], #23250
433; CHECK: bfxil w0, [[REG]], #0, #20
434define i32 @test6(i32 %a) {
435  %1 = and i32 %a, 4293918720 ; 0xfff00000
436  %2 = or i32 %1, 744146      ; 0x000b5ad2
437  ret i32 %2
438}
439
440; BFIs that require the same number of instruction to materialize the constant
441; as the original ORR are okay.
442; CHECK-LABEL: @test7
443; CHECK: mov [[REG:w[0-9]+]], #327680
444; CHECK: movk [[REG]], #44393
445; CHECK: bfi w0, [[REG]], #1, #19
446define i32 @test7(i32 %a) {
447  %1 = and i32 %a, 4293918721 ; 0xfff00001
448  %2 = or i32 %1, 744146      ; 0x000b5ad2
449  ret i32 %2
450}
451
452; BFIs that require more instructions to materialize the constant as compared
453; to the original ORR are not okay.  In this case we would be replacing the
454; 'and' with a 'movk', which would decrease ILP while using the same number of
455; instructions.
456; CHECK-LABEL: @test8
457; CHECK: mov [[REG2:x[0-9]+]], #157599529959424
458; CHECK: and [[REG1:x[0-9]+]], x0, #0xff000000000000ff
459; CHECK: movk [[REG2]], #31059, lsl #16
460; CHECK: orr x0, [[REG1]], [[REG2]]
461define i64 @test8(i64 %a) {
462  %1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
463  %2 = or i64 %1, 157601565442048     ; 0x00008f5679530000
464  ret i64 %2
465}
466
467; This test exposed an issue with an overly aggressive assert.  The bit of code
468; that is expected to catch this case is unable to deal with the trunc, which
469; results in a failing check due to a mismatch between the BFI opcode and
470; the expected value type of the OR.
471; CHECK-LABEL: @test9
472; CHECK: lsr x0, x0, #12
473; CHECK: lsr [[REG:w[0-9]+]], w1, #23
474; CHECK: bfi w0, [[REG]], #23, #9
475define i32 @test9(i64 %b, i32 %e) {
476  %c = lshr i64 %b, 12
477  %d = trunc i64 %c to i32
478  %f = and i32 %d, 8388607
479  %g = and i32 %e, -8388608
480  %h = or i32 %g, %f
481  ret i32 %h
482}
483